Merge pull request #339 from lnis-uofu/micro_benchmark

Micro benchmark
This commit is contained in:
tangxifan 2021-06-22 17:42:03 -06:00 committed by GitHub
commit 931dc21687
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 351 additions and 32 deletions

View File

@ -210,6 +210,7 @@ jobs:
- name: fpga_bitstream_reg_test
- name: fpga_sdc_reg_test
- name: fpga_spice_reg_test
- name: micro_benchmark_reg_test
- name: quicklogic_reg_test
- name: vtr_benchmark_reg_test
- name: iwls_benchmark_reg_test
@ -257,6 +258,7 @@ jobs:
- name: fpga_bitstream_reg_test
- name: fpga_sdc_reg_test
- name: fpga_spice_reg_test
- name: micro_benchmark_reg_test
- name: quicklogic_reg_test
- name: vtr_benchmark_reg_test
- name: iwls_benchmark_reg_test

View File

@ -0,0 +1,22 @@
/////////////////////////////////////////
// Functionality: 4-input AND
// Author: Xifan Tang
////////////////////////////////////////
`timescale 1ns / 1ps
module and4(
a,
b,
c,
d,
e);
input wire a;
input wire b;
input wire c;
input wire d;
output wire e;
assign e = a & b & c & d;
endmodule

View File

@ -0,0 +1,25 @@
///////////////////////////////////////////
// Functionality: Counter with asynchronous reset
// Author: Xifan Tang
////////////////////////////////////////
module counter (
clk,
reset,
result
);
input clk;
input reset;
output [127:0] result;
reg [127:0] result;
always @(posedge clk or posedge reset)
begin
if (reset)
result = 0;
else
result = result + 1;
end
endmodule

View File

@ -0,0 +1,25 @@
module counter_tb;
reg clk, reset;
wire [127:0] result;
counter DUT(
.clk(clk),
.reset(reset),
.result(result)
);
initial begin
#0 reset = 1'b1; clk = 1'b0;
#100 reset = 1'b0;
end
always begin
#10 clk = ~clk;
end
initial begin
#5000 $stop;
end
endmodule

View File

@ -0,0 +1,25 @@
///////////////////////////////////////////
// Functionality: Counter with asynchronous reset
// Author: Xifan Tang
////////////////////////////////////////
module counter (
clk,
resetb,
result
);
input clk;
input resetb;
output [127:0] result;
reg [127:0] result;
always @(posedge clk or negedge resetb)
begin
if (~resetb)
result = 0;
else
result = result + 1;
end
endmodule

View File

@ -0,0 +1,25 @@
module counter_tb;
reg clk, resetb;
wire [127:0] result;
counter DUT(
.clk(clk),
.resetb(resetb),
.result(result)
);
initial begin
#0 reset = 1'b0; clk = 1'b0;
#100 reset = 1'b1;
end
always begin
#10 clk = ~clk;
end
initial begin
#5000 $stop;
end
endmodule

View File

@ -0,0 +1,22 @@
//-------------------------------------------------------
// Functionality: A 18-bit multiply-acculumate circuit
// Author: Xifan Tang
//-------------------------------------------------------
module mac_18(a, b, c, out);
parameter DATA_WIDTH = 18; /* declare a parameter. default required */
input [DATA_WIDTH - 1 : 0] a, b, c;
output [DATA_WIDTH - 1 : 0] out;
assign out = a * b + c;
endmodule

View File

@ -0,0 +1,22 @@
//-------------------------------------------------------
// Functionality: A 20-bit multiply-acculumate circuit
// Author: Xifan Tang
//-------------------------------------------------------
module mac_20(a, b, c, out);
parameter DATA_WIDTH = 20; /* declare a parameter. default required */
input [DATA_WIDTH - 1 : 0] a, b, c;
output [DATA_WIDTH - 1 : 0] out;
assign out = a * b + c;
endmodule

View File

@ -0,0 +1,22 @@
//-------------------------------------------------------
// Functionality: A 36-bit multiply-acculumate circuit
// Author: Xifan Tang
//-------------------------------------------------------
module mac_36(a, b, c, out);
parameter DATA_WIDTH = 4; /* declare a parameter. default required */
input [DATA_WIDTH - 1 : 0] a, b, c;
output [DATA_WIDTH - 1 : 0] out;
assign out = a * b + c;
endmodule

View File

@ -0,0 +1,25 @@
//-------------------------------------------------------
// Functionality:
// - A 8-bit multiply-acculumate circuit
// - A 9-bit multiply-acculumate circuit
// Author: Xifan Tang
//-------------------------------------------------------
module mac_8_9(a, b, c, out);
parameter DATA_WIDTH = 18; /* declare a parameter. default required */
input [DATA_WIDTH - 1 : 0] a, b, c;
output [DATA_WIDTH - 1 : 0] out;
assign out[8:0] = a[8:0] * b[8:0] + c[8:0];
assign out[17:9] = a[17:9] * b[17:9] + c[17:9];
endmodule

View File

@ -0,0 +1,22 @@
//-------------------------------------------------------
// Functionality: A 9-bit multiply-acculumate circuit
// Author: Xifan Tang
//-------------------------------------------------------
module mac_9(a, b, c, out);
parameter DATA_WIDTH = 9; /* declare a parameter. default required */
input [DATA_WIDTH - 1 : 0] a, b, c;
output [DATA_WIDTH - 1 : 0] out;
assign out = a * b + c;
endmodule

View File

@ -37,7 +37,7 @@ repack #--verbose
# Build the bitstream
# - Output the fabric-independent bitstream to a file
build_architecture_bitstream --verbose --write_file fabric_indepenent_bitstream.xml
build_architecture_bitstream --verbose
# Build fabric-dependent bitstream
build_fabric_bitstream --verbose
@ -46,9 +46,7 @@ build_fabric_bitstream --verbose
# - Enable the use of explicit port mapping in Verilog netlist
write_fabric_verilog --file ./SRC \
--explicit_port_mapping \
--include_timing \
--include_signal_init
# --support_icarus_simulator
--include_timing
# Write the Verilog testbench for FPGA fabric
# - We suggest the use of same output directory as fabric Verilog netlists
@ -59,16 +57,6 @@ write_fabric_verilog --file ./SRC \
write_preconfigured_fabric_wrapper --file ./SRC --support_icarus_simulator
write_preconfigured_testbench --file ./SRC --reference_benchmark_file_path ${REFERENCE_VERILOG_TESTBENCH} --support_icarus_simulator
# Write the SDC files for PnR backend
# - Turn on every options here
write_pnr_sdc --file ./SDC
# Write SDC to disable timing for configure ports
write_sdc_disable_timing_configure_ports --file ./SDC/disable_configure_ports.sdc
# Write the SDC to run timing analysis for a mapped FPGA fabric
write_analysis_sdc --file ./SDC_analysis
# Finish and exit OpenFPGA
exit

View File

@ -136,9 +136,3 @@ run-task fpga_verilog/fully_connected_output_crossbar --debug --show_thread_logs
echo -e "Testing through channels in tileable routing";
run-task fpga_verilog/thru_channel/thru_narrow_tile --debug --show_thread_logs
run-task fpga_verilog/thru_channel/thru_wide_tile --debug --show_thread_logs
# Verify MCNC big20 benchmark suite with ModelSim
# Please make sure you have ModelSim installed in the environment
# Otherwise, it will fail
#run-task fpga_verilog/mcnc_big20 --debug --show_thread_logs --maxthreads 20
#python3 openfpga_flow/scripts/run_modelsim.py mcnc_big20 --run_sim

View File

@ -0,0 +1,17 @@
#!/bin/bash
set -e
source openfpga.sh
PYTHON_EXEC=python3.8
###############################################
# OpenFPGA Shell with VPR8
##############################################
echo -e "Micro benchmark regression tests";
run-task benchmark_sweep/counter --debug --show_thread_logs
run-task benchmark_sweep/mac_units --debug --show_thread_logs
# Verify MCNC big20 benchmark suite with ModelSim
# Please make sure you have ModelSim installed in the environment
# Otherwise, it will fail
run-task benchmark_sweep/mcnc_big20 --debug --show_thread_logs
#python3 openfpga_flow/scripts/run_modelsim.py mcnc_big20 --run_sim

View File

@ -9,29 +9,45 @@
[GENERAL]
run_engine=openfpga_shell
power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml
power_analysis = true
power_analysis = false
spice_output=false
verilog_output=true
timeout_each_job = 20*60
fpga_flow=vpr_blif
timeout_each_job = 5*60
fpga_flow=yosys_vpr
[OpenFPGA_SHELL]
openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/implicit_verilog_example_script.openfpga
openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k6_N10_40nm_openfpga.xml
openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml
openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/iwls_benchmark_example_script.openfpga
openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k6_frac_N10_adder_chain_dpram8K_dsp36_fracff_40nm_openfpga.xml
openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/fixed_sim_openfpga.xml
# Yosys script parameters
yosys_cell_sim_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_fracff_40nm_cell_sim.v
yosys_dff_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_fracff_40nm_dff_map.v
yosys_bram_map_rules=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_bram.txt
yosys_bram_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_bram_map.v
yosys_dsp_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_dsp_map.v
yosys_dsp_map_parameters=-D DSP_A_MAXWIDTH=36 -D DSP_B_MAXWIDTH=36 -D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 -D DSP_NAME=mult_36x36
# VPR parameters
# # Use a fixed routing channel width to save runtime
vpr_route_chan_width=50
[ARCHITECTURES]
arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k6_N10_40nm.xml
arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_fracff_40nm.xml
[BENCHMARKS]
bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counter/counter_post_yosys.blif
bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counter/counter.v
bench1=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counter_async_reset/counter.v
bench2=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counter_128bit_async_reset/counter.v
bench3=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counter_128bit_async_resetb/counter.v
[SYNTHESIS_PARAM]
bench_yosys_common=${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_dff_flow.ys
#bench_yosys_rewrite_common=${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_yosys_vpr_flow_with_rewrite.ys;${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_rewrite_flow.ys
bench0_top = counter
bench0_act = ${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counter/counter_pre_vpr.act
bench0_verilog = ${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counter/counter_output_verilog.v
bench0_chan_width = 300
bench1_top = counter
bench2_top = counter
bench3_top = counter
[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH]
end_flow_with_test=
#end_flow_with_test=
#vpr_fpga_verilog_formal_verification_top_netlist=

View File

@ -0,0 +1,67 @@
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# Configuration file for running experiments
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs
# Each job execute fpga_flow script on combination of architecture & benchmark
# timeout_each_job is timeout for each job
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
[GENERAL]
run_engine=openfpga_shell
power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml
power_analysis = false
spice_output=false
verilog_output=true
timeout_each_job = 20*60
fpga_flow=yosys_vpr
[OpenFPGA_SHELL]
openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/iwls_benchmark_example_script.openfpga
openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k6_frac_N10_adder_chain_dpram8K_dsp36_fracff_40nm_openfpga.xml
openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/fixed_sim_openfpga.xml
# Yosys script parameters
yosys_cell_sim_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_fracff_40nm_cell_sim.v
yosys_dff_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_fracff_40nm_dff_map.v
yosys_bram_map_rules=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_bram.txt
yosys_bram_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_bram_map.v
yosys_dsp_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_dsp_map.v
yosys_dsp_map_parameters=-D DSP_A_MAXWIDTH=36 -D DSP_B_MAXWIDTH=36 -D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 -D DSP_NAME=mult_36x36
# VPR parameters
# Use a fixed routing channel width to save runtime
vpr_route_chan_width=300
[ARCHITECTURES]
arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_fracff_40nm.xml
[BENCHMARKS]
bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_2/mac_2.v
bench1=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_4/mac_4.v
bench2=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_6/mac_6.v
bench3=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_8/mac_8.v
bench4=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_8_9/mac_8_9.v
bench5=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_9/mac_9.v
bench6=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_12/mac_12.v
bench7=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_16/mac_16.v
bench8=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_18/mac_18.v
bench9=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_20/mac_20.v
bench10=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_32/mac_32.v
bench11=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_36/mac_36.v
[SYNTHESIS_PARAM]
bench_yosys_common=${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_dff_flow.ys
bench0_top = mac_2
bench1_top = mac_4
bench2_top = mac_6
bench3_top = mac_8
bench4_top = mac_8_9
bench5_top = mac_9
bench6_top = mac_12
bench7_top = mac_16
bench8_top = mac_18
bench9_top = mac_20
bench10_top = mac_32
bench11_top = mac_36
[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH]
#end_flow_with_test=
#vpr_fpga_verilog_formal_verification_top_netlist=