diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 753b4c77f..77ccb1527 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -210,6 +210,7 @@ jobs: - name: fpga_bitstream_reg_test - name: fpga_sdc_reg_test - name: fpga_spice_reg_test + - name: micro_benchmark_reg_test - name: quicklogic_reg_test - name: vtr_benchmark_reg_test - name: iwls_benchmark_reg_test @@ -257,6 +258,7 @@ jobs: - name: fpga_bitstream_reg_test - name: fpga_sdc_reg_test - name: fpga_spice_reg_test + - name: micro_benchmark_reg_test - name: quicklogic_reg_test - name: vtr_benchmark_reg_test - name: iwls_benchmark_reg_test diff --git a/openfpga_flow/benchmarks/micro_benchmark/and4/and4.v b/openfpga_flow/benchmarks/micro_benchmark/and4/and4.v new file mode 100644 index 000000000..4b71607f6 --- /dev/null +++ b/openfpga_flow/benchmarks/micro_benchmark/and4/and4.v @@ -0,0 +1,22 @@ +///////////////////////////////////////// +// Functionality: 4-input AND +// Author: Xifan Tang +//////////////////////////////////////// +`timescale 1ns / 1ps + +module and4( + a, + b, + c, + d, + e); + +input wire a; +input wire b; +input wire c; +input wire d; +output wire e; + +assign e = a & b & c & d; + +endmodule diff --git a/openfpga_flow/benchmarks/micro_benchmark/counter_128bit_async_reset/counter.v b/openfpga_flow/benchmarks/micro_benchmark/counter_128bit_async_reset/counter.v new file mode 100644 index 000000000..4a3542eec --- /dev/null +++ b/openfpga_flow/benchmarks/micro_benchmark/counter_128bit_async_reset/counter.v @@ -0,0 +1,25 @@ +/////////////////////////////////////////// +// Functionality: Counter with asynchronous reset +// Author: Xifan Tang +//////////////////////////////////////// + +module counter ( + clk, + reset, + result +); + + input clk; + input reset; + output [127:0] result; + + reg [127:0] result; + + always @(posedge clk or posedge reset) + begin + if (reset) + result = 0; + else + result = result + 1; + end +endmodule diff --git a/openfpga_flow/benchmarks/micro_benchmark/counter_128bit_async_reset/counter_tb.v b/openfpga_flow/benchmarks/micro_benchmark/counter_128bit_async_reset/counter_tb.v new file mode 100644 index 000000000..4d11e9e0b --- /dev/null +++ b/openfpga_flow/benchmarks/micro_benchmark/counter_128bit_async_reset/counter_tb.v @@ -0,0 +1,25 @@ +module counter_tb; + + reg clk, reset; + wire [127:0] result; + + counter DUT( + .clk(clk), + .reset(reset), + .result(result) + ); + + initial begin + #0 reset = 1'b1; clk = 1'b0; + #100 reset = 1'b0; + end + + always begin + #10 clk = ~clk; + end + + initial begin + #5000 $stop; + end + +endmodule diff --git a/openfpga_flow/benchmarks/micro_benchmark/counter_128bit_async_resetb/counter.v b/openfpga_flow/benchmarks/micro_benchmark/counter_128bit_async_resetb/counter.v new file mode 100644 index 000000000..628ec4c08 --- /dev/null +++ b/openfpga_flow/benchmarks/micro_benchmark/counter_128bit_async_resetb/counter.v @@ -0,0 +1,25 @@ +/////////////////////////////////////////// +// Functionality: Counter with asynchronous reset +// Author: Xifan Tang +//////////////////////////////////////// + +module counter ( + clk, + resetb, + result +); + + input clk; + input resetb; + output [127:0] result; + + reg [127:0] result; + + always @(posedge clk or negedge resetb) + begin + if (~resetb) + result = 0; + else + result = result + 1; + end +endmodule diff --git a/openfpga_flow/benchmarks/micro_benchmark/counter_128bit_async_resetb/counter_tb.v b/openfpga_flow/benchmarks/micro_benchmark/counter_128bit_async_resetb/counter_tb.v new file mode 100644 index 000000000..c96557b4e --- /dev/null +++ b/openfpga_flow/benchmarks/micro_benchmark/counter_128bit_async_resetb/counter_tb.v @@ -0,0 +1,25 @@ +module counter_tb; + + reg clk, resetb; + wire [127:0] result; + + counter DUT( + .clk(clk), + .resetb(resetb), + .result(result) + ); + + initial begin + #0 reset = 1'b0; clk = 1'b0; + #100 reset = 1'b1; + end + + always begin + #10 clk = ~clk; + end + + initial begin + #5000 $stop; + end + +endmodule diff --git a/openfpga_flow/benchmarks/micro_benchmark/mac/mac_18/mac_18.v b/openfpga_flow/benchmarks/micro_benchmark/mac/mac_18/mac_18.v new file mode 100644 index 000000000..74a935e8d --- /dev/null +++ b/openfpga_flow/benchmarks/micro_benchmark/mac/mac_18/mac_18.v @@ -0,0 +1,22 @@ +//------------------------------------------------------- +// Functionality: A 18-bit multiply-acculumate circuit +// Author: Xifan Tang +//------------------------------------------------------- + +module mac_18(a, b, c, out); +parameter DATA_WIDTH = 18; /* declare a parameter. default required */ +input [DATA_WIDTH - 1 : 0] a, b, c; +output [DATA_WIDTH - 1 : 0] out; + +assign out = a * b + c; + +endmodule + + + + + + + + + diff --git a/openfpga_flow/benchmarks/micro_benchmark/mac/mac_20/mac_20.v b/openfpga_flow/benchmarks/micro_benchmark/mac/mac_20/mac_20.v new file mode 100644 index 000000000..f7be3eff9 --- /dev/null +++ b/openfpga_flow/benchmarks/micro_benchmark/mac/mac_20/mac_20.v @@ -0,0 +1,22 @@ +//------------------------------------------------------- +// Functionality: A 20-bit multiply-acculumate circuit +// Author: Xifan Tang +//------------------------------------------------------- + +module mac_20(a, b, c, out); +parameter DATA_WIDTH = 20; /* declare a parameter. default required */ +input [DATA_WIDTH - 1 : 0] a, b, c; +output [DATA_WIDTH - 1 : 0] out; + +assign out = a * b + c; + +endmodule + + + + + + + + + diff --git a/openfpga_flow/benchmarks/micro_benchmark/mac/mac_36/mac_36.v b/openfpga_flow/benchmarks/micro_benchmark/mac/mac_36/mac_36.v new file mode 100644 index 000000000..669bc4747 --- /dev/null +++ b/openfpga_flow/benchmarks/micro_benchmark/mac/mac_36/mac_36.v @@ -0,0 +1,22 @@ +//------------------------------------------------------- +// Functionality: A 36-bit multiply-acculumate circuit +// Author: Xifan Tang +//------------------------------------------------------- + +module mac_36(a, b, c, out); +parameter DATA_WIDTH = 4; /* declare a parameter. default required */ +input [DATA_WIDTH - 1 : 0] a, b, c; +output [DATA_WIDTH - 1 : 0] out; + +assign out = a * b + c; + +endmodule + + + + + + + + + diff --git a/openfpga_flow/benchmarks/micro_benchmark/mac/mac_8_9/mac_8_9.v b/openfpga_flow/benchmarks/micro_benchmark/mac/mac_8_9/mac_8_9.v new file mode 100644 index 000000000..05df02fac --- /dev/null +++ b/openfpga_flow/benchmarks/micro_benchmark/mac/mac_8_9/mac_8_9.v @@ -0,0 +1,25 @@ +//------------------------------------------------------- +// Functionality: +// - A 8-bit multiply-acculumate circuit +// - A 9-bit multiply-acculumate circuit +// Author: Xifan Tang +//------------------------------------------------------- + +module mac_8_9(a, b, c, out); +parameter DATA_WIDTH = 18; /* declare a parameter. default required */ +input [DATA_WIDTH - 1 : 0] a, b, c; +output [DATA_WIDTH - 1 : 0] out; + +assign out[8:0] = a[8:0] * b[8:0] + c[8:0]; +assign out[17:9] = a[17:9] * b[17:9] + c[17:9]; + +endmodule + + + + + + + + + diff --git a/openfpga_flow/benchmarks/micro_benchmark/mac/mac_9/mac_9.v b/openfpga_flow/benchmarks/micro_benchmark/mac/mac_9/mac_9.v new file mode 100644 index 000000000..664df33f4 --- /dev/null +++ b/openfpga_flow/benchmarks/micro_benchmark/mac/mac_9/mac_9.v @@ -0,0 +1,22 @@ +//------------------------------------------------------- +// Functionality: A 9-bit multiply-acculumate circuit +// Author: Xifan Tang +//------------------------------------------------------- + +module mac_9(a, b, c, out); +parameter DATA_WIDTH = 9; /* declare a parameter. default required */ +input [DATA_WIDTH - 1 : 0] a, b, c; +output [DATA_WIDTH - 1 : 0] out; + +assign out = a * b + c; + +endmodule + + + + + + + + + diff --git a/openfpga_flow/openfpga_shell_scripts/mcnc_example_script.openfpga b/openfpga_flow/openfpga_shell_scripts/mcnc_example_script.openfpga index 56b5c9caf..7f2984f96 100644 --- a/openfpga_flow/openfpga_shell_scripts/mcnc_example_script.openfpga +++ b/openfpga_flow/openfpga_shell_scripts/mcnc_example_script.openfpga @@ -37,7 +37,7 @@ repack #--verbose # Build the bitstream # - Output the fabric-independent bitstream to a file -build_architecture_bitstream --verbose --write_file fabric_indepenent_bitstream.xml +build_architecture_bitstream --verbose # Build fabric-dependent bitstream build_fabric_bitstream --verbose @@ -46,9 +46,7 @@ build_fabric_bitstream --verbose # - Enable the use of explicit port mapping in Verilog netlist write_fabric_verilog --file ./SRC \ --explicit_port_mapping \ - --include_timing \ - --include_signal_init -# --support_icarus_simulator + --include_timing # Write the Verilog testbench for FPGA fabric # - We suggest the use of same output directory as fabric Verilog netlists @@ -59,16 +57,6 @@ write_fabric_verilog --file ./SRC \ write_preconfigured_fabric_wrapper --file ./SRC --support_icarus_simulator write_preconfigured_testbench --file ./SRC --reference_benchmark_file_path ${REFERENCE_VERILOG_TESTBENCH} --support_icarus_simulator -# Write the SDC files for PnR backend -# - Turn on every options here -write_pnr_sdc --file ./SDC - -# Write SDC to disable timing for configure ports -write_sdc_disable_timing_configure_ports --file ./SDC/disable_configure_ports.sdc - -# Write the SDC to run timing analysis for a mapped FPGA fabric -write_analysis_sdc --file ./SDC_analysis - # Finish and exit OpenFPGA exit diff --git a/openfpga_flow/regression_test_scripts/fpga_verilog_reg_test.sh b/openfpga_flow/regression_test_scripts/fpga_verilog_reg_test.sh index 70137dd77..9c51ef6ea 100755 --- a/openfpga_flow/regression_test_scripts/fpga_verilog_reg_test.sh +++ b/openfpga_flow/regression_test_scripts/fpga_verilog_reg_test.sh @@ -136,9 +136,3 @@ run-task fpga_verilog/fully_connected_output_crossbar --debug --show_thread_logs echo -e "Testing through channels in tileable routing"; run-task fpga_verilog/thru_channel/thru_narrow_tile --debug --show_thread_logs run-task fpga_verilog/thru_channel/thru_wide_tile --debug --show_thread_logs - -# Verify MCNC big20 benchmark suite with ModelSim -# Please make sure you have ModelSim installed in the environment -# Otherwise, it will fail -#run-task fpga_verilog/mcnc_big20 --debug --show_thread_logs --maxthreads 20 -#python3 openfpga_flow/scripts/run_modelsim.py mcnc_big20 --run_sim diff --git a/openfpga_flow/regression_test_scripts/micro_benchmark_reg_test.sh b/openfpga_flow/regression_test_scripts/micro_benchmark_reg_test.sh new file mode 100755 index 000000000..28b24aef3 --- /dev/null +++ b/openfpga_flow/regression_test_scripts/micro_benchmark_reg_test.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -e +source openfpga.sh +PYTHON_EXEC=python3.8 +############################################### +# OpenFPGA Shell with VPR8 +############################################## +echo -e "Micro benchmark regression tests"; +run-task benchmark_sweep/counter --debug --show_thread_logs +run-task benchmark_sweep/mac_units --debug --show_thread_logs + +# Verify MCNC big20 benchmark suite with ModelSim +# Please make sure you have ModelSim installed in the environment +# Otherwise, it will fail +run-task benchmark_sweep/mcnc_big20 --debug --show_thread_logs +#python3 openfpga_flow/scripts/run_modelsim.py mcnc_big20 --run_sim diff --git a/openfpga_flow/tasks/benchmark_sweep/counter/config/task.conf b/openfpga_flow/tasks/benchmark_sweep/counter/config/task.conf index 9d5a92747..eb0079516 100644 --- a/openfpga_flow/tasks/benchmark_sweep/counter/config/task.conf +++ b/openfpga_flow/tasks/benchmark_sweep/counter/config/task.conf @@ -9,29 +9,45 @@ [GENERAL] run_engine=openfpga_shell power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml -power_analysis = true +power_analysis = false spice_output=false verilog_output=true -timeout_each_job = 20*60 -fpga_flow=vpr_blif +timeout_each_job = 5*60 +fpga_flow=yosys_vpr [OpenFPGA_SHELL] -openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/implicit_verilog_example_script.openfpga -openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k6_N10_40nm_openfpga.xml -openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml +openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/iwls_benchmark_example_script.openfpga +openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k6_frac_N10_adder_chain_dpram8K_dsp36_fracff_40nm_openfpga.xml +openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/fixed_sim_openfpga.xml +# Yosys script parameters +yosys_cell_sim_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_fracff_40nm_cell_sim.v +yosys_dff_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_fracff_40nm_dff_map.v +yosys_bram_map_rules=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_bram.txt +yosys_bram_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_bram_map.v +yosys_dsp_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_dsp_map.v +yosys_dsp_map_parameters=-D DSP_A_MAXWIDTH=36 -D DSP_B_MAXWIDTH=36 -D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 -D DSP_NAME=mult_36x36 +# VPR parameters +# # Use a fixed routing channel width to save runtime +vpr_route_chan_width=50 [ARCHITECTURES] -arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k6_N10_40nm.xml +arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_fracff_40nm.xml [BENCHMARKS] -bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counter/counter_post_yosys.blif +bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counter/counter.v +bench1=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counter_async_reset/counter.v +bench2=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counter_128bit_async_reset/counter.v +bench3=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counter_128bit_async_resetb/counter.v [SYNTHESIS_PARAM] +bench_yosys_common=${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_dff_flow.ys +#bench_yosys_rewrite_common=${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_yosys_vpr_flow_with_rewrite.ys;${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_rewrite_flow.ys + bench0_top = counter -bench0_act = ${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counter/counter_pre_vpr.act -bench0_verilog = ${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counter/counter_output_verilog.v -bench0_chan_width = 300 +bench1_top = counter +bench2_top = counter +bench3_top = counter [SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH] -end_flow_with_test= +#end_flow_with_test= #vpr_fpga_verilog_formal_verification_top_netlist= diff --git a/openfpga_flow/tasks/benchmark_sweep/mac_units/config/task.conf b/openfpga_flow/tasks/benchmark_sweep/mac_units/config/task.conf new file mode 100644 index 000000000..0fddaa91a --- /dev/null +++ b/openfpga_flow/tasks/benchmark_sweep/mac_units/config/task.conf @@ -0,0 +1,67 @@ +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# Configuration file for running experiments +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs +# Each job execute fpga_flow script on combination of architecture & benchmark +# timeout_each_job is timeout for each job +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + +[GENERAL] +run_engine=openfpga_shell +power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml +power_analysis = false +spice_output=false +verilog_output=true +timeout_each_job = 20*60 +fpga_flow=yosys_vpr + +[OpenFPGA_SHELL] +openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/iwls_benchmark_example_script.openfpga +openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k6_frac_N10_adder_chain_dpram8K_dsp36_fracff_40nm_openfpga.xml +openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/fixed_sim_openfpga.xml +# Yosys script parameters +yosys_cell_sim_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_fracff_40nm_cell_sim.v +yosys_dff_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_fracff_40nm_dff_map.v +yosys_bram_map_rules=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_bram.txt +yosys_bram_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_bram_map.v +yosys_dsp_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_dsp_map.v +yosys_dsp_map_parameters=-D DSP_A_MAXWIDTH=36 -D DSP_B_MAXWIDTH=36 -D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 -D DSP_NAME=mult_36x36 +# VPR parameters +# Use a fixed routing channel width to save runtime +vpr_route_chan_width=300 + +[ARCHITECTURES] +arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_fracff_40nm.xml + +[BENCHMARKS] +bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_2/mac_2.v +bench1=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_4/mac_4.v +bench2=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_6/mac_6.v +bench3=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_8/mac_8.v +bench4=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_8_9/mac_8_9.v +bench5=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_9/mac_9.v +bench6=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_12/mac_12.v +bench7=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_16/mac_16.v +bench8=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_18/mac_18.v +bench9=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_20/mac_20.v +bench10=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_32/mac_32.v +bench11=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac/mac_36/mac_36.v + +[SYNTHESIS_PARAM] +bench_yosys_common=${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_dff_flow.ys +bench0_top = mac_2 +bench1_top = mac_4 +bench2_top = mac_6 +bench3_top = mac_8 +bench4_top = mac_8_9 +bench5_top = mac_9 +bench6_top = mac_12 +bench7_top = mac_16 +bench8_top = mac_18 +bench9_top = mac_20 +bench10_top = mac_32 +bench11_top = mac_36 + +[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH] +#end_flow_with_test= +#vpr_fpga_verilog_formal_verification_top_netlist=