From 1185f7b8bf8685e91b325d26687a2373d9036733 Mon Sep 17 00:00:00 2001 From: tangxifan Date: Sat, 20 Mar 2021 17:05:30 -0600 Subject: [PATCH 01/38] [Script] Add a template yosys script to enable DSP mapping --- .../misc/ys_tmpl_yosys_vpr_bram_dsp_flow.ys | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_flow.ys diff --git a/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_flow.ys b/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_flow.ys new file mode 100644 index 000000000..ebe101ed2 --- /dev/null +++ b/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_flow.ys @@ -0,0 +1,102 @@ +# Yosys synthesis script for ${TOP_MODULE} + +######################### +# Parse input files +######################### +# Read verilog files +${READ_VERILOG_FILE} +# Read technology library +read_verilog -lib -specify ${YOSYS_CELL_SIM_VERILOG} + +######################### +# Prepare for synthesis +######################### +# Identify top module from hierarchy +hierarchy -check -top ${TOP_MODULE} +# - Convert process blocks to AST +proc +# Flatten all the gates/primitives +flatten +# Identify tri-state buffers from 'z' signal in AST +# with follow-up optimizations to clean up AST +tribuf -logic +opt_expr +opt_clean +# demote inout ports to input or output port +# with follow-up optimizations to clean up AST +deminout +opt + +opt_expr +opt_clean +check +opt +wreduce -keepdc +peepopt +pmuxtree +opt_clean + +######################## +# Map multipliers +# Inspired from synth_xilinx.cc +######################### +techmap -map +/mul2dsp.v -map ${YOSYS_DSP_MAP_VERILOG} ${YOSYS_DSP_MAP_PARAMETERS} +select a:mul2dsp +setattr -unset mul2dsp +opt_expr -fine +wreduce +select -clear +chtype -set $mul t:$__soft_mul# Extract arithmetic functions + +######################### +# Run coarse synthesis +######################### +# Run a tech map with default library +techmap +alumacc +share +opt +fsm +# Run a quick follow-up optimization to sweep out unused nets/signals +opt -fast +# Optimize any memory cells by merging share-able ports and collecting all the ports belonging to memorcy cells +memory -nomap +opt_clean + +######################### +# Map logics to BRAMs +######################### +memory_bram -rules ${YOSYS_BRAM_MAP_RULES} +techmap -map ${YOSYS_BRAM_MAP_VERILOG} +opt -fast -mux_undef -undriven -fine +memory_map +opt -undriven -fine + +######################### +# Map flip-flops +######################### +techmap -map +/adff2dff.v +opt_expr -mux_undef +simplemap +opt_expr +opt_merge +opt_rmdff +opt_clean +opt + +######################### +# Map LUTs +######################### +abc -lut ${LUT_SIZE} + +######################### +# Check and show statisitics +######################### +hierarchy -check +stat + +######################### +# Output netlists +######################### +opt_clean -purge +write_blif ${OUTPUT_BLIF} From 911979a731fe810c0e61d16db2e76460d4c65bac Mon Sep 17 00:00:00 2001 From: tangxifan Date: Sat, 20 Mar 2021 18:04:59 -0600 Subject: [PATCH 02/38] [Arch] Update heterogenous architecture for vtr benchmark by adding mult36 --- ...der_chain_dpram8K_dsp36_40nm_openfpga.xml} | 14 ++++ ...leable_adder_chain_dpram8K_dsp36_40nm.xml} | 80 +++++++++++++++++++ ...egister_scan_chain_mem16K_depop50_12nm.xml | 11 ++- 3 files changed, 104 insertions(+), 1 deletion(-) rename openfpga_flow/openfpga_arch/{k6_frac_N10_adder_chain_dpram8K_40nm_openfpga.xml => k6_frac_N10_adder_chain_dpram8K_dsp36_40nm_openfpga.xml} (93%) rename openfpga_flow/vpr_arch/{k6_frac_N10_tileable_adder_chain_dpram8K_40nm.xml => k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm.xml} (91%) diff --git a/openfpga_flow/openfpga_arch/k6_frac_N10_adder_chain_dpram8K_40nm_openfpga.xml b/openfpga_flow/openfpga_arch/k6_frac_N10_adder_chain_dpram8K_dsp36_40nm_openfpga.xml similarity index 93% rename from openfpga_flow/openfpga_arch/k6_frac_N10_adder_chain_dpram8K_40nm_openfpga.xml rename to openfpga_flow/openfpga_arch/k6_frac_N10_adder_chain_dpram8K_dsp36_40nm_openfpga.xml index 37beb67bf..ed8f0134c 100644 --- a/openfpga_flow/openfpga_arch/k6_frac_N10_adder_chain_dpram8K_40nm_openfpga.xml +++ b/openfpga_flow/openfpga_arch/k6_frac_N10_adder_chain_dpram8K_dsp36_40nm_openfpga.xml @@ -206,6 +206,16 @@ + + + + + + + + + + @@ -265,6 +275,10 @@ + + + + diff --git a/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_chain_dpram8K_40nm.xml b/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm.xml similarity index 91% rename from openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_chain_dpram8K_40nm.xml rename to openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm.xml index d697a1e8d..a34b29c22 100644 --- a/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_chain_dpram8K_40nm.xml +++ b/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm.xml @@ -138,6 +138,15 @@ + + + + + + + + + @@ -196,6 +205,23 @@ memory.waddr[9:5] memory.raddr[9:5] memory.data_in[7:4] memory.ren memory.data_out[7:4] + + + + + + + + + + + + mult_36.b[0:9] mult_36.b[10:35] mult_36.out[36:71] + + mult_36.a[0:9] mult_36.a[10:35] mult_36.out[0:35] + + + @@ -208,6 +234,8 @@ + + @@ -686,6 +714,58 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_register_scan_chain_mem16K_depop50_12nm.xml b/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_register_scan_chain_mem16K_depop50_12nm.xml index baada7911..b6dad0a8f 100755 --- a/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_register_scan_chain_mem16K_depop50_12nm.xml +++ b/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_register_scan_chain_mem16K_depop50_12nm.xml @@ -193,7 +193,16 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + + + + 10e-12 5e-12 + + + 10e-12 5e-12 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/openfpga_flow/vpr_arch/k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_nonLR_caravel_io_skywater130nm.xml b/openfpga_flow/vpr_arch/k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_nonLR_caravel_io_skywater130nm.xml new file mode 100644 index 000000000..fea61541c --- /dev/null +++ b/openfpga_flow/vpr_arch/k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_nonLR_caravel_io_skywater130nm.xml @@ -0,0 +1,906 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io_top.outpad io_top.inpad + + + + + + + + + + + + io_right.outpad io_right.inpad + + + + + + + + + + + + io_bottom.outpad io_bottom.inpad + + + + + + + + + + + + io_left.outpad io_left.inpad + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + clb.clk clb.reset + clb.reg_in clb.sc_in clb.cin clb.O[7:0] clb.I0 clb.I0i clb.I1 clb.I1i clb.I2 clb.I2i clb.I3 clb.I3i + clb.O[15:8] clb.I4 clb.I4i clb.I5 clb.I5i clb.I6 clb.I6i clb.I7 clb.I7i + clb.reg_out clb.sc_out clb.cout + + + + + + + + + + + + + + + + mult_8.a[0:5] mult_8.b[0:5] mult_8.out[0:10] + mult_8.a[6:7] mult_8.b[6:7] mult_8.out[11:15] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 1 + 1 + + + + 1 1 1 + 1 1 + + + + 1 1 1 1 1 + 1 1 1 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 235e-12 + 235e-12 + 235e-12 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 261e-12 + 261e-12 + 261e-12 + 261e-12 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 145a80de434ec3c4362290a456f85ffcef89fa9c Mon Sep 17 00:00:00 2001 From: tangxifan Date: Tue, 23 Mar 2021 15:35:34 -0600 Subject: [PATCH 35/38] [Script] Add an openfpga shell script for heterogeneous fpga verification --- ...terogeneous_device_example_script.openfpga | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 openfpga_flow/openfpga_shell_scripts/fix_heterogeneous_device_example_script.openfpga diff --git a/openfpga_flow/openfpga_shell_scripts/fix_heterogeneous_device_example_script.openfpga b/openfpga_flow/openfpga_shell_scripts/fix_heterogeneous_device_example_script.openfpga new file mode 100644 index 000000000..10ed5f17a --- /dev/null +++ b/openfpga_flow/openfpga_shell_scripts/fix_heterogeneous_device_example_script.openfpga @@ -0,0 +1,74 @@ +# Run VPR for the 'and' design +#--write_rr_graph example_rr_graph.xml +vpr ${VPR_ARCH_FILE} ${VPR_TESTBENCH_BLIF} --clock_modeling route --device ${OPENFPGA_VPR_DEVICE_LAYOUT} + +# Read OpenFPGA architecture definition +read_openfpga_arch -f ${OPENFPGA_ARCH_FILE} + +# Read OpenFPGA simulation settings +read_openfpga_simulation_setting -f ${OPENFPGA_SIM_SETTING_FILE} + +# Annotate the OpenFPGA architecture to VPR data base +# to debug use --verbose options +link_openfpga_arch --sort_gsb_chan_node_in_edges + +# Check and correct any naming conflicts in the BLIF netlist +check_netlist_naming_conflict --fix --report ./netlist_renaming.xml + +# Apply fix-up to clustering nets based on routing results +pb_pin_fixup --verbose + +# Apply fix-up to Look-Up Table truth tables based on packing results +lut_truth_table_fixup + +# Build the module graph +# - Enabled compression on routing architecture modules +# - Enable pin duplication on grid modules +build_fabric --compress_routing #--verbose + +# Write the fabric hierarchy of module graph to a file +# This is used by hierarchical PnR flows +write_fabric_hierarchy --file ./fabric_hierarchy.txt + +# Repack the netlist to physical pbs +# This must be done before bitstream generator and testbench generation +# Strongly recommend it is done after all the fix-up have been applied +repack #--verbose + +# Build the bitstream +# - Output the fabric-independent bitstream to a file +build_architecture_bitstream --verbose --write_file fabric_independent_bitstream.xml + +# Build fabric-dependent bitstream +build_fabric_bitstream --verbose + +# Write fabric-dependent bitstream +write_fabric_bitstream --file fabric_bitstream.xml --format xml + +# Write the Verilog netlist for FPGA fabric +# - Enable the use of explicit port mapping in Verilog netlist +write_fabric_verilog --file ./SRC --explicit_port_mapping --include_timing --print_user_defined_template --verbose + +# Write the Verilog testbench for FPGA fabric +# - We suggest the use of same output directory as fabric Verilog netlists +# - Must specify the reference benchmark file if you want to output any testbenches +# - Enable top-level testbench which is a full verification including programming circuit and core logic of FPGA +# - Enable pre-configured top-level testbench which is a fast verification skipping programming phase +# - Simulation ini file is optional and is needed only when you need to interface different HDL simulators using openfpga flow-run scripts +write_verilog_testbench --file ./SRC --reference_benchmark_file_path ${REFERENCE_VERILOG_TESTBENCH} --print_top_testbench --print_preconfig_top_testbench --print_simulation_ini ./SimulationDeck/simulation_deck.ini --include_signal_init --support_icarus_simulator #--explicit_port_mapping + +# Write the SDC files for PnR backend +# - Turn on every options here +write_pnr_sdc --file ./SDC + +# Write SDC to disable timing for configure ports +write_sdc_disable_timing_configure_ports --file ./SDC/disable_configure_ports.sdc + +# Write the SDC to run timing analysis for a mapped FPGA fabric +write_analysis_sdc --file ./SDC_analysis + +# Finish and exit OpenFPGA +exit + +# Note : +# To run verification at the end of the flow maintain source in ./SRC directory From 108c84a022babebc649fc2d4d4c49b740d4074f3 Mon Sep 17 00:00:00 2001 From: tangxifan Date: Tue, 23 Mar 2021 15:36:09 -0600 Subject: [PATCH 36/38] [HDL] Add HDL for 8-bit single-mode multiplier --- .../openfpga_cell_library/verilog/mult_8x8.v | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 openfpga_flow/openfpga_cell_library/verilog/mult_8x8.v diff --git a/openfpga_flow/openfpga_cell_library/verilog/mult_8x8.v b/openfpga_flow/openfpga_cell_library/verilog/mult_8x8.v new file mode 100644 index 000000000..a8649488b --- /dev/null +++ b/openfpga_flow/openfpga_cell_library/verilog/mult_8x8.v @@ -0,0 +1,16 @@ +//----------------------------------------------------- +// Design Name : mult_8x8 +// File Name : mult_8x8.v +// Function : A 8-bit multiplier +// Coder : Xifan Tang +//----------------------------------------------------- + +module mult_8x8 ( + input [0:7] A, + input [0:7] B, + output [0:15] Y +); + + assign Y = A * B; + +endmodule From d82ffe0cbf36eec45e697df93cfa765fdea0c904 Mon Sep 17 00:00:00 2001 From: tangxifan Date: Tue, 23 Mar 2021 15:36:28 -0600 Subject: [PATCH 37/38] [Test] Deploy MAC_8 benchmark to regression test --- openfpga_flow/regression_test_scripts/fpga_verilog_reg_test.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/openfpga_flow/regression_test_scripts/fpga_verilog_reg_test.sh b/openfpga_flow/regression_test_scripts/fpga_verilog_reg_test.sh index 450bc262c..0033ad8ac 100755 --- a/openfpga_flow/regression_test_scripts/fpga_verilog_reg_test.sh +++ b/openfpga_flow/regression_test_scripts/fpga_verilog_reg_test.sh @@ -44,6 +44,9 @@ run-task fpga_verilog/bram/dpram16k --debug --show_thread_logs echo -e "Testing Verilog generation with 16k block RAMs spanning two columns "; run-task fpga_verilog/bram/wide_dpram16k --debug --show_thread_logs +echo -e "Testing Verilog generation with heterogeneous fabric using 8-bit single-mode multipliers "; +run-task fpga_verilog/dsp/single_mode_mult_8x8 --debug --show_thread_logs + echo -e "Testing Verilog generation with different I/O capacities on each side of an FPGA "; run-task fpga_verilog/io/multi_io_capacity --debug --show_thread_logs From b00b4f0f5fd7382dd838ba958baa3bc223533d44 Mon Sep 17 00:00:00 2001 From: tangxifan Date: Tue, 23 Mar 2021 15:44:53 -0600 Subject: [PATCH 38/38] [HDL] Patch the yosys techlib for the heterogeneous FPGA by using little endian --- ..._adder_chain_dpram8K_dsp36_40nm_bram_map.v | 8 +++--- ..._adder_chain_dpram8K_dsp36_40nm_cell_sim.v | 26 +++++++++---------- ...e_adder_chain_dpram8K_dsp36_40nm_dsp_map.v | 6 ++--- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_bram_map.v b/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_bram_map.v index fdc8bef43..804077258 100644 --- a/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_bram_map.v +++ b/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_bram_map.v @@ -1,9 +1,9 @@ module $__MY_DPRAM_1024x8 ( - output [7:0] B1DATA, + output [0:7] B1DATA, input CLK1, - input [9:0] B1ADDR, - input [9:0] A1ADDR, - input [7:0] A1DATA, + input [0:9] B1ADDR, + input [0:9] A1ADDR, + input [0:7] A1DATA, input A1EN, input B1EN ); diff --git a/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_cell_sim.v b/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_cell_sim.v index 5e99dc55f..bc8f1206e 100644 --- a/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_cell_sim.v +++ b/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_cell_sim.v @@ -5,15 +5,15 @@ module dpram_1024x8_core ( input wclk, input wen, - input [9:0] waddr, - input [7:0] data_in, + input [0:9] waddr, + input [0:7] data_in, input rclk, input ren, - input [9:0] raddr, - output [7:0] data_out ); + input [0:9] raddr, + output [0:7] data_out ); - reg [7:0] ram[1023:0]; - reg [7:0] internal; + reg [0:7] ram[0:1023]; + reg [0:7] internal; assign data_out = internal; @@ -40,10 +40,10 @@ module dpram_1024x8 ( input clk, input wen, input ren, - input [9:0] waddr, - input [9:0] raddr, - input [7:0] data_in, - output [7:0] data_out ); + input [0:9] waddr, + input [0:9] raddr, + input [0:7] data_in, + output [0:7] data_out ); dpram_1024x8_core memory_0 ( .wclk (clk), @@ -61,9 +61,9 @@ endmodule // 36-bit multiplier //----------------------------- module mult_36( - input [35:0] A, - input [35:0] B, - output [71:0] Y + input [0:35] A, + input [0:35] B, + output [0:71] Y ); assign Y = A * B; diff --git a/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_dsp_map.v b/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_dsp_map.v index 4620af74d..977afdb13 100644 --- a/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_dsp_map.v +++ b/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_dsp_map.v @@ -1,7 +1,7 @@ module mult_36x36 ( - input [35:0] A, - input [35:0] B, - output [71:0] Y + input [0:35] A, + input [0:35] B, + output [0:71] Y ); parameter A_SIGNED = 0; parameter B_SIGNED = 0;