diff --git a/openfpga_flow/misc/ys_tmpl_yosys_vpr_adder_bram_dsp_dff_flow.ys b/openfpga_flow/misc/ys_tmpl_yosys_vpr_adder_bram_dsp_dff_flow.ys new file mode 100644 index 000000000..9a5d5c970 --- /dev/null +++ b/openfpga_flow/misc/ys_tmpl_yosys_vpr_adder_bram_dsp_dff_flow.ys @@ -0,0 +1,116 @@ +# Yosys synthesis script for ${TOP_MODULE} + +######################### +# Parse input files +######################### +# Read verilog files +${READ_VERILOG_FILE} +# Read technology library +read_verilog -lib -specify ${YOSYS_CELL_SIM_VERILOG} + +######################### +# Prepare for synthesis +######################### +# Identify top module from hierarchy +hierarchy -check -top ${TOP_MODULE} +# - Convert process blocks to AST +proc +# Flatten all the gates/primitives +flatten +# Identify tri-state buffers from 'z' signal in AST +# with follow-up optimizations to clean up AST +tribuf -logic +opt_expr +opt_clean +# demote inout ports to input or output port +# with follow-up optimizations to clean up AST +deminout +opt + +opt_expr +opt_clean +check +opt +wreduce -keepdc +peepopt +pmuxtree +opt_clean + +######################## +# Map multipliers +# Inspired from synth_xilinx.cc +######################### +# Avoid merging any registers into DSP, reserve memory port registers first +memory_dff +wreduce t:$mul +techmap -map +/mul2dsp.v -map ${YOSYS_DSP_MAP_VERILOG} ${YOSYS_DSP_MAP_PARAMETERS} +select a:mul2dsp +setattr -unset mul2dsp +opt_expr -fine +wreduce +select -clear +chtype -set $mul t:$__soft_mul# Extract arithmetic functions + +######################### +# Map $alu to carry chain +######################### +alumacc +techmap -map ${YOSYS_ADDER_MAP_VERILOG} + +######################### +# Run coarse synthesis +######################### +# Run a tech map with default library +techmap +share +opt +fsm +# Run a quick follow-up optimization to sweep out unused nets/signals +opt -fast +# Optimize any memory cells by merging share-able ports and collecting all the ports belonging to memorcy cells +memory -nomap +opt_clean + + +######################### +# Map logics to BRAMs +######################### +memory_bram -rules ${YOSYS_BRAM_MAP_RULES} +techmap -map ${YOSYS_BRAM_MAP_VERILOG} +opt -fast -mux_undef -undriven -fine +memory_map +opt -undriven -fine + +######################### +# Map muxes to pmuxes +######################### +techmap -map +/pmux2mux.v + +######################### +# Map flip-flops +######################### +techmap -map ${YOSYS_DFF_MAP_VERILOG} +opt_expr -mux_undef +simplemap +opt_expr +opt_merge +opt_rmdff +opt_clean +opt + +######################### +# Map LUTs +######################### +abc -lut ${LUT_SIZE} + +######################### +# Check and show statisitics +######################### +hierarchy -check +stat + +######################### +# Output netlists +######################### +opt_clean -purge +write_blif ${OUTPUT_BLIF} diff --git a/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_dff_flow.ys b/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_dff_flow.ys index 9a5d5c970..6412ecf8c 100644 --- a/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_dff_flow.ys +++ b/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_dff_flow.ys @@ -51,12 +51,6 @@ wreduce select -clear chtype -set $mul t:$__soft_mul# Extract arithmetic functions -######################### -# Map $alu to carry chain -######################### -alumacc -techmap -map ${YOSYS_ADDER_MAP_VERILOG} - ######################### # Run coarse synthesis ######################### diff --git a/openfpga_flow/openfpga_shell_scripts/write_full_testbench_no_clk_modeling_example_script.openfpga b/openfpga_flow/openfpga_shell_scripts/write_full_testbench_no_clk_modeling_example_script.openfpga index ffb290c12..506364e30 100644 --- a/openfpga_flow/openfpga_shell_scripts/write_full_testbench_no_clk_modeling_example_script.openfpga +++ b/openfpga_flow/openfpga_shell_scripts/write_full_testbench_no_clk_modeling_example_script.openfpga @@ -10,7 +10,7 @@ read_openfpga_simulation_setting -f ${OPENFPGA_SIM_SETTING_FILE} # Annotate the OpenFPGA architecture to VPR data base # to debug use --verbose options -link_openfpga_arch --activity_file and2_ace_out.act --sort_gsb_chan_node_in_edges +link_openfpga_arch --sort_gsb_chan_node_in_edges # Check and correct any naming conflicts in the BLIF netlist check_netlist_naming_conflict --fix --report ./netlist_renaming.xml diff --git a/openfpga_flow/openfpga_yosys_techlib/common/bram_dsp_dff_cell_sim.v b/openfpga_flow/openfpga_yosys_techlib/common/bram_dsp_dff_cell_sim.v new file mode 100644 index 000000000..478378413 --- /dev/null +++ b/openfpga_flow/openfpga_yosys_techlib/common/bram_dsp_dff_cell_sim.v @@ -0,0 +1,312 @@ +//----------------------------- +// Dual-port RAM 1024x8 bit (8Kbit) +// Core logic +//----------------------------- +module dpram_1024x8_core ( + input wclk, + input wen, + input [0:9] waddr, + input [0:7] data_in, + input rclk, + input ren, + input [0:9] raddr, + output [0:7] data_out ); + + reg [0:7] ram[0:1023]; + reg [0:7] internal; + + assign data_out = internal; + + always @(posedge wclk) begin + if(wen) begin + ram[waddr] <= data_in; + end + end + + always @(posedge rclk) begin + if(ren) begin + internal <= ram[raddr]; + end + end + +endmodule + +//----------------------------- +// Dual-port RAM 1024x8 bit (8Kbit) wrapper +// where the read clock and write clock +// are combined to a unified clock +//----------------------------- +module dpram_1024x8 ( + input clk, + input wen, + input ren, + input [0:9] waddr, + input [0:9] raddr, + input [0:7] data_in, + output [0:7] data_out ); + + dpram_1024x8_core memory_0 ( + .wclk (clk), + .wen (wen), + .waddr (waddr), + .data_in (data_in), + .rclk (clk), + .ren (ren), + .raddr (raddr), + .data_out (data_out) ); + +endmodule + + + + + +//----------------------------- +// Dual-port RAM 128x8 bit (1Kbit) +// Core logic +//----------------------------- +module dpram_128x8_core ( + input wclk, + input wen, + input [0:6] waddr, + input [0:7] data_in, + input rclk, + input ren, + input [0:6] raddr, + output [0:7] data_out ); + + reg [0:7] ram[0:127]; + reg [0:7] internal; + + assign data_out = internal; + + always @(posedge wclk) begin + if(wen) begin + ram[waddr] <= data_in; + end + end + + always @(posedge rclk) begin + if(ren) begin + internal <= ram[raddr]; + end + end + +endmodule + +//----------------------------- +// Dual-port RAM 128x8 bit (1Kbit) wrapper +// where the read clock and write clock +// are combined to a unified clock +//----------------------------- +module dpram_128x8 ( + input clk, + input wen, + input ren, + input [0:6] waddr, + input [0:6] raddr, + input [0:7] data_in, + output [0:7] data_out ); + + dpram_128x8_core memory_0 ( + .wclk (clk), + .wen (wen), + .waddr (waddr), + .data_in (data_in), + .rclk (clk), + .ren (ren), + .raddr (raddr), + .data_out (data_out) ); + +endmodule + + + +//----------------------------- +// 36-bit multiplier +//----------------------------- +module mult_36( + input [0:35] A, + input [0:35] B, + output [0:71] Y +); + +assign Y = A * B; + +endmodule + +//----------------------------- +// 18-bit multiplier +//----------------------------- +module mult_18( + input [0:17] A, + input [0:17] B, + output [0:35] Y +); + +assign Y = A * B; + +endmodule + + +//----------------------------- +// 8-bit multiplier +//----------------------------- +module mult_8( + input [0:7] A, + input [0:7] B, + output [0:15] Y +); + +assign Y = A * B; + +endmodule + + +//----------------------------- +// Native D-type flip-flop +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dff( + output reg Q, + input D, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + Q <= D; + 1'b1: + always @(negedge C) + Q <= D; + endcase +endmodule + +//----------------------------- +// D-type flip-flop with active-high asynchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffr( + output reg Q, + input D, + input R, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or posedge R) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C or posedge R) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// D-type flip-flop with active-high asynchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffs( + output reg Q, + input D, + input S, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or posedge S) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C or posedge S) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// D-type flip-flop with active-low asynchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffrn( + output reg Q, + input D, + input RN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or negedge RN) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C or negedge RN) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// D-type flip-flop with active-low asynchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffsn( + output reg Q, + input D, + input SN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or negedge SN) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C or negedge SN) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule diff --git a/openfpga_flow/openfpga_yosys_techlib/common/dpram_1K_bram.txt b/openfpga_flow/openfpga_yosys_techlib/common/dpram_1K_bram_rules.txt similarity index 100% rename from openfpga_flow/openfpga_yosys_techlib/common/dpram_1K_bram.txt rename to openfpga_flow/openfpga_yosys_techlib/common/dpram_1K_bram_rules.txt diff --git a/openfpga_flow/openfpga_yosys_techlib/common/dpram_8K_bram.txt b/openfpga_flow/openfpga_yosys_techlib/common/dpram_8K_bram_rules.txt similarity index 100% rename from openfpga_flow/openfpga_yosys_techlib/common/dpram_8K_bram.txt rename to openfpga_flow/openfpga_yosys_techlib/common/dpram_8K_bram_rules.txt diff --git a/openfpga_flow/tasks/benchmark_sweep/counter/config/task.conf b/openfpga_flow/tasks/benchmark_sweep/counter/config/task.conf index 6fefdb16d..a0d8a07b4 100644 --- a/openfpga_flow/tasks/benchmark_sweep/counter/config/task.conf +++ b/openfpga_flow/tasks/benchmark_sweep/counter/config/task.conf @@ -40,7 +40,7 @@ bench2=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counters/c bench3=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counters/counter_128bit_async_resetb/counter.v [SYNTHESIS_PARAM] -bench_yosys_common=${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_dff_flow.ys +bench_yosys_common=${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_yosys_vpr_dff_flow.ys #bench_yosys_rewrite_common=${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_yosys_vpr_flow_with_rewrite.ys;${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_rewrite_flow.ys bench0_top = counter diff --git a/openfpga_flow/tasks/benchmark_sweep/processor/config/task.conf b/openfpga_flow/tasks/benchmark_sweep/processor/config/task.conf index ea252027f..4f429b8d7 100644 --- a/openfpga_flow/tasks/benchmark_sweep/processor/config/task.conf +++ b/openfpga_flow/tasks/benchmark_sweep/processor/config/task.conf @@ -9,31 +9,39 @@ [GENERAL] run_engine=openfpga_shell power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml -power_analysis = true +power_analysis = false spice_output=false verilog_output=true timeout_each_job = 20*60 fpga_flow=yosys_vpr +arch_variable_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_timing_annotation/design_variables.yml [OpenFPGA_SHELL] -openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/generate_bitstream_global_tile_multiclock_example_script.openfpga -openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_frac_N4_adder_chain_40nm_cc_openfpga.xml -openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/fixed_sim_openfpga.xml +openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/write_full_testbench_no_clk_modeling_example_script.openfpga +openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_frac_N8_adder_chain_mem1K_130nm_cc_openfpga.xml +openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml openfpga_vpr_device_layout=auto openfpga_fast_configuration= +yosys_cell_sim_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k4_frac_N8_tileable_adder_chain_dpram1K_dsp18_fracff_skywater130nm/k4_frac_N8_tileable_adder_chain_dpram1K_dsp18_fracff_skywater130nm_cell_sim.v +yosys_bram_map_rules=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/common/dpram_1K_bram.txt +yosys_bram_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/common/dpram_1K_bram_map.v +yosys_dsp_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/common/dsp_map.v +yosys_dsp_map_parameters=-D DSP_A_MAXWIDTH=8 -D DSP_B_MAXWIDTH=8 -D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 -D DSP_NAME=mult_8x8 +yosys_dff_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k4_frac_N8_tileable_adder_chain_dpram1K_dsp18_fracff_skywater130nm/k4_frac_N8_tileable_adder_chain_dpram1K_dsp18_fracff_skywater130nm_dff_map.v + [ARCHITECTURES] -arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_frac_N4_tileable_adder_chain_40nm.xml +arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_frac_N8_tileable_adder_chain_mem1K_130nm.xml [BENCHMARKS] -bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/processor/picorv32/picorv32.v -bench1=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/processor/vexriscv/vexriscv_small.v +bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/signal_gen/clock_divider.v [SYNTHESIS_PARAM] -bench0_top = picorv32 +bench0_top = clock_divider bench0_chan_width = 300 -bench1_top = VexRiscv -bench1_chan_width = 300 + +bench_yosys_common=${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_dff_flow.ys [SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH] -end_flow_with_test= +#end_flow_with_test= +vpr_fpga_verilog_formal_verification_top_netlist= diff --git a/openfpga_flow/tasks/benchmark_sweep/signal_gen/config/task.conf b/openfpga_flow/tasks/benchmark_sweep/signal_gen/config/task.conf index 6661945a0..a6825d862 100644 --- a/openfpga_flow/tasks/benchmark_sweep/signal_gen/config/task.conf +++ b/openfpga_flow/tasks/benchmark_sweep/signal_gen/config/task.conf @@ -1,18 +1,11 @@ # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = - # Configuration file for running experiments - # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = - # timeout_each_job : FPGA Task script splits fpga flow into multiple jobs - # Each job execute fpga_flow script on combination of architecture & benchmark - # timeout_each_job is timeout for each job - # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = - [GENERAL] run_engine=openfpga_shell power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml @@ -23,7 +16,7 @@ timeout_each_job = 20*60 fpga_flow=yosys_vpr [OpenFPGA_SHELL] -openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/write_full_testbench_example_script.openfpga +openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/example_script.openfpga openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_N4_40nm_cc_openfpga.xml openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml openfpga_vpr_device_layout= @@ -48,4 +41,5 @@ bench2_top = reset_generator bench2_chan_width = 300 [SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH] -end_flow_with_test= \ No newline at end of file +#end_flow_with_test= +vpr_fpga_verilog_formal_verification_top_netlist= diff --git a/openfpga_flow/tasks/benchmark_sweep/vtr_benchmarks/config/vtr_benchmark_golden_results.csv b/openfpga_flow/tasks/benchmark_sweep/vtr_benchmarks/config/vtr_benchmark_golden_results.csv index af8842eed..3b74e5d7f 100644 --- a/openfpga_flow/tasks/benchmark_sweep/vtr_benchmarks/config/vtr_benchmark_golden_results.csv +++ b/openfpga_flow/tasks/benchmark_sweep/vtr_benchmarks/config/vtr_benchmark_golden_results.csv @@ -10,7 +10,7 @@ name,mult_blocks,memory_blocks 00_bgm_MIN_ROUTE_CHAN_WIDTH,11,0 00_RLE_BlobMerging_MIN_ROUTE_CHAN_WIDTH,0,0 00_paj_boundtop_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,0,1 -00_memset_MIN_ROUTE_CHAN_WIDTH,0,1 +00_memset_MIN_ROUTE_CHAN_WIDTH,0,0 00_diffeq_paj_convert_MIN_ROUTE_CHAN_WIDTH,5,0 00_diffeq_f_systemC_MIN_ROUTE_CHAN_WIDTH,5,0 00_LU8PEEng_MIN_ROUTE_CHAN_WIDTH,8,9 @@ -20,7 +20,7 @@ name,mult_blocks,memory_blocks 00_mkPktMerge_MIN_ROUTE_CHAN_WIDTH,0,3 00_mkSMAdapter4B_MIN_ROUTE_CHAN_WIDTH,0,3 00_or1200_flat_MIN_ROUTE_CHAN_WIDTH,1,2 -00_paj_raygentop_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,18,1 +00_paj_raygentop_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,18,0 00_sha1_MIN_ROUTE_CHAN_WIDTH,0,0 00_sv_chip0_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,0,0 00_sv_chip1_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,152,0