added complete bram sizing files

This commit is contained in:
Andrew Pond 2022-02-07 12:19:31 -07:00
parent 8c38747d6c
commit 8f18a9ad9a
10 changed files with 454 additions and 30 deletions

View File

@ -0,0 +1,116 @@
# Yosys synthesis script for ${TOP_MODULE}
#########################
# Parse input files
#########################
# Read verilog files
${READ_VERILOG_FILE}
# Read technology library
read_verilog -lib -specify ${YOSYS_CELL_SIM_VERILOG}
#########################
# Prepare for synthesis
#########################
# Identify top module from hierarchy
hierarchy -check -top ${TOP_MODULE}
# - Convert process blocks to AST
proc
# Flatten all the gates/primitives
flatten
# Identify tri-state buffers from 'z' signal in AST
# with follow-up optimizations to clean up AST
tribuf -logic
opt_expr
opt_clean
# demote inout ports to input or output port
# with follow-up optimizations to clean up AST
deminout
opt
opt_expr
opt_clean
check
opt
wreduce -keepdc
peepopt
pmuxtree
opt_clean
########################
# Map multipliers
# Inspired from synth_xilinx.cc
#########################
# Avoid merging any registers into DSP, reserve memory port registers first
memory_dff
wreduce t:$mul
techmap -map +/mul2dsp.v -map ${YOSYS_DSP_MAP_VERILOG} ${YOSYS_DSP_MAP_PARAMETERS}
select a:mul2dsp
setattr -unset mul2dsp
opt_expr -fine
wreduce
select -clear
chtype -set $mul t:$__soft_mul# Extract arithmetic functions
#########################
# Map $alu to carry chain
#########################
alumacc
techmap -map ${YOSYS_ADDER_MAP_VERILOG}
#########################
# Run coarse synthesis
#########################
# Run a tech map with default library
techmap
share
opt
fsm
# Run a quick follow-up optimization to sweep out unused nets/signals
opt -fast
# Optimize any memory cells by merging share-able ports and collecting all the ports belonging to memorcy cells
memory -nomap
opt_clean
#########################
# Map logics to BRAMs
#########################
memory_bram -rules ${YOSYS_BRAM_MAP_RULES}
techmap -map ${YOSYS_BRAM_MAP_VERILOG}
opt -fast -mux_undef -undriven -fine
memory_map
opt -undriven -fine
#########################
# Map muxes to pmuxes
#########################
techmap -map +/pmux2mux.v
#########################
# Map flip-flops
#########################
techmap -map ${YOSYS_DFF_MAP_VERILOG}
opt_expr -mux_undef
simplemap
opt_expr
opt_merge
opt_rmdff
opt_clean
opt
#########################
# Map LUTs
#########################
abc -lut ${LUT_SIZE}
#########################
# Check and show statisitics
#########################
hierarchy -check
stat
#########################
# Output netlists
#########################
opt_clean -purge
write_blif ${OUTPUT_BLIF}

View File

@ -51,12 +51,6 @@ wreduce
select -clear select -clear
chtype -set $mul t:$__soft_mul# Extract arithmetic functions chtype -set $mul t:$__soft_mul# Extract arithmetic functions
#########################
# Map $alu to carry chain
#########################
alumacc
techmap -map ${YOSYS_ADDER_MAP_VERILOG}
######################### #########################
# Run coarse synthesis # Run coarse synthesis
######################### #########################

View File

@ -10,7 +10,7 @@ read_openfpga_simulation_setting -f ${OPENFPGA_SIM_SETTING_FILE}
# Annotate the OpenFPGA architecture to VPR data base # Annotate the OpenFPGA architecture to VPR data base
# to debug use --verbose options # to debug use --verbose options
link_openfpga_arch --activity_file and2_ace_out.act --sort_gsb_chan_node_in_edges link_openfpga_arch --sort_gsb_chan_node_in_edges
# Check and correct any naming conflicts in the BLIF netlist # Check and correct any naming conflicts in the BLIF netlist
check_netlist_naming_conflict --fix --report ./netlist_renaming.xml check_netlist_naming_conflict --fix --report ./netlist_renaming.xml

View File

@ -0,0 +1,312 @@
//-----------------------------
// Dual-port RAM 1024x8 bit (8Kbit)
// Core logic
//-----------------------------
module dpram_1024x8_core (
input wclk,
input wen,
input [0:9] waddr,
input [0:7] data_in,
input rclk,
input ren,
input [0:9] raddr,
output [0:7] data_out );
reg [0:7] ram[0:1023];
reg [0:7] internal;
assign data_out = internal;
always @(posedge wclk) begin
if(wen) begin
ram[waddr] <= data_in;
end
end
always @(posedge rclk) begin
if(ren) begin
internal <= ram[raddr];
end
end
endmodule
//-----------------------------
// Dual-port RAM 1024x8 bit (8Kbit) wrapper
// where the read clock and write clock
// are combined to a unified clock
//-----------------------------
module dpram_1024x8 (
input clk,
input wen,
input ren,
input [0:9] waddr,
input [0:9] raddr,
input [0:7] data_in,
output [0:7] data_out );
dpram_1024x8_core memory_0 (
.wclk (clk),
.wen (wen),
.waddr (waddr),
.data_in (data_in),
.rclk (clk),
.ren (ren),
.raddr (raddr),
.data_out (data_out) );
endmodule
//-----------------------------
// Dual-port RAM 128x8 bit (1Kbit)
// Core logic
//-----------------------------
module dpram_128x8_core (
input wclk,
input wen,
input [0:6] waddr,
input [0:7] data_in,
input rclk,
input ren,
input [0:6] raddr,
output [0:7] data_out );
reg [0:7] ram[0:127];
reg [0:7] internal;
assign data_out = internal;
always @(posedge wclk) begin
if(wen) begin
ram[waddr] <= data_in;
end
end
always @(posedge rclk) begin
if(ren) begin
internal <= ram[raddr];
end
end
endmodule
//-----------------------------
// Dual-port RAM 128x8 bit (1Kbit) wrapper
// where the read clock and write clock
// are combined to a unified clock
//-----------------------------
module dpram_128x8 (
input clk,
input wen,
input ren,
input [0:6] waddr,
input [0:6] raddr,
input [0:7] data_in,
output [0:7] data_out );
dpram_128x8_core memory_0 (
.wclk (clk),
.wen (wen),
.waddr (waddr),
.data_in (data_in),
.rclk (clk),
.ren (ren),
.raddr (raddr),
.data_out (data_out) );
endmodule
//-----------------------------
// 36-bit multiplier
//-----------------------------
module mult_36(
input [0:35] A,
input [0:35] B,
output [0:71] Y
);
assign Y = A * B;
endmodule
//-----------------------------
// 18-bit multiplier
//-----------------------------
module mult_18(
input [0:17] A,
input [0:17] B,
output [0:35] Y
);
assign Y = A * B;
endmodule
//-----------------------------
// 8-bit multiplier
//-----------------------------
module mult_8(
input [0:7] A,
input [0:7] B,
output [0:15] Y
);
assign Y = A * B;
endmodule
//-----------------------------
// Native D-type flip-flop
//-----------------------------
(* abc9_flop, lib_whitebox *)
module dff(
output reg Q,
input D,
(* clkbuf_sink *)
(* invertible_pin = "IS_C_INVERTED" *)
input C
);
parameter [0:0] INIT = 1'b0;
parameter [0:0] IS_C_INVERTED = 1'b0;
initial Q = INIT;
case(|IS_C_INVERTED)
1'b0:
always @(posedge C)
Q <= D;
1'b1:
always @(negedge C)
Q <= D;
endcase
endmodule
//-----------------------------
// D-type flip-flop with active-high asynchronous reset
//-----------------------------
(* abc9_flop, lib_whitebox *)
module dffr(
output reg Q,
input D,
input R,
(* clkbuf_sink *)
(* invertible_pin = "IS_C_INVERTED" *)
input C
);
parameter [0:0] INIT = 1'b0;
parameter [0:0] IS_C_INVERTED = 1'b0;
initial Q = INIT;
case(|IS_C_INVERTED)
1'b0:
always @(posedge C or posedge R)
if (R == 1'b1)
Q <= 1'b0;
else
Q <= D;
1'b1:
always @(negedge C or posedge R)
if (R == 1'b1)
Q <= 1'b0;
else
Q <= D;
endcase
endmodule
//-----------------------------
// D-type flip-flop with active-high asynchronous set
//-----------------------------
(* abc9_flop, lib_whitebox *)
module dffs(
output reg Q,
input D,
input S,
(* clkbuf_sink *)
(* invertible_pin = "IS_C_INVERTED" *)
input C
);
parameter [0:0] INIT = 1'b0;
parameter [0:0] IS_C_INVERTED = 1'b0;
initial Q = INIT;
case(|IS_C_INVERTED)
1'b0:
always @(posedge C or posedge S)
if (S == 1'b1)
Q <= 1'b1;
else
Q <= D;
1'b1:
always @(negedge C or posedge S)
if (S == 1'b1)
Q <= 1'b1;
else
Q <= D;
endcase
endmodule
//-----------------------------
// D-type flip-flop with active-low asynchronous reset
//-----------------------------
(* abc9_flop, lib_whitebox *)
module dffrn(
output reg Q,
input D,
input RN,
(* clkbuf_sink *)
(* invertible_pin = "IS_C_INVERTED" *)
input C
);
parameter [0:0] INIT = 1'b0;
parameter [0:0] IS_C_INVERTED = 1'b0;
initial Q = INIT;
case(|IS_C_INVERTED)
1'b0:
always @(posedge C or negedge RN)
if (RN == 1'b0)
Q <= 1'b0;
else
Q <= D;
1'b1:
always @(negedge C or negedge RN)
if (RN == 1'b0)
Q <= 1'b0;
else
Q <= D;
endcase
endmodule
//-----------------------------
// D-type flip-flop with active-low asynchronous set
//-----------------------------
(* abc9_flop, lib_whitebox *)
module dffsn(
output reg Q,
input D,
input SN,
(* clkbuf_sink *)
(* invertible_pin = "IS_C_INVERTED" *)
input C
);
parameter [0:0] INIT = 1'b0;
parameter [0:0] IS_C_INVERTED = 1'b0;
initial Q = INIT;
case(|IS_C_INVERTED)
1'b0:
always @(posedge C or negedge SN)
if (SN == 1'b0)
Q <= 1'b1;
else
Q <= D;
1'b1:
always @(negedge C or negedge SN)
if (SN == 1'b0)
Q <= 1'b1;
else
Q <= D;
endcase
endmodule

View File

@ -40,7 +40,7 @@ bench2=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counters/c
bench3=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counters/counter_128bit_async_resetb/counter.v bench3=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counters/counter_128bit_async_resetb/counter.v
[SYNTHESIS_PARAM] [SYNTHESIS_PARAM]
bench_yosys_common=${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_dff_flow.ys bench_yosys_common=${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_yosys_vpr_dff_flow.ys
#bench_yosys_rewrite_common=${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_yosys_vpr_flow_with_rewrite.ys;${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_rewrite_flow.ys #bench_yosys_rewrite_common=${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_yosys_vpr_flow_with_rewrite.ys;${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_rewrite_flow.ys
bench0_top = counter bench0_top = counter

View File

@ -9,31 +9,39 @@
[GENERAL] [GENERAL]
run_engine=openfpga_shell run_engine=openfpga_shell
power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml
power_analysis = true power_analysis = false
spice_output=false spice_output=false
verilog_output=true verilog_output=true
timeout_each_job = 20*60 timeout_each_job = 20*60
fpga_flow=yosys_vpr fpga_flow=yosys_vpr
arch_variable_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_timing_annotation/design_variables.yml
[OpenFPGA_SHELL] [OpenFPGA_SHELL]
openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/generate_bitstream_global_tile_multiclock_example_script.openfpga openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/write_full_testbench_no_clk_modeling_example_script.openfpga
openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_frac_N4_adder_chain_40nm_cc_openfpga.xml openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_frac_N8_adder_chain_mem1K_130nm_cc_openfpga.xml
openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/fixed_sim_openfpga.xml openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml
openfpga_vpr_device_layout=auto openfpga_vpr_device_layout=auto
openfpga_fast_configuration= openfpga_fast_configuration=
yosys_cell_sim_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k4_frac_N8_tileable_adder_chain_dpram1K_dsp18_fracff_skywater130nm/k4_frac_N8_tileable_adder_chain_dpram1K_dsp18_fracff_skywater130nm_cell_sim.v
yosys_bram_map_rules=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/common/dpram_1K_bram.txt
yosys_bram_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/common/dpram_1K_bram_map.v
yosys_dsp_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/common/dsp_map.v
yosys_dsp_map_parameters=-D DSP_A_MAXWIDTH=8 -D DSP_B_MAXWIDTH=8 -D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 -D DSP_NAME=mult_8x8
yosys_dff_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k4_frac_N8_tileable_adder_chain_dpram1K_dsp18_fracff_skywater130nm/k4_frac_N8_tileable_adder_chain_dpram1K_dsp18_fracff_skywater130nm_dff_map.v
[ARCHITECTURES] [ARCHITECTURES]
arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_frac_N4_tileable_adder_chain_40nm.xml arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_frac_N8_tileable_adder_chain_mem1K_130nm.xml
[BENCHMARKS] [BENCHMARKS]
bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/processor/picorv32/picorv32.v bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/signal_gen/clock_divider.v
bench1=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/processor/vexriscv/vexriscv_small.v
[SYNTHESIS_PARAM] [SYNTHESIS_PARAM]
bench0_top = picorv32 bench0_top = clock_divider
bench0_chan_width = 300 bench0_chan_width = 300
bench1_top = VexRiscv
bench1_chan_width = 300 bench_yosys_common=${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_dff_flow.ys
[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH] [SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH]
end_flow_with_test= #end_flow_with_test=
vpr_fpga_verilog_formal_verification_top_netlist=

View File

@ -1,18 +1,11 @@
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# Configuration file for running experiments # Configuration file for running experiments
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs # timeout_each_job : FPGA Task script splits fpga flow into multiple jobs
# Each job execute fpga_flow script on combination of architecture & benchmark # Each job execute fpga_flow script on combination of architecture & benchmark
# timeout_each_job is timeout for each job # timeout_each_job is timeout for each job
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
[GENERAL] [GENERAL]
run_engine=openfpga_shell run_engine=openfpga_shell
power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml
@ -23,7 +16,7 @@ timeout_each_job = 20*60
fpga_flow=yosys_vpr fpga_flow=yosys_vpr
[OpenFPGA_SHELL] [OpenFPGA_SHELL]
openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/write_full_testbench_example_script.openfpga openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/example_script.openfpga
openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_N4_40nm_cc_openfpga.xml openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_N4_40nm_cc_openfpga.xml
openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml
openfpga_vpr_device_layout= openfpga_vpr_device_layout=
@ -48,4 +41,5 @@ bench2_top = reset_generator
bench2_chan_width = 300 bench2_chan_width = 300
[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH] [SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH]
end_flow_with_test= #end_flow_with_test=
vpr_fpga_verilog_formal_verification_top_netlist=

View File

@ -10,7 +10,7 @@ name,mult_blocks,memory_blocks
00_bgm_MIN_ROUTE_CHAN_WIDTH,11,0 00_bgm_MIN_ROUTE_CHAN_WIDTH,11,0
00_RLE_BlobMerging_MIN_ROUTE_CHAN_WIDTH,0,0 00_RLE_BlobMerging_MIN_ROUTE_CHAN_WIDTH,0,0
00_paj_boundtop_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,0,1 00_paj_boundtop_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,0,1
00_memset_MIN_ROUTE_CHAN_WIDTH,0,1 00_memset_MIN_ROUTE_CHAN_WIDTH,0,0
00_diffeq_paj_convert_MIN_ROUTE_CHAN_WIDTH,5,0 00_diffeq_paj_convert_MIN_ROUTE_CHAN_WIDTH,5,0
00_diffeq_f_systemC_MIN_ROUTE_CHAN_WIDTH,5,0 00_diffeq_f_systemC_MIN_ROUTE_CHAN_WIDTH,5,0
00_LU8PEEng_MIN_ROUTE_CHAN_WIDTH,8,9 00_LU8PEEng_MIN_ROUTE_CHAN_WIDTH,8,9
@ -20,7 +20,7 @@ name,mult_blocks,memory_blocks
00_mkPktMerge_MIN_ROUTE_CHAN_WIDTH,0,3 00_mkPktMerge_MIN_ROUTE_CHAN_WIDTH,0,3
00_mkSMAdapter4B_MIN_ROUTE_CHAN_WIDTH,0,3 00_mkSMAdapter4B_MIN_ROUTE_CHAN_WIDTH,0,3
00_or1200_flat_MIN_ROUTE_CHAN_WIDTH,1,2 00_or1200_flat_MIN_ROUTE_CHAN_WIDTH,1,2
00_paj_raygentop_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,18,1 00_paj_raygentop_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,18,0
00_sha1_MIN_ROUTE_CHAN_WIDTH,0,0 00_sha1_MIN_ROUTE_CHAN_WIDTH,0,0
00_sv_chip0_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,0,0 00_sv_chip0_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,0,0
00_sv_chip1_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,152,0 00_sv_chip1_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,152,0

1 #####################################################################
10 00_bgm_MIN_ROUTE_CHAN_WIDTH,11,0
11 00_RLE_BlobMerging_MIN_ROUTE_CHAN_WIDTH,0,0
12 00_paj_boundtop_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,0,1
13 00_memset_MIN_ROUTE_CHAN_WIDTH,0,1 00_memset_MIN_ROUTE_CHAN_WIDTH,0,0
14 00_diffeq_paj_convert_MIN_ROUTE_CHAN_WIDTH,5,0
15 00_diffeq_f_systemC_MIN_ROUTE_CHAN_WIDTH,5,0
16 00_LU8PEEng_MIN_ROUTE_CHAN_WIDTH,8,9
20 00_mkPktMerge_MIN_ROUTE_CHAN_WIDTH,0,3
21 00_mkSMAdapter4B_MIN_ROUTE_CHAN_WIDTH,0,3
22 00_or1200_flat_MIN_ROUTE_CHAN_WIDTH,1,2
23 00_paj_raygentop_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,18,1 00_paj_raygentop_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,18,0
24 00_sha1_MIN_ROUTE_CHAN_WIDTH,0,0
25 00_sv_chip0_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,0,0
26 00_sv_chip1_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,152,0