[test] fixed a few bugs

This commit is contained in:
tangxifan 2022-09-29 15:32:31 -07:00
parent 9bc9b61d35
commit ce0fbe1765
7 changed files with 296 additions and 40 deletions

View File

@ -1,7 +1,7 @@
# Use a FF with constant input to connect a clock signal (frequency divided by 2) from a global network to datapath
# Use an external signal to enable the clock signal
.model clk_gate
.inputs clk_i data_i
.inputs rst_i clk_i data_i
.outputs data_o
.names $true
@ -13,6 +13,6 @@
.names ff_o data_i data_o
11 1
.latch ff_i ff_o re clk_i 0
.subckt dffr D=ff_i Q=ff_o C=clk_i R=rst_i
.end

View File

@ -6,17 +6,22 @@
`timescale 1ns / 1ps
module clk_gate(
rst_i,
clk_i,
data_i,
data_o);
input wire rst_i;
input wire clk_i;
input wire data_i;
output wire data_o;
reg q;
always @(posedge clk_i) begin
if (rst_i) begin
q <= 0;
end else begin
q <= 1;
end
end
assign data_o = data_i & q;

View File

@ -0,0 +1,76 @@
# Run VPR for the 'and' design
#--write_rr_graph example_rr_graph.xml
vpr ${VPR_ARCH_FILE} ${VPR_TESTBENCH_BLIF} --clock_modeling ideal --skip_sync_clustering_and_routing_results on --absorb_buffer_luts off
# Read OpenFPGA architecture definition
read_openfpga_arch -f ${OPENFPGA_ARCH_FILE}
# Read OpenFPGA simulation settings
read_openfpga_simulation_setting -f ${OPENFPGA_SIM_SETTING_FILE}
# Annotate the OpenFPGA architecture to VPR data base
# to debug use --verbose options
link_openfpga_arch --sort_gsb_chan_node_in_edges
# Check and correct any naming conflicts in the BLIF netlist
check_netlist_naming_conflict --fix --report ./netlist_renaming.xml
# Apply fix-up to clustering nets based on routing results
pb_pin_fixup --verbose
# Apply fix-up to Look-Up Table truth tables based on packing results
lut_truth_table_fixup
# Build the module graph
# - Enabled compression on routing architecture modules
# - Enable pin duplication on grid modules
build_fabric --compress_routing #--verbose
# Write the fabric hierarchy of module graph to a file
# This is used by hierarchical PnR flows
write_fabric_hierarchy --file ./fabric_hierarchy.txt
# Repack the netlist to physical pbs
# This must be done before bitstream generator and testbench generation
# Strongly recommend it is done after all the fix-up have been applied
repack #--verbose
# Build the bitstream
# - Output the fabric-independent bitstream to a file
build_architecture_bitstream --verbose --write_file fabric_independent_bitstream.xml
# Build fabric-dependent bitstream
build_fabric_bitstream --verbose
# Write fabric-dependent bitstream
write_fabric_bitstream --file fabric_bitstream.bit --format plain_text
# Write the Verilog netlist for FPGA fabric
# - Enable the use of explicit port mapping in Verilog netlist
write_fabric_verilog --file ./SRC --explicit_port_mapping --include_timing --print_user_defined_template --verbose
# Write the Verilog testbench for FPGA fabric
# - We suggest the use of same output directory as fabric Verilog netlists
# - Must specify the reference benchmark file if you want to output any testbenches
# - Enable top-level testbench which is a full verification including programming circuit and core logic of FPGA
# - Enable pre-configured top-level testbench which is a fast verification skipping programming phase
# - Simulation ini file is optional and is needed only when you need to interface different HDL simulators using openfpga flow-run scripts
write_full_testbench --file ./SRC --reference_benchmark_file_path ${REFERENCE_VERILOG_TESTBENCH} --include_signal_init --pin_constraints_file ${OPENFPGA_PIN_CONSTRAINTS_FILE} --bitstream fabric_bitstream.bit
write_preconfigured_fabric_wrapper --embed_bitstream iverilog --file ./SRC --pin_constraints_file ${OPENFPGA_PIN_CONSTRAINTS_FILE}
write_preconfigured_testbench --file ./SRC --reference_benchmark_file_path ${REFERENCE_VERILOG_TESTBENCH} --pin_constraints_file ${OPENFPGA_PIN_CONSTRAINTS_FILE}
# Write the SDC files for PnR backend
# - Turn on every options here
write_pnr_sdc --file ./SDC
# Write SDC to disable timing for configure ports
write_sdc_disable_timing_configure_ports --file ./SDC/disable_configure_ports.sdc
# Write the SDC to run timing analysis for a mapped FPGA fabric
write_analysis_sdc --file ./SDC_analysis
# Finish and exit OpenFPGA
exit
# Note :
# To run verification at the end of the flow maintain source in ./SRC directory

View File

@ -25,6 +25,7 @@ run-task fpga_bitstream/load_external_architecture_bitstream $@
echo -e "Testing repacker capability in identifying wire LUTs";
run-task fpga_bitstream/repack_wire_lut $@
run-task fpga_bitstream/repack_wire_lut_strong $@
echo -e "Testing overloading default paths for programmable interconnect when generating bitstream";
run-task fpga_bitstream/overload_mux_default_path $@

View File

@ -1,4 +0,0 @@
<!-- A dummy pin constraint file, useful when no pin constraints are needed for a benchmark -->
<pin_constraints>
</pin_constraints>

View File

@ -16,13 +16,13 @@ timeout_each_job = 20*60
fpga_flow=vpr_blif
[OpenFPGA_SHELL]
openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/example_without_ace_script.openfpga
openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_frac_N4_40nm_cc_openfpga.xml
openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/wire_lut_example_script.openfpga
openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_frac_N4_fracff_40nm_cc_openfpga.xml
openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/fixed_sim_openfpga.xml
openfpga_pin_constraints_file=${PATH:TASK_DIR}/config/pin_constraints_dummy.xml
openfpga_pin_constraints_file=${PATH:TASK_DIR}/config/pin_constraints.xml
[ARCHITECTURES]
arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_frac_N1_tileable_40nm.xml
arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_frac_N1_tileable_fracff_40nm.xml
[BENCHMARKS]
#

View File

@ -7,11 +7,7 @@
with optionally registered outputs
- Routing architecture: L = 4, fc_in = 0.15, Fc_out = 0.1
Details on Modelling:
Based on flagship k4_frac_N4_mem32K_40nm.xml architecture.
Authors: Jason Luu, Jeff Goeders, Vaughn Betz
Authors: Xifan Tang
--><architecture>
<!--
ODIN II specific config begins
@ -42,6 +38,38 @@
<port name="lut4_out"/>
</output_ports>
</model>
<!-- A virtual model for scan-chain flip-flop to be used in the physical mode of FF -->
<model name="dff">
<input_ports>
<port name="D" clock="C"/>
<port name="C" is_clock="1"/>
</input_ports>
<output_ports>
<port name="Q" clock="C"/>
</output_ports>
</model>
<!-- A virtual model for scan-chain flip-flop to be used in the physical mode of FF -->
<model name="dffr">
<input_ports>
<port name="D" clock="C"/>
<port name="R" clock="C"/>
<port name="C" is_clock="1"/>
</input_ports>
<output_ports>
<port name="Q" clock="C"/>
</output_ports>
</model>
<!-- A virtual model for scan-chain flip-flop to be used in the physical mode of FF -->
<model name="dffrn">
<input_ports>
<port name="D" clock="C"/>
<port name="RN" clock="C"/>
<port name="C" is_clock="1"/>
</input_ports>
<output_ports>
<port name="Q" clock="C"/>
</output_ports>
</model>
</models>
<tiles>
<!-- Do NOT add clock pins to I/O here!!! VPR does not build clock network in the way that OpenFPGA can support
@ -67,9 +95,13 @@
<site pb_type="clb"/>
</equivalent_sites>
<input name="I" num_pins="4" equivalent="full"/>
<input name="reset" num_pins="1" is_non_clock_global="true"/>
<output name="O" num_pins="2" equivalent="none"/>
<clock name="clk" num_pins="1"/>
<fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
<fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
<fc_override port_name="clk" fc_type="frac" fc_val="0"/>
<fc_override port_name="reset" fc_type="frac" fc_val="0"/>
</fc>
<pinlocations pattern="spread"/>
</sub_tile> </tile>
</tiles>
@ -97,6 +129,13 @@
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</fixed_layout>
<fixed_layout name="48x48" width="50" height="50">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</fixed_layout>
</layout>
<device>
<!-- VB & JL: Using Ian Kuon's transistor sizing and drive strength data for routing, at 40 nm. Ian used BPTM
@ -230,6 +269,7 @@
-->
<pb_type name="clb">
<input name="I" num_pins="4" equivalent="full"/>
<input name="reset" num_pins="1"/>
<output name="O" num_pins="2" equivalent="none"/>
<clock name="clk" num_pins="1"/>
<!-- Describe fracturable logic element.
@ -238,12 +278,14 @@
-->
<pb_type name="fle" num_pb="1">
<input name="in" num_pins="4"/>
<input name="reset" num_pins="1"/>
<output name="out" num_pins="2"/>
<clock name="clk" num_pins="1"/>
<!-- Physical mode definition begin (physical implementation of the fle) -->
<mode name="physical" disable_packing="true">
<pb_type name="fabric" num_pb="1">
<input name="in" num_pins="4"/>
<input name="reset" num_pins="1"/>
<output name="out" num_pins="2"/>
<clock name="clk" num_pins="1"/>
<pb_type name="frac_logic" num_pb="1">
@ -263,17 +305,20 @@
</interconnect>
</pb_type>
<!-- Define flip-flop -->
<pb_type name="ff" blif_model=".latch" num_pb="2" class="flipflop">
<pb_type name="ff" blif_model=".subckt dffr" num_pb="2">
<input name="D" num_pins="1" port_class="D"/>
<input name="R" num_pins="1"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="clk" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="ff.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
<clock name="C" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="ff.D" clock="C"/>
<T_setup value="66e-12" port="ff.R" clock="C"/>
<T_clock_to_Q max="124e-12" port="ff.Q" clock="C"/>
</pb_type>
<interconnect>
<direct name="direct1" input="fabric.in" output="frac_logic.in"/>
<direct name="direct2" input="frac_logic.out[1:0]" output="ff[1:0].D"/>
<complete name="direct3" input="fabric.clk" output="ff[1:0].clk"/>
<complete name="direct3" input="fabric.clk" output="ff[1:0].C"/>
<complete name="direct4" input="fabric.reset" output="ff[1:0].R"/>
<mux name="mux1" input="ff[0].Q frac_logic.out[0]" output="fabric.out[0]">
<!-- LUT to output is faster than FF to output on a Stratix IV -->
<delay_constant max="25e-12" in_port="frac_logic.out[0]" out_port="fabric.out[0]"/>
@ -290,6 +335,7 @@
<direct name="direct1" input="fle.in" output="fabric.in"/>
<direct name="direct2" input="fabric.out" output="fle.out"/>
<direct name="direct3" input="fle.clk" output="fabric.clk"/>
<direct name="direct4" input="fle.reset" output="fabric.reset"/>
</interconnect>
</mode>
<!-- Physical mode definition end (physical implementation of the fle) -->
@ -297,10 +343,12 @@
<mode name="n2_lut3">
<pb_type name="lut3inter" num_pb="1">
<input name="in" num_pins="3"/>
<input name="reset" num_pins="1"/>
<output name="out" num_pins="2"/>
<clock name="clk" num_pins="1"/>
<pb_type name="ble3" num_pb="2">
<input name="in" num_pins="3"/>
<input name="reset" num_pins="1"/>
<output name="out" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- Define the LUT -->
@ -323,12 +371,73 @@
</delay_matrix>
</pb_type>
<!-- Define the flip-flop -->
<pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
<pb_type name="ff" num_pb="1">
<input name="D" num_pins="1"/>
<input name="R" num_pins="1"/>
<output name="Q" num_pins="1"/>
<clock name="C" num_pins="1"/>
<mode name="latch">
<pb_type name="latch" blif_model=".latch" num_pb="1">
<input name="D" num_pins="1" port_class="D"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="clk" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="ff.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
<T_setup value="66e-12" port="latch.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="latch.Q" clock="clk"/>
</pb_type>
<interconnect>
<direct name="direct1" input="ff.D" output="latch.D"/>
<direct name="direct2" input="ff.C" output="latch.clk"/>
<direct name="direct3" input="latch.Q" output="ff.Q"/>
</interconnect>
</mode>
<mode name="dff">
<pb_type name="dff" blif_model=".subckt dff" num_pb="1">
<input name="D" num_pins="1" port_class="D"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="C" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="dff.D" clock="C"/>
<T_clock_to_Q max="124e-12" port="dff.Q" clock="C"/>
</pb_type>
<interconnect>
<direct name="direct1" input="ff.D" output="dff.D"/>
<direct name="direct2" input="ff.C" output="dff.C"/>
<direct name="direct3" input="dff.Q" output="ff.Q"/>
</interconnect>
</mode>
<mode name="dffr">
<pb_type name="dffr" blif_model=".subckt dffr" num_pb="1">
<input name="D" num_pins="1" port_class="D"/>
<input name="R" num_pins="1"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="C" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="dffr.D" clock="C"/>
<T_setup value="66e-12" port="dffr.R" clock="C"/>
<T_clock_to_Q max="124e-12" port="dffr.Q" clock="C"/>
</pb_type>
<interconnect>
<direct name="direct1" input="ff.D" output="dffr.D"/>
<direct name="direct2" input="ff.C" output="dffr.C"/>
<direct name="direct3" input="ff.R" output="dffr.R"/>
<direct name="direct4" input="dffr.Q" output="ff.Q"/>
</interconnect>
</mode>
<mode name="dffrn">
<pb_type name="dffrn" blif_model=".subckt dffrn" num_pb="1">
<input name="D" num_pins="1" port_class="D"/>
<input name="RN" num_pins="1"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="C" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="dffrn.D" clock="C"/>
<T_setup value="66e-12" port="dffrn.RN" clock="C"/>
<T_clock_to_Q max="124e-12" port="dffrn.Q" clock="C"/>
</pb_type>
<interconnect>
<direct name="direct1" input="ff.D" output="dffrn.D"/>
<direct name="direct2" input="ff.C" output="dffrn.C"/>
<direct name="direct3" input="ff.R" output="dffrn.RN"/>
<direct name="direct4" input="dffrn.Q" output="ff.Q"/>
</interconnect>
</mode>
</pb_type>
<interconnect>
<direct name="direct1" input="ble3.in[2:0]" output="lut3[0:0].in[2:0]"/>
@ -336,7 +445,8 @@
<!-- Advanced user option that tells CAD tool to find LUT+FF pairs in netlist -->
<pack_pattern name="ble3" in_port="lut3[0:0].out" out_port="ff[0:0].D"/>
</direct>
<direct name="direct3" input="ble3.clk" output="ff[0:0].clk"/>
<direct name="direct3" input="ble3.clk" output="ff[0:0].C"/>
<direct name="direct4" input="ble3.reset" output="ff[0:0].R"/>
<mux name="mux1" input="ff[0:0].Q lut3.out[0:0]" output="ble3.out[0:0]">
<!-- LUT to output is faster than FF to output on a Stratix IV -->
<delay_constant max="25e-12" in_port="lut3.out[0:0]" out_port="ble3.out[0:0]"/>
@ -349,12 +459,14 @@
<direct name="direct2" input="lut3inter.in" output="ble3[1:1].in"/>
<direct name="direct3" input="ble3[1:0].out" output="lut3inter.out"/>
<complete name="complete1" input="lut3inter.clk" output="ble3[1:0].clk"/>
<complete name="complete2" input="lut3inter.reset" output="ble3[1:0].reset"/>
</interconnect>
</pb_type>
<interconnect>
<direct name="direct1" input="fle.in[2:0]" output="lut3inter.in"/>
<direct name="direct2" input="lut3inter.out" output="fle.out"/>
<direct name="direct3" input="fle.clk" output="lut3inter.clk"/>
<direct name="direct4" input="fle.reset" output="lut3inter.reset"/>
</interconnect>
</mode>
<!-- Dual 3-LUT mode definition end -->
@ -363,6 +475,7 @@
<!-- Define 4-LUT mode -->
<pb_type name="ble4" num_pb="1">
<input name="in" num_pins="4"/>
<input name="reset" num_pins="1"/>
<output name="out" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- Define LUT -->
@ -386,13 +499,74 @@
261e-12
</delay_matrix>
</pb_type>
<!-- Define flip-flop -->
<pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
<!-- Define the flip-flop -->
<pb_type name="ff" num_pb="1">
<input name="D" num_pins="1"/>
<input name="R" num_pins="1"/>
<output name="Q" num_pins="1"/>
<clock name="C" num_pins="1"/>
<mode name="latch">
<pb_type name="latch" blif_model=".latch" num_pb="1">
<input name="D" num_pins="1" port_class="D"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="clk" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="ff.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
<T_setup value="66e-12" port="latch.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="latch.Q" clock="clk"/>
</pb_type>
<interconnect>
<direct name="direct1" input="ff.D" output="latch.D"/>
<direct name="direct2" input="ff.C" output="latch.clk"/>
<direct name="direct3" input="latch.Q" output="ff.Q"/>
</interconnect>
</mode>
<mode name="dff">
<pb_type name="dff" blif_model=".subckt dff" num_pb="1">
<input name="D" num_pins="1" port_class="D"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="C" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="dff.D" clock="C"/>
<T_clock_to_Q max="124e-12" port="dff.Q" clock="C"/>
</pb_type>
<interconnect>
<direct name="direct1" input="ff.D" output="dff.D"/>
<direct name="direct2" input="ff.C" output="dff.C"/>
<direct name="direct3" input="dff.Q" output="ff.Q"/>
</interconnect>
</mode>
<mode name="dffr">
<pb_type name="dffr" blif_model=".subckt dffr" num_pb="1">
<input name="D" num_pins="1" port_class="D"/>
<input name="R" num_pins="1"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="C" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="dffr.D" clock="C"/>
<T_setup value="66e-12" port="dffr.R" clock="C"/>
<T_clock_to_Q max="124e-12" port="dffr.Q" clock="C"/>
</pb_type>
<interconnect>
<direct name="direct1" input="ff.D" output="dffr.D"/>
<direct name="direct2" input="ff.C" output="dffr.C"/>
<direct name="direct3" input="ff.R" output="dffr.R"/>
<direct name="direct4" input="dffr.Q" output="ff.Q"/>
</interconnect>
</mode>
<mode name="dffrn">
<pb_type name="dffrn" blif_model=".subckt dffrn" num_pb="1">
<input name="D" num_pins="1" port_class="D"/>
<input name="RN" num_pins="1"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="C" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="dffrn.D" clock="C"/>
<T_setup value="66e-12" port="dffrn.RN" clock="C"/>
<T_clock_to_Q max="124e-12" port="dffrn.Q" clock="C"/>
</pb_type>
<interconnect>
<direct name="direct1" input="ff.D" output="dffrn.D"/>
<direct name="direct2" input="ff.C" output="dffrn.C"/>
<direct name="direct3" input="ff.R" output="dffrn.RN"/>
<direct name="direct4" input="dffrn.Q" output="ff.Q"/>
</interconnect>
</mode>
</pb_type>
<interconnect>
<direct name="direct1" input="ble4.in" output="lut4[0:0].in"/>
@ -400,7 +574,8 @@
<!-- Advanced user option that tells CAD tool to find LUT+FF pairs in netlist -->
<pack_pattern name="ble4" in_port="lut4.out" out_port="ff.D"/>
</direct>
<direct name="direct3" input="ble4.clk" output="ff.clk"/>
<direct name="direct3" input="ble4.clk" output="ff.C"/>
<direct name="direct4" input="ble4.reset" output="ff.R"/>
<mux name="mux1" input="ff.Q lut4.out" output="ble4.out">
<!-- LUT to output is faster than FF to output on a Stratix IV -->
<delay_constant max="25e-12" in_port="lut4.out" out_port="ble4.out"/>
@ -412,6 +587,7 @@
<direct name="direct1" input="fle.in" output="ble4.in"/>
<direct name="direct2" input="ble4.out" output="fle.out[0:0]"/>
<direct name="direct3" input="fle.clk" output="ble4.clk"/>
<direct name="direct4" input="fle.reset" output="ble4.reset"/>
</interconnect>
</mode>
<!-- 6-LUT mode definition end -->
@ -431,6 +607,8 @@
<delay_constant max="75e-12" in_port="fle[0:0].out" out_port="fle[0:0].in"/>
</complete>
<complete name="clks" input="clb.clk" output="fle[0:0].clk">
</complete>
<complete name="resets" input="clb.reset" output="fle[0:0].reset">
</complete>
<!-- This way of specifying direct connection to clb outputs is important because this architecture uses automatic spreading of opins.
By grouping to output pins in this fashion, if a logic block is completely filled by 6-LUTs,