Merge pull request #594 from lnis-uofu/multi_clock

Now fabric generator supports a global port from partial bits of physical tile ports
This commit is contained in:
tangxifan 2022-03-20 14:28:36 +08:00 committed by GitHub
commit 7e90294e0b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 329 additions and 5 deletions

View File

@ -134,6 +134,16 @@ When a global port, e.g., ``clk``, is defined in ``tile_annotation`` using the f
</global_port>
</tile_annotations>
Note that a global port can also be defined to drive only a partial bit of a port of a physical tile.
.. code-block:: xml
<tile_annotations>
<global_port name="clk" is_clock="true">
<tile name="clb" port="clk[3:3]"/>
</global_port>
</tile_annotations>
Clock port ``clk`` of each ``clb`` tile will be connected to a common clock port of the top module, while local clock network is customizable through VPR's architecture description language. For instance, the local clock network can be a programmable clock network.
.. _annotate_vpr_arch_pb_type_annotation:

View File

@ -782,11 +782,19 @@ int build_top_module_global_net_for_given_grid_module(ModuleManager& module_mana
/* Ensure port width is in range */
BasicPort src_port = module_manager.module_port(top_module, top_module_port);
VTR_ASSERT(src_port.get_width() >= size_t(physical_tile_port.num_pins));
VTR_ASSERT(src_port.get_width() == tile_port_to_connect.get_width());
/* Create a pin id mapping between the source port (top module) and the sink port (grid module) */
std::map<size_t, size_t> sink2src_pin_map;
for (size_t ipin = 0; ipin < tile_port_to_connect.get_width(); ++ipin) {
size_t sink_pin = tile_port_to_connect.pins()[ipin];
size_t src_pin = src_port.pins()[ipin];
sink2src_pin_map[sink_pin] = src_pin;
}
/* A tile may consist of multiple subtile, connect to all the pins from sub tiles */
for (int iz = 0; iz < physical_tile->capacity; ++iz) {
for (size_t pin_id = 0; pin_id < size_t(physical_tile_port.num_pins); ++pin_id) {
for (size_t pin_id = tile_port_to_connect.get_lsb(); pin_id < tile_port_to_connect.get_msb() + 1; ++pin_id) {
/* TODO: This should be replaced by using a pin mapping data structure from physical tile! */
int grid_pin_index = grid_pin_start_index + iz * physical_tile->equivalent_sites[0]->pb_type->num_pins + pin_id;
/* Find the module pin */
@ -811,7 +819,7 @@ int build_top_module_global_net_for_given_grid_module(ModuleManager& module_mana
ModuleNetId net = create_module_source_pin_net(module_manager, top_module,
top_module, 0,
top_module_port, src_port.pins()[pin_id]);
top_module_port, src_port.pins()[sink2src_pin_map[pin_id]]);
VTR_ASSERT(ModuleNetId::INVALID() != net);
/* Configure the net sink */

View File

@ -31,6 +31,8 @@ Note that an OpenFPGA architecture can be applied to multiple VPR architecture f
- tree\_mux: If routing multiplexers are built with a tree-like structure
- <feature_size>: The technology node which the delay numbers are extracted from.
- powergate : The FPGA has power-gating techniques applied. If not defined, there is no power-gating.
- GlobalTile<Int>Clk: How many clocks are defined through global ports from physical tiles. <Int> is the number of clocks
- GlobalTile<Int>Clk<Pin>: How many clocks are defined through global ports from physical tiles.
* <Int> is the number of clocks
* <Pin> When specified, multiple clocks are in separated pins with different names
Other features are used in naming should be listed here.

View File

@ -0,0 +1,207 @@
<!-- Architecture annotation for OpenFPGA framework
This annotation supports the k6_N10_40nm.xml
- General purpose logic block
- K = 6, N = 10, I = 40
- Single mode
- Routing architecture
- L = 4, fc_in = 0.15, fc_out = 0.1
- 4 operating clocks
-->
<openfpga_architecture>
<technology_library>
<device_library>
<device_model name="logic" type="transistor">
<lib type="industry" corner="TOP_TT" ref="M" path="${OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.pm"/>
<design vdd="0.9" pn_ratio="2"/>
<pmos name="pch" chan_length="40e-9" min_width="140e-9" variation="logic_transistor_var"/>
<nmos name="nch" chan_length="40e-9" min_width="140e-9" variation="logic_transistor_var"/>
</device_model>
<device_model name="io" type="transistor">
<lib type="academia" ref="M" path="${OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.pm"/>
<design vdd="2.5" pn_ratio="3"/>
<pmos name="pch_25" chan_length="270e-9" min_width="320e-9" variation="io_transistor_var"/>
<nmos name="nch_25" chan_length="270e-9" min_width="320e-9" variation="io_transistor_var"/>
</device_model>
</device_library>
<variation_library>
<variation name="logic_transistor_var" abs_deviation="0.1" num_sigma="3"/>
<variation name="io_transistor_var" abs_deviation="0.1" num_sigma="3"/>
</variation_library>
</technology_library>
<circuit_library>
<circuit_model type="inv_buf" name="INVTX1" prefix="INVTX1" is_default="true">
<design_technology type="cmos" topology="inverter" size="1"/>
<device_technology device_model_name="logic"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<delay_matrix type="rise" in_port="in" out_port="out">
10e-12
</delay_matrix>
<delay_matrix type="fall" in_port="in" out_port="out">
10e-12
</delay_matrix>
</circuit_model>
<circuit_model type="inv_buf" name="buf4" prefix="buf4" is_default="false">
<design_technology type="cmos" topology="buffer" size="1" num_level="2" f_per_stage="4"/>
<device_technology device_model_name="logic"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<delay_matrix type="rise" in_port="in" out_port="out">
10e-12
</delay_matrix>
<delay_matrix type="fall" in_port="in" out_port="out">
10e-12
</delay_matrix>
</circuit_model>
<circuit_model type="inv_buf" name="tap_buf4" prefix="tap_buf4" is_default="false">
<design_technology type="cmos" topology="buffer" size="1" num_level="3" f_per_stage="4"/>
<device_technology device_model_name="logic"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<delay_matrix type="rise" in_port="in" out_port="out">
10e-12
</delay_matrix>
<delay_matrix type="fall" in_port="in" out_port="out">
10e-12
</delay_matrix>
</circuit_model>
<circuit_model type="pass_gate" name="TGATE" prefix="TGATE" is_default="true">
<design_technology type="cmos" topology="transmission_gate" nmos_size="1" pmos_size="2"/>
<device_technology device_model_name="logic"/>
<input_buffer exist="false"/>
<output_buffer exist="false"/>
<port type="input" prefix="in" size="1"/>
<port type="input" prefix="sel" size="1"/>
<port type="input" prefix="selb" size="1"/>
<port type="output" prefix="out" size="1"/>
<delay_matrix type="rise" in_port="in sel selb" out_port="out">
10e-12 5e-12 5e-12
</delay_matrix>
<delay_matrix type="fall" in_port="in sel selb" out_port="out">
10e-12 5e-12 5e-12
</delay_matrix>
</circuit_model>
<circuit_model type="chan_wire" name="chan_segment" prefix="track_seg" is_default="true">
<design_technology type="cmos"/>
<input_buffer exist="false"/>
<output_buffer exist="false"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<wire_param model_type="pi" R="101" C="22.5e-15" num_level="1"/> <!-- model_type could be T, res_val and cap_val DON'T CARE -->
</circuit_model>
<circuit_model type="wire" name="direct_interc" prefix="direct_interc" is_default="true">
<design_technology type="cmos"/>
<input_buffer exist="false"/>
<output_buffer exist="false"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<wire_param model_type="pi" R="0" C="0" num_level="1"/> <!-- model_type could be T, res_val cap_val should be defined -->
</circuit_model>
<circuit_model type="mux" name="mux_tree" prefix="mux_tree" dump_structural_verilog="true">
<design_technology type="cmos" structure="tree" add_const_input="true" const_input_val="1"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="INVTX1"/>
<pass_gate_logic circuit_model_name="TGATE"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<port type="sram" prefix="sram" size="1"/>
</circuit_model>
<circuit_model type="mux" name="mux_tree_tapbuf" prefix="mux_tree_tapbuf" is_default="true" dump_structural_verilog="true">
<design_technology type="cmos" structure="tree" add_const_input="true" const_input_val="1"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="tap_buf4"/>
<pass_gate_logic circuit_model_name="TGATE"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<port type="sram" prefix="sram" size="1"/>
</circuit_model>
<!--DFF subckt ports should be defined as <D> <Q> <CLK> <RESET> <SET> -->
<circuit_model type="ff" name="DFFSRQ" prefix="DFFSRQ" spice_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/spice/dff.sp" verilog_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/verilog/dff.v">
<design_technology type="cmos"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="INVTX1"/>
<port type="input" prefix="D" size="1"/>
<port type="input" prefix="set" lib_name="SET" size="1" is_global="true" default_val="0" is_set="true"/>
<port type="input" prefix="reset" lib_name="RST" size="1" is_global="true" default_val="0" is_reset="true"/>
<port type="output" prefix="Q" size="1"/>
<port type="clock" prefix="clk" lib_name="CK" size="1" is_global="false" default_val="0"/>
</circuit_model>
<circuit_model type="lut" name="lut4" prefix="lut4" dump_structural_verilog="true">
<design_technology type="cmos"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="INVTX1"/>
<lut_input_inverter exist="true" circuit_model_name="INVTX1"/>
<lut_input_buffer exist="true" circuit_model_name="buf4"/>
<pass_gate_logic circuit_model_name="TGATE"/>
<port type="input" prefix="in" size="4"/>
<port type="output" prefix="out" size="1"/>
<port type="sram" prefix="sram" size="16"/>
</circuit_model>
<!--Scan-chain DFF subckt ports should be defined as <D> <Q> <Qb> <CLK> <RESET> <SET> -->
<circuit_model type="ccff" name="DFF" prefix="DFF" spice_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/spice/dff.sp" verilog_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/verilog/dff.v">
<design_technology type="cmos"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="INVTX1"/>
<port type="input" prefix="D" size="1"/>
<port type="output" prefix="Q" size="1"/>
<port type="output" prefix="QN" size="1"/>
<port type="clock" prefix="prog_clk" lib_name="CK" size="1" is_global="true" default_val="0" is_prog="true"/>
</circuit_model>
<circuit_model type="iopad" name="GPIO" prefix="GPIO" spice_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/spice/gpio.sp" verilog_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/verilog/gpio.v">
<design_technology type="cmos"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="INVTX1"/>
<port type="inout" prefix="PAD" size="1" is_global="true" is_io="true" is_data_io="true"/>
<port type="sram" prefix="DIR" size="1" mode_select="true" circuit_model_name="DFF" default_val="1"/>
<port type="input" prefix="outpad" lib_name="A" size="1"/>
<port type="output" prefix="inpad" lib_name="Y" size="1"/>
</circuit_model>
</circuit_library>
<configuration_protocol>
<organization type="scan_chain" circuit_model_name="DFF"/>
</configuration_protocol>
<connection_block>
<switch name="ipin_cblock" circuit_model_name="mux_tree_tapbuf"/>
</connection_block>
<switch_block>
<switch name="0" circuit_model_name="mux_tree_tapbuf"/>
</switch_block>
<routing_segment>
<segment name="L4" circuit_model_name="chan_segment"/>
</routing_segment>
<tile_annotations>
<!-- MUST explicitly define the number of clock bits
being consistent with physical tile port definition
-->
<global_port name="clk0" is_clock="true" default_val="0">
<tile name="clb" port="clk[0:0]" x="-1" y="-1"/>
</global_port>
<global_port name="clk1" is_clock="true" default_val="0">
<tile name="clb" port="clk[1:1]" x="-1" y="-1"/>
</global_port>
<global_port name="clk2" is_clock="true" default_val="0">
<tile name="clb" port="clk[2:2]" x="-1" y="-1"/>
</global_port>
<global_port name="clk3" is_clock="true" default_val="0">
<tile name="clb" port="clk[3:3]" x="-1" y="-1"/>
</global_port>
</tile_annotations>
<pb_type_annotations>
<!-- physical pb_type binding in complex block IO -->
<pb_type name="io" physical_mode_name="physical" idle_mode_name="inpad"/>
<pb_type name="io[physical].iopad" circuit_model_name="GPIO" mode_bits="1"/>
<pb_type name="io[inpad].inpad" physical_pb_type_name="io[physical].iopad" mode_bits="1"/>
<pb_type name="io[outpad].outpad" physical_pb_type_name="io[physical].iopad" mode_bits="0"/>
<!-- End physical pb_type binding in complex block IO -->
<!-- physical pb_type binding in complex block CLB -->
<!-- physical mode will be the default mode if not specified -->
<pb_type name="clb">
<!-- Binding interconnect to circuit models as their physical implementation, if not defined, we use the default model -->
<interconnect name="crossbar" circuit_model_name="mux_tree"/>
</pb_type>
<pb_type name="clb.fle[n1_lut4].ble4.lut4" circuit_model_name="lut4"/>
<pb_type name="clb.fle[n1_lut4].ble4.ff" circuit_model_name="DFFSRQ"/>
<!-- End physical pb_type binding in complex block IO -->
</pb_type_annotations>
</openfpga_architecture>

View File

@ -72,7 +72,9 @@ write_pnr_sdc --file ./SDC
write_sdc_disable_timing_configure_ports --file ./SDC/disable_configure_ports.sdc
# Write the SDC to run timing analysis for a mapped FPGA fabric
write_analysis_sdc --file ./SDC_analysis
# TODO: Currently SDC writer only supports 1 operating clock due to
# - Missing information about which I/O is constrained by which clock
#write_analysis_sdc --file ./SDC_analysis
# Finish and exit OpenFPGA
exit

View File

@ -139,6 +139,7 @@ echo -e "Testing global port definition from tiles";
run-task basic_tests/global_tile_ports/global_tile_clock --debug --show_thread_logs
run-task basic_tests/global_tile_ports/global_tile_reset --debug --show_thread_logs
run-task basic_tests/global_tile_ports/global_tile_4clock --debug --show_thread_logs
run-task basic_tests/global_tile_ports/global_tile_4clock_pin --debug --show_thread_logs
echo -e "Testing configuration chain of a K4N4 FPGA using .blif generated by yosys+verific";
run-task basic_tests/verific_test --debug --show_thread_logs

View File

@ -0,0 +1,11 @@
<pin_constraints>
<!-- For a given .blif file, we want to assign
- the clk0 signal to the clk[0] port of the FPGA fabric
- the clk1 signal to the clk[1] port of the FPGA fabric
-->
<set_io pin="clk0" net="clk0"/>
<set_io pin="clk1" net="clk1"/>
<set_io pin="clk2" net="OPEN"/>
<set_io pin="clk3" net="OPEN"/>
</pin_constraints>

View File

@ -0,0 +1,14 @@
<repack_design_constraints>
<!-- For a given .blif file, we want to assign
- the clk0 signal to the clk[0] port of all the clb tiles available in the FPGA fabric
- the clk1 signal to the clk[1] port of all the clb tiles available in the FPGA fabric
and ensure no signals could be mapped to
- the clk[2] port of all the clb tiles available in the FPGA fabric
- the clk[3] port of all the clb tiles available in the FPGA fabric
-->
<pin_constraint pb_type="clb" pin="clk[0]" net="clk0"/>
<pin_constraint pb_type="clb" pin="clk[1]" net="clk1"/>
<pin_constraint pb_type="clb" pin="clk[2]" net="OPEN"/>
<pin_constraint pb_type="clb" pin="clk[3]" net="OPEN"/>
</repack_design_constraints>

View File

@ -0,0 +1,12 @@
<pin_constraints>
<!-- For a given .blif file, we want to assign
- the clk0 signal to the clk[0] port of the FPGA fabric
- the clk1 signal to the clk[1] port of the FPGA fabric
-->
<set_io pin="clk0" net="OPEN"/>
<set_io pin="clk1" net="OPEN"/>
<set_io pin="clk2" net="clk0"/>
<set_io pin="clk3" net="clk1"/>
<set_io pin="reset[0]" net="rst"/>
</pin_constraints>

View File

@ -0,0 +1,14 @@
<repack_design_constraints>
<!-- For a given .blif file, we want to assign
- the clk0 signal to the clk[0] port of all the clb tiles available in the FPGA fabric
- the clk1 signal to the clk[1] port of all the clb tiles available in the FPGA fabric
and ensure no signals could be mapped to
- the clk[2] port of all the clb tiles available in the FPGA fabric
- the clk[3] port of all the clb tiles available in the FPGA fabric
-->
<pin_constraint pb_type="clb" pin="clk[0]" net="OPEN"/>
<pin_constraint pb_type="clb" pin="clk[1]" net="OPEN"/>
<pin_constraint pb_type="clb" pin="clk[2]" net="clk0"/>
<pin_constraint pb_type="clb" pin="clk[3]" net="clk1"/>
</repack_design_constraints>

View File

@ -0,0 +1,43 @@
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# Configuration file for running experiments
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs
# Each job execute fpga_flow script on combination of architecture & benchmark
# timeout_each_job is timeout for each job
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
[GENERAL]
run_engine=openfpga_shell
power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml
power_analysis = false
spice_output=false
verilog_output=true
timeout_each_job = 20*60
fpga_flow=yosys_vpr
[OpenFPGA_SHELL]
openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/global_tile_multiclock_example_script.openfpga
openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_N4_40nm_GlobalTile4ClkPin_cc_openfpga.xml
openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/fixed_4clock_sim_openfpga.xml
[ARCHITECTURES]
arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_N4_tileable_GlobalTile4Clk_40nm.xml
[BENCHMARKS]
bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/counters/counter_4bit_2clock/counter_4bit_2clock.v
bench1=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2_latch_2clock/and2_latch_2clock.v
[SYNTHESIS_PARAM]
bench_read_verilog_options_common = -nolatches
bench0_top = counter_4bit_2clock
bench0_openfpga_pin_constraints_file=${PATH:TASK_DIR}/config/counter_2clock_pin_constraints.xml
bench0_openfpga_repack_design_constraints_file=${PATH:TASK_DIR}/config/counter_2clock_repack_pin_constraints.xml
bench1_top = and2_latch_2clock
bench1_openfpga_pin_constraints_file=${PATH:TASK_DIR}/config/and2_latch_pin_constraints.xml
bench1_openfpga_repack_design_constraints_file=${PATH:TASK_DIR}/config/and2_latch_repack_pin_constraints.xml
[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH]
end_flow_with_test=
vpr_fpga_verilog_formal_verification_top_netlist=