Merge pull request #1148 from lnis-uofu/xt_subtile

Subtile Support
This commit is contained in:
tangxifan 2023-05-04 13:16:15 +08:00 committed by GitHub
commit fe1beb98a6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 563 additions and 82 deletions

View File

@ -80,21 +80,32 @@ IoLocationMap build_fabric_io_location_map(const ModuleManager& module_manager,
module_manager.io_children(child).size());
for (size_t isubchild = 0;
isubchild < module_manager.io_children(child).size(); ++isubchild) {
/* Note that we should use the subchild module when checking the GPIO
* ports. The child module is actually the grid-level I/O module, while
* the subchild module is the subtile inside grid-level I/O modules. Note
* that grid-level I/O module contains all the GPIO ports while the
* subtile may have part of it. For example, a grid I/O module may have 24
* GPINs and 12 GPOUTs, while the first subtile only have 4 GPINs, and the
* second subtile only have 3 GPOUTs. Therefore, to accurately build the
* I/O location map downto subtile level, we need to check the subchild
* module here.
*/
ModuleId subchild = module_manager.io_children(child)[isubchild];
vtr::Point<int> subchild_coord =
module_manager.io_child_coordinates(child)[isubchild];
for (const ModuleManager::e_module_port_type& module_io_port_type :
MODULE_IO_PORT_TYPES) {
for (const ModulePortId& gpio_port_id :
module_manager.module_port_ids_by_type(child,
module_manager.module_port_ids_by_type(subchild,
module_io_port_type)) {
/* Only care mappable I/O */
if (false ==
module_manager.port_is_mappable_io(child, gpio_port_id)) {
module_manager.port_is_mappable_io(subchild, gpio_port_id)) {
continue;
}
const BasicPort& gpio_port =
module_manager.module_port(child, gpio_port_id);
module_manager.module_port(subchild, gpio_port_id);
auto curr_io_index = io_counter.find(gpio_port.get_name());
/* Index always start from zero */

View File

@ -151,6 +151,7 @@ void add_grid_module_duplicated_pb_type_ports(
static void add_grid_module_net_connect_duplicated_pb_graph_pin(
ModuleManager& module_manager, const ModuleId& grid_module,
const ModuleId& child_module, const size_t& child_instance,
const size_t& child_inst_subtile_index,
const VprDeviceAnnotation& vpr_device_annotation,
t_physical_tile_type_ptr grid_type_descriptor, t_pb_graph_pin* pb_graph_pin,
const e_side& border_side, const e_pin2pin_interc_type& pin2pin_interc_type) {
@ -169,15 +170,18 @@ static void add_grid_module_net_connect_duplicated_pb_graph_pin(
grid_pin_sides = {TOP, RIGHT, BOTTOM, LEFT};
}
/* num_pins/capacity = the number of pins that each type_descriptor has.
* Capacity defines the number of type_descriptors in each grid
* so the pin index at grid level = pin_index_in_type_descriptor
* + type_descriptor_index_in_capacity *
* num_pins_per_type_descriptor
/* Note that each grid may contain a number of sub tiles, each type of which
* may a different capacity and number of pins We need to find the start pin
* index for a given z offset (instance id), denotes the index of the first
* pin regarding the current instance. The variable 'pin_count_in_cluster'
* represent the pin index in the context of current instance only. With the
* information above, we can then calculate the absolute pin index at
* grid-level (considering all the sub tiles).
*/
size_t grid_pin_index = pb_graph_pin->pin_count_in_cluster +
child_instance * grid_type_descriptor->num_pins /
grid_type_descriptor->capacity;
size_t grid_pin_index =
pb_graph_pin->pin_count_in_cluster +
vpr_device_annotation.physical_tile_z_to_start_pin_index(
grid_type_descriptor, child_inst_subtile_index);
int pin_width = grid_type_descriptor->pin_width_offset[grid_pin_index];
int pin_height = grid_type_descriptor->pin_height_offset[grid_pin_index];
@ -292,49 +296,48 @@ static void add_grid_module_net_connect_duplicated_pb_graph_pin(
void add_grid_module_nets_connect_duplicated_pb_type_ports(
ModuleManager& module_manager, const ModuleId& grid_module,
const ModuleId& child_module, const size_t& child_instance,
const VprDeviceAnnotation& vpr_device_annotation,
const t_sub_tile& sub_tile, const VprDeviceAnnotation& vpr_device_annotation,
t_physical_tile_type_ptr grid_type_descriptor, const e_side& border_side) {
/* Ensure that we have a valid grid_type_descriptor */
VTR_ASSERT(false == is_empty_type(grid_type_descriptor));
/* FIXME: Currently support only 1 equivalent site! Should clarify this
* limitation in documentation! */
for (const t_sub_tile& sub_tile : grid_type_descriptor->sub_tiles) {
t_logical_block_type_ptr lb_type = sub_tile.equivalent_sites[0];
t_pb_graph_node* top_pb_graph_node = lb_type->pb_graph_head;
VTR_ASSERT(nullptr != top_pb_graph_node);
t_logical_block_type_ptr lb_type = sub_tile.equivalent_sites[0];
t_pb_graph_node* top_pb_graph_node = lb_type->pb_graph_head;
VTR_ASSERT(nullptr != top_pb_graph_node);
size_t child_inst_subtile_index = sub_tile.capacity.low + child_instance;
for (int iport = 0; iport < top_pb_graph_node->num_input_ports; ++iport) {
for (int ipin = 0; ipin < top_pb_graph_node->num_input_pins[iport];
++ipin) {
add_grid_module_net_connect_pb_graph_pin(
module_manager, grid_module, child_module, child_instance,
vpr_device_annotation, grid_type_descriptor,
&(top_pb_graph_node->input_pins[iport][ipin]), border_side,
INPUT2INPUT_INTERC);
}
for (int iport = 0; iport < top_pb_graph_node->num_input_ports; ++iport) {
for (int ipin = 0; ipin < top_pb_graph_node->num_input_pins[iport];
++ipin) {
add_grid_module_net_connect_pb_graph_pin(
module_manager, grid_module, child_module, child_instance,
child_inst_subtile_index, vpr_device_annotation, grid_type_descriptor,
&(top_pb_graph_node->input_pins[iport][ipin]), border_side,
INPUT2INPUT_INTERC);
}
}
for (int iport = 0; iport < top_pb_graph_node->num_output_ports; ++iport) {
for (int ipin = 0; ipin < top_pb_graph_node->num_output_pins[iport];
++ipin) {
add_grid_module_net_connect_duplicated_pb_graph_pin(
module_manager, grid_module, child_module, child_instance,
vpr_device_annotation, grid_type_descriptor,
&(top_pb_graph_node->output_pins[iport][ipin]), border_side,
OUTPUT2OUTPUT_INTERC);
}
for (int iport = 0; iport < top_pb_graph_node->num_output_ports; ++iport) {
for (int ipin = 0; ipin < top_pb_graph_node->num_output_pins[iport];
++ipin) {
add_grid_module_net_connect_duplicated_pb_graph_pin(
module_manager, grid_module, child_module, child_instance,
child_inst_subtile_index, vpr_device_annotation, grid_type_descriptor,
&(top_pb_graph_node->output_pins[iport][ipin]), border_side,
OUTPUT2OUTPUT_INTERC);
}
}
for (int iport = 0; iport < top_pb_graph_node->num_clock_ports; ++iport) {
for (int ipin = 0; ipin < top_pb_graph_node->num_clock_pins[iport];
++ipin) {
add_grid_module_net_connect_pb_graph_pin(
module_manager, grid_module, child_module, child_instance,
vpr_device_annotation, grid_type_descriptor,
&(top_pb_graph_node->clock_pins[iport][ipin]), border_side,
INPUT2INPUT_INTERC);
}
for (int iport = 0; iport < top_pb_graph_node->num_clock_ports; ++iport) {
for (int ipin = 0; ipin < top_pb_graph_node->num_clock_pins[iport];
++ipin) {
add_grid_module_net_connect_pb_graph_pin(
module_manager, grid_module, child_module, child_instance,
child_inst_subtile_index, vpr_device_annotation, grid_type_descriptor,
&(top_pb_graph_node->clock_pins[iport][ipin]), border_side,
INPUT2INPUT_INTERC);
}
}
}

View File

@ -24,7 +24,7 @@ void add_grid_module_duplicated_pb_type_ports(
void add_grid_module_nets_connect_duplicated_pb_type_ports(
ModuleManager& module_manager, const ModuleId& grid_module,
const ModuleId& child_module, const size_t& child_instance,
const VprDeviceAnnotation& vpr_device_annotation,
const t_sub_tile& sub_tile, const VprDeviceAnnotation& vpr_device_annotation,
t_physical_tile_type_ptr grid_type_descriptor, const e_side& border_side);
} /* end namespace openfpga */

View File

@ -43,6 +43,7 @@ std::vector<e_side> find_grid_module_pin_sides(
void add_grid_module_net_connect_pb_graph_pin(
ModuleManager& module_manager, const ModuleId& grid_module,
const ModuleId& child_module, const size_t& child_instance,
const size_t& child_inst_subtile_index,
const VprDeviceAnnotation& vpr_device_annotation,
t_physical_tile_type_ptr grid_type_descriptor, t_pb_graph_pin* pb_graph_pin,
const e_side& border_side, const e_pin2pin_interc_type& pin2pin_interc_type) {
@ -69,7 +70,7 @@ void add_grid_module_net_connect_pb_graph_pin(
size_t grid_pin_index =
pb_graph_pin->pin_count_in_cluster +
vpr_device_annotation.physical_tile_z_to_start_pin_index(
grid_type_descriptor, child_instance);
grid_type_descriptor, child_inst_subtile_index);
int pin_height = grid_type_descriptor->pin_height_offset[grid_pin_index];
int pin_width = grid_type_descriptor->pin_width_offset[grid_pin_index];
for (const e_side& side : grid_pin_sides) {

View File

@ -23,6 +23,7 @@ std::vector<e_side> find_grid_module_pin_sides(
void add_grid_module_net_connect_pb_graph_pin(
ModuleManager& module_manager, const ModuleId& grid_module,
const ModuleId& child_module, const size_t& child_instance,
const size_t& child_inst_subtile_index,
const VprDeviceAnnotation& vpr_device_annotation,
t_physical_tile_type_ptr grid_type_descriptor, t_pb_graph_pin* pb_graph_pin,
const e_side& border_side,

View File

@ -108,50 +108,49 @@ static void add_grid_module_pb_type_ports(
static void add_grid_module_nets_connect_pb_type_ports(
ModuleManager& module_manager, const ModuleId& grid_module,
const ModuleId& child_module, const size_t& child_instance,
const VprDeviceAnnotation& vpr_device_annotation,
const t_sub_tile& sub_tile, const VprDeviceAnnotation& vpr_device_annotation,
t_physical_tile_type_ptr grid_type_descriptor, const e_side& border_side) {
/* Ensure that we have a valid grid_type_descriptor */
VTR_ASSERT(nullptr != grid_type_descriptor);
/* FIXME: Currently support only 1 equivalent site! Should clarify this
* limitation in documentation! */
for (const t_sub_tile& sub_tile : grid_type_descriptor->sub_tiles) {
VTR_ASSERT(sub_tile.equivalent_sites.size() == 1);
t_logical_block_type_ptr lb_type = sub_tile.equivalent_sites[0];
t_pb_graph_node* top_pb_graph_node = lb_type->pb_graph_head;
VTR_ASSERT(nullptr != top_pb_graph_node);
VTR_ASSERT(sub_tile.equivalent_sites.size() == 1);
t_logical_block_type_ptr lb_type = sub_tile.equivalent_sites[0];
t_pb_graph_node* top_pb_graph_node = lb_type->pb_graph_head;
VTR_ASSERT(nullptr != top_pb_graph_node);
size_t child_inst_subtile_index = sub_tile.capacity.low + child_instance;
for (int iport = 0; iport < top_pb_graph_node->num_input_ports; ++iport) {
for (int ipin = 0; ipin < top_pb_graph_node->num_input_pins[iport];
++ipin) {
add_grid_module_net_connect_pb_graph_pin(
module_manager, grid_module, child_module, child_instance,
vpr_device_annotation, grid_type_descriptor,
&(top_pb_graph_node->input_pins[iport][ipin]), border_side,
INPUT2INPUT_INTERC);
}
for (int iport = 0; iport < top_pb_graph_node->num_input_ports; ++iport) {
for (int ipin = 0; ipin < top_pb_graph_node->num_input_pins[iport];
++ipin) {
add_grid_module_net_connect_pb_graph_pin(
module_manager, grid_module, child_module, child_instance,
child_inst_subtile_index, vpr_device_annotation, grid_type_descriptor,
&(top_pb_graph_node->input_pins[iport][ipin]), border_side,
INPUT2INPUT_INTERC);
}
}
for (int iport = 0; iport < top_pb_graph_node->num_output_ports; ++iport) {
for (int ipin = 0; ipin < top_pb_graph_node->num_output_pins[iport];
++ipin) {
add_grid_module_net_connect_pb_graph_pin(
module_manager, grid_module, child_module, child_instance,
vpr_device_annotation, grid_type_descriptor,
&(top_pb_graph_node->output_pins[iport][ipin]), border_side,
OUTPUT2OUTPUT_INTERC);
}
for (int iport = 0; iport < top_pb_graph_node->num_output_ports; ++iport) {
for (int ipin = 0; ipin < top_pb_graph_node->num_output_pins[iport];
++ipin) {
add_grid_module_net_connect_pb_graph_pin(
module_manager, grid_module, child_module, child_instance,
child_inst_subtile_index, vpr_device_annotation, grid_type_descriptor,
&(top_pb_graph_node->output_pins[iport][ipin]), border_side,
OUTPUT2OUTPUT_INTERC);
}
}
for (int iport = 0; iport < top_pb_graph_node->num_clock_ports; ++iport) {
for (int ipin = 0; ipin < top_pb_graph_node->num_clock_pins[iport];
++ipin) {
add_grid_module_net_connect_pb_graph_pin(
module_manager, grid_module, child_module, child_instance,
vpr_device_annotation, grid_type_descriptor,
&(top_pb_graph_node->clock_pins[iport][ipin]), border_side,
INPUT2INPUT_INTERC);
}
for (int iport = 0; iport < top_pb_graph_node->num_clock_ports; ++iport) {
for (int ipin = 0; ipin < top_pb_graph_node->num_clock_pins[iport];
++ipin) {
add_grid_module_net_connect_pb_graph_pin(
module_manager, grid_module, child_module, child_instance,
child_inst_subtile_index, vpr_device_annotation, grid_type_descriptor,
&(top_pb_graph_node->clock_pins[iport][ipin]), border_side,
INPUT2INPUT_INTERC);
}
}
}
@ -1094,7 +1093,8 @@ static void build_physical_tile_module(
* it as a mode under a <pb_type>
*/
for (const t_sub_tile& sub_tile : phy_block_type->sub_tiles) {
for (int iz = 0; iz < sub_tile.capacity.total(); ++iz) {
for (int iz = sub_tile.capacity.low; iz < sub_tile.capacity.high + 1;
++iz) {
VTR_ASSERT(1 == sub_tile.equivalent_sites.size());
t_logical_block_type_ptr lb_type = sub_tile.equivalent_sites[0];
/* Bypass empty pb_graph */
@ -1154,7 +1154,7 @@ static void build_physical_tile_module(
for (const size_t& child_instance :
module_manager.child_module_instances(grid_module, pb_module)) {
add_grid_module_nets_connect_pb_type_ports(
module_manager, grid_module, pb_module, child_instance,
module_manager, grid_module, pb_module, child_instance, sub_tile,
vpr_device_annotation, phy_block_type, border_side);
}
}
@ -1180,7 +1180,7 @@ static void build_physical_tile_module(
for (const size_t& child_instance :
module_manager.child_module_instances(grid_module, pb_module)) {
add_grid_module_nets_connect_duplicated_pb_type_ports(
module_manager, grid_module, pb_module, child_instance,
module_manager, grid_module, pb_module, child_instance, sub_tile,
vpr_device_annotation, phy_block_type, border_side);
}
}

View File

@ -27,6 +27,7 @@ Note that an OpenFPGA architecture can be applied to multiple VPR architecture f
- local\_encoder: If local encoders are used in routing multiplexer design
- spyio/spypad: If spy I/Os are used
- registerable\_io: If I/Os are registerable (can be either combinational or sequential)
- IoSubtile: If I/O block contains sub tiles (more compact with a higher density of I/Os)
- stdcell: If circuit designs are built with standard cells only
- tree\_mux: If routing multiplexers are built with a tree-like structure
- localClkGen: The clock signal of CLB can be generated by internal programmable resources

View File

@ -0,0 +1,203 @@
<?xml version="1.0"?>
<!-- Architecture annotation for OpenFPGA framework
This annotation supports the k6_N10_40nm.xml
- General purpose logic block
- K = 6, N = 10, I = 40
- Single mode
- Routing architecture
- L = 4, fc_in = 0.15, fc_out = 0.1
-->
<openfpga_architecture>
<technology_library>
<device_library>
<device_model name="logic" type="transistor">
<lib type="industry" corner="TOP_TT" ref="M" path="${OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.pm"/>
<design vdd="0.9" pn_ratio="2"/>
<pmos name="pch" chan_length="40e-9" min_width="140e-9" variation="logic_transistor_var"/>
<nmos name="nch" chan_length="40e-9" min_width="140e-9" variation="logic_transistor_var"/>
</device_model>
<device_model name="io" type="transistor">
<lib type="academia" ref="M" path="${OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.pm"/>
<design vdd="2.5" pn_ratio="3"/>
<pmos name="pch_25" chan_length="270e-9" min_width="320e-9" variation="io_transistor_var"/>
<nmos name="nch_25" chan_length="270e-9" min_width="320e-9" variation="io_transistor_var"/>
</device_model>
</device_library>
<variation_library>
<variation name="logic_transistor_var" abs_deviation="0.1" num_sigma="3"/>
<variation name="io_transistor_var" abs_deviation="0.1" num_sigma="3"/>
</variation_library>
</technology_library>
<circuit_library>
<circuit_model type="inv_buf" name="INVTX1" prefix="INVTX1" is_default="true">
<design_technology type="cmos" topology="inverter" size="1"/>
<device_technology device_model_name="logic"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<delay_matrix type="rise" in_port="in" out_port="out">
10e-12
</delay_matrix>
<delay_matrix type="fall" in_port="in" out_port="out">
10e-12
</delay_matrix>
</circuit_model>
<circuit_model type="inv_buf" name="buf4" prefix="buf4" is_default="false">
<design_technology type="cmos" topology="buffer" size="1" num_level="2" f_per_stage="4"/>
<device_technology device_model_name="logic"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<delay_matrix type="rise" in_port="in" out_port="out">
10e-12
</delay_matrix>
<delay_matrix type="fall" in_port="in" out_port="out">
10e-12
</delay_matrix>
</circuit_model>
<circuit_model type="inv_buf" name="tap_buf4" prefix="tap_buf4" is_default="false">
<design_technology type="cmos" topology="buffer" size="1" num_level="3" f_per_stage="4"/>
<device_technology device_model_name="logic"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<delay_matrix type="rise" in_port="in" out_port="out">
10e-12
</delay_matrix>
<delay_matrix type="fall" in_port="in" out_port="out">
10e-12
</delay_matrix>
</circuit_model>
<circuit_model type="pass_gate" name="TGATE" prefix="TGATE" is_default="true">
<design_technology type="cmos" topology="transmission_gate" nmos_size="1" pmos_size="2"/>
<device_technology device_model_name="logic"/>
<input_buffer exist="false"/>
<output_buffer exist="false"/>
<port type="input" prefix="in" size="1"/>
<port type="input" prefix="sel" size="1"/>
<port type="input" prefix="selb" size="1"/>
<port type="output" prefix="out" size="1"/>
<delay_matrix type="rise" in_port="in sel selb" out_port="out">
10e-12 5e-12 5e-12
</delay_matrix>
<delay_matrix type="fall" in_port="in sel selb" out_port="out">
10e-12 5e-12 5e-12
</delay_matrix>
</circuit_model>
<circuit_model type="chan_wire" name="chan_segment" prefix="track_seg" is_default="true">
<design_technology type="cmos"/>
<input_buffer exist="false"/>
<output_buffer exist="false"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<wire_param model_type="pi" R="101" C="22.5e-15" num_level="1"/>
<!-- model_type could be T, res_val and cap_val DON'T CARE -->
</circuit_model>
<circuit_model type="wire" name="direct_interc" prefix="direct_interc" is_default="true">
<design_technology type="cmos"/>
<input_buffer exist="false"/>
<output_buffer exist="false"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<wire_param model_type="pi" R="0" C="0" num_level="1"/>
<!-- model_type could be T, res_val cap_val should be defined -->
</circuit_model>
<circuit_model type="mux" name="mux_tree" prefix="mux_tree" dump_structural_verilog="true">
<design_technology type="cmos" structure="tree" add_const_input="true" const_input_val="1"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="INVTX1"/>
<pass_gate_logic circuit_model_name="TGATE"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<port type="sram" prefix="sram" size="1"/>
</circuit_model>
<circuit_model type="mux" name="mux_tree_tapbuf" prefix="mux_tree_tapbuf" is_default="true" dump_structural_verilog="true">
<design_technology type="cmos" structure="tree" add_const_input="true" const_input_val="1"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="tap_buf4"/>
<pass_gate_logic circuit_model_name="TGATE"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<port type="sram" prefix="sram" size="1"/>
</circuit_model>
<!--DFF subckt ports should be defined as <D> <Q> <CLK> <RESET> <SET> -->
<circuit_model type="ff" name="DFFSRQ" prefix="DFFSRQ" spice_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/spice/dff.sp" verilog_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/verilog/dff.v">
<design_technology type="cmos"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="INVTX1"/>
<port type="input" prefix="D" size="1"/>
<port type="input" prefix="set" lib_name="SET" size="1" is_global="true" default_val="0" is_set="true"/>
<port type="input" prefix="reset" lib_name="RST" size="1" is_global="true" default_val="0" is_reset="true"/>
<port type="output" prefix="Q" size="1"/>
<port type="clock" prefix="clk" lib_name="CK" size="1" is_global="false" default_val="0"/>
</circuit_model>
<circuit_model type="lut" name="lut4" prefix="lut4" dump_structural_verilog="true">
<design_technology type="cmos"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="INVTX1"/>
<lut_input_inverter exist="true" circuit_model_name="INVTX1"/>
<lut_input_buffer exist="true" circuit_model_name="buf4"/>
<pass_gate_logic circuit_model_name="TGATE"/>
<port type="input" prefix="in" size="4"/>
<port type="output" prefix="out" size="1"/>
<port type="sram" prefix="sram" size="16"/>
</circuit_model>
<!--Scan-chain DFF subckt ports should be defined as <D> <Q> <Qb> <CLK> <RESET> <SET> -->
<circuit_model type="ccff" name="DFF" prefix="DFF" spice_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/spice/dff.sp" verilog_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/verilog/dff.v">
<design_technology type="cmos"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="INVTX1"/>
<port type="input" prefix="D" size="1"/>
<port type="output" prefix="Q" size="1"/>
<port type="output" prefix="QN" size="1"/>
<port type="clock" prefix="prog_clk" lib_name="CK" size="1" is_global="true" default_val="0" is_prog="true"/>
</circuit_model>
<circuit_model type="iopad" name="GPIN" prefix="GPIN" is_default="true" verilog_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/verilog/gpio.v">
<design_technology type="cmos"/>
<input_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__inv_1"/>
<output_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__inv_1"/>
<port type="inout" prefix="PAD" lib_name="A" size="1" is_global="true" is_io="true" is_data_io="true"/>
<port type="output" prefix="inpad" lib_name="Y" size="1"/>
</circuit_model>
<circuit_model type="iopad" name="GPOUT" prefix="GPOUT" is_default="false" verilog_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/verilog/gpio.v">
<design_technology type="cmos"/>
<input_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__inv_1"/>
<output_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__inv_1"/>
<port type="inout" prefix="PAD" lib_name="Y" size="1" is_global="true" is_io="true" is_data_io="true"/>
<port type="input" prefix="outpad" lib_name="A" size="1"/>
</circuit_model>
</circuit_library>
<configuration_protocol>
<organization type="scan_chain" circuit_model_name="DFF"/>
</configuration_protocol>
<connection_block>
<switch name="ipin_cblock" circuit_model_name="mux_tree_tapbuf"/>
</connection_block>
<switch_block>
<switch name="0" circuit_model_name="mux_tree_tapbuf"/>
</switch_block>
<routing_segment>
<segment name="L4" circuit_model_name="chan_segment"/>
</routing_segment>
<tile_annotations>
<global_port name="clk" is_clock="true" default_val="0">
<tile name="clb" port="clk" x="-1" y="-1"/>
</global_port>
</tile_annotations>
<pb_type_annotations>
<!-- fpga_input pb_type annoptation -->
<pb_type name="fpga_input" physical_mode_name="physical"/>
<pb_type name="fpga_output" physical_mode_name="physical"/>
<pb_type name="fpga_input[physical].iopad" circuit_model_name="GPIN"/>
<pb_type name="fpga_output[physical].iopad" circuit_model_name="GPOUT"/>
<pb_type name="fpga_input[inpad].inpad" physical_pb_type_name="fpga_input[physical].iopad"/>
<pb_type name="fpga_output[outpad].outpad" physical_pb_type_name="fpga_output[physical].iopad"/>
<!-- End physical pb_type binding in complex block IO -->
<!-- physical pb_type binding in complex block CLB -->
<!-- physical mode will be the default mode if not specified -->
<pb_type name="clb">
<!-- Binding interconnect to circuit models as their physical implementation, if not defined, we use the default model -->
<interconnect name="crossbar" circuit_model_name="mux_tree"/>
</pb_type>
<pb_type name="clb.fle[n1_lut4].ble4.lut4" circuit_model_name="lut4"/>
<pb_type name="clb.fle[n1_lut4].ble4.ff" circuit_model_name="DFFSRQ"/>
<!-- End physical pb_type binding in complex block IO -->
</pb_type_annotations>
</openfpga_architecture>

View File

@ -158,6 +158,8 @@ echo -e "Testing tiles with pins only on bottom and right sides";
run-task basic_tests/tile_organization/bottom_right_custom_pins $@
echo -e "Testing tiles with I/O in center grid";
run-task basic_tests/tile_organization/tileable_io $@
echo -e "Testing tiles with I/O consisting of subtiles";
run-task basic_tests/tile_organization/io_subtile $@
echo -e "Testing global port definition from tiles";
run-task basic_tests/global_tile_ports/global_tile_clock $@

View File

@ -0,0 +1,37 @@
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# Configuration file for running experiments
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs
# Each job execute fpga_flow script on combination of architecture & benchmark
# timeout_each_job is timeout for each job
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
[GENERAL]
run_engine=openfpga_shell
power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml
power_analysis = true
spice_output=false
verilog_output=true
timeout_each_job = 20*60
fpga_flow=yosys_vpr
[OpenFPGA_SHELL]
openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/fix_device_example_script.openfpga
openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_N4_40nm_IoSubtile_cc_openfpga.xml
openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml
openfpga_vpr_device_layout=2x2
[ARCHITECTURES]
arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_N4_tileable_IoSubtile_40nm.xml
[BENCHMARKS]
bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/or2/or2.v
[SYNTHESIS_PARAM]
bench_read_verilog_options_common = -nolatches
bench0_top = or2
bench0_chan_width = 300
[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH]
end_flow_with_test=
vpr_fpga_verilog_formal_verification_top_netlist=

View File

@ -30,7 +30,6 @@ bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/or2/or2.v
[SYNTHESIS_PARAM]
bench_read_verilog_options_common = -nolatches
bench0_top = or2
bench0_chan_width = 300
[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH]
end_flow_with_test=

View File

@ -21,6 +21,7 @@ Please reveal the following architecture features in the names to help quickly s
- multi\_io\_capacity: If I/O capacity is different on each side of FPGAs.
- reduced\_io: If I/Os only appear a certain or multiple sides of FPGAs
- registerable\_io: If I/Os are registerable (can be either combinational or sequential)
- IoSubtile: If I/O block contains sub tiles (more compact with a higher density of I/Os)
- CustomIoLoc: Use OpenFPGA's extended custom I/O location syntax
- rstOnLut: The reset signal of CLB can feed LUT inputs through a local routing architecture
- localClkGen: The clock signal of CLB can be generated by internal programmable resources

View File

@ -0,0 +1,222 @@
<?xml version="1.0"?>
<!--
Architecture with no fracturable LUTs
- 40 nm technology
- General purpose logic block:
K = 4, N = 4
- Routing architecture: L = 4, fc_in = 0.15, Fc_out = 0.1
Details on Modelling:
Based on flagship k6_frac_N10_mem32K_40nm.xml architecture. This architecture has no fracturable LUTs nor any heterogeneous blocks.
Authors: Jason Luu, Jeff Goeders, Vaughn Betz
-->
<architecture>
<models>
<!-- A virtual model for I/O to be used in the physical mode of io block -->
<model name="io_inpad">
<output_ports>
<port name="inpad"/>
</output_ports>
</model>
<model name="io_outpad">
<input_ports>
<port name="outpad"/>
</input_ports>
</model>
</models>
<tiles>
<tile name="hybrid_io_tile" area="0">
<sub_tile name="fpga_input" capacity="4">
<equivalent_sites>
<site pb_type="fpga_input"/>
</equivalent_sites>
<output name="inpad" num_pins="1"/>
<fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
<pinlocations pattern="custom">
<loc side="left"> fpga_input.inpad</loc>
<loc side="top"> fpga_input.inpad</loc>
<loc side="right"> fpga_input.inpad</loc>
<loc side="bottom"> fpga_input.inpad</loc>
</pinlocations>
</sub_tile>
<sub_tile name="fpga_output" capacity="2">
<equivalent_sites>
<site pb_type="fpga_output"/>
</equivalent_sites>
<input name="outpad" num_pins="1"/>
<fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
<pinlocations pattern="custom">
<loc side="left"> fpga_output.outpad</loc>
<loc side="top"> fpga_output.outpad</loc>
<loc side="right"> fpga_output.outpad</loc>
<loc side="bottom"> fpga_output.outpad</loc>
</pinlocations>
</sub_tile>
</tile>
<tile name="clb" area="53894">
<sub_tile name="clb">
<equivalent_sites>
<site pb_type="clb"/>
</equivalent_sites>
<input name="I" num_pins="10" equivalent="full"/>
<output name="O" num_pins="4" equivalent="none"/>
<clock name="clk" num_pins="1"/>
<fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
<fc_override port_name="clk" fc_type="frac" fc_val="0"/>
</fc>
<pinlocations pattern="spread"/>
</sub_tile>
</tile>
</tiles>
<!-- ODIN II specific config ends -->
<!-- Physical descriptions begin -->
<layout tileable="true">
<auto_layout aspect_ratio="1.0">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</auto_layout>
<fixed_layout name="2x2" width="4" height="4">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="hybrid_io_tile" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</fixed_layout>
</layout>
<device>
<sizing R_minW_nmos="8926" R_minW_pmos="16067"/>
<area grid_logic_tile_area="0"/>
<chan_width_distr>
<x distr="uniform" peak="1.000000"/>
<y distr="uniform" peak="1.000000"/>
</chan_width_distr>
<switch_block type="wilton" fs="3"/>
<connection_block input_switch_name="ipin_cblock"/>
</device>
<switchlist>
<switch type="mux" name="0" R="551" Cin=".77e-15" Cout="4e-15" Tdel="58e-12" mux_trans_size="2.630740" buf_size="27.645901"/>
<switch type="mux" name="ipin_cblock" R="2231.5" Cout="0." Cin="1.47e-15" Tdel="7.247000e-11" mux_trans_size="1.222260" buf_size="auto"/>
</switchlist>
<segmentlist>
<segment name="L4" freq="1.000000" length="4" type="unidir" Rmetal="101" Cmetal="22.5e-15">
<mux name="0"/>
<sb type="pattern">1 1 1 1 1</sb>
<cb type="pattern">1 1 1 1</cb>
</segment>
</segmentlist>
<complexblocklist>
<pb_type name="fpga_input">
<output name="inpad" num_pins="1"/>
<mode name="physical" disable_packing="true">
<pb_type name="iopad" blif_model=".subckt io_inpad" num_pb="1">
<output name="inpad" num_pins="1"/>
</pb_type>
<interconnect>
<direct name="inpad" input="iopad.inpad" output="fpga_input.inpad"/>
</interconnect>
</mode>
<mode name="inpad">
<pb_type name="inpad" blif_model=".input" num_pb="1">
<output name="inpad" num_pins="1"/>
</pb_type>
<interconnect>
<direct name="inpad" input="inpad.inpad" output="fpga_input.inpad">
<delay_constant max="4.243e-11" in_port="inpad.inpad" out_port="fpga_input.inpad"/>
</direct>
</interconnect>
</mode>
</pb_type>
<pb_type name="fpga_output">
<input name="outpad" num_pins="1"/>
<mode name="physical" disable_packing="true">
<pb_type name="iopad" blif_model=".subckt io_outpad" num_pb="1">
<input name="outpad" num_pins="1"/>
</pb_type>
<interconnect>
<direct name="outpad" input="fpga_output.outpad" output="iopad.outpad"/>
</interconnect>
</mode>
<mode name="outpad">
<pb_type name="outpad" blif_model=".output" num_pb="1">
<input name="outpad" num_pins="1"/>
</pb_type>
<interconnect>
<direct name="outpad" input="fpga_output.outpad" output="outpad.outpad"/>
</interconnect>
</mode>
</pb_type>
<pb_type name="clb">
<input name="I" num_pins="10" equivalent="full"/>
<output name="O" num_pins="4" equivalent="none"/>
<clock name="clk" num_pins="1"/>
<pb_type name="fle" num_pb="4">
<input name="in" num_pins="4"/>
<output name="out" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- 4-LUT mode definition begin -->
<mode name="n1_lut4">
<!-- Define 4-LUT mode -->
<pb_type name="ble4" num_pb="1">
<input name="in" num_pins="4"/>
<output name="out" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- Define LUT -->
<pb_type name="lut4" blif_model=".names" num_pb="1" class="lut">
<input name="in" num_pins="4" port_class="lut_in"/>
<output name="out" num_pins="1" port_class="lut_out"/>
<!-- LUT timing using delay matrix -->
<delay_matrix type="max" in_port="lut4.in" out_port="lut4.out">
261e-12
261e-12
261e-12
261e-12
</delay_matrix>
</pb_type>
<!-- Define flip-flop -->
<pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
<input name="D" num_pins="1" port_class="D"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="clk" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="ff.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
</pb_type>
<interconnect>
<direct name="direct1" input="ble4.in" output="lut4[0:0].in"/>
<direct name="direct2" input="lut4.out" output="ff.D">
<!-- Advanced user option that tells CAD tool to find LUT+FF pairs in netlist -->
<pack_pattern name="ble4" in_port="lut4.out" out_port="ff.D"/>
</direct>
<direct name="direct3" input="ble4.clk" output="ff.clk"/>
<mux name="mux1" input="ff.Q lut4.out" output="ble4.out">
<!-- LUT to output is faster than FF to output on a Stratix IV -->
<delay_constant max="25e-12" in_port="lut4.out" out_port="ble4.out"/>
<delay_constant max="45e-12" in_port="ff.Q" out_port="ble4.out"/>
</mux>
</interconnect>
</pb_type>
<interconnect>
<direct name="direct1" input="fle.in" output="ble4.in"/>
<direct name="direct2" input="ble4.out" output="fle.out[0:0]"/>
<direct name="direct3" input="fle.clk" output="ble4.clk"/>
</interconnect>
</mode>
</pb_type>
<interconnect>
<complete name="crossbar" input="clb.I fle[3:0].out" output="fle[3:0].in">
<delay_constant max="95e-12" in_port="clb.I" out_port="fle[3:0].in"/>
<delay_constant max="75e-12" in_port="fle[3:0].out" out_port="fle[3:0].in"/>
</complete>
<complete name="clks" input="clb.clk" output="fle[3:0].clk">
</complete>
<direct name="clbouts1" input="fle[3:0].out" output="clb.O"/>
</interconnect>
</pb_type>
</complexblocklist>
</architecture>