Merge pull request #792 from lnis-uofu/io_indexing

Now I/O indexing follows a natural way (clockwise) throughout the fabric.
This commit is contained in:
tangxifan 2022-09-16 12:01:25 -07:00 committed by GitHub
commit b7b82804ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 380 additions and 1 deletions

View File

@ -51,7 +51,6 @@ size_t add_top_module_grid_instance(ModuleManager& module_manager,
size_t grid_instance = module_manager.num_instance(top_module, grid_module);
/* Add the module to top_module */
module_manager.add_child_module(top_module, grid_module, false);
module_manager.add_io_child(top_module, grid_module, grid_instance, vtr::Point<int>(grid_coord.x(), grid_coord.y()));
/* Set an unique name to the instance
* Note: it is your risk to gurantee the name is unique!
*/
@ -276,6 +275,118 @@ vtr::Matrix<size_t> add_top_module_connection_block_instances(ModuleManager& mod
return cb_instance_ids;
}
/********************************************************************
* Add the I/O children to the top-level module, which impacts the I/O indexing
* This is the default function to build the I/O sequence/indexing
* The I/O children is added in a maze shape
* The function supports I/Os in the center of grids, starting from the bottom-left corner and ending at the center
*
* +----------------------+
* |+--------------------+|
* ||+------------------+||
* |||+----------------+|||
* ||||+-------------->||||
* ||||+---------------+|||
* |||+-----------------+||
* ||+-------------------+|
* |+---------------------+
* ^
* io[0]
*******************************************************************/
static
void add_top_module_io_children(ModuleManager& module_manager,
const ModuleId& top_module,
const DeviceGrid& grids,
const vtr::Matrix<size_t>& grid_instance_ids) {
/* Create the coordinate range for the perimeter I/Os of FPGA fabric */
std::map<e_side, std::vector<vtr::Point<size_t>>> io_coordinates = generate_perimeter_grid_coordinates( grids);
for (const e_side& io_side : FPGA_SIDES_CLOCKWISE) {
for (const vtr::Point<size_t>& io_coord : io_coordinates[io_side]) {
/* Bypass EMPTY grid */
if (true == is_empty_type(grids[io_coord.x()][io_coord.y()].type)) {
continue;
}
/* Skip width, height > 1 tiles (mostly heterogeneous blocks) */
if ( (0 < grids[io_coord.x()][io_coord.y()].width_offset)
|| (0 < grids[io_coord.x()][io_coord.y()].height_offset)) {
continue;
}
/* Find the module name for this type of grid */
t_physical_tile_type_ptr grid_type = grids[io_coord.x()][io_coord.y()].type;
std::string grid_module_name_prefix(GRID_MODULE_NAME_PREFIX);
std::string grid_module_name = generate_grid_block_module_name(grid_module_name_prefix, std::string(grid_type->name), is_io_type(grid_type), io_side);
ModuleId grid_module = module_manager.find_module(grid_module_name);
VTR_ASSERT(true == module_manager.valid_module_id(grid_module));
/* Add a I/O children to top_module*/
module_manager.add_io_child(top_module, grid_module, grid_instance_ids[io_coord.x()][io_coord.y()], vtr::Point<int>(io_coord.x(), io_coord.y()));
}
}
/* Walk through the center grids */
size_t xmin = 1;
size_t xmax = grids.width() - 2;
size_t ymin = 1;
size_t ymax = grids.height() - 2;
std::vector<vtr::Point<size_t>> coords;
while (xmin < xmax && ymin < ymax) {
for (size_t iy = ymin; iy < ymax + 1; iy++) {
coords.push_back(vtr::Point<size_t>(xmin, iy));
}
for (size_t ix = xmin + 1; ix < xmax + 1; ix++) {
coords.push_back(vtr::Point<size_t>(ix, ymax));
}
for (size_t iy = ymax - 1; iy > ymin; iy--) {
coords.push_back(vtr::Point<size_t>(xmax, iy));
}
for (size_t ix = xmax; ix > xmin; ix--) {
coords.push_back(vtr::Point<size_t>(ix, ymin));
}
xmin++;
ymin++;
xmax--;
ymax--;
}
/* If height is odd, add the missing horizental line */
if ((grids.height() - 2) % 2 == 1) {
if (ymin == ymax) {
for (size_t ix = xmin; ix < xmax + 1; ix++) {
coords.push_back(vtr::Point<size_t>(ix, ymin));
}
}
}
/* If width is odd, add the missing vertical line */
if ((grids.width() - 2) % 2 == 1) {
if (xmin == xmax) {
for (size_t iy = ymin; iy < ymax + 1; iy++) {
coords.push_back(vtr::Point<size_t>(xmin, iy));
}
}
}
/* Now walk through the coordinates */
for (vtr::Point<size_t> coord : coords) {
/* Bypass EMPTY grid */
if (true == is_empty_type(grids[coord.x()][coord.y()].type)) {
continue;
}
/* Skip width or height > 1 tiles (mostly heterogeneous blocks) */
if ( (0 < grids[coord.x()][coord.y()].width_offset)
|| (0 < grids[coord.x()][coord.y()].height_offset)) {
continue;
}
/* Find the module name for this type of grid */
t_physical_tile_type_ptr grid_type = grids[coord.x()][coord.y()].type;
std::string grid_module_name_prefix(GRID_MODULE_NAME_PREFIX);
std::string grid_module_name = generate_grid_block_module_name(grid_module_name_prefix, std::string(grid_type->name), is_io_type(grid_type), NUM_SIDES);
ModuleId grid_module = module_manager.find_module(grid_module_name);
VTR_ASSERT(true == module_manager.valid_module_id(grid_module));
/* Add a I/O children to top_module*/
module_manager.add_io_child(top_module, grid_module, grid_instance_ids[coord.x()][coord.y()], vtr::Point<int>(coord.x(), coord.y()));
}
}
/********************************************************************
* Print the top-level module for the FPGA fabric in Verilog format
* This function will
@ -328,6 +439,9 @@ int build_top_module(ModuleManager& module_manager,
cb_instance_ids[CHANX] = add_top_module_connection_block_instances(module_manager, top_module, device_rr_gsb, CHANX, compact_routing_hierarchy);
cb_instance_ids[CHANY] = add_top_module_connection_block_instances(module_manager, top_module, device_rr_gsb, CHANY, compact_routing_hierarchy);
/* Update I/O children list */
add_top_module_io_children(module_manager, top_module, grids, grid_instance_ids);
/* Add nets when we need a complete fabric modeling,
* which is required by downstream functions
*/

View File

@ -129,6 +129,9 @@ echo -e "Testing K4N5 with pattern based local routing";
run-task basic_tests/k4_series/k4n5_pattern_local_routing $@
echo -e "Testing K4N4 with custom I/O location syntax";
run-task basic_tests/k4_series/k4n4_custom_io_loc $@
run-task basic_tests/k4_series/k4n4_custom_io_loc_center $@
run-task basic_tests/k4_series/k4n4_custom_io_loc_center_height_odd $@
run-task basic_tests/k4_series/k4n4_custom_io_loc_center_width_odd $@
echo -e "Testing K4N4 with a local routing where reset can driven LUT inputs";
run-task basic_tests/k4_series/k4n4_rstOnLut $@

View File

@ -0,0 +1,109 @@
"""
=========================================
Represetes IO Sequence in OpenFPGA Engine
=========================================
This example demonstrates the ``OpenFPGA_Arch`` class which parses the
`VPR` and `OpenFPGA` Architecture file and provides logical information.
.. image:: ../../../examples/OpenFPGA_basic/_sample_io_sequence.svg
:width: 60%
:align: center
Author: Ganesh Gore
"""
import math
import svgwrite
from svgwrite.container import Group
def draw_connections(width, height, connections):
"""
Draw connection sequence
"""
dwg = svgwrite.Drawing()
DRAW_WIDTH = (width + 2) * SCALE
DRAW_HEIGHT = (height + 2) * SCALE
# set user coordinate space
dwg.viewbox(width=DRAW_WIDTH, height=DRAW_HEIGHT, miny=-1 * DRAW_HEIGHT)
dwg_main = Group(id="Main", transform="scale(1,-1)")
dwg.add(dwg_main)
for w in range(1, width + 2):
dwg_main.add(
dwg.line(
(w * SCALE, SCALE), (w * SCALE, (height + 1) * SCALE), stroke="red"
)
)
for h in range(1, height + 2):
dwg_main.add(
dwg.line((SCALE, h * SCALE), ((width + 1) * SCALE, h * SCALE), stroke="red")
)
path = "M "
for point in connections:
path += " %d %d " % ((point[0] + 0.5) * SCALE, (point[1] + 0.5) * SCALE)
dwg_main.add(dwg.path(path, stroke="blue", fill="none", stroke_width="2px"))
dwg.saveas("_sample_io_sequence.svg", pretty=True)
SCALE = 20
FPGA_WIDTH = 40
FPGA_HEIGHT = 15
W = max(FPGA_WIDTH, FPGA_HEIGHT)
W2 = math.floor(W / 2) + 1
connections = []
xmin, xmax = 1, FPGA_WIDTH
ymin, ymax = 1, FPGA_HEIGHT
while (xmin < xmax) and (ymin < ymax):
print(xmin, ymin, end=" -> ")
print(xmax, ymax)
x = xmin
for y in range(ymin, ymax + 1):
connections.append((x, y))
y = ymax
for x in range(xmin, xmax + 1):
connections.append((x, y))
x = xmax
for y in range(ymin, ymax + 1)[::-1]:
connections.append((x, y))
y = ymin
for x in range(xmin, xmax + 1)[::-1][:-1]:
connections.append((x, y))
xmin += 1
ymin += 1
xmax -= 1
ymax -= 1
if FPGA_HEIGHT % 2 == 1: # if height is odd
if ymin == ymax: # if touching vertically
y = ymin
for x in range(xmin, xmax + 1):
connections.append((x, y))
if FPGA_WIDTH % 2 == 1: # if width is odd
if xmin == xmax: # if touching horizontally
x = xmin
for y in range(ymin, ymax + 1):
connections.append((x, y))
# print(connections)
if connections:
draw_connections(FPGA_WIDTH, FPGA_HEIGHT, connections)
else:
# Dummy draw
draw_connections(FPGA_WIDTH, FPGA_HEIGHT, [(1, 1)])

View File

@ -0,0 +1,37 @@
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# Configuration file for running experiments
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs
# Each job execute fpga_flow script on combination of architecture & benchmark
# timeout_each_job is timeout for each job
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
[GENERAL]
run_engine=openfpga_shell
power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml
power_analysis = true
spice_output=false
verilog_output=true
timeout_each_job = 20*60
fpga_flow=vpr_blif
[OpenFPGA_SHELL]
openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/write_full_testbench_example_script.openfpga
openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_N4_40nm_cc_openfpga.xml
openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml
openfpga_vpr_device_layout=--device 4x4_io_center
openfpga_fast_configuration=
[ARCHITECTURES]
arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_N4_tileable_customIoLoc_40nm.xml
[BENCHMARKS]
bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.blif
[SYNTHESIS_PARAM]
bench0_top = and2
bench0_act = ${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.act
bench0_verilog = ${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.v
[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH]
end_flow_with_test=

View File

@ -0,0 +1,37 @@
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# Configuration file for running experiments
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs
# Each job execute fpga_flow script on combination of architecture & benchmark
# timeout_each_job is timeout for each job
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
[GENERAL]
run_engine=openfpga_shell
power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml
power_analysis = true
spice_output=false
verilog_output=true
timeout_each_job = 20*60
fpga_flow=vpr_blif
[OpenFPGA_SHELL]
openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/write_full_testbench_example_script.openfpga
openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_N4_40nm_cc_openfpga.xml
openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml
openfpga_vpr_device_layout=--device 4x3_io_center
openfpga_fast_configuration=
[ARCHITECTURES]
arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_N4_tileable_customIoLoc_40nm.xml
[BENCHMARKS]
bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.blif
[SYNTHESIS_PARAM]
bench0_top = and2
bench0_act = ${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.act
bench0_verilog = ${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.v
[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH]
end_flow_with_test=

View File

@ -0,0 +1,37 @@
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# Configuration file for running experiments
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs
# Each job execute fpga_flow script on combination of architecture & benchmark
# timeout_each_job is timeout for each job
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
[GENERAL]
run_engine=openfpga_shell
power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml
power_analysis = true
spice_output=false
verilog_output=true
timeout_each_job = 20*60
fpga_flow=vpr_blif
[OpenFPGA_SHELL]
openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/write_full_testbench_example_script.openfpga
openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_N4_40nm_cc_openfpga.xml
openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml
openfpga_vpr_device_layout=--device 3x4_io_center
openfpga_fast_configuration=
[ARCHITECTURES]
arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_N4_tileable_customIoLoc_40nm.xml
[BENCHMARKS]
bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.blif
[SYNTHESIS_PARAM]
bench0_top = and2
bench0_act = ${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.act
bench0_verilog = ${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.v
[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH]
end_flow_with_test=

View File

@ -132,6 +132,48 @@
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</fixed_layout>
<fixed_layout name="4x4_io_center" width="6" height="6">
<!--Perimeter of 'clb' blocks, I/Os are placed in the center-->
<row type="clb" starty="H-2" priority="90"/>
<row type="clb" starty="1" priority="91"/>
<col type="clb" startx="W-2" priority="93"/>
<col type="clb" startx="1" priority="93"/>
<row type="EMPTY" starty="H-1" priority="101"/>
<row type="EMPTY" starty="0" priority="102"/>
<col type="EMPTY" startx="0" priority="103"/>
<col type="EMPTY" startx="W-1" priority="104"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="io_top" priority="10"/>
</fixed_layout>
<fixed_layout name="4x3_io_center" width="6" height="5">
<!--Perimeter of 'clb' blocks, I/Os are placed in the center-->
<row type="clb" starty="H-2" priority="90"/>
<row type="clb" starty="1" priority="91"/>
<col type="clb" startx="W-2" priority="93"/>
<col type="clb" startx="1" priority="93"/>
<row type="EMPTY" starty="H-1" priority="101"/>
<row type="EMPTY" starty="0" priority="102"/>
<col type="EMPTY" startx="0" priority="103"/>
<col type="EMPTY" startx="W-1" priority="104"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="io_top" priority="10"/>
</fixed_layout>
<fixed_layout name="3x4_io_center" width="5" height="6">
<!--Perimeter of 'clb' blocks, I/Os are placed in the center-->
<row type="clb" starty="H-2" priority="90"/>
<row type="clb" starty="1" priority="91"/>
<col type="clb" startx="W-2" priority="93"/>
<col type="clb" startx="1" priority="93"/>
<row type="EMPTY" starty="H-1" priority="101"/>
<row type="EMPTY" starty="0" priority="102"/>
<col type="EMPTY" startx="0" priority="103"/>
<col type="EMPTY" startx="W-1" priority="104"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="io_top" priority="10"/>
</fixed_layout>
<fixed_layout name="48x48" width="50" height="50">
<!--Perimeter of 'EMPTY' blocks, I/Os are placed on the inner ring -->
<row type="io_top" starty="H-2" priority="90"/>