Merge pull request #1090 from lnis-uofu/xt_clk_arch

Programmable Clock Architecture Support
This commit is contained in:
tangxifan 2023-04-20 22:27:02 +08:00 committed by GitHub
commit 81428090b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
67 changed files with 5525 additions and 149 deletions

View File

@ -86,6 +86,7 @@ set(ODIN_YOSYS OFF CACHE BOOL "Enable building odin with yosys in Verilog-to-Rou
set(YOSYS_SV_UHDM_PLUGIN OFF CACHE BOOL "Enable building and installing Yosys SystemVerilog and UHDM plugins in Verilog-to-Routing")
set(YOSYS_F4PGA_PLUGINS OFF CACHE BOOL "Enable building and installing Yosys SystemVerilog and UHDM plugins")
set(VTR_ENABLE_VERSION ${OPENFPGA_WITH_VERSION} CACHE BOOL "Enable version always-up-to-date when building codebase. Disable only when you do not care an accurate version number")
set(WITH_PARMYS OFF CACHE BOOL "Enable Yosys as elaborator and parmys-plugin as partial mapper")
# TODO: OpenFPGA and VPR has different requirements on no-warning build, e.g., on OS and compiler versions
#set(VTR_ENABLE_STRICT_COMPILE ${OPENFPGA_ENABLE_STRICT_COMPILE} CACHE BOOL "Specifies whether compiler warnings should be treated as errors (e.g. -Werror)")

View File

@ -62,7 +62,7 @@ Here is an example:
.. code-block:: xml
<tile_annotations>
<global_port name="<string>" is_clock="<bool>" is_reset="<bool>" is_set="<bool>" default_val="<int>">
<global_port name="<string>" is_clock="<bool>" clock_arch_tree_name="<string>" is_reset="<bool>" is_set="<bool>" default_val="<int>">
<tile name="<string>" port="<string>" x="<int>" y="<int>"/>
...
</global_port>
@ -72,6 +72,8 @@ Here is an example:
- ``is_clock="<bool>"`` define if the global port is a clock port at the top-level FPGA fabric. An operating clock port will be driven by proper signals in auto-generated testbenches.
- ``clock_arch_tree_name="<string>"`` defines the name of the programmable clock network, which the global port will drive. The name of the programmable clock network must be a valid name (See details in :ref:`file_formats_clock_network`)
- ``is_reset="<bool>"`` define if the global port is a reset port at the top-level FPGA fabric. An operating reset port will be driven by proper signals in testbenches.
- ``is_set="<bool>"`` define if the global port is a set port at the top-level FPGA fabric. An operating set port will be driven by proper signals in testbenches.

View File

@ -0,0 +1,207 @@
.. _file_formats_clock_network:
Clock Network (.xml)
--------------------
The XML-based clock network description language is used to describe
- One or more programmable clock networks constaining programmable switches for routing clock signals
- The routing for clock signals on the programmable clock network
Using the clock network description language, users can define multiple clock networks, each of which consists:
- A number of clock spines which can propagate clock signals from one point to another. See details in :ref:`_file_formats_clock_network_clock_spine`.
- A number of switch points which interconnects clock spines using programmable routing switches. See details in :ref:`_file_formats_clock_network_switch_point`.
- A number of tap points which connect the clock spines to programmable blocks, e.g., CLBs. See details in :ref:`_file_formats_clock_network_tap_point`.
.. note:: Please note that the levels of a clock network will be automatically inferred from the clock spines and switch points. Clock network will be **only** built based on the width and the number of levels, as well as the tap points.
.. note:: The switch points and clock spines will be used to route a clock network. The switch points will not impact the physical clock network but only impact the configuration of the programmable routing switches in the physical clock network.
.. warning:: Clock network is a feature for power-users. It requires additional EDA support to leverage the best performance of the clock network, as timing analysis and convergence is more challenging.
.. code-block:: xml
<clock_networks default_segment="<string>" default_switch="<string>">
<clock_network name="<string>" width="<int>">
<spine name="<string>" start_x="<int>" start_y="<int>" end_x="<int>" end_y="<int>">
<switch_point tap="<string>" x="<int>" y="<int>"/>
</spine>
<taps>
<tap tile_pin="<string>"/>
</taps>
</clock_network>
</clock_networks>
General Settings
^^^^^^^^^^^^^^^^
The following syntax are applicable to the XML definition under the root node ``clock_networks``
.. option:: default_segment="<string>"
Define the default routing segment to be used when building the routing tracks for the clock network. Must be a valid routing segment defined in the VPR architecture file. For example,
.. code-block:: xml
default_segment="L1"
where the segment is defined in the VPR architecture file:
.. code-block:: xml
<segmentlist>
<segment name="L1" freq="1" length="1" type="undir"/>
</segmentlist>
.. note:: Currently, clock network requires only length-1 wire segment to be used!
.. option:: default_switch="<string>"
Define the default routing switch to be used when interconnects the routing tracks in the clock network. Must be a valid routing switch defined in the VPR architecture file. For example,
.. code-block:: xml
default_switch="clk_mux"
where the switch is defined in the VPR architecture file:
.. code-block:: xml
<switchlist>
<switch type="mux" name="clk_mux" R="551" Cin=".77e-15" Cout="4e-15" Tdel="58e-12" mux_trans_size="2.630740" buf_size="27.645901"/>
</switchlist>
.. note:: Currently, clock network only supports one type of routing switch, which means all the programmable routing switch in the clock network will be in the same type and circuit design topology.
Clock Network Settings
^^^^^^^^^^^^^^^^^^^^^^
The following syntax are applicable to the XML definition tagged by ``clock_network``.
Note that a number of clock networks can be defined under the root node ``clock_networks``.
.. option:: name="<string>"
The unique name of the clock network. It will be used to link the clock network to a specific global port in :ref:`annotate_vpr_arch_physical_tile_annotation`. For example,
.. code-block:: xml
name="clk_tree_0"
where the clock network is used to drive the global clock pin ``clk0`` in OpenFPGA's architecture description file:
.. code-block:: xml
<tile_annotations>
<global_port name="clk0" is_clock="true" clock_arch_tree_name="clk_tree_0" default_val="0">
<tile name="clb" port="clk[0:1]"
</global_port>
</tile_annotations>
.. option:: width="<int>"
The maximum number of clock pins that a clock network can drive.
.. _file_formats_clock_network_clock_spine:
Clock Spine Settings
^^^^^^^^^^^^^^^^^^^^
The following syntax are applicable to the XML definition tagged by ``spine``.
Note that a number of clock spines can be defined under the node ``clock_network``.
.. option:: name="<string>"
The unique name of the clock spine. It will be used to build switch points between other clock spines.
.. option:: start_x="<int>"
The coordinate X of the starting point of the clock spine.
.. option:: start_y="<int>"
The coordinate Y of the starting point of the clock spine.
.. option:: end_x="<int>"
The coordinate X of the ending point of the clock spine.
.. option:: end_y="<int>"
The coordinate Y of the ending point of the clock spine.
For example,
.. code-block:: xml
<spine name="spine0" start_x="1" start_y="1" end_x="2" end_y="1"/>
where a horizental clock spine ``spine0`` is defined which spans from (1, 1) to (2, 1)
.. note:: We only support clock spines in horizental and vertical directions. Diagonal clock spine is not supported!
.. _file_formats_clock_network_switch_point:
Switch Point Settings
^^^^^^^^^^^^^^^^^^^^^
The following syntax are applicable to the XML definition tagged by ``switch_point``.
Note that a number of switch points can be defined under each clock spine ``spine``.
.. option:: tap="<string>"
Define which clock spine will be tapped from the current clock spine.
.. option:: x="<int>"
The coordinate X of the switch point. Must be a valid coordinate within the range of the current clock spine and the clock spine to be tapped.
.. option:: y="<int>"
The coordinate Y of the switch point. Must be a valid coordinate within the range of the current clock spine and the clock spine to be tapped.
For example,
.. code-block:: xml
<spine name="spine0" start_x="1" start_y="1" end_x="2" end_y="1">
<switch_point tap="spine1" x="1" y="1"/>
<spine>
where clock spine ``spine0`` will drive another clock spine ``spine1`` at (1, 1).
.. _file_formats_clock_network_tap_point:
Tap Point Settings
^^^^^^^^^^^^^^^^^^
The following syntax are applicable to the XML definition tagged by ``tap``.
Note that a number of tap points can be defined under the node ``taps``.
.. option:: tile_pin="<string>"
Define the pin of a programmable block to be tapped by a clock network. The pin must be a valid pin defined in the VPR architecture description file.
.. note:: Only the leaf clock spine (not switch points to drive other clock spine) can tap pins of programmable blocks.
For example,
.. code-block:: xml
<clock_network name="clk_tree_0" width="1">
<!-- Some clock spines -->
<taps>
<tap tile_pin="clb.clk"/>
</taps>
</clock_network>
where all the clock spines of the clock network ``clk_tree_0`` tap the clock pins ``clk`` of tile ``clb`` in a VPR architecture description file:
.. code-block:: xml
<tile name="clb">
<sub_tile name="clb">
<clock name="clk" num_pins="1"/>
</sub_tile>
</tile>

View File

@ -33,3 +33,5 @@ OpenFPGA widely uses XML format for interchangable files
pcf_file
pin_table_file
clock_network

View File

@ -83,6 +83,60 @@ write_openfpga_bitstream_setting
Show verbose log
.. _openfpga_setup_command_read_openfpga_clock_arch:
read_openfpga_clock_arch
~~~~~~~~~~~~~~~~~~~~~~~~
Read the XML file about programmable clock network (see details in :ref:`file_formats_clock_network`)
.. option:: --file <string> or -f <string>
Specify the file name. For example, ``--file clock_network.xml``
.. option:: --verbose
Show verbose log
write_openfpga_clock_arch
~~~~~~~~~~~~~~~~~~~~~~~~~
Write the OpenFPGA programmable clock network to an XML file
.. option:: --file <string> or -f <string>
Specify the file name. For example, ``--file clock_network_echo.xml``.
See details about file format at :ref:`file_formats_clock_network`.
.. option:: --verbose
Show verbose log
append_clock_rr_graph
~~~~~~~~~~~~~~~~~~~~~
Build the routing resource graph based on an defined programmable clock network, and append it to the existing routing resource graph built by VPR.
Use command :ref:`openfpga_setup_command_read_openfpga_clock_arch`` to load the clock network.
.. option:: --verbose
Show verbose log
route_clock_rr_graph
~~~~~~~~~~~~~~~~~~~~
Route clock signals on the built routing resource graph which contains a programmable clock network.
Clock signals will be auto-detected and routed based on pin constraints which are provided by users.
.. option:: --pin_constraints_file <string> or -pcf <string>
Specify the *Pin Constraints File* (PCF) when the clock network contains multiple clock pins. For example, ``-pin_constraints_file pin_constraints.xml``
Strongly recommend for multi-clock network. See detailed file format about :ref:`file_format_pin_constraints_file`.
.. option:: --verbose
Show verbose log
link_openfpga_arch
~~~~~~~~~~~~~~~~~~

View File

@ -1,6 +1,7 @@
# OpenFPGA-related libraries
add_subdirectory(libini)
add_subdirectory(libopenfpgashell)
add_subdirectory(libclkarchopenfpga)
add_subdirectory(libarchopenfpga)
add_subdirectory(libopenfpgautil)
add_subdirectory(libfabrickey)

View File

@ -89,6 +89,14 @@ static void read_xml_tile_global_port_annotation(
get_attribute(xml_tile, "is_clock", loc_data, pugiutil::ReqOpt::OPTIONAL)
.as_bool(false));
/* Get clock tree attributes if this is a clock */
if (tile_annotation.global_port_is_clock(tile_global_port_id)) {
tile_annotation.set_global_port_clock_arch_tree_name(
tile_global_port_id, get_attribute(xml_tile, "clock_arch_tree_name",
loc_data, pugiutil::ReqOpt::OPTIONAL)
.as_string());
}
/* Get is_set attributes */
tile_annotation.set_global_port_is_set(
tile_global_port_id,

View File

@ -71,6 +71,12 @@ size_t TileAnnotation::global_port_default_value(
return global_port_default_values_[global_port_id];
}
std::string TileAnnotation::global_port_clock_arch_tree_name(
const TileGlobalPortId& global_port_id) const {
VTR_ASSERT(valid_global_port_id(global_port_id));
return global_port_clock_arch_tree_names_[global_port_id];
}
/************************************************************************
* Public Mutators
***********************************************************************/
@ -91,6 +97,7 @@ TileGlobalPortId TileAnnotation::create_global_port(
global_port_tile_ports_.emplace_back();
global_port_tile_coordinates_.emplace_back();
global_port_is_clock_.push_back(false);
global_port_clock_arch_tree_names_.emplace_back();
global_port_is_set_.push_back(false);
global_port_is_reset_.push_back(false);
global_port_default_values_.push_back(0);
@ -116,6 +123,12 @@ void TileAnnotation::set_global_port_is_clock(
global_port_is_clock_[global_port_id] = is_clock;
}
void TileAnnotation::set_global_port_clock_arch_tree_name(
const TileGlobalPortId& global_port_id, const std::string& clock_tree_name) {
VTR_ASSERT(valid_global_port_id(global_port_id));
global_port_clock_arch_tree_names_[global_port_id] = clock_tree_name;
}
void TileAnnotation::set_global_port_is_set(
const TileGlobalPortId& global_port_id, const bool& is_set) {
VTR_ASSERT(valid_global_port_id(global_port_id));

View File

@ -51,6 +51,8 @@ class TileAnnotation {
bool global_port_is_clock(const TileGlobalPortId& global_port_id) const;
bool global_port_is_set(const TileGlobalPortId& global_port_id) const;
bool global_port_is_reset(const TileGlobalPortId& global_port_id) const;
std::string global_port_clock_arch_tree_name(
const TileGlobalPortId& global_port_id) const;
size_t global_port_default_value(
const TileGlobalPortId& global_port_id) const;
@ -66,6 +68,8 @@ class TileAnnotation {
const vtr::Point<size_t>& tile_coord);
void set_global_port_is_clock(const TileGlobalPortId& global_port_id,
const bool& is_clock);
void set_global_port_clock_arch_tree_name(
const TileGlobalPortId& global_port_id, const std::string& clock_tree_name);
void set_global_port_is_set(const TileGlobalPortId& global_port_id,
const bool& is_set);
void set_global_port_is_reset(const TileGlobalPortId& global_port_id,
@ -91,6 +95,7 @@ class TileAnnotation {
global_port_tile_coordinates_;
vtr::vector<TileGlobalPortId, std::vector<BasicPort>> global_port_tile_ports_;
vtr::vector<TileGlobalPortId, bool> global_port_is_clock_;
vtr::vector<TileGlobalPortId, std::string> global_port_clock_arch_tree_names_;
vtr::vector<TileGlobalPortId, bool> global_port_is_reset_;
vtr::vector<TileGlobalPortId, bool> global_port_is_set_;
vtr::vector<TileGlobalPortId, size_t> global_port_default_values_;

View File

@ -36,6 +36,14 @@ static void write_xml_tile_annotation_global_port(
write_xml_attribute(fp, "is_clock",
tile_annotation.global_port_is_clock(global_port_id));
if (tile_annotation.global_port_is_clock(global_port_id) &&
!tile_annotation.global_port_clock_arch_tree_name(global_port_id)
.empty()) {
write_xml_attribute(
fp, "clock_arch_tree_name",
tile_annotation.global_port_clock_arch_tree_name(global_port_id).c_str());
}
write_xml_attribute(fp, "is_set",
tile_annotation.global_port_is_set(global_port_id));

View File

@ -0,0 +1,38 @@
cmake_minimum_required(VERSION 3.9)
project("libclkarchopenfpga")
file(GLOB_RECURSE EXEC_SOURCES test/*.cpp)
file(GLOB_RECURSE LIB_SOURCES src/*.cpp)
file(GLOB_RECURSE LIB_HEADERS src/*.h)
files_to_dirs(LIB_HEADERS LIB_INCLUDE_DIRS)
#Remove test executable from library
list(REMOVE_ITEM LIB_SOURCES ${EXEC_SOURCES})
#Create the library
add_library(libclkarchopenfpga STATIC
${LIB_HEADERS}
${LIB_SOURCES})
target_include_directories(libclkarchopenfpga PUBLIC ${LIB_INCLUDE_DIRS})
set_target_properties(libclkarchopenfpga PROPERTIES PREFIX "") #Avoid extra 'lib' prefix
#Specify link-time dependancies
target_link_libraries(libclkarchopenfpga
libopenfpgautil
libopenfpgashell
libarchopenfpga
librrgraph
libvtrutil
libpugiutil)
#Create the test executable
foreach(testsourcefile ${EXEC_SOURCES})
# Use a simple string replace, to cut off .cpp.
get_filename_component(testname ${testsourcefile} NAME_WE)
add_executable(${testname} ${testsourcefile})
# Make sure the library is linked to each test executable
target_link_libraries(${testname} libclkarchopenfpga)
endforeach(testsourcefile ${EXEC_SOURCES})
install(TARGETS libclkarchopenfpga DESTINATION bin)

View File

@ -0,0 +1,26 @@
<clock_networks default_segment="seg_len1" default_switch="fast_switch">
<clock_network name="example_network" width="8">
<spine name="spine_lvl3" start_x="0" start_y="2" end_x="2" end_y="2">
<switch_point tap="spine_lvl2_upper" x="2" y="2"/>
<switch_point tap="spine_lvl2_lower" x="2" y="2"/>
</spine>
<spine name="spine_lvl2_upper" start_x="2" start_y="2" end_x="2" end_y="3">
<switch_point tap="rib_lvl1_upper_left" x="2" y="3"/>
<switch_point tap="rib_lvl1_upper_right" x="2" y="3"/>
</spine>
<spine name="spine_lvl2_lower" start_x="2" start_y="2" end_x="2" end_y="1">
<switch_point tap="rib_lvl1_lower_left" x="2" y="1"/>
<switch_point tap="rib_lvl1_lower_right" x="2" y="1"/>
</spine>
<spine name="rib_lvl1_upper_left" start_x="2" start_y="3" end_x="1" end_y="3"/>
<spine name="rib_lvl1_upper_right" start_x="2" start_y="3" end_x="3" end_y="3"/>
<spine name="rib_lvl1_lower_left" start_x="2" start_y="1" end_x="1" end_y="1"/>
<spine name="rib_lvl1_lower_right" start_x="2" start_y="1" end_x="3" end_y="1"/>
<taps>
<tap tile_pin="io[0:23].clk[0:7]"/>
<tap tile_pin="clb[0:0].clk[0:7]"/>
<tap tile_pin="dsp[0:0].clk[0:7]"/>
<tap tile_pin="bram[0:0].clk[0:7]"/>
</taps>
</clock_network>
</clock_networks>

View File

@ -0,0 +1,745 @@
#include "clock_network.h"
#include <algorithm>
#include "openfpga_port_parser.h"
#include "openfpga_tokenizer.h"
#include "vtr_assert.h"
#include "vtr_log.h"
namespace openfpga { // Begin namespace openfpga
/************************************************************************
* Member functions for class ClockNetwork
***********************************************************************/
/************************************************************************
* Constructors
***********************************************************************/
ClockNetwork::ClockNetwork() {
default_segment_id_ = RRSegmentId::INVALID();
default_switch_id_ = RRSwitchId::INVALID();
is_dirty_ = true;
}
/************************************************************************
* Public Accessors : aggregates
***********************************************************************/
size_t ClockNetwork::num_trees() const { return trees().size(); }
ClockNetwork::clock_tree_range ClockNetwork::trees() const {
return vtr::make_range(tree_ids_.begin(), tree_ids_.end());
}
std::vector<ClockLevelId> ClockNetwork::levels(
const ClockTreeId& tree_id) const {
std::vector<ClockLevelId> ret;
for (size_t ilvl = 0; ilvl < tree_depth(tree_id); ++ilvl) {
ret.push_back(ClockLevelId(ilvl));
}
return ret;
}
std::vector<ClockTreePinId> ClockNetwork::pins(
const ClockTreeId& tree_id, const ClockLevelId& level,
const t_rr_type& track_type, const Direction& direction) const {
std::vector<ClockTreePinId> ret;
/* Avoid to repeatedly count the tracks which can be shared by spines
* For two or more spines that locate in different coordinates, they can share
* the same routing tracks. Therefore, we only ensure that routing tracks in
* their demanding direction (INC and DEC) are satisfied
*/
bool dir_flag = false;
for (ClockSpineId curr_spine : spines(tree_id)) {
if (spine_levels_[curr_spine] != size_t(level)) {
continue;
}
if (spine_track_type(curr_spine) == track_type) {
if (!dir_flag && spine_direction(curr_spine) == direction) {
ret.reserve(ret.size() + tree_width(spine_parent_trees_[curr_spine]));
for (size_t i = 0; i < tree_width(spine_parent_trees_[curr_spine]);
++i) {
ret.push_back(ClockTreePinId(i));
}
dir_flag = true;
}
}
}
return ret;
}
std::vector<ClockTreePinId> ClockNetwork::pins(
const ClockTreeId& tree_id) const {
std::vector<ClockTreePinId> ret;
for (size_t i = 0; i < tree_width(tree_id); ++i) {
ret.push_back(ClockTreePinId(i));
}
return ret;
}
/************************************************************************
* Public Accessors : Basic data query
***********************************************************************/
t_rr_type ClockNetwork::spine_track_type(const ClockSpineId& spine_id) const {
VTR_ASSERT(valid_spine_start_end_points(spine_id));
if ((spine_start_point(spine_id).x() == spine_end_point(spine_id).x()) &&
(spine_start_point(spine_id).y() == spine_end_point(spine_id).y())) {
return spine_track_types_[spine_id];
} else if (spine_start_point(spine_id).y() == spine_end_point(spine_id).y()) {
return CHANX;
}
return CHANY;
}
Direction ClockNetwork::spine_direction(const ClockSpineId& spine_id) const {
VTR_ASSERT(valid_spine_start_end_points(spine_id));
if (spine_track_type(spine_id) == CHANX) {
if (spine_start_point(spine_id).x() == spine_end_point(spine_id).x()) {
return spine_directions_[spine_id];
} else if (spine_start_point(spine_id).x() <
spine_end_point(spine_id).x()) {
return Direction::INC;
}
} else {
VTR_ASSERT(spine_track_type(spine_id) == CHANY);
if (spine_start_point(spine_id).y() == spine_end_point(spine_id).y()) {
return spine_directions_[spine_id];
} else if (spine_start_point(spine_id).y() <
spine_end_point(spine_id).y()) {
return Direction::INC;
}
}
return Direction::DEC;
}
size_t ClockNetwork::num_tracks(const ClockTreeId& tree_id,
const ClockLevelId& level,
const t_rr_type& track_type) const {
size_t num_tracks = 0;
/* Avoid to repeatedly count the tracks which can be shared by spines
* For two or more spines that locate in different coordinates, they can share
* the same routing tracks. Therefore, we only ensure that routing tracks in
* their demanding direction (INC and DEC) are satisfied
*/
std::map<Direction, bool> dir_flags;
dir_flags[Direction::INC] = false;
dir_flags[Direction::DEC] = false;
for (ClockSpineId curr_spine : spines(tree_id)) {
if (spine_levels_[curr_spine] != size_t(level)) {
continue;
}
if (spine_track_type(curr_spine) == track_type) {
/* TODO: Deposit routing tracks in both INC and DEC direction, currently
* this is limited by the connection block build-up algorithm in fabric
* generator */
return 2 * tree_width(spine_parent_trees_[curr_spine]);
}
}
return num_tracks;
}
size_t ClockNetwork::num_tracks(const ClockTreeId& tree_id,
const ClockLevelId& level,
const t_rr_type& track_type,
const Direction& direction) const {
size_t num_tracks = 0;
/* Avoid to repeatedly count the tracks which can be shared by spines
* For two or more spines that locate in different coordinates, they can share
* the same routing tracks. Therefore, we only ensure that routing tracks in
* their demanding direction (INC and DEC) are satisfied
*/
for (ClockSpineId curr_spine : spines(tree_id)) {
if (spine_levels_[curr_spine] != size_t(level)) {
continue;
}
if (spine_track_type(curr_spine) == track_type) {
if (spine_direction(curr_spine) == direction) {
/* TODO: Deposit routing tracks in both INC and DEC direction, currently
* this is limited by the connection block build-up algorithm in fabric
* generator */
return tree_width(spine_parent_trees_[curr_spine]);
}
}
}
return num_tracks;
}
std::string ClockNetwork::default_segment_name() const {
return default_segment_name_;
}
RRSegmentId ClockNetwork::default_segment() const {
return default_segment_id_;
}
std::string ClockNetwork::default_switch_name() const {
return default_switch_name_;
}
RRSwitchId ClockNetwork::default_switch() const { return default_switch_id_; }
std::string ClockNetwork::tree_name(const ClockTreeId& tree_id) const {
VTR_ASSERT(valid_tree_id(tree_id));
return tree_names_[tree_id];
}
size_t ClockNetwork::max_tree_width() const {
size_t max_size = 0;
for (auto itree : trees()) {
max_size = std::max(tree_width(itree), max_size);
}
return max_size;
}
size_t ClockNetwork::max_tree_depth() const {
size_t max_size = 0;
for (auto itree : trees()) {
max_size = std::max(tree_depth(itree), max_size);
}
return max_size;
}
size_t ClockNetwork::tree_width(const ClockTreeId& tree_id) const {
VTR_ASSERT(valid_tree_id(tree_id));
return tree_widths_[tree_id];
}
size_t ClockNetwork::tree_depth(const ClockTreeId& tree_id) const {
VTR_ASSERT(valid_tree_id(tree_id));
if (is_dirty_) {
VTR_LOG_ERROR("Unable to identify tree depth when data is still dirty!\n");
exit(1);
}
return tree_depths_[tree_id] + 1;
}
std::vector<ClockSpineId> ClockNetwork::tree_top_spines(
const ClockTreeId& tree_id) const {
VTR_ASSERT(valid_tree_id(tree_id));
return tree_top_spines_[tree_id];
}
std::vector<ClockSpineId> ClockNetwork::spines(
const ClockTreeId& tree_id) const {
std::vector<ClockSpineId> ret;
for (ClockSpineId spine_id : spine_ids_) {
if (spine_parent_trees_[spine_id] == tree_id) {
ret.push_back(spine_id);
}
}
return ret;
}
std::string ClockNetwork::spine_name(const ClockSpineId& spine_id) const {
VTR_ASSERT(valid_spine_id(spine_id));
return spine_names_[spine_id];
}
vtr::Point<int> ClockNetwork::spine_start_point(
const ClockSpineId& spine_id) const {
VTR_ASSERT(valid_spine_id(spine_id));
return spine_start_points_[spine_id];
}
vtr::Point<int> ClockNetwork::spine_end_point(
const ClockSpineId& spine_id) const {
VTR_ASSERT(valid_spine_id(spine_id));
return spine_end_points_[spine_id];
}
ClockLevelId ClockNetwork::spine_level(const ClockSpineId& spine_id) const {
VTR_ASSERT(valid_spine_id(spine_id));
if (is_dirty_) {
VTR_LOG_ERROR("Unable to identify spine level when data is still dirty!\n");
exit(1);
}
return ClockLevelId(spine_levels_[spine_id]);
}
std::vector<vtr::Point<int>> ClockNetwork::spine_coordinates(
const ClockSpineId& spine_id) const {
vtr::Point<int> start_coord = spine_start_point(spine_id);
vtr::Point<int> end_coord = spine_end_point(spine_id);
std::vector<vtr::Point<int>> coords;
if (Direction::INC == spine_direction(spine_id)) {
if (CHANX == spine_track_type(spine_id)) {
for (int ix = start_coord.x(); ix <= end_coord.x(); ix++) {
coords.push_back(vtr::Point<int>(ix, start_coord.y()));
}
} else {
VTR_ASSERT(CHANY == spine_track_type(spine_id));
for (int iy = start_coord.y(); iy <= end_coord.y(); iy++) {
coords.push_back(vtr::Point<int>(start_coord.x(), iy));
}
}
} else {
VTR_ASSERT(Direction::DEC == spine_direction(spine_id));
if (CHANX == spine_track_type(spine_id)) {
for (int ix = start_coord.x(); ix >= end_coord.x(); ix--) {
coords.push_back(vtr::Point<int>(ix, start_coord.y()));
}
} else {
VTR_ASSERT(CHANY == spine_track_type(spine_id));
for (int iy = start_coord.y(); iy >= end_coord.y(); iy--) {
coords.push_back(vtr::Point<int>(start_coord.x(), iy));
}
}
}
return coords;
}
std::vector<ClockSwitchPointId> ClockNetwork::spine_switch_points(
const ClockSpineId& spine_id) const {
VTR_ASSERT(valid_spine_id(spine_id));
std::vector<ClockSwitchPointId> ret;
ret.reserve(spine_switch_points_[spine_id].size());
for (size_t i = 0; i < spine_switch_points_[spine_id].size(); ++i) {
ret.push_back(ClockSwitchPointId(i));
}
return ret;
}
ClockSpineId ClockNetwork::spine_switch_point_tap(
const ClockSpineId& spine_id,
const ClockSwitchPointId& switch_point_id) const {
VTR_ASSERT(valid_spine_switch_point_id(spine_id, switch_point_id));
return spine_switch_points_[spine_id][size_t(switch_point_id)];
}
vtr::Point<int> ClockNetwork::spine_switch_point(
const ClockSpineId& spine_id,
const ClockSwitchPointId& switch_point_id) const {
VTR_ASSERT(valid_spine_switch_point_id(spine_id, switch_point_id));
return spine_switch_coords_[spine_id][size_t(switch_point_id)];
}
std::vector<std::string> ClockNetwork::tree_taps(
const ClockTreeId& tree_id) const {
VTR_ASSERT(valid_tree_id(tree_id));
return tree_taps_[tree_id];
}
std::vector<std::string> ClockNetwork::tree_flatten_taps(
const ClockTreeId& tree_id, const ClockTreePinId& clk_pin_id) const {
VTR_ASSERT(valid_tree_id(tree_id));
std::vector<std::string> flatten_taps;
for (const std::string& tap_name : tree_taps_[tree_id]) {
StringToken tokenizer(tap_name);
std::vector<std::string> pin_tokens = tokenizer.split(".");
if (pin_tokens.size() != 2) {
VTR_LOG_ERROR("Invalid pin name '%s'. Expect <tile>.<port>\n",
tap_name.c_str());
exit(1);
}
PortParser tile_parser(pin_tokens[0]);
BasicPort tile_info = tile_parser.port();
PortParser pin_parser(pin_tokens[1]);
BasicPort pin_info = pin_parser.port();
if (!tile_info.is_valid()) {
VTR_LOG_ERROR("Invalid pin name '%s' whose subtile index is not valid\n",
tap_name.c_str());
exit(1);
}
if (!pin_info.is_valid()) {
VTR_LOG_ERROR("Invalid pin name '%s' whose pin index is not valid\n",
tap_name.c_str());
exit(1);
}
for (size_t& tile_idx : tile_info.pins()) {
std::string flatten_tile_str =
tile_info.get_name() + "[" + std::to_string(tile_idx) + "]";
for (size_t& pin_idx : pin_info.pins()) {
if (pin_idx != size_t(clk_pin_id)) {
continue;
}
std::string flatten_pin_str =
pin_info.get_name() + "[" + std::to_string(pin_idx) + "]";
flatten_taps.push_back(flatten_tile_str + "." + flatten_pin_str);
}
}
}
return flatten_taps;
}
ClockTreeId ClockNetwork::find_tree(const std::string& name) const {
auto result = tree_name2id_map_.find(name);
if (result == tree_name2id_map_.end()) {
return ClockTreeId::INVALID();
}
return result->second;
}
ClockSpineId ClockNetwork::find_spine(const std::string& name) const {
auto result = spine_name2id_map_.find(name);
if (result == spine_name2id_map_.end()) {
return ClockSpineId::INVALID();
}
return result->second;
}
bool ClockNetwork::empty() const { return 0 == tree_ids_.size(); }
bool ClockNetwork::is_valid() const { return !is_dirty_; }
ClockLevelId ClockNetwork::next_level(const ClockLevelId& lvl) const {
return ClockLevelId(size_t(lvl) + 1);
}
/************************************************************************
* Public Mutators
***********************************************************************/
void ClockNetwork::reserve_spines(const size_t& num_spines) {
spine_ids_.reserve(num_spines);
spine_names_.reserve(num_spines);
spine_levels_.reserve(num_spines);
spine_start_points_.reserve(num_spines);
spine_end_points_.reserve(num_spines);
spine_directions_.reserve(num_spines);
spine_track_types_.reserve(num_spines);
spine_switch_points_.reserve(num_spines);
spine_switch_coords_.reserve(num_spines);
spine_parents_.reserve(num_spines);
spine_children_.reserve(num_spines);
spine_parent_trees_.reserve(num_spines);
}
void ClockNetwork::reserve_trees(const size_t& num_trees) {
tree_ids_.reserve(num_trees);
tree_names_.reserve(num_trees);
tree_widths_.reserve(num_trees);
tree_top_spines_.reserve(num_trees);
tree_taps_.reserve(num_trees);
}
void ClockNetwork::set_default_segment(const RRSegmentId& seg_id) {
default_segment_id_ = seg_id;
}
void ClockNetwork::set_default_switch(const RRSwitchId& switch_id) {
default_switch_id_ = switch_id;
}
void ClockNetwork::set_default_segment_name(const std::string& name) {
default_segment_name_ = name;
}
void ClockNetwork::set_default_switch_name(const std::string& name) {
default_switch_name_ = name;
}
ClockTreeId ClockNetwork::create_tree(const std::string& name, size_t width) {
/* Create a new id */
ClockTreeId tree_id = ClockTreeId(tree_ids_.size());
tree_ids_.push_back(tree_id);
tree_names_.push_back(name);
tree_widths_.push_back(width);
tree_depths_.emplace_back();
tree_taps_.emplace_back();
tree_top_spines_.emplace_back();
/* Register to fast look-up */
auto result = tree_name2id_map_.find(name);
if (result == tree_name2id_map_.end()) {
tree_name2id_map_[name] = tree_id;
} else {
VTR_LOG_ERROR("Duplicated clock tree name '%s' in clock network\n",
name.c_str());
exit(1);
}
return tree_id;
}
ClockSpineId ClockNetwork::create_spine(const std::string& name) {
/* Check if the name is already used or not */
auto result = spine_name2id_map_.find(name);
if (result != spine_name2id_map_.end()) {
VTR_LOG_WARN(
"Unable to create a spine with duplicated name '%s' in clock "
"network\nPlease use the existing spine or rename\n",
name.c_str());
return ClockSpineId::INVALID();
}
/* Create a new id */
ClockSpineId spine_id = ClockSpineId(spine_ids_.size());
spine_ids_.push_back(spine_id);
spine_names_.push_back(name);
spine_levels_.emplace_back(0);
spine_start_points_.emplace_back();
spine_end_points_.emplace_back();
spine_directions_.emplace_back(Direction::NUM_DIRECTIONS);
spine_track_types_.emplace_back(NUM_RR_TYPES);
spine_switch_points_.emplace_back();
spine_switch_coords_.emplace_back();
spine_parents_.emplace_back();
spine_children_.emplace_back();
spine_parent_trees_.emplace_back();
/* Register to the lookup */
VTR_ASSERT(valid_spine_id(spine_id));
spine_name2id_map_[name] = spine_id;
return spine_id;
}
ClockSpineId ClockNetwork::try_create_spine(const std::string& name) {
ClockSpineId spine_id = find_spine(name);
if (!spine_id) {
spine_id = create_spine(name);
}
return spine_id;
}
void ClockNetwork::set_spine_parent_tree(const ClockSpineId& spine_id,
const ClockTreeId& tree_id) {
VTR_ASSERT(valid_spine_id(spine_id));
VTR_ASSERT(valid_tree_id(tree_id));
spine_parent_trees_[spine_id] = tree_id;
}
void ClockNetwork::set_spine_start_point(const ClockSpineId& spine_id,
const vtr::Point<int>& coord) {
VTR_ASSERT(valid_spine_id(spine_id));
spine_start_points_[spine_id] = coord;
}
void ClockNetwork::set_spine_end_point(const ClockSpineId& spine_id,
const vtr::Point<int>& coord) {
VTR_ASSERT(valid_spine_id(spine_id));
spine_end_points_[spine_id] = coord;
}
void ClockNetwork::set_spine_direction(const ClockSpineId& spine_id,
const Direction& dir) {
VTR_ASSERT(valid_spine_id(spine_id));
spine_directions_[spine_id] = dir;
}
void ClockNetwork::set_spine_track_type(const ClockSpineId& spine_id,
const t_rr_type& type) {
VTR_ASSERT(valid_spine_id(spine_id));
spine_track_types_[spine_id] = type;
}
void ClockNetwork::add_spine_switch_point(const ClockSpineId& spine_id,
const ClockSpineId& drive_spine_id,
const vtr::Point<int>& coord) {
VTR_ASSERT(valid_spine_id(spine_id));
VTR_ASSERT(valid_spine_id(drive_spine_id));
spine_switch_points_[spine_id].push_back(drive_spine_id);
spine_switch_coords_[spine_id].push_back(coord);
/* Do not allow any spine has different parents */
if (spine_parents_[drive_spine_id]) {
VTR_LOG_ERROR(
"Detect a spine %s' has two parents '%s' and '%s'. Not allowed in a "
"clock tree!\n",
spine_name(drive_spine_id).c_str(),
spine_name(spine_parents_[drive_spine_id]).c_str(),
spine_name(spine_id).c_str());
exit(1);
}
spine_parents_[drive_spine_id] = spine_id;
spine_children_[spine_id].push_back(drive_spine_id);
}
void ClockNetwork::add_tree_tap(const ClockTreeId& tree_id,
const std::string& pin_name) {
VTR_ASSERT(valid_tree_id(tree_id));
tree_taps_[tree_id].push_back(pin_name);
}
bool ClockNetwork::link() {
for (ClockTreeId tree_id : trees()) {
if (!link_tree(tree_id)) {
return false;
}
}
return true;
}
bool ClockNetwork::validate_tree() const {
for (ClockTreeId tree_id : trees()) {
for (ClockSpineId spine_id : spines(tree_id)) {
for (ClockSwitchPointId switch_point_id : spine_switch_points(spine_id)) {
if (!valid_spine_switch_point_id(spine_id, switch_point_id)) {
VTR_LOG_ERROR(
"Spine '%s' contains invalid switching point (%lu, %lu)\n",
spine_name(spine_id).c_str(),
spine_switch_point(spine_id, switch_point_id).x(),
spine_switch_point(spine_id, switch_point_id).y());
return false;
}
}
if (!valid_spine_start_end_points(spine_id)) {
VTR_LOG_ERROR(
"Spine '%s' contains invalid starting point (%lu, %lu) or ending "
"point (%lu, %lu)\n",
spine_name(spine_id).c_str(), spine_start_point(spine_id).x(),
spine_start_point(spine_id).y(), spine_end_point(spine_id).x(),
spine_end_point(spine_id).y());
return false;
}
/* Ensure valid track types */
if (spine_track_type(spine_id) != spine_track_types_[spine_id]) {
VTR_LOG_ERROR(
"Spine '%s' has a mismatch between inferred track type '%s' against "
"user-defined track type '%s'\n",
spine_name(spine_id).c_str(),
rr_node_typename[spine_track_type(spine_id)],
rr_node_typename[spine_track_types_[spine_id]]);
return false;
}
if (spine_direction(spine_id) != spine_directions_[spine_id]) {
VTR_LOG_ERROR(
"Spine '%s' has a mismatch between inferred direction '%s' against "
"user-defined direction '%s'\n",
spine_name(spine_id).c_str(),
DIRECTION_STRING[size_t(spine_direction(spine_id))],
DIRECTION_STRING[size_t(spine_directions_[spine_id])]);
return false;
}
/* parent spine and child spine should be in different track type */
ClockSpineId parent_spine = spine_parents_[spine_id];
if (valid_spine_id(parent_spine)) {
if (spine_track_type(spine_id) == spine_track_type(parent_spine)) {
VTR_LOG_ERROR(
"Spine '%s' and its parent '%s' are in the same track type (both "
"horizental or vertical). Expect they are othorgonal (one "
"horizental and one vertical)!\n",
spine_name(spine_id).c_str(), spine_name(parent_spine).c_str());
return false;
}
}
}
}
return true;
}
bool ClockNetwork::validate() const {
is_dirty_ = true;
if (default_segment_id_ && default_switch_id_ && validate_tree()) {
is_dirty_ = false;
}
return true;
}
bool ClockNetwork::link_tree(const ClockTreeId& tree_id) {
if (!link_tree_top_spines(tree_id)) {
return false;
}
if (!sort_tree_spines(tree_id)) {
return false;
}
if (!update_tree_depth(tree_id)) {
return false;
}
if (!update_spine_attributes(tree_id)) {
return false;
}
return true;
}
bool ClockNetwork::link_tree_top_spines(const ClockTreeId& tree_id) {
tree_top_spines_[tree_id].clear();
/* Sort the spines under a tree; assign levels and identify top-level spines
*/
for (ClockSpineId spine_id : spines(tree_id)) {
/* Spines that have no parent are the top-level spines*/
if (!spine_parents_[spine_id]) {
tree_top_spines_[tree_id].push_back(spine_id);
}
}
return true;
}
bool ClockNetwork::sort_tree_spines(const ClockTreeId& tree_id) {
for (ClockSpineId spine_id : tree_top_spines_[tree_id]) {
spine_levels_[spine_id] = 0;
rec_update_spine_level(spine_id);
}
return true;
}
bool ClockNetwork::rec_update_spine_level(const ClockSpineId& spine_id) {
for (ClockSpineId child_spine_id : spine_children_[spine_id]) {
spine_levels_[child_spine_id] = spine_levels_[spine_id] + 1;
rec_update_spine_level(child_spine_id);
}
return true;
}
bool ClockNetwork::update_tree_depth(const ClockTreeId& tree_id) {
size_t depth = 0;
for (ClockSpineId spine_id : spines(tree_id)) {
depth = std::max(depth, spine_levels_[spine_id]);
}
tree_depths_[tree_id] = depth;
return true;
}
bool ClockNetwork::update_spine_attributes(const ClockTreeId& tree_id) {
for (ClockSpineId spine_id : spines(tree_id)) {
spine_track_types_[spine_id] = spine_track_type(spine_id);
spine_directions_[spine_id] = spine_direction(spine_id);
}
return true;
}
/************************************************************************
* Internal invalidators/validators
***********************************************************************/
bool ClockNetwork::valid_tree_id(const ClockTreeId& tree_id) const {
return (size_t(tree_id) < tree_ids_.size()) &&
(tree_id == tree_ids_[tree_id]);
}
bool ClockNetwork::valid_level_id(const ClockTreeId& tree_id,
const ClockLevelId& lvl_id) const {
return valid_tree_id(tree_id) && (size_t(lvl_id) < tree_depth(tree_id));
}
bool ClockNetwork::is_last_level(const ClockTreeId& tree_id,
const ClockLevelId& lvl_id) const {
return valid_tree_id(tree_id) && (size_t(lvl_id) == tree_depth(tree_id) - 1);
}
bool ClockNetwork::is_last_level(const ClockSpineId& spine_id) const {
return spine_level(spine_id) ==
ClockLevelId(tree_depth(spine_parent_trees_[spine_id]) - 1);
}
bool ClockNetwork::valid_spine_id(const ClockSpineId& spine_id) const {
return (size_t(spine_id) < spine_ids_.size()) &&
(spine_id == spine_ids_[spine_id]);
}
bool ClockNetwork::valid_spine_switch_point_id(
const ClockSpineId& spine_id,
const ClockSwitchPointId& switch_point_id) const {
if (!valid_spine_id(spine_id)) {
return false;
}
return size_t(switch_point_id) < spine_switch_points_[spine_id].size();
}
bool ClockNetwork::valid_spine_start_end_points(
const ClockSpineId& spine_id) const {
VTR_ASSERT(valid_spine_id(spine_id));
if ((spine_start_point(spine_id).x() != spine_end_point(spine_id).x()) &&
(spine_start_point(spine_id).y() != spine_end_point(spine_id).y())) {
return false;
}
return true;
}
bool ClockNetwork::is_vague_coordinate(const ClockSpineId& spine_id) const {
return ((spine_start_point(spine_id).x() == spine_end_point(spine_id).x()) &&
(spine_start_point(spine_id).y() == spine_end_point(spine_id).y()));
}
} // End of namespace openfpga

View File

@ -0,0 +1,265 @@
#ifndef CLOCK_NETWORK_H
#define CLOCK_NETWORK_H
/********************************************************************
* This file include the declaration of pin constraints
*******************************************************************/
#include <array>
#include <map>
#include <string>
/* Headers from vtrutil library */
#include "vtr_geometry.h"
#include "vtr_vector.h"
/* Headers from openfpgautil library */
#include "clock_network_fwd.h"
#include "rr_graph_fwd.h"
#include "rr_node_types.h"
namespace openfpga { // Begin namespace openfpga
/********************************************************************
* A data structure to describe a clock network
* A clock network consists of a number of clock trees
* each of which has:
* - a unique id
* - different entry point
*
* Typical usage:
* --------------
* // Create an object of clock network
* ClockNetwork clk_ntwk;
* // Create a new clock tree which contains 8 clock pins
* ClockTreeId clk_tree_id = clk_ntwk.create_tree("tree1")
* // Add a spine to the clock tree
* ClockSpineId clk_spine_id = clk_ntwk.create_spine("tree1_spine0");
*
*******************************************************************/
class ClockNetwork {
public: /* Types */
typedef vtr::vector<ClockTreeId, ClockTreeId>::const_iterator
clock_tree_iterator;
/* Create range */
typedef vtr::Range<clock_tree_iterator> clock_tree_range;
public: /* Constructors */
ClockNetwork();
public: /* Accessors: aggregates */
size_t num_trees() const;
clock_tree_range trees() const;
/* Return the range of clock levels */
std::vector<ClockLevelId> levels(const ClockTreeId& tree_id) const;
/* Return a list of spine id under a clock tree */
std::vector<ClockSpineId> spines(const ClockTreeId& tree_id) const;
/* Return a list of clock pins in a bus of clock tree at a given level and
* direction */
std::vector<ClockTreePinId> pins(const ClockTreeId& tree_id,
const ClockLevelId& level,
const t_rr_type& track_type,
const Direction& direction) const;
std::vector<ClockTreePinId> pins(const ClockTreeId& tree_id) const;
public: /* Public Accessors: Basic data query */
/* Return the number of routing tracks required by a selected clock tree at a
* given level and direction */
size_t num_tracks(const ClockTreeId& tree_id, const ClockLevelId& level,
const t_rr_type& track_type) const;
size_t num_tracks(const ClockTreeId& tree_id, const ClockLevelId& level,
const t_rr_type& track_type,
const Direction& direction) const;
/* Return the id of default routing segment, use this to find detailed segment
* information from RRGraph */
RRSegmentId default_segment() const;
std::string default_segment_name() const;
RRSwitchId default_switch() const;
std::string default_switch_name() const;
std::string tree_name(const ClockTreeId& tree_id) const;
size_t tree_width(const ClockTreeId& tree_id) const;
size_t tree_depth(const ClockTreeId& tree_id) const;
size_t max_tree_width() const;
size_t max_tree_depth() const;
std::vector<ClockSpineId> tree_top_spines(const ClockTreeId& tree_id) const;
std::string spine_name(const ClockSpineId& spine_id) const;
vtr::Point<int> spine_start_point(const ClockSpineId& spine_id) const;
vtr::Point<int> spine_end_point(const ClockSpineId& spine_id) const;
/* Return the level where the spine locates in the multi-layer clock tree
* structure */
ClockLevelId spine_level(const ClockSpineId& spine_id) const;
/* Return the list of coordinates that a spine will go across */
std::vector<vtr::Point<int>> spine_coordinates(
const ClockSpineId& spine_id) const;
/* Identify the direction of a spine, depending on its starting and ending
* points
* - CHANX represents a horizental routing track
* - CHANY represents a vertical routing track
*/
t_rr_type spine_track_type(const ClockSpineId& spine_id) const;
/* Identify the direction of a spine, depending on its starting and ending
* points INC represents
* - a CHANX track goes from left to right, or
* - a CHANY track goes from bottom to top
* DEC represents
* - a CHANX track goes from right to left, or
* - a CHANY track goes from top to bottom
*/
Direction spine_direction(const ClockSpineId& spine_id) const;
/* Return the unique id of switch points under a clock spine*/
std::vector<ClockSwitchPointId> spine_switch_points(
const ClockSpineId& spine_id) const;
ClockSpineId spine_switch_point_tap(
const ClockSpineId& spine_id,
const ClockSwitchPointId& switch_point_id) const;
vtr::Point<int> spine_switch_point(
const ClockSpineId& spine_id,
const ClockSwitchPointId& switch_point_id) const;
/* Return the original list of tap pins that is in storage; useful for parsers
*/
std::vector<std::string> tree_taps(const ClockTreeId& tree_id) const;
/* Return the list of flatten tap pins. For example: clb[0:1].clk[2:2] is
* flatten to { clb[0].clk[2], clb[1].clk[2] } Useful to build clock routing
* resource graph Note that the clk_pin_id limits only 1 clock to be accessed
*/
std::vector<std::string> tree_flatten_taps(
const ClockTreeId& tree_id, const ClockTreePinId& clk_pin_id) const;
/* Find a spine with a given name, if not found, return an valid id, otherwise
* return an invalid one */
ClockSpineId find_spine(const std::string& name) const;
/* Find a tree with a given name, if not found, return an valid id, otherwise
* return an invalid one */
ClockTreeId find_tree(const std::string& name) const;
/* Check if there are clock tree */
bool empty() const;
bool is_valid() const;
/* Get the level id which is next to the current level
* Note that this follows the same rule in computing levels in
* update_tree_depth() If the rule has been changed, this API should be
* changed as well
*/
ClockLevelId next_level(const ClockLevelId& lvl) const;
public: /* Public Mutators */
/* Reserve a number of spines to be memory efficent */
void reserve_spines(const size_t& num_spines);
/* Reserve a number of trees to be memory efficent */
void reserve_trees(const size_t& num_trees);
void set_default_segment(const RRSegmentId& seg_id);
void set_default_switch(const RRSwitchId& switch_id);
void set_default_segment_name(const std::string& name);
void set_default_switch_name(const std::string& name);
/* Create a new tree, by default the tree can accomodate only 1 clock signal;
* use width to adjust the size */
ClockTreeId create_tree(const std::string& name, size_t width = 1);
/* Create a new spine, if the spine is already created, return an invalid id
*/
ClockSpineId create_spine(const std::string& name);
/* Try to create a new spine, if the spine is already existing, return the id.
* If not, create a new spine and return its id */
ClockSpineId try_create_spine(const std::string& name);
/* Set the parent tree for a given spine. It is illegal that a spine which
* does not belong to any tree */
void set_spine_parent_tree(const ClockSpineId& spine_id,
const ClockTreeId& tree_id);
void set_spine_start_point(const ClockSpineId& spine_id,
const vtr::Point<int>& coord);
void set_spine_end_point(const ClockSpineId& spine_id,
const vtr::Point<int>& coord);
void set_spine_direction(const ClockSpineId& spine_id, const Direction& dir);
void set_spine_track_type(const ClockSpineId& spine_id,
const t_rr_type& type);
void add_spine_switch_point(const ClockSpineId& spine_id,
const ClockSpineId& drive_spine_id,
const vtr::Point<int>& coord);
void add_tree_tap(const ClockTreeId& tree_id, const std::string& pin_name);
/* Build internal links between clock tree, spines etc. This is also an
* validator to verify the correctness of the clock network. Must run before
* using the data! */
bool link();
public: /* Public invalidators/validators */
/* Show if the tree id is a valid for data queries */
bool valid_tree_id(const ClockTreeId& tree_id) const;
/* Show if the level id is a valid for a given tree */
bool valid_level_id(const ClockTreeId& tree_id,
const ClockLevelId& lvl_id) const;
/* Identify if the level is the last level of the given tree */
bool is_last_level(const ClockTreeId& tree_id, const ClockLevelId& lvl) const;
/* Identify if the spine is at the last level of its tree */
bool is_last_level(const ClockSpineId& spine_id) const;
/* Show if the tree id is a valid for data queries */
bool valid_spine_id(const ClockSpineId& spine_id) const;
bool valid_spine_switch_point_id(
const ClockSpineId& spine_id,
const ClockSwitchPointId& switch_point_id) const;
/* Valid starting and ending point should indicate either this is a
* X-direction spine or a Y-direction spine. Diagonal spine is not supported!
*/
bool valid_spine_start_end_points(const ClockSpineId& spine_id) const;
/* Definition of a vague coordinate is that start_x == end_x && start_y ==
* end_y In such situation, we need specific track type and direction to be
* provided by user
*/
bool is_vague_coordinate(const ClockSpineId& spine_id) const;
/* Validate the internal data. Required to ensure clean data before usage. If
* validation is successful, is_valid() will return true */
bool validate() const;
private: /* Public invalidators/validators */
/* Ensure tree data is clean. All the spines are valid, and switch points are
* valid */
bool validate_tree() const;
private: /* Private mutators */
/* Build internal links between spines under a given tree */
bool link_tree(const ClockTreeId& tree_id);
bool link_tree_top_spines(const ClockTreeId& tree_id);
/* Require link_tree_top_spines() to called before! */
bool sort_tree_spines(const ClockTreeId& tree_id);
bool rec_update_spine_level(const ClockSpineId& spine_id);
/* Require sort_tree_spines() to called before! */
bool update_tree_depth(const ClockTreeId& tree_id);
/* Infer track type and directions for each spine by their coordinates */
bool update_spine_attributes(const ClockTreeId& tree_id);
private: /* Internal data */
/* Basic information of each tree */
vtr::vector<ClockTreeId, ClockTreeId> tree_ids_;
vtr::vector<ClockTreeId, std::string> tree_names_;
vtr::vector<ClockTreeId, size_t> tree_widths_;
vtr::vector<ClockTreeId, size_t> tree_depths_;
vtr::vector<ClockTreeId, std::vector<ClockSpineId>> tree_top_spines_;
vtr::vector<ClockTreeId, std::vector<std::string>> tree_taps_;
/* Basic information of each spine */
vtr::vector<ClockSpineId, ClockSpineId> spine_ids_;
vtr::vector<ClockSpineId, std::string> spine_names_;
vtr::vector<ClockSpineId, size_t> spine_levels_;
vtr::vector<ClockSpineId, vtr::Point<int>> spine_start_points_;
vtr::vector<ClockSpineId, vtr::Point<int>> spine_end_points_;
vtr::vector<ClockSpineId, Direction> spine_directions_;
vtr::vector<ClockSpineId, t_rr_type> spine_track_types_;
vtr::vector<ClockSpineId, std::vector<ClockSpineId>> spine_switch_points_;
vtr::vector<ClockSpineId, std::vector<vtr::Point<int>>> spine_switch_coords_;
vtr::vector<ClockSpineId, ClockSpineId> spine_parents_;
vtr::vector<ClockSpineId, std::vector<ClockSpineId>> spine_children_;
vtr::vector<ClockSpineId, ClockTreeId> spine_parent_trees_;
/* Default routing resource */
std::string default_segment_name_; /* The routing segment representing the
clock wires */
RRSegmentId default_segment_id_;
std::string
default_switch_name_; /* The routing switch interconnecting clock wire */
RRSwitchId default_switch_id_;
/* Fast lookup */
std::map<std::string, ClockTreeId> tree_name2id_map_;
std::map<std::string, ClockSpineId> spine_name2id_map_;
/* Flags */
mutable bool is_dirty_;
};
} // End of namespace openfpga
#endif

View File

@ -0,0 +1,34 @@
/************************************************************************
* A header file for ClockNetwork class, including critical data declaration
* Please include this file only for using any PinConstraints data structure
* Refer to clock_network.h for more details
***********************************************************************/
/************************************************************************
* Create strong id for ClockNetwork to avoid illegal type casting
***********************************************************************/
#ifndef CLOCK_NETWORK_FWD_H
#define CLOCK_NETWORK_FWD_H
#include "vtr_strong_id.h"
namespace openfpga { // Begin namespace openfpga
struct clock_level_id_tag;
struct clock_tree_id_tag;
struct clock_tree_pin_id_tag;
struct clock_spine_id_tag;
struct clock_switch_point_id_tag;
typedef vtr::StrongId<clock_level_id_tag> ClockLevelId;
typedef vtr::StrongId<clock_tree_id_tag> ClockTreeId;
typedef vtr::StrongId<clock_tree_pin_id_tag> ClockTreePinId;
typedef vtr::StrongId<clock_spine_id_tag> ClockSpineId;
typedef vtr::StrongId<clock_switch_point_id_tag> ClockSwitchPointId;
/* Short declaration of class */
class ClockNetwork;
} // End of namespace openfpga
#endif

View File

@ -0,0 +1,133 @@
#include "rr_clock_spatial_lookup.h"
#include "vtr_assert.h"
#include "vtr_log.h"
namespace openfpga { // begin namespace openfpga
RRClockSpatialLookup::RRClockSpatialLookup() {}
RRNodeId RRClockSpatialLookup::find_node(int x, int y, const ClockTreeId& tree,
const ClockLevelId& lvl,
const ClockTreePinId& pin,
const Direction& direction) const {
size_t dir = size_t(direction);
/* Pre-check: the x, y, side and ptc should be non negative numbers!
* Otherwise, return an invalid id */
if ((x < 0) || (y < 0) ||
(direction != Direction::INC && direction != Direction::DEC)) {
return RRNodeId::INVALID();
}
/* Sanity check to ensure the x, y, side and ptc are in range
* - Return an valid id by searching in look-up when all the parameters are in
* range
* - Return an invalid id if any out-of-range is detected
*/
if (size_t(dir) >= rr_node_indices_.size()) {
VTR_LOG("Direction out of range");
return RRNodeId::INVALID();
}
if (size_t(x) >= rr_node_indices_[dir].dim_size(0)) {
VTR_LOG("X out of range");
return RRNodeId::INVALID();
}
if (size_t(y) >= rr_node_indices_[dir].dim_size(1)) {
VTR_LOG("Y out of range");
return RRNodeId::INVALID();
}
if (size_t(tree) >= rr_node_indices_[dir][x][y].size()) {
VTR_LOG("Tree id out of range");
return RRNodeId::INVALID();
}
if (size_t(lvl) == rr_node_indices_[dir][x][y][size_t(tree)].size()) {
VTR_LOG("Level id out of range");
return RRNodeId::INVALID();
}
if (size_t(pin) ==
rr_node_indices_[dir][x][y][size_t(tree)][size_t(lvl)].size()) {
VTR_LOG("Pin id out of range");
return RRNodeId::INVALID();
}
return rr_node_indices_[dir][x][y][size_t(tree)][size_t(lvl)][size_t(pin)];
}
void RRClockSpatialLookup::add_node(RRNodeId node, int x, int y,
const ClockTreeId& tree,
const ClockLevelId& lvl,
const ClockTreePinId& pin,
const Direction& direction) {
size_t dir = size_t(direction);
VTR_ASSERT(node); /* Must have a valid node id to be added */
VTR_ASSERT_SAFE(2 == rr_node_indices_[dir].ndims());
resize_nodes(x, y, direction);
if (size_t(tree) >= rr_node_indices_[dir][x][y].size()) {
rr_node_indices_[dir][x][y].resize(size_t(tree) + 1);
}
if (size_t(lvl) >= rr_node_indices_[dir][x][y][size_t(tree)].size()) {
rr_node_indices_[dir][x][y][size_t(tree)].resize(size_t(lvl) + 1);
}
if (size_t(pin) >=
rr_node_indices_[dir][x][y][size_t(tree)][size_t(lvl)].size()) {
rr_node_indices_[dir][x][y][size_t(tree)][size_t(lvl)].resize(size_t(pin) +
1);
}
/* Resize on demand finished; Register the node */
rr_node_indices_[dir][x][y][size_t(tree)][size_t(lvl)][size_t(pin)] = node;
}
void RRClockSpatialLookup::reserve_nodes(int x, int y, int tree, int lvl,
int pin) {
for (Direction dir : {Direction::INC, Direction::DEC}) {
resize_nodes(x, y, dir);
for (int ix = 0; ix < x; ++ix) {
for (int iy = 0; iy < y; ++iy) {
rr_node_indices_[size_t(dir)][ix][iy].resize(tree);
for (int itree = 0; itree < tree; ++itree) {
rr_node_indices_[size_t(dir)][ix][iy][itree].resize(lvl);
for (int ilvl = 0; ilvl < lvl; ++ilvl) {
rr_node_indices_[size_t(dir)][ix][iy][itree][ilvl].resize(pin);
}
}
}
}
}
}
void RRClockSpatialLookup::resize_nodes(int x, int y,
const Direction& direction) {
/* Expand the fast look-up if the new node is out-of-range
* This may seldom happen because the rr_graph building function
* should ensure the fast look-up well organized
*/
size_t dir = size_t(direction);
VTR_ASSERT(dir < rr_node_indices_.size());
VTR_ASSERT(x >= 0);
VTR_ASSERT(y >= 0);
if ((x >= int(rr_node_indices_[dir].dim_size(0))) ||
(y >= int(rr_node_indices_[dir].dim_size(1)))) {
rr_node_indices_[dir].resize(
{std::max(rr_node_indices_[dir].dim_size(0), size_t(x) + 1),
std::max(rr_node_indices_[dir].dim_size(1), size_t(y) + 1)});
}
}
void RRClockSpatialLookup::clear() {
for (auto& data : rr_node_indices_) {
data.clear();
}
}
} // end namespace openfpga

View File

@ -0,0 +1,113 @@
#ifndef RR_CLOCK_SPATIAL_LOOKUP_H
#define RR_CLOCK_SPATIAL_LOOKUP_H
/**
* @file
* @brief This RRClockSpatialLookup class encapsulates
* the node-lookup for clock nodes in a routing resource graph
*
* A data structure built to find the id of an routing resource node
* (rr_node) given information about its physical position and type in a clock
* network The data structure is mostly needed during building the clock part of
* a routing resource graph
*
* The data structure allows users to
*
* - Update the look-up with new nodes
* - Find the id of a node with given information, e.g., x, y, type etc.
*/
#include "clock_network_fwd.h"
#include "physical_types.h"
#include "rr_graph_fwd.h"
#include "rr_node_types.h"
#include "vtr_geometry.h"
#include "vtr_vector.h"
namespace openfpga { // begin namespace openfpga
class RRClockSpatialLookup {
/* -- Constructors -- */
public:
/* Explicitly define the only way to create an object */
explicit RRClockSpatialLookup();
/* Disable copy constructors and copy assignment operator
* This is to avoid accidental copy because it could be an expensive operation
* considering that the memory footprint of the data structure could ~ Gb
* Using the following syntax, we prohibit accidental 'pass-by-value' which
* can be immediately caught by compiler
*/
RRClockSpatialLookup(const RRClockSpatialLookup&) = delete;
void operator=(const RRClockSpatialLookup&) = delete;
/* -- Accessors -- */
public:
/**
* @brief Returns the index of the specified routing resource node.
*
* @param (x, y) are the grid location within the FPGA
* @param clk_tree specifies the id of the clock tree in a clock network,
* @param clk_level specifies the level of the clock node in a clock network
* (typically multi-level),
* @param clk_pin specifies the pin id of the clock node in a bus of clock
* tree (consider multiple clock in a tree)
* @param direction specifies how the clock node will propagate the signal
* (either in a horizental or a vertical way)
*
* @note An invalid id will be returned if the node does not exist
*/
RRNodeId find_node(int x, int y, const ClockTreeId& tree,
const ClockLevelId& lvl, const ClockTreePinId& pin,
const Direction& direction) const;
/* -- Mutators -- */
public:
/**
* @brief Register a node in the fast look-up
*
* @note You must have a valid node id to register the node in the lookup
*
* @param (x, y) are the grid location within the FPGA
* @param clk_tree specifies the id of the clock tree in a clock network,
* @param clk_level specifies the level of the clock node in a clock network
(typically multi-level),
* @param clk_pin specifies the pin id of the clock node in a bus of clock
tree (consider multiple clock in a tree)
* @param direction specifies how the clock node will propagate the signal
(either in a horizental or a vertical way)
*
* @note a node added with this call will not create a node in the rr_graph
node list
* You MUST add the node in the rr_graph so that the node is valid
*/
void add_node(RRNodeId node, int x, int y, const ClockTreeId& clk_tree,
const ClockLevelId& clk_lvl, const ClockTreePinId& clk_pin,
const Direction& direction);
/**
* @brief Allocate memory for the lookup with maximum sizes on each dimension
* .. note:: Must run before any other API!
*/
void reserve_nodes(int x, int y, int tree, int lvl, int pin);
/** @brief Clear all the data inside */
void clear();
private: /* Private mutators */
/** @brief Resize the nodes upon needs */
void resize_nodes(int x, int y, const Direction& direction);
/* -- Internal data storage -- */
private:
/* Fast look-up:
* [INC|DEC][0..grid_width][0..grid_height][tree_id][level_id][clock_pin_id]
*/
std::array<vtr::NdMatrix<std::vector<std::vector<std::vector<RRNodeId>>>, 2>,
2>
rr_node_indices_;
};
} // end namespace openfpga
#endif

View File

@ -0,0 +1,30 @@
#ifndef CLOCK_NETWORK_XML_CONSTANTS_H
#define CLOCK_NETWORK_XML_CONSTANTS_H
/* Constants required by XML parser */
constexpr const char* XML_CLOCK_NETWORK_ROOT_NAME = "clock_networks";
constexpr const char* XML_CLOCK_NETWORK_ATTRIBUTE_DEFAULT_SEGMENT =
"default_segment";
constexpr const char* XML_CLOCK_NETWORK_ATTRIBUTE_DEFAULT_SWITCH =
"default_switch";
constexpr const char* XML_CLOCK_TREE_NODE_NAME = "clock_network";
constexpr const char* XML_CLOCK_TREE_ATTRIBUTE_NAME = "name";
constexpr const char* XML_CLOCK_TREE_ATTRIBUTE_WIDTH = "width";
constexpr const char* XML_CLOCK_SPINE_NODE_NAME = "spine";
constexpr const char* XML_CLOCK_SPINE_ATTRIBUTE_NAME = "name";
constexpr const char* XML_CLOCK_SPINE_ATTRIBUTE_START_X = "start_x";
constexpr const char* XML_CLOCK_SPINE_ATTRIBUTE_START_Y = "start_y";
constexpr const char* XML_CLOCK_SPINE_ATTRIBUTE_END_X = "end_x";
constexpr const char* XML_CLOCK_SPINE_ATTRIBUTE_END_Y = "end_y";
constexpr const char* XML_CLOCK_SPINE_ATTRIBUTE_TYPE = "type";
constexpr const char* XML_CLOCK_SPINE_ATTRIBUTE_DIRECTION = "direction";
constexpr const char* XML_CLOCK_SPINE_SWITCH_POINT_NODE_NAME = "switch_point";
constexpr const char* XML_CLOCK_SPINE_SWITCH_POINT_ATTRIBUTE_TAP = "tap";
constexpr const char* XML_CLOCK_SPINE_SWITCH_POINT_ATTRIBUTE_X = "x";
constexpr const char* XML_CLOCK_SPINE_SWITCH_POINT_ATTRIBUTE_Y = "y";
constexpr const char* XML_CLOCK_TREE_TAPS_NODE_NAME = "taps";
constexpr const char* XML_CLOCK_TREE_TAP_NODE_NAME = "tap";
constexpr const char* XML_CLOCK_TREE_TAP_ATTRIBUTE_TILE_PIN = "tile_pin";
#endif

View File

@ -0,0 +1,300 @@
/********************************************************************
* This file includes the top-level function of this library
* which reads an XML of clock network file to the associated
* data structures
*******************************************************************/
#include <string>
/* Headers from pugi XML library */
#include "pugixml.hpp"
#include "pugixml_util.hpp"
/* Headers from vtr util library */
#include "vtr_assert.h"
#include "vtr_time.h"
/* Headers from libopenfpga util library */
#include "openfpga_port_parser.h"
/* Headers from libarchfpga */
#include "arch_error.h"
#include "clock_network_xml_constants.h"
#include "read_xml_clock_network.h"
#include "read_xml_util.h"
namespace openfpga { // Begin namespace openfpga
/********************************************************************
* Parse XML codes of a <tap> to an object of ClockNetwork
*******************************************************************/
static void read_xml_clock_tree_tap(pugi::xml_node& xml_tap,
const pugiutil::loc_data& loc_data,
ClockNetwork& clk_ntwk,
const ClockTreeId& tree_id) {
if (!clk_ntwk.valid_tree_id(tree_id)) {
archfpga_throw(loc_data.filename_c_str(), loc_data.line(xml_tap),
"Invalid id of a clock tree!\n");
}
std::string tile_pin_name =
get_attribute(xml_tap, XML_CLOCK_TREE_TAP_ATTRIBUTE_TILE_PIN, loc_data)
.as_string();
clk_ntwk.add_tree_tap(tree_id, tile_pin_name);
}
static void read_xml_clock_tree_taps(pugi::xml_node& xml_taps,
const pugiutil::loc_data& loc_data,
ClockNetwork& clk_ntwk,
const ClockTreeId& tree_id) {
for (pugi::xml_node xml_tap : xml_taps.children()) {
/* Error out if the XML child has an invalid name! */
if (xml_tap.name() == std::string(XML_CLOCK_TREE_TAP_NODE_NAME)) {
read_xml_clock_tree_tap(xml_tap, loc_data, clk_ntwk, tree_id);
} else {
bad_tag(xml_taps, loc_data, xml_tap, {XML_CLOCK_TREE_TAP_NODE_NAME});
}
}
}
/********************************************************************
* Parse XML codes of a <switch_point> to an object of ClockNetwork
*******************************************************************/
static void read_xml_clock_spine_switch_point(
pugi::xml_node& xml_switch_point, const pugiutil::loc_data& loc_data,
ClockNetwork& clk_ntwk, const ClockSpineId& spine_id) {
if (!clk_ntwk.valid_spine_id(spine_id)) {
archfpga_throw(loc_data.filename_c_str(), loc_data.line(xml_switch_point),
"Invalid id of a clock spine!\n");
}
std::string tap_spine_name =
get_attribute(xml_switch_point, XML_CLOCK_SPINE_SWITCH_POINT_ATTRIBUTE_TAP,
loc_data)
.as_string();
/* Try to find an existing spine, if not, create one */
ClockSpineId tap_spine_id = clk_ntwk.find_spine(tap_spine_name);
if (!tap_spine_id) {
tap_spine_id = clk_ntwk.create_spine(tap_spine_name);
}
if (false == clk_ntwk.valid_spine_id(tap_spine_id)) {
archfpga_throw(loc_data.filename_c_str(), loc_data.line(xml_switch_point),
"Fail to create a clock spine!\n");
}
int tap_x = get_attribute(xml_switch_point,
XML_CLOCK_SPINE_SWITCH_POINT_ATTRIBUTE_X, loc_data)
.as_int();
int tap_y = get_attribute(xml_switch_point,
XML_CLOCK_SPINE_SWITCH_POINT_ATTRIBUTE_Y, loc_data)
.as_int();
clk_ntwk.add_spine_switch_point(spine_id, tap_spine_id,
vtr::Point<int>(tap_x, tap_y));
}
/********************************************************************
* Convert string to the enumerate of model type
*******************************************************************/
static t_rr_type string_to_track_type(const std::string& type_string) {
for (size_t itype = 0; itype < NUM_RR_TYPES; ++itype) {
if (std::string(rr_node_typename[itype]) == type_string) {
return static_cast<t_rr_type>(itype);
}
}
/* Reach here, we have an invalid value, error out */
return NUM_RR_TYPES;
}
/********************************************************************
* Convert string to the enumerate of model type
*******************************************************************/
static Direction string_to_direction(const std::string& type_string) {
for (size_t itype = 0; itype < size_t(Direction::NUM_DIRECTIONS); ++itype) {
if (std::string(DIRECTION_STRING[itype]) == type_string) {
return static_cast<Direction>(itype);
}
}
/* Reach here, we have an invalid value, error out */
return Direction::NUM_DIRECTIONS;
}
/********************************************************************
* Parse XML codes of a <spine> to an object of ClockNetwork
*******************************************************************/
static void read_xml_clock_spine(pugi::xml_node& xml_spine,
const pugiutil::loc_data& loc_data,
ClockNetwork& clk_ntwk,
const ClockTreeId& tree_id) {
if (!clk_ntwk.valid_tree_id(tree_id)) {
archfpga_throw(loc_data.filename_c_str(), loc_data.line(xml_spine),
"Invalid id of a clock tree!\n");
}
std::string clk_spine_name =
get_attribute(xml_spine, XML_CLOCK_SPINE_ATTRIBUTE_NAME, loc_data)
.as_string();
/* Try to find an existing spine, if not, create one */
ClockSpineId spine_id = clk_ntwk.find_spine(clk_spine_name);
if (!spine_id) {
spine_id = clk_ntwk.create_spine(clk_spine_name);
}
if (false == clk_ntwk.valid_spine_id(spine_id)) {
archfpga_throw(loc_data.filename_c_str(), loc_data.line(xml_spine),
"Fail to create a clock spine!\n");
}
clk_ntwk.set_spine_parent_tree(spine_id, tree_id);
int start_x =
get_attribute(xml_spine, XML_CLOCK_SPINE_ATTRIBUTE_START_X, loc_data)
.as_int();
int start_y =
get_attribute(xml_spine, XML_CLOCK_SPINE_ATTRIBUTE_START_Y, loc_data)
.as_int();
clk_ntwk.set_spine_start_point(spine_id, vtr::Point<int>(start_x, start_y));
int end_x =
get_attribute(xml_spine, XML_CLOCK_SPINE_ATTRIBUTE_END_X, loc_data)
.as_int();
int end_y =
get_attribute(xml_spine, XML_CLOCK_SPINE_ATTRIBUTE_END_Y, loc_data)
.as_int();
clk_ntwk.set_spine_end_point(spine_id, vtr::Point<int>(end_x, end_y));
if (clk_ntwk.is_vague_coordinate(spine_id)) {
std::string track_type_name =
get_attribute(xml_spine, XML_CLOCK_SPINE_ATTRIBUTE_TYPE, loc_data)
.as_string();
t_rr_type track_type = string_to_track_type(track_type_name);
if (CHANX != track_type && CHANY != track_type) {
archfpga_throw(loc_data.filename_c_str(), loc_data.line(xml_spine),
"Invalid track type! Expect '%s' or '%s'!\n",
rr_node_typename[CHANX], rr_node_typename[CHANY]);
}
clk_ntwk.set_spine_track_type(spine_id, track_type);
std::string direction_name =
get_attribute(xml_spine, XML_CLOCK_SPINE_ATTRIBUTE_DIRECTION, loc_data)
.as_string();
Direction direction_type = string_to_direction(direction_name);
if (Direction::INC != direction_type && Direction::DEC != direction_type) {
archfpga_throw(loc_data.filename_c_str(), loc_data.line(xml_spine),
"Invalid direction type! Expect '%s' or '%s'!\n",
DIRECTION_STRING[size_t(Direction::INC)],
DIRECTION_STRING[size_t(Direction::DEC)]);
}
clk_ntwk.set_spine_direction(spine_id, direction_type);
}
for (pugi::xml_node xml_switch_point : xml_spine.children()) {
/* Error out if the XML child has an invalid name! */
if (xml_switch_point.name() ==
std::string(XML_CLOCK_SPINE_SWITCH_POINT_NODE_NAME)) {
read_xml_clock_spine_switch_point(xml_switch_point, loc_data, clk_ntwk,
spine_id);
} else {
bad_tag(xml_switch_point, loc_data, xml_spine,
{XML_CLOCK_SPINE_SWITCH_POINT_NODE_NAME});
}
}
}
/********************************************************************
* Parse XML codes of a <clock_network> to an object of ClockNetwork
*******************************************************************/
static void read_xml_clock_tree(pugi::xml_node& xml_clk_tree,
const pugiutil::loc_data& loc_data,
ClockNetwork& clk_ntwk) {
std::string clk_tree_name =
get_attribute(xml_clk_tree, XML_CLOCK_TREE_ATTRIBUTE_NAME, loc_data)
.as_string();
int clk_tree_width =
get_attribute(xml_clk_tree, XML_CLOCK_TREE_ATTRIBUTE_WIDTH, loc_data)
.as_int();
/* Create a new tree in the storage */
ClockTreeId tree_id = clk_ntwk.create_tree(clk_tree_name, clk_tree_width);
if (false == clk_ntwk.valid_tree_id(tree_id)) {
archfpga_throw(loc_data.filename_c_str(), loc_data.line(xml_clk_tree),
"Fail to create a clock tree!\n");
}
for (pugi::xml_node xml_spine : xml_clk_tree.children()) {
/* Error out if the XML child has an invalid name! */
if (xml_spine.name() == std::string(XML_CLOCK_SPINE_NODE_NAME)) {
read_xml_clock_spine(xml_spine, loc_data, clk_ntwk, tree_id);
} else if (xml_spine.name() == std::string(XML_CLOCK_TREE_TAPS_NODE_NAME)) {
read_xml_clock_tree_taps(xml_spine, loc_data, clk_ntwk, tree_id);
} else {
bad_tag(xml_spine, loc_data, xml_clk_tree,
{XML_CLOCK_SPINE_NODE_NAME, XML_CLOCK_TREE_TAPS_NODE_NAME});
}
}
}
/********************************************************************
* Parse XML codes about <clock_network> to an object of ClockNetwork
*******************************************************************/
ClockNetwork read_xml_clock_network(const char* fname) {
vtr::ScopedStartFinishTimer timer("Read clock network");
ClockNetwork clk_ntwk;
/* Parse the file */
pugi::xml_document doc;
pugiutil::loc_data loc_data;
try {
loc_data = pugiutil::load_xml(doc, fname);
pugi::xml_node xml_root =
get_single_child(doc, XML_CLOCK_NETWORK_ROOT_NAME, loc_data);
std::string default_segment_name =
get_attribute(xml_root, XML_CLOCK_NETWORK_ATTRIBUTE_DEFAULT_SEGMENT,
loc_data)
.as_string();
clk_ntwk.set_default_segment_name(default_segment_name);
std::string default_switch_name =
get_attribute(xml_root, XML_CLOCK_NETWORK_ATTRIBUTE_DEFAULT_SWITCH,
loc_data)
.as_string();
clk_ntwk.set_default_switch_name(default_switch_name);
size_t num_trees =
std::distance(xml_root.children().begin(), xml_root.children().end());
/* Count the total number of spines */
size_t num_spines = 0;
for (pugi::xml_node xml_tree : xml_root.children()) {
num_spines +=
std::distance(xml_tree.children().begin(), xml_tree.children().end());
}
/* Reserve memory space */
clk_ntwk.reserve_trees(num_trees);
clk_ntwk.reserve_spines(num_spines);
for (pugi::xml_node xml_tree : xml_root.children()) {
/* Error out if the XML child has an invalid name! */
if (xml_tree.name() != std::string(XML_CLOCK_TREE_NODE_NAME)) {
bad_tag(xml_tree, loc_data, xml_root, {XML_CLOCK_TREE_NODE_NAME});
}
read_xml_clock_tree(xml_tree, loc_data, clk_ntwk);
}
} catch (pugiutil::XmlError& e) {
archfpga_throw(fname, e.line(), "%s", e.what());
}
return clk_ntwk;
}
} // End of namespace openfpga

View File

@ -0,0 +1,21 @@
#ifndef READ_XML_CLOCK_NETWORK_H
#define READ_XML_CLOCK_NETWORK_H
/********************************************************************
* Include header files that are required by function declaration
*******************************************************************/
#include "clock_network.h"
#include "pugixml.hpp"
#include "pugixml_util.hpp"
/********************************************************************
* Function declaration
*******************************************************************/
namespace openfpga { // Begin namespace openfpga
ClockNetwork read_xml_clock_network(const char* fname);
} // End of namespace openfpga
#endif

View File

@ -0,0 +1,197 @@
/********************************************************************
* This file includes functions that outputs a clock network object to XML
*format
*******************************************************************/
/* Headers from system goes first */
#include <algorithm>
#include <string>
/* Headers from vtr util library */
#include "vtr_assert.h"
#include "vtr_log.h"
#include "vtr_time.h"
/* Headers from openfpga util library */
#include "openfpga_digest.h"
/* Headers from arch openfpga library */
#include "write_xml_utils.h"
/* Headers from pin constraint library */
#include "clock_network_xml_constants.h"
#include "write_xml_clock_network.h"
namespace openfpga { // Begin namespace openfpga
static int write_xml_clock_tree_taps(std::fstream& fp,
const ClockNetwork& clk_ntwk,
const ClockTreeId& tree_id) {
openfpga::write_tab_to_file(fp, 3);
fp << "<" << XML_CLOCK_TREE_TAPS_NODE_NAME << ">\n";
for (const std::string& tile_pin_name : clk_ntwk.tree_taps(tree_id)) {
openfpga::write_tab_to_file(fp, 4);
fp << "<" << XML_CLOCK_TREE_TAP_NODE_NAME << "";
write_xml_attribute(fp, XML_CLOCK_TREE_TAP_ATTRIBUTE_TILE_PIN,
tile_pin_name.c_str());
fp << "/>"
<< "\n";
}
openfpga::write_tab_to_file(fp, 3);
fp << "</" << XML_CLOCK_TREE_TAPS_NODE_NAME << ">\n";
return 0;
}
static int write_xml_clock_spine_switch_point(
std::fstream& fp, const ClockNetwork& clk_ntwk, const ClockSpineId& spine_id,
const ClockSwitchPointId& switch_point_id) {
openfpga::write_tab_to_file(fp, 3);
fp << "<" << XML_CLOCK_SPINE_SWITCH_POINT_NODE_NAME << "";
write_xml_attribute(
fp, XML_CLOCK_SPINE_SWITCH_POINT_ATTRIBUTE_TAP,
clk_ntwk
.spine_name(clk_ntwk.spine_switch_point_tap(spine_id, switch_point_id))
.c_str());
vtr::Point<int> coord =
clk_ntwk.spine_switch_point(spine_id, switch_point_id);
write_xml_attribute(fp, XML_CLOCK_SPINE_SWITCH_POINT_ATTRIBUTE_X, coord.x());
write_xml_attribute(fp, XML_CLOCK_SPINE_SWITCH_POINT_ATTRIBUTE_Y, coord.y());
fp << "/>"
<< "\n";
return 0;
}
static int write_xml_clock_spine(std::fstream& fp, const ClockNetwork& clk_ntwk,
const ClockSpineId& spine_id) {
openfpga::write_tab_to_file(fp, 2);
fp << "<" << XML_CLOCK_SPINE_NODE_NAME << "";
write_xml_attribute(fp, XML_CLOCK_SPINE_ATTRIBUTE_NAME,
clk_ntwk.spine_name(spine_id).c_str());
vtr::Point<int> start_coord = clk_ntwk.spine_start_point(spine_id);
write_xml_attribute(fp, XML_CLOCK_SPINE_ATTRIBUTE_START_X, start_coord.x());
write_xml_attribute(fp, XML_CLOCK_SPINE_ATTRIBUTE_START_Y, start_coord.y());
vtr::Point<int> end_coord = clk_ntwk.spine_end_point(spine_id);
write_xml_attribute(fp, XML_CLOCK_SPINE_ATTRIBUTE_END_X, end_coord.x());
write_xml_attribute(fp, XML_CLOCK_SPINE_ATTRIBUTE_END_Y, end_coord.y());
if (clk_ntwk.is_vague_coordinate(spine_id)) {
write_xml_attribute(fp, XML_CLOCK_SPINE_ATTRIBUTE_TYPE,
rr_node_typename[clk_ntwk.spine_track_type(spine_id)]);
write_xml_attribute(
fp, XML_CLOCK_SPINE_ATTRIBUTE_DIRECTION,
DIRECTION_STRING[size_t(clk_ntwk.spine_direction(spine_id))]);
}
fp << ">"
<< "\n";
for (const ClockSwitchPointId& switch_point_id :
clk_ntwk.spine_switch_points(spine_id)) {
write_xml_clock_spine_switch_point(fp, clk_ntwk, spine_id, switch_point_id);
}
openfpga::write_tab_to_file(fp, 2);
fp << "</" << XML_CLOCK_SPINE_NODE_NAME << "";
fp << ">"
<< "\n";
return 0;
}
/********************************************************************
* A writer to output a clock tree to XML format
*
* Return 0 if successful
* Return 1 if there are more serious bugs in the architecture
* Return 2 if fail when creating files
*******************************************************************/
static int write_xml_clock_tree(std::fstream& fp, const ClockNetwork& clk_ntwk,
const ClockTreeId& tree_id) {
/* Validate the file stream */
if (false == openfpga::valid_file_stream(fp)) {
return 2;
}
openfpga::write_tab_to_file(fp, 1);
fp << "<" << XML_CLOCK_TREE_NODE_NAME << "";
if (false == clk_ntwk.valid_tree_id(tree_id)) {
return 1;
}
write_xml_attribute(fp, XML_CLOCK_TREE_ATTRIBUTE_NAME,
clk_ntwk.tree_name(tree_id).c_str());
write_xml_attribute(fp, XML_CLOCK_TREE_ATTRIBUTE_WIDTH,
clk_ntwk.tree_width(tree_id));
fp << ">"
<< "\n";
/* Output all the pins under this bus */
for (const ClockSpineId& spine_id : clk_ntwk.spines(tree_id)) {
write_xml_clock_spine(fp, clk_ntwk, spine_id);
}
write_xml_clock_tree_taps(fp, clk_ntwk, tree_id);
openfpga::write_tab_to_file(fp, 1);
fp << "</" << XML_CLOCK_TREE_NODE_NAME << "";
fp << ">"
<< "\n";
return 0;
}
/********************************************************************
* A writer to output a bus group object to XML format
*
* Return 0 if successful
* Return 1 if there are more serious bugs in the architecture
* Return 2 if fail when creating files
*******************************************************************/
int write_xml_clock_network(const char* fname, const ClockNetwork& clk_ntwk) {
vtr::ScopedStartFinishTimer timer("Write Clock Network");
/* Create a file handler */
std::fstream fp;
/* Open the file stream */
fp.open(std::string(fname), std::fstream::out | std::fstream::trunc);
/* Validate the file stream */
openfpga::check_file_stream(fname, fp);
/* Write the root node */
fp << "<" << XML_CLOCK_NETWORK_ROOT_NAME;
write_xml_attribute(fp, XML_CLOCK_NETWORK_ATTRIBUTE_DEFAULT_SEGMENT,
clk_ntwk.default_segment_name().c_str());
write_xml_attribute(fp, XML_CLOCK_NETWORK_ATTRIBUTE_DEFAULT_SWITCH,
clk_ntwk.default_switch_name().c_str());
fp << ">"
<< "\n";
int err_code = 0;
/* Write each bus */
for (const ClockTreeId& tree_id : clk_ntwk.trees()) {
/* Write bus */
err_code = write_xml_clock_tree(fp, clk_ntwk, tree_id);
if (0 != err_code) {
return err_code;
}
}
/* Finish writing the root node */
fp << "</" << XML_CLOCK_NETWORK_ROOT_NAME << ">"
<< "\n";
/* Close the file stream */
fp.close();
return err_code;
}
} // End of namespace openfpga

View File

@ -0,0 +1,20 @@
#ifndef WRITE_XML_CLOCK_NETWORK_H
#define WRITE_XML_CLOCK_NETWORK_H
/********************************************************************
* Include header files that are required by function declaration
*******************************************************************/
#include <fstream>
#include "clock_network.h"
/********************************************************************
* Function declaration
*******************************************************************/
namespace openfpga { // Begin namespace openfpga
int write_xml_clock_network(const char* fname, const ClockNetwork& clk_ntwk);
} // End of namespace openfpga
#endif

View File

@ -0,0 +1,65 @@
#include "clock_network_utils.h"
#include "command_exit_codes.h"
#include "vtr_assert.h"
#include "vtr_time.h"
namespace openfpga { // Begin namespace openfpga
/********************************************************************
* Link all the segments that are defined in a routing resource graph to a given
*clock network
*******************************************************************/
static int link_clock_network_rr_segments(ClockNetwork& clk_ntwk,
const RRGraphView& rr_graph) {
/* default segment id */
std::string default_segment_name = clk_ntwk.default_segment_name();
for (size_t rr_seg_id = 0; rr_seg_id < rr_graph.num_rr_segments();
++rr_seg_id) {
if (rr_graph.rr_segments(RRSegmentId(rr_seg_id)).name ==
default_segment_name) {
clk_ntwk.set_default_segment(RRSegmentId(rr_seg_id));
return CMD_EXEC_SUCCESS;
}
}
return CMD_EXEC_FATAL_ERROR;
}
/********************************************************************
* Link all the switches that are defined in a routing resource graph to a given
*clock network
*******************************************************************/
static int link_clock_network_rr_switches(ClockNetwork& clk_ntwk,
const RRGraphView& rr_graph) {
/* default switch id */
std::string default_switch_name = clk_ntwk.default_switch_name();
for (size_t rr_switch_id = 0; rr_switch_id < rr_graph.num_rr_switches();
++rr_switch_id) {
if (std::string(rr_graph.rr_switch_inf(RRSwitchId(rr_switch_id)).name) ==
default_switch_name) {
clk_ntwk.set_default_switch(RRSwitchId(rr_switch_id));
return CMD_EXEC_SUCCESS;
}
}
return CMD_EXEC_FATAL_ERROR;
}
int link_clock_network_rr_graph(ClockNetwork& clk_ntwk,
const RRGraphView& rr_graph) {
int status = CMD_EXEC_SUCCESS;
status = link_clock_network_rr_segments(clk_ntwk, rr_graph);
if (CMD_EXEC_FATAL_ERROR == status) {
return status;
}
status = link_clock_network_rr_switches(clk_ntwk, rr_graph);
if (CMD_EXEC_FATAL_ERROR == status) {
return status;
}
return status;
}
} // End of namespace openfpga

View File

@ -0,0 +1,21 @@
#ifndef CLOCK_NETWORK_UTILS_H
#define CLOCK_NETWORK_UTILS_H
/********************************************************************
* Include header files that are required by function declaration
*******************************************************************/
#include "clock_network.h"
#include "rr_graph_view.h"
/********************************************************************
* Function declaration
*******************************************************************/
namespace openfpga { // Begin namespace openfpga
int link_clock_network_rr_graph(ClockNetwork& clk_ntwk,
const RRGraphView& rr_graph);
} // End of namespace openfpga
#endif

View File

@ -0,0 +1,43 @@
/********************************************************************
* Unit test functions to validate the correctness of
* 1. parser of data structures
* 2. writer of data structures
*******************************************************************/
/* Headers from vtrutils */
#include "vtr_assert.h"
#include "vtr_log.h"
/* Headers from readarchopenfpga */
#include "read_xml_clock_network.h"
#include "write_xml_clock_network.h"
int main(int argc, const char** argv) {
/* Ensure we have only one or two argument */
VTR_ASSERT((2 == argc) || (3 == argc));
/* Parse the circuit library from an XML file */
openfpga::ClockNetwork clk_ntwk = openfpga::read_xml_clock_network(argv[1]);
VTR_LOG("Parsed %lu clock tree(s) from XML into clock network.\n",
clk_ntwk.trees().size());
/* Validate before write out */
if (!clk_ntwk.link()) {
VTR_LOG_ERROR("Invalid clock network.\n");
exit(1);
}
VTR_ASSERT(clk_ntwk.is_valid());
for (auto tree_id : clk_ntwk.trees()) {
VTR_LOG("Max. depth of the clock tree '%lu' is %d\n", size_t(tree_id),
clk_ntwk.tree_depth(tree_id));
}
/* Output the bus group to an XML file
* This is optional only used when there is a second argument
*/
if (3 <= argc) {
openfpga::write_xml_clock_network(argv[2], clk_ntwk);
VTR_LOG("Write the clock network to an XML file: %s.\n", argv[2]);
}
return 0;
}

View File

@ -31,6 +31,7 @@ set_target_properties(libopenfpga PROPERTIES PREFIX "") #Avoid extra 'lib' prefi
#Specify link-time dependancies
target_link_libraries(libopenfpga
libclkarchopenfpga
libarchopenfpga
libopenfpgashell
libopenfpgautil

View File

@ -96,7 +96,8 @@ static RRChan build_one_rr_chan(const DeviceContext& vpr_device_ctx,
*/
static RRGSB build_rr_gsb(const DeviceContext& vpr_device_ctx,
const vtr::Point<size_t>& gsb_range,
const vtr::Point<size_t>& gsb_coord) {
const vtr::Point<size_t>& gsb_coord,
const bool& include_clock) {
/* Create an object to return */
RRGSB rr_gsb;
@ -370,7 +371,7 @@ static RRGSB build_rr_gsb(const DeviceContext& vpr_device_ctx,
/* Collect IPIN rr_nodes*/
temp_ipin_rr_nodes =
find_rr_graph_grid_nodes(vpr_device_ctx.rr_graph, vpr_device_ctx.grid, ix,
iy, IPIN, ipin_rr_node_grid_side);
iy, IPIN, ipin_rr_node_grid_side, include_clock);
/* Fill the ipin nodes of RRGSB */
for (const RRNodeId& inode : temp_ipin_rr_nodes) {
/* Skip those has no configurable outgoing, they should NOT appear in the
@ -405,6 +406,7 @@ static RRGSB build_rr_gsb(const DeviceContext& vpr_device_ctx,
*******************************************************************/
void annotate_device_rr_gsb(const DeviceContext& vpr_device_ctx,
DeviceRRGSB& device_rr_gsb,
const bool& include_clock,
const bool& verbose_output) {
vtr::ScopedStartFinishTimer timer(
"Build General Switch Block(GSB) annotation on top of routing resource "
@ -431,7 +433,7 @@ void annotate_device_rr_gsb(const DeviceContext& vpr_device_ctx,
build_rr_gsb(vpr_device_ctx,
vtr::Point<size_t>(vpr_device_ctx.grid.width() - 2,
vpr_device_ctx.grid.height() - 2),
vtr::Point<size_t>(ix, iy));
vtr::Point<size_t>(ix, iy), include_clock);
/* Add to device_rr_gsb */
vtr::Point<size_t> gsb_coordinate = rr_gsb.get_sb_coordinate();

View File

@ -17,6 +17,7 @@ namespace openfpga {
void annotate_device_rr_gsb(const DeviceContext& vpr_device_ctx,
DeviceRRGSB& device_rr_gsb,
const bool& include_clock,
const bool& verbose_output);
void sort_device_rr_gsb_chan_node_in_edges(const RRGraphView& rr_graph,

View File

@ -0,0 +1,701 @@
#include "append_clock_rr_graph.h"
#include "command_exit_codes.h"
#include "openfpga_physical_tile_utils.h"
#include "rr_graph_builder_utils.h"
#include "rr_graph_cost.h"
#include "vtr_assert.h"
#include "vtr_geometry.h"
#include "vtr_log.h"
#include "vtr_time.h"
/* begin namespace openfpga */
namespace openfpga {
/********************************************************************
* Estimate the number of clock nodes to be added for a given tile and clock
*structure For each layer/level of a clock network, we need
* - the clock nodes are paired in INC and DEC directions
* - the number of clock nodes depend on the width of clock tree (number of
*clock signals)
* - Note that some layer only need CHANX or CHANY clock nodes since clock nodes
*cannot make turns in the same layer. For instance
* - Layer 0: CHANX
* - Layer 1: CHANY
* - Layer 2: CHANX
*******************************************************************/
static size_t estimate_clock_rr_graph_num_chan_nodes(
const ClockNetwork& clk_ntwk, const t_rr_type& chan_type) {
size_t num_nodes = 0;
for (auto itree : clk_ntwk.trees()) {
for (auto ilvl : clk_ntwk.levels(itree)) {
num_nodes += clk_ntwk.num_tracks(itree, ilvl, chan_type);
}
}
return num_nodes;
}
/********************************************************************
* Estimate the number of clock nodes to be added.
* Clock nodes are required by X-direction and Y-direction connection blocks
* which are in the type of CHANX and CHANY
* Note that switch blocks do not require any new nodes but new edges
*******************************************************************/
static size_t estimate_clock_rr_graph_num_nodes(const DeviceGrid& grids,
const bool& through_channel,
const ClockNetwork& clk_ntwk) {
size_t num_nodes = 0;
/* Check the number of CHANX nodes required */
for (size_t iy = 0; iy < grids.height() - 1; ++iy) {
for (size_t ix = 1; ix < grids.width() - 1; ++ix) {
vtr::Point<size_t> chanx_coord(ix, iy);
/* Bypass if the routing channel does not exist when through channels are
* not allowed */
if ((false == through_channel) &&
(false == is_chanx_exist(grids, chanx_coord))) {
continue;
}
/* Estimate the routing tracks required by clock routing only */
num_nodes += estimate_clock_rr_graph_num_chan_nodes(clk_ntwk, CHANX);
}
}
for (size_t ix = 0; ix < grids.width() - 1; ++ix) {
for (size_t iy = 1; iy < grids.height() - 1; ++iy) {
vtr::Point<size_t> chany_coord(ix, iy);
/* Bypass if the routing channel does not exist when through channel are
* not allowed */
if ((false == through_channel) &&
(false == is_chany_exist(grids, chany_coord))) {
continue;
}
/* Estimate the routing tracks required by clock routing only */
num_nodes += estimate_clock_rr_graph_num_chan_nodes(clk_ntwk, CHANY);
}
}
return num_nodes;
}
/********************************************************************
* Add clock nodes to a routing resource graph
* For each tree and level of the tree, add a number of clock nodes
* with direction, ptc and coordinates etc.
*******************************************************************/
static void add_rr_graph_block_clock_nodes(
RRGraphBuilder& rr_graph_builder, RRClockSpatialLookup& clk_rr_lookup,
const RRGraphView& rr_graph_view, const ClockNetwork& clk_ntwk,
const vtr::Point<size_t> chan_coord, const t_rr_type& chan_type,
const int& cost_index_offset, const bool& verbose) {
size_t orig_chan_width =
rr_graph_view.node_lookup()
.find_channel_nodes(chan_coord.x(), chan_coord.y(), chan_type)
.size();
size_t curr_node_ptc = orig_chan_width;
for (auto itree : clk_ntwk.trees()) {
for (auto ilvl : clk_ntwk.levels(itree)) {
/* As we want to keep uni-directional wires, clock routing tracks have to
* be in pairs. Therefore, always add clock routing tracks in pair, even
* one of them is not required
*/
size_t num_pins = 0;
bool require_complementary_pins = false;
for (auto node_dir : {Direction::INC, Direction::DEC}) {
if (0 == clk_ntwk.pins(itree, ilvl, chan_type, node_dir).size()) {
require_complementary_pins = true;
}
num_pins += clk_ntwk.pins(itree, ilvl, chan_type, node_dir).size();
}
if (require_complementary_pins) {
num_pins = 2 * num_pins;
}
for (size_t ipin = 0; ipin < num_pins / 2; ++ipin) {
for (auto node_dir : {Direction::INC, Direction::DEC}) {
RRNodeId clk_node = rr_graph_builder.create_node(
chan_coord.x(), chan_coord.y(), chan_type, curr_node_ptc);
rr_graph_builder.set_node_direction(clk_node, node_dir);
rr_graph_builder.set_node_capacity(clk_node, 1);
/* set cost_index using segment id */
rr_graph_builder.set_node_cost_index(
clk_node, RRIndexedDataId(cost_index_offset +
size_t(clk_ntwk.default_segment())));
/* FIXME: need to set rc_index and cost_index when building the graph
* in VTR */
/* register the node to a dedicated lookup */
clk_rr_lookup.add_node(clk_node, chan_coord.x(), chan_coord.y(),
itree, ilvl, ClockTreePinId(ipin), node_dir);
VTR_LOGV(verbose,
"Added node '%lu' to clock node lookup (x='%lu' y='%lu' "
"tree='%lu' level='%lu' pin='%lu' direction='%s')\n",
size_t(clk_node), chan_coord.x(), chan_coord.y(),
size_t(itree), size_t(ilvl), ipin,
DIRECTION_STRING[size_t(node_dir)]);
/* Update ptc count and go to next */
curr_node_ptc++;
}
}
}
}
}
/********************************************************************
* Add clock nodes one by one to the routing resource graph.
* Assign node-level attributes properly and register in dedicated lookup
*******************************************************************/
static void add_rr_graph_clock_nodes(RRGraphBuilder& rr_graph_builder,
RRClockSpatialLookup& clk_rr_lookup,
const RRGraphView& rr_graph_view,
const DeviceGrid& grids,
const bool& through_channel,
const ClockNetwork& clk_ntwk,
const bool& verbose) {
/* Pre-allocate memory: Must do otherwise data will be messed up! */
clk_rr_lookup.reserve_nodes(grids.width(), grids.height(),
clk_ntwk.num_trees(), clk_ntwk.max_tree_depth(),
clk_ntwk.max_tree_width());
/* Add X-direction clock nodes */
for (size_t iy = 0; iy < grids.height() - 1; ++iy) {
for (size_t ix = 1; ix < grids.width() - 1; ++ix) {
vtr::Point<size_t> chanx_coord(ix, iy);
/* Bypass if the routing channel does not exist when through channels are
* not allowed */
if ((false == through_channel) &&
(false == is_chanx_exist(grids, chanx_coord))) {
continue;
}
add_rr_graph_block_clock_nodes(rr_graph_builder, clk_rr_lookup,
rr_graph_view, clk_ntwk, chanx_coord,
CHANX, CHANX_COST_INDEX_START, verbose);
VTR_ASSERT(rr_graph_view.valid_node(
clk_rr_lookup.find_node(1, 0, ClockTreeId(0), ClockLevelId(0),
ClockTreePinId(0), Direction::INC)));
}
}
VTR_ASSERT(rr_graph_view.valid_node(clk_rr_lookup.find_node(
1, 0, ClockTreeId(0), ClockLevelId(0), ClockTreePinId(0), Direction::INC)));
/* Add Y-direction clock nodes */
for (size_t ix = 0; ix < grids.width() - 1; ++ix) {
for (size_t iy = 1; iy < grids.height() - 1; ++iy) {
vtr::Point<size_t> chany_coord(ix, iy);
/* Bypass if the routing channel does not exist when through channel are
* not allowed */
if ((false == through_channel) &&
(false == is_chany_exist(grids, chany_coord))) {
continue;
}
add_rr_graph_block_clock_nodes(
rr_graph_builder, clk_rr_lookup, rr_graph_view, clk_ntwk, chany_coord,
CHANY, CHANX_COST_INDEX_START + rr_graph_view.num_rr_segments(),
verbose);
VTR_ASSERT(rr_graph_view.valid_node(
clk_rr_lookup.find_node(1, 0, ClockTreeId(0), ClockLevelId(0),
ClockTreePinId(0), Direction::INC)));
}
}
VTR_ASSERT(rr_graph_view.valid_node(clk_rr_lookup.find_node(
1, 0, ClockTreeId(0), ClockLevelId(0), ClockTreePinId(0), Direction::INC)));
}
/********************************************************************
* Find the destination CHANX|CHANY nodes for a driver clock node in a given
*connection block There are two types of destination nodes:
* - Straight connection where the driver clock node connects to another clock
*node in the same direction and at the same level as well as clock index For
*example
*
* clk0_lvl0_chanx[1][1] -->------------->---> clk0_lvl0_chanx[2][1]
*
* - Turning connections where the driver clock node makes turns to connect
*other clock nodes at 1-level up and in the same clock index
*
*
* clk0_lvl1_chany[1][2]
* ^
* |
* clk0_lvl0_chanx[1][1] -->---------+
* |
* v
* clk0_lvl1_chany[1][1]
*
* Coordinate system:
*
* +----------+----------+------------+
* | Grid | CBy | Grid |
* | [x][y+1] | [x][y+1] | [x+1][y+1] |
* +----------+----------+------------+
* | CBx | SB | CBx |
* | [x][y] | [x][y] | [x+1][y] |
* +----------+----------+------------+
* | Grid | CBy | Grid |
* | [x][y] | [x][y] | [x+1][y] |
* +----------+----------+------------+
*
*******************************************************************/
static std::vector<RRNodeId> find_clock_track2track_node(
const RRGraphView& rr_graph_view, const ClockNetwork& clk_ntwk,
const RRClockSpatialLookup& clk_rr_lookup, const t_rr_type& chan_type,
const vtr::Point<size_t>& chan_coord, const ClockTreeId& clk_tree,
const ClockLevelId& clk_lvl, const ClockTreePinId& clk_pin,
const Direction& direction) {
std::vector<RRNodeId> des_nodes;
/* Straight connection */
vtr::Point<size_t> straight_des_coord = chan_coord;
if (chan_type == CHANX) {
if (direction == Direction::INC) {
straight_des_coord.set_x(straight_des_coord.x() + 1);
} else {
VTR_ASSERT(direction == Direction::DEC);
straight_des_coord.set_x(straight_des_coord.x() - 1);
}
} else {
VTR_ASSERT(chan_type == CHANY);
if (direction == Direction::INC) {
straight_des_coord.set_y(straight_des_coord.y() + 1);
} else {
VTR_ASSERT(direction == Direction::DEC);
straight_des_coord.set_y(straight_des_coord.y() - 1);
}
}
RRNodeId straight_des_node =
clk_rr_lookup.find_node(straight_des_coord.x(), straight_des_coord.y(),
clk_tree, clk_lvl, clk_pin, direction);
if (rr_graph_view.valid_node(straight_des_node)) {
VTR_ASSERT(chan_type == rr_graph_view.node_type(straight_des_node));
des_nodes.push_back(straight_des_node);
}
/* Check the next level if this is the last level, there are no turns
* available */
ClockLevelId next_clk_lvl = clk_ntwk.next_level(clk_lvl);
if (!clk_ntwk.valid_level_id(clk_tree, next_clk_lvl)) {
return des_nodes;
}
/* left turn connection */
vtr::Point<size_t> left_des_coord = chan_coord;
Direction left_direction = direction;
t_rr_type left_des_chan_type = chan_type;
if (chan_type == CHANX) {
left_des_chan_type = CHANY;
if (direction == Direction::INC) {
/*
* ^
* |
* -->+
*/
left_des_coord.set_y(left_des_coord.y() + 1);
} else {
/*
* +<--
* |
* v
*/
VTR_ASSERT(direction == Direction::DEC);
left_des_coord.set_x(left_des_coord.x() - 1);
}
} else {
VTR_ASSERT(chan_type == CHANY);
left_des_chan_type = CHANX;
if (direction == Direction::INC) {
/*
* <--+
* ^
* |
*/
left_direction = Direction::DEC;
} else {
VTR_ASSERT(direction == Direction::DEC);
/*
* |
* v
* +-->
*/
left_direction = Direction::INC;
left_des_coord.set_x(left_des_coord.x() + 1);
left_des_coord.set_y(left_des_coord.y() - 1);
}
}
RRNodeId left_des_node =
clk_rr_lookup.find_node(left_des_coord.x(), left_des_coord.y(), clk_tree,
next_clk_lvl, clk_pin, left_direction);
if (rr_graph_view.valid_node(left_des_node)) {
VTR_ASSERT(left_des_chan_type == rr_graph_view.node_type(left_des_node));
des_nodes.push_back(left_des_node);
}
/* right turn connection */
vtr::Point<size_t> right_des_coord = chan_coord;
Direction right_direction = direction;
t_rr_type right_des_chan_type = chan_type;
if (chan_type == CHANX) {
right_des_chan_type = CHANY;
if (direction == Direction::INC) {
/*
* -->+
* |
* v
*/
right_direction = Direction::DEC;
} else {
/*
* ^
* |
* +<--
*/
VTR_ASSERT(direction == Direction::DEC);
right_direction = Direction::INC;
right_des_coord.set_x(right_des_coord.x() - 1);
right_des_coord.set_y(right_des_coord.y() + 1);
}
} else {
VTR_ASSERT(chan_type == CHANY);
right_des_chan_type = CHANX;
if (direction == Direction::INC) {
/*
* +-->
* ^
* |
*/
right_des_coord.set_x(right_des_coord.x() + 1);
} else {
VTR_ASSERT(direction == Direction::DEC);
/*
* |
* v
* <--+
*/
right_des_coord.set_y(right_des_coord.y() - 1);
}
}
RRNodeId right_des_node =
clk_rr_lookup.find_node(right_des_coord.x(), right_des_coord.y(), clk_tree,
next_clk_lvl, clk_pin, right_direction);
if (rr_graph_view.valid_node(right_des_node)) {
VTR_ASSERT(right_des_chan_type == rr_graph_view.node_type(right_des_node));
des_nodes.push_back(right_des_node);
}
return des_nodes;
}
/********************************************************************
* Try to find an IPIN of a grid which satisfy the requirement of clock pins
* that has been defined in clock network. If the IPIN does exist in a
* routing resource graph, add it to the node list
*******************************************************************/
static void try_find_and_add_clock_track2ipin_node(
std::vector<RRNodeId>& des_nodes, const DeviceGrid& grids,
const RRGraphView& rr_graph_view, const vtr::Point<size_t>& grid_coord,
const e_side& pin_side, const ClockNetwork& clk_ntwk,
const ClockTreeId& clk_tree, const ClockTreePinId& clk_pin) {
t_physical_tile_type_ptr grid_type =
grids[grid_coord.x()][grid_coord.y()].type;
for (std::string tap_pin_name :
clk_ntwk.tree_flatten_taps(clk_tree, clk_pin)) {
/* tap pin name could be 'io[5:5].a2f[0]' */
int grid_pin_idx = find_physical_tile_pin_index(grid_type, tap_pin_name);
if (grid_pin_idx == grid_type->num_pins) {
continue;
}
RRNodeId des_node = rr_graph_view.node_lookup().find_node(
grid_coord.x(), grid_coord.y(), IPIN, grid_pin_idx, pin_side);
if (rr_graph_view.valid_node(des_node)) {
des_nodes.push_back(des_node);
}
}
}
/********************************************************************
* Find the destination IPIN nodes for a driver clock node in a given connection
*block.
* For CHANX, the IPIN nodes are typically on the BOTTOM and TOP sides of
*adjacent grids For CHANY, the IPIN nodes are typically on the LEFT and RIGHT
*sides of adjacent grids For example Grid[1][2]
* ^
* |
* clk0_lvl2_chanx[1][1] -->---------+
* |
* v
* Grid[1][1]
*
* Coordinate system:
*
* +----------+----------+------------+
* | Grid | CBy | Grid |
* | [x][y+1] | [x][y+1] | [x+1][y+1] |
* +----------+----------+------------+
* | CBx | SB | CBx |
* | [x][y] | [x][y] | [x+1][y] |
* +----------+----------+------------+
* | Grid | CBy | Grid |
* | [x][y] | [x][y] | [x+1][y] |
* +----------+----------+------------+
*******************************************************************/
static std::vector<RRNodeId> find_clock_track2ipin_node(
const DeviceGrid& grids, const RRGraphView& rr_graph_view,
const t_rr_type& chan_type, const vtr::Point<size_t>& chan_coord,
const ClockNetwork& clk_ntwk, const ClockTreeId& clk_tree,
const ClockTreePinId& clk_pin) {
std::vector<RRNodeId> des_nodes;
if (chan_type == CHANX) {
/* Get the clock IPINs at the BOTTOM side of adjacent grids [x][y+1] */
vtr::Point<size_t> bot_grid_coord(chan_coord.x(), chan_coord.y() + 1);
try_find_and_add_clock_track2ipin_node(des_nodes, grids, rr_graph_view,
bot_grid_coord, BOTTOM, clk_ntwk,
clk_tree, clk_pin);
/* Get the clock IPINs at the TOP side of adjacent grids [x][y] */
vtr::Point<size_t> top_grid_coord(chan_coord.x(), chan_coord.y());
try_find_and_add_clock_track2ipin_node(des_nodes, grids, rr_graph_view,
top_grid_coord, TOP, clk_ntwk,
clk_tree, clk_pin);
} else {
VTR_ASSERT(chan_type == CHANY);
/* Get the clock IPINs at the LEFT side of adjacent grids [x][y+1] */
vtr::Point<size_t> left_grid_coord(chan_coord.x() + 1, chan_coord.y());
try_find_and_add_clock_track2ipin_node(des_nodes, grids, rr_graph_view,
left_grid_coord, LEFT, clk_ntwk,
clk_tree, clk_pin);
/* Get the clock IPINs at the RIGHT side of adjacent grids [x][y] */
vtr::Point<size_t> right_grid_coord(chan_coord.x(), chan_coord.y());
try_find_and_add_clock_track2ipin_node(des_nodes, grids, rr_graph_view,
right_grid_coord, RIGHT, clk_ntwk,
clk_tree, clk_pin);
}
return des_nodes;
}
/********************************************************************
* Add edges for the clock nodes in a given connection block
*******************************************************************/
static void add_rr_graph_block_clock_edges(
RRGraphBuilder& rr_graph_builder, size_t& num_edges_to_create,
const RRClockSpatialLookup& clk_rr_lookup, const RRGraphView& rr_graph_view,
const DeviceGrid& grids, const ClockNetwork& clk_ntwk,
const vtr::Point<size_t>& chan_coord, const t_rr_type& chan_type,
const bool& verbose) {
size_t edge_count = 0;
for (auto itree : clk_ntwk.trees()) {
for (auto ilvl : clk_ntwk.levels(itree)) {
/* As we want to keep uni-directional wires, clock routing tracks have to
* be in pairs. Therefore, always add clock routing tracks in pair, even
* one of them is not required
*/
size_t num_pins = 0;
bool require_complementary_pins = false;
for (auto node_dir : {Direction::INC, Direction::DEC}) {
if (0 == clk_ntwk.pins(itree, ilvl, chan_type, node_dir).size()) {
require_complementary_pins = true;
}
num_pins += clk_ntwk.pins(itree, ilvl, chan_type, node_dir).size();
}
if (require_complementary_pins) {
num_pins = 2 * num_pins;
}
for (size_t ipin = 0; ipin < num_pins / 2; ++ipin) {
for (auto node_dir : {Direction::INC, Direction::DEC}) {
/* find the driver clock node through lookup */
RRNodeId src_node =
clk_rr_lookup.find_node(chan_coord.x(), chan_coord.y(), itree, ilvl,
ClockTreePinId(ipin), node_dir);
VTR_LOGV(verbose,
"Try to find node '%lu' from clock node lookup (x='%lu' "
"y='%lu' tree='%lu' level='%lu' pin='%lu' direction='%s')\n",
size_t(src_node), chan_coord.x(), chan_coord.y(),
size_t(itree), size_t(ilvl), size_t(ipin),
DIRECTION_STRING[size_t(node_dir)]);
VTR_ASSERT(rr_graph_view.valid_node(src_node));
/* find the fan-out clock node through lookup */
{
size_t curr_edge_count = edge_count;
for (RRNodeId des_node : find_clock_track2track_node(
rr_graph_view, clk_ntwk, clk_rr_lookup, chan_type,
chan_coord, itree, ilvl, ClockTreePinId(ipin), node_dir)) {
/* Create edges */
VTR_ASSERT(rr_graph_view.valid_node(des_node));
rr_graph_builder.create_edge(src_node, des_node,
clk_ntwk.default_switch());
edge_count++;
}
VTR_LOGV(verbose, "\tWill add %lu edges to other clock nodes\n",
edge_count - curr_edge_count);
}
/* If this is the clock node at the last level of the tree,
* should drive some grid IPINs which are clocks */
if (clk_ntwk.is_last_level(itree, ilvl)) {
size_t curr_edge_count = edge_count;
for (RRNodeId des_node : find_clock_track2ipin_node(
grids, rr_graph_view, chan_type, chan_coord, clk_ntwk, itree,
ClockTreePinId(ipin))) {
/* Create edges */
VTR_ASSERT(rr_graph_view.valid_node(des_node));
rr_graph_builder.create_edge(src_node, des_node,
clk_ntwk.default_switch());
edge_count++;
}
VTR_LOGV(verbose, "\tWill add %lu edges to other IPIN\n",
edge_count - curr_edge_count);
}
}
}
}
}
/* Allocate edges */
rr_graph_builder.build_edges(true);
num_edges_to_create += edge_count;
}
/********************************************************************
* Add edges to interconnect clock nodes
* Walk through the routing tracks in each connection block (driver nodes)
* and add edge to their fan-out clock nodes
* Note that
* - clock nodes at the same level of a clock tree can only go straight
* - clock nodes can only drive clock nodes belong to the same clock index (a
*clock tree may contain multiple clocks)
* - clock nodes can only drive clock nodes (by making a turn, straight
*connection is not allowed) which are 1 level lower in the same clock tree with
*the same clock index
* For example
*
* clk0_lvl1_chany[1][2]
* ^
* |
* clk0_lvl0_chanx[1][1] -->---------+--->---> clk0_lvl0_chanx[2][1]
* |
* v
* clk0_lvl1_chany[1][1]
*******************************************************************/
static void add_rr_graph_clock_edges(
RRGraphBuilder& rr_graph_builder, size_t& num_edges_to_create,
const RRClockSpatialLookup& clk_rr_lookup, const RRGraphView& rr_graph_view,
const DeviceGrid& grids, const bool& through_channel,
const ClockNetwork& clk_ntwk, const bool& verbose) {
/* Add edges which is driven by X-direction clock routing tracks */
for (size_t iy = 0; iy < grids.height() - 1; ++iy) {
for (size_t ix = 1; ix < grids.width() - 1; ++ix) {
vtr::Point<size_t> chanx_coord(ix, iy);
/* Bypass if the routing channel does not exist when through channels are
* not allowed */
if ((false == through_channel) &&
(false == is_chanx_exist(grids, chanx_coord))) {
continue;
}
add_rr_graph_block_clock_edges(rr_graph_builder, num_edges_to_create,
clk_rr_lookup, rr_graph_view, grids,
clk_ntwk, chanx_coord, CHANX, verbose);
}
}
/* Add edges which is driven by Y-direction clock routing tracks */
for (size_t ix = 0; ix < grids.width() - 1; ++ix) {
for (size_t iy = 1; iy < grids.height() - 1; ++iy) {
vtr::Point<size_t> chany_coord(ix, iy);
/* Bypass if the routing channel does not exist when through channel are
* not allowed */
if ((false == through_channel) &&
(false == is_chany_exist(grids, chany_coord))) {
continue;
}
add_rr_graph_block_clock_edges(rr_graph_builder, num_edges_to_create,
clk_rr_lookup, rr_graph_view, grids,
clk_ntwk, chany_coord, CHANY, verbose);
}
}
}
/********************************************************************
* Append a programmable clock network to an existing routing resource graph
* This function will do the following jobs:
* - Estimate the number of clock nodes and pre-allocate memory
* - Add clock nodes
* - Build edges between clock nodes
* - Sanity checks
*******************************************************************/
int append_clock_rr_graph(DeviceContext& vpr_device_ctx,
RRClockSpatialLookup& clk_rr_lookup,
const ClockNetwork& clk_ntwk, const bool& verbose) {
vtr::ScopedStartFinishTimer timer(
"Appending programmable clock network to routing resource graph");
/* Skip if there is no clock tree */
if (clk_ntwk.num_trees() == 0) {
VTR_LOG(
"Skip due to 0 clock trees.\nDouble check your clock architecture "
"definition if this is unexpected\n");
return CMD_EXEC_SUCCESS;
}
/* Report any clock structure we do not support yet! */
if (clk_ntwk.num_trees() > 1) {
VTR_LOG(
"Currently only support 1 clock tree in programmable clock "
"architecture\nPlease update your clock architecture definition\n");
return CMD_EXEC_FATAL_ERROR;
}
/* Estimate the number of nodes and pre-allocate */
size_t orig_num_nodes = vpr_device_ctx.rr_graph.num_nodes();
size_t num_clock_nodes = estimate_clock_rr_graph_num_nodes(
vpr_device_ctx.grid, vpr_device_ctx.arch->through_channel, clk_ntwk);
vpr_device_ctx.rr_graph_builder.unlock_storage();
vpr_device_ctx.rr_graph_builder.reserve_nodes(num_clock_nodes +
orig_num_nodes);
VTR_LOGV(verbose,
"Estimate %lu clock nodes (+%.5f%) to be added to routing "
"resource graph.\n",
num_clock_nodes, (float)(num_clock_nodes / orig_num_nodes));
/* Add clock nodes */
add_rr_graph_clock_nodes(vpr_device_ctx.rr_graph_builder, clk_rr_lookup,
vpr_device_ctx.rr_graph, vpr_device_ctx.grid,
vpr_device_ctx.arch->through_channel, clk_ntwk,
verbose);
VTR_LOGV(verbose,
"Added %lu clock nodes to routing "
"resource graph.\n",
vpr_device_ctx.rr_graph.num_nodes() - orig_num_nodes);
VTR_ASSERT(num_clock_nodes + orig_num_nodes ==
vpr_device_ctx.rr_graph.num_nodes());
/* Add edges between clock nodes*/
size_t num_clock_edges = 0;
add_rr_graph_clock_edges(
vpr_device_ctx.rr_graph_builder, num_clock_edges,
static_cast<const RRClockSpatialLookup&>(clk_rr_lookup),
vpr_device_ctx.rr_graph, vpr_device_ctx.grid,
vpr_device_ctx.arch->through_channel, clk_ntwk, verbose);
VTR_LOGV(verbose,
"Added %lu clock edges to routing "
"resource graph.\n",
num_clock_edges);
/* TODO: Sanity checks */
VTR_LOGV(verbose, "Initializing fan-in of nodes\n");
vpr_device_ctx.rr_graph_builder.init_fan_in();
VTR_LOGV(verbose, "Apply edge partitioning\n");
vpr_device_ctx.rr_graph_builder.partition_edges();
VTR_LOGV(verbose, "Building incoming edges\n");
vpr_device_ctx.rr_graph_builder.build_in_edges();
/* Report number of added clock nodes and edges */
VTR_LOG(
"Appended %lu clock nodes (+%.2f%) and %lu clock edges to routing "
"resource graph.\n",
num_clock_nodes, (float)(num_clock_nodes / orig_num_nodes),
num_clock_edges);
return CMD_EXEC_SUCCESS;
}
} /* end namespace openfpga */

View File

@ -0,0 +1,24 @@
#ifndef APPEND_CLOCK_RR_GRAPH_H
#define APPEND_CLOCK_RR_GRAPH_H
/********************************************************************
* Include header files that are required by function declaration
*******************************************************************/
#include "clock_network.h"
#include "rr_clock_spatial_lookup.h"
#include "vpr_context.h"
/********************************************************************
* Function declaration
*******************************************************************/
/* begin namespace openfpga */
namespace openfpga {
int append_clock_rr_graph(DeviceContext& vpr_device_ctx,
RRClockSpatialLookup& clk_rr_lookup,
const ClockNetwork& clk_ntwk, const bool& verbose);
} /* end namespace openfpga */
#endif

View File

@ -43,6 +43,26 @@ const RRGSB& DeviceRRGSB::get_gsb(const size_t& x, const size_t& y) const {
return get_gsb(coordinate);
}
/* Get a rr switch block in the array with a coordinate */
const RRGSB& DeviceRRGSB::get_gsb_by_cb_coordinate(
const t_rr_type& cb_type, const vtr::Point<size_t>& coordinate) const {
vtr::Point<size_t> gsb_coord = coordinate;
/* TODO move the coordinate conversion to RRGSB */
switch (cb_type) {
case CHANX:
break;
case CHANY:
gsb_coord.set_y(gsb_coord.y() - 1);
break;
default:
VTR_LOG("Invalid type of connection block!\n");
exit(1);
}
VTR_ASSERT(validate_coordinate(gsb_coord));
return rr_gsb_[gsb_coord.x()][gsb_coord.y()];
}
/* get the number of unique mirrors of switch blocks */
size_t DeviceRRGSB::get_num_cb_unique_module(const t_rr_type& cb_type) const {
VTR_ASSERT(validate_cb_type(cb_type));

View File

@ -38,6 +38,9 @@ class DeviceRRGSB {
const; /* Get a rr switch block in the array with a coordinate */
const RRGSB& get_gsb(const size_t& x, const size_t& y)
const; /* Get a rr switch block in the array with a coordinate */
/* Get a gsb using its connection block coordinate */
const RRGSB& get_gsb_by_cb_coordinate(
const t_rr_type& cb_type, const vtr::Point<size_t>& coordinate) const;
size_t get_num_gsb_unique_module()
const; /* get the number of unique mirrors of GSB */
size_t get_num_sb_unique_module()

View File

@ -0,0 +1,268 @@
#include "route_clock_rr_graph.h"
#include "command_exit_codes.h"
#include "openfpga_atom_netlist_utils.h"
#include "vtr_assert.h"
#include "vtr_geometry.h"
#include "vtr_log.h"
#include "vtr_time.h"
/* begin namespace openfpga */
namespace openfpga {
/********************************************************************
* Build the lookup between clock name and pins and clock tree pins
* This is required for routing clock nets in each clock tree
* Special: when there is only 1 clock and 1 clock tree (width = 1), the mapping
*is straight forward
* FIXME: This part works only for clock network which constains only 1 clock
*tree!
*******************************************************************/
static int build_clock_tree_net_map(
std::map<ClockTreePinId, ClusterNetId>& tree2clk_pin_map,
const ClusteredNetlist& cluster_nlist, const PinConstraints& pin_constraints,
const std::vector<std::string>& clk_names, const ClockNetwork& clk_ntwk,
const ClockTreeId clk_tree, const bool& verbose) {
/* Find the pin id for each clock name, error out if there is any mismatch */
if (clk_names.size() == 1 && clk_ntwk.tree_width(clk_tree) == 1) {
/* Find cluster net id */
ClusterNetId clk_net = cluster_nlist.find_net(clk_names[0]);
if (!cluster_nlist.valid_net_id(clk_net)) {
VTR_LOG_ERROR("Invalid clock name '%s'! Cannot found from netlists!\n",
clk_names[0].c_str());
return CMD_EXEC_FATAL_ERROR;
}
tree2clk_pin_map[ClockTreePinId(0)] = clk_net;
} else {
for (std::string clk_name : clk_names) {
/* Find the pin information that the net should be mapped to */
BasicPort tree_pin = pin_constraints.net_pin(clk_name);
if (!tree_pin.is_valid()) {
VTR_LOG_ERROR(
"Invalid tree pin for clock '%s'! Clock name may not be valid "
"(mismatched with netlists)!\n",
clk_name.c_str());
return CMD_EXEC_FATAL_ERROR;
}
if (tree_pin.get_width() != 1) {
VTR_LOG_ERROR(
"Invalid tree pin %s[%lu:%lu] for clock '%s'! Clock pin must have "
"only a width of 1!\n",
tree_pin.get_name().c_str(), tree_pin.get_lsb(), tree_pin.get_msb(),
clk_name.c_str());
return CMD_EXEC_FATAL_ERROR;
}
if (tree_pin.get_lsb() >= clk_ntwk.tree_width(clk_tree)) {
VTR_LOG_ERROR(
"Invalid tree pin %s[%lu] is out of range of clock tree size '%lu'\n",
tree_pin.get_name().c_str(), tree_pin.get_lsb(),
clk_ntwk.tree_width(clk_tree));
return CMD_EXEC_FATAL_ERROR;
}
/* Find cluster net id */
ClusterNetId clk_net = cluster_nlist.find_net(clk_name);
if (!cluster_nlist.valid_net_id(clk_net)) {
VTR_LOG_ERROR("Invalid clock name '%s'! Cannot found from netlists!\n",
clk_name.c_str());
return CMD_EXEC_FATAL_ERROR;
}
/* Register the pin mapping */
tree2clk_pin_map[ClockTreePinId(tree_pin.get_lsb())] = clk_net;
}
}
VTR_LOGV(verbose, "Build a pin map for %lu clock nets and pins.\n",
tree2clk_pin_map.size());
return CMD_EXEC_SUCCESS;
}
/********************************************************************
* Route a clock tree on an existing routing resource graph
* The strategy is to route spine one by one
* - route the spine from the starting point to the ending point
* - route the spine-to-spine switching points
* - route the spine-to-IPIN connections (only for the last level)
*******************************************************************/
static int route_clock_tree_rr_graph(
VprRoutingAnnotation& vpr_routing_annotation, const RRGraphView& rr_graph,
const RRClockSpatialLookup& clk_rr_lookup,
const std::map<ClockTreePinId, ClusterNetId>& tree2clk_pin_map,
const ClockNetwork& clk_ntwk, const ClockTreeId& clk_tree,
const bool& verbose) {
for (auto ispine : clk_ntwk.spines(clk_tree)) {
VTR_LOGV(verbose, "Routing spine '%s'...\n",
clk_ntwk.spine_name(ispine).c_str());
for (auto ipin : clk_ntwk.pins(clk_tree)) {
/* Route the spine from starting point to ending point */
std::vector<vtr::Point<int>> spine_coords =
clk_ntwk.spine_coordinates(ispine);
VTR_LOGV(verbose, "Routing backbone of spine '%s'...\n",
clk_ntwk.spine_name(ispine).c_str());
for (size_t icoord = 0; icoord < spine_coords.size() - 1; ++icoord) {
vtr::Point<int> src_coord = spine_coords[icoord];
vtr::Point<int> des_coord = spine_coords[icoord + 1];
Direction src_spine_direction = clk_ntwk.spine_direction(ispine);
Direction des_spine_direction = clk_ntwk.spine_direction(ispine);
ClockLevelId src_spine_level = clk_ntwk.spine_level(ispine);
ClockLevelId des_spine_level = clk_ntwk.spine_level(ispine);
RRNodeId src_node =
clk_rr_lookup.find_node(src_coord.x(), src_coord.y(), clk_tree,
src_spine_level, ipin, src_spine_direction);
RRNodeId des_node =
clk_rr_lookup.find_node(des_coord.x(), des_coord.y(), clk_tree,
des_spine_level, ipin, des_spine_direction);
VTR_ASSERT(rr_graph.valid_node(src_node));
VTR_ASSERT(rr_graph.valid_node(des_node));
vpr_routing_annotation.set_rr_node_prev_node(rr_graph, des_node,
src_node);
}
/* Route the spine-to-spine switching points */
VTR_LOGV(verbose, "Routing switch points of spine '%s'...\n",
clk_ntwk.spine_name(ispine).c_str());
for (ClockSwitchPointId switch_point_id :
clk_ntwk.spine_switch_points(ispine)) {
vtr::Point<int> src_coord =
clk_ntwk.spine_switch_point(ispine, switch_point_id);
ClockSpineId des_spine =
clk_ntwk.spine_switch_point_tap(ispine, switch_point_id);
vtr::Point<int> des_coord = clk_ntwk.spine_start_point(des_spine);
Direction src_spine_direction = clk_ntwk.spine_direction(ispine);
Direction des_spine_direction = clk_ntwk.spine_direction(des_spine);
ClockLevelId src_spine_level = clk_ntwk.spine_level(ispine);
ClockLevelId des_spine_level = clk_ntwk.spine_level(des_spine);
RRNodeId src_node =
clk_rr_lookup.find_node(src_coord.x(), src_coord.y(), clk_tree,
src_spine_level, ipin, src_spine_direction);
RRNodeId des_node =
clk_rr_lookup.find_node(des_coord.x(), des_coord.y(), clk_tree,
des_spine_level, ipin, des_spine_direction);
VTR_ASSERT(rr_graph.valid_node(src_node));
VTR_ASSERT(rr_graph.valid_node(des_node));
vpr_routing_annotation.set_rr_node_prev_node(rr_graph, des_node,
src_node);
/* It could happen that there is no net mapped some clock pin, skip the
* net mapping */
if (tree2clk_pin_map.find(ipin) != tree2clk_pin_map.end()) {
vpr_routing_annotation.set_rr_node_net(src_node,
tree2clk_pin_map.at(ipin));
vpr_routing_annotation.set_rr_node_net(des_node,
tree2clk_pin_map.at(ipin));
}
}
/* Route the spine-to-IPIN connections (only for the last level) */
if (clk_ntwk.is_last_level(ispine)) {
VTR_LOGV(verbose, "Routing clock taps of spine '%s'...\n",
clk_ntwk.spine_name(ispine).c_str());
/* Connect to any fan-out node which is IPIN */
for (size_t icoord = 0; icoord < spine_coords.size(); ++icoord) {
vtr::Point<int> src_coord = spine_coords[icoord];
Direction src_spine_direction = clk_ntwk.spine_direction(ispine);
ClockLevelId src_spine_level = clk_ntwk.spine_level(ispine);
RRNodeId src_node =
clk_rr_lookup.find_node(src_coord.x(), src_coord.y(), clk_tree,
src_spine_level, ipin, src_spine_direction);
for (RREdgeId edge : rr_graph.edge_range(src_node)) {
RRNodeId des_node = rr_graph.edge_sink_node(edge);
if (rr_graph.node_type(des_node) == IPIN) {
VTR_ASSERT(rr_graph.valid_node(src_node));
VTR_ASSERT(rr_graph.valid_node(des_node));
vpr_routing_annotation.set_rr_node_prev_node(rr_graph, des_node,
src_node);
if (tree2clk_pin_map.find(ipin) != tree2clk_pin_map.end()) {
vpr_routing_annotation.set_rr_node_net(
src_node, tree2clk_pin_map.at(ipin));
vpr_routing_annotation.set_rr_node_net(
des_node, tree2clk_pin_map.at(ipin));
}
}
}
}
}
}
}
return CMD_EXEC_SUCCESS;
}
/********************************************************************
* Route a clock network based on an existing routing resource graph
* This function will do the following jobs:
* - configure the routing annotation w.r.t. the clock node connections
* - quick check to ensure routing is valid
*******************************************************************/
int route_clock_rr_graph(VprRoutingAnnotation& vpr_routing_annotation,
const DeviceContext& vpr_device_ctx,
const AtomContext& atom_ctx,
const ClusteredNetlist& cluster_nlist,
const VprNetlistAnnotation& netlist_annotation,
const RRClockSpatialLookup& clk_rr_lookup,
const ClockNetwork& clk_ntwk,
const PinConstraints& pin_constraints,
const bool& verbose) {
vtr::ScopedStartFinishTimer timer(
"Route programmable clock network based on routing resource graph");
/* Skip if there is no clock tree */
if (0 == clk_ntwk.num_trees()) {
VTR_LOG(
"Skip due to 0 clock trees.\nDouble check your clock architecture "
"definition if this is unexpected\n");
return CMD_EXEC_SUCCESS;
}
/* Report any clock structure we do not support yet! */
if (clk_ntwk.num_trees() > 1) {
VTR_LOG(
"Currently only support 1 clock tree in programmable clock "
"architecture\nPlease update your clock architecture definition\n");
return CMD_EXEC_FATAL_ERROR;
}
/* If there are multiple clock signals from the netlist, require pin
* constraints */
std::vector<std::string> clock_net_names =
find_atom_netlist_clock_port_names(atom_ctx.nlist, netlist_annotation);
if (clock_net_names.empty()) {
VTR_LOG(
"Skip due to 0 clocks found from netlist\nDouble check your HDL design "
"if this is unexpected\n");
return CMD_EXEC_SUCCESS;
}
if (clock_net_names.size() > 1 && pin_constraints.empty()) {
VTR_LOG(
"There is %lu clock nets (more than 1). Require pin constraints to be "
"specified\n",
clock_net_names.size());
return CMD_EXEC_FATAL_ERROR;
}
/* Route spines one by one */
for (auto itree : clk_ntwk.trees()) {
VTR_LOGV(verbose, "Build clock name to clock tree '%s' pin mapping...\n",
clk_ntwk.tree_name(itree).c_str());
std::map<ClockTreePinId, ClusterNetId> tree2clk_pin_map;
int status = CMD_EXEC_SUCCESS;
status =
build_clock_tree_net_map(tree2clk_pin_map, cluster_nlist, pin_constraints,
clock_net_names, clk_ntwk, itree, verbose);
if (status == CMD_EXEC_FATAL_ERROR) {
return status;
}
VTR_LOGV(verbose, "Routing clock tree '%s'...\n",
clk_ntwk.tree_name(itree).c_str());
status = route_clock_tree_rr_graph(
vpr_routing_annotation, vpr_device_ctx.rr_graph, clk_rr_lookup,
tree2clk_pin_map, clk_ntwk, itree, verbose);
if (status == CMD_EXEC_FATAL_ERROR) {
return status;
}
VTR_LOGV(verbose, "Done\n");
}
/* TODO: Sanity checks */
return CMD_EXEC_SUCCESS;
}
} /* end namespace openfpga */

View File

@ -0,0 +1,33 @@
#ifndef ROUTE_CLOCK_RR_GRAPH_H
#define ROUTE_CLOCK_RR_GRAPH_H
/********************************************************************
* Include header files that are required by function declaration
*******************************************************************/
#include "clock_network.h"
#include "pin_constraints.h"
#include "rr_clock_spatial_lookup.h"
#include "vpr_context.h"
#include "vpr_netlist_annotation.h"
#include "vpr_routing_annotation.h"
/********************************************************************
* Function declaration
*******************************************************************/
/* begin namespace openfpga */
namespace openfpga {
int route_clock_rr_graph(VprRoutingAnnotation& vpr_routing_annotation,
const DeviceContext& vpr_device_ctx,
const AtomContext& atom_ctx,
const ClusteredNetlist& cluster_nlist,
const VprNetlistAnnotation& netlist_annotation,
const RRClockSpatialLookup& clk_rr_lookup,
const ClockNetwork& clk_ntwk,
const PinConstraints& pin_constraints,
const bool& verbose);
} /* end namespace openfpga */
#endif

View File

@ -5,6 +5,7 @@
#include "bitstream_manager.h"
#include "bitstream_setting.h"
#include "clock_network.h"
#include "decoder_library.h"
#include "device_rr_gsb.h"
#include "fabric_bitstream.h"
@ -16,6 +17,7 @@
#include "netlist_manager.h"
#include "openfpga_arch.h"
#include "openfpga_flow_manager.h"
#include "rr_clock_spatial_lookup.h"
#include "simulation_setting.h"
#include "tile_direct.h"
#include "vpr_bitstream_annotation.h"
@ -61,6 +63,10 @@ class OpenfpgaContext : public Context {
const openfpga::BitstreamSetting& bitstream_setting() const {
return bitstream_setting_;
}
const openfpga::ClockNetwork& clock_arch() const { return clock_arch_; }
const openfpga::RRClockSpatialLookup& clock_rr_lookup() const {
return clock_rr_lookup_;
}
const openfpga::VprDeviceAnnotation& vpr_device_annotation() const {
return vpr_device_annotation_;
}
@ -116,6 +122,10 @@ class OpenfpgaContext : public Context {
openfpga::BitstreamSetting& mutable_bitstream_setting() {
return bitstream_setting_;
}
openfpga::ClockNetwork& mutable_clock_arch() { return clock_arch_; }
openfpga::RRClockSpatialLookup& mutable_clock_rr_lookup() {
return clock_rr_lookup_;
}
openfpga::VprDeviceAnnotation& mutable_vpr_device_annotation() {
return vpr_device_annotation_;
}
@ -165,6 +175,8 @@ class OpenfpgaContext : public Context {
openfpga::Arch arch_;
openfpga::SimulationSetting sim_setting_;
openfpga::BitstreamSetting bitstream_setting_;
openfpga::ClockNetwork clock_arch_;
openfpga::RRClockSpatialLookup clock_rr_lookup_;
/* Annotation to pb_type of VPR */
openfpga::VprDeviceAnnotation vpr_device_annotation_;

View File

@ -12,6 +12,7 @@
#include "annotate_placement.h"
#include "annotate_rr_graph.h"
#include "annotate_simulation_setting.h"
#include "append_clock_rr_graph.h"
#include "build_tile_direct.h"
#include "command.h"
#include "command_context.h"
@ -22,6 +23,8 @@
#include "openfpga_rr_graph_support.h"
#include "pb_type_utils.h"
#include "read_activity.h"
#include "read_xml_pin_constraints.h"
#include "route_clock_rr_graph.h"
#include "vpr_device_annotation.h"
#include "vtr_assert.h"
#include "vtr_log.h"
@ -106,9 +109,10 @@ int link_arch_template(T& openfpga_ctx, const Command& cmd,
VTR_LOG("Built %ld incoming edges for routing resource graph\n",
g_vpr_ctx.device().rr_graph.in_edges_count());
VTR_ASSERT(g_vpr_ctx.device().rr_graph.validate_in_edges());
annotate_device_rr_gsb(g_vpr_ctx.device(),
openfpga_ctx.mutable_device_rr_gsb(),
cmd_context.option_enable(cmd, opt_verbose));
annotate_device_rr_gsb(
g_vpr_ctx.device(), openfpga_ctx.mutable_device_rr_gsb(),
!openfpga_ctx.clock_arch().empty(), /* FIXME: consider to be more robust! */
cmd_context.option_enable(cmd, opt_verbose));
if (true == cmd_context.option_enable(cmd, opt_sort_edge)) {
sort_device_rr_gsb_chan_node_in_edges(
@ -182,6 +186,54 @@ int link_arch_template(T& openfpga_ctx, const Command& cmd,
return CMD_EXEC_SUCCESS;
}
/********************************************************************
* Top-level function to append a clock network to VPR's routing resource graph,
*including:
* - Routing tracks dedicated to clock network
* - Programmable switches to enable reconfigurability of clock network
* - Adding virtual sources for clock signals
*******************************************************************/
template <class T>
int append_clock_rr_graph_template(T& openfpga_ctx, const Command& cmd,
const CommandContext& cmd_context) {
vtr::ScopedStartFinishTimer timer(
"Append clock network to routing resource graph");
CommandOptionId opt_verbose = cmd.option("verbose");
return append_clock_rr_graph(
g_vpr_ctx.mutable_device(), openfpga_ctx.mutable_clock_rr_lookup(),
openfpga_ctx.clock_arch(), cmd_context.option_enable(cmd, opt_verbose));
}
/********************************************************************
* Top-level function to route a clock network based on the clock spines
* defined in clock architecture
*******************************************************************/
template <class T>
int route_clock_rr_graph_template(T& openfpga_ctx, const Command& cmd,
const CommandContext& cmd_context) {
vtr::ScopedStartFinishTimer timer("Route clock routing resource graph");
/* add an option '--pin_constraints_file in short '-pcf' */
CommandOptionId opt_pcf = cmd.option("pin_constraints_file");
CommandOptionId opt_verbose = cmd.option("verbose");
/* If pin constraints are enabled by command options, read the file */
PinConstraints pin_constraints;
if (true == cmd_context.option_enable(cmd, opt_pcf)) {
pin_constraints =
read_xml_pin_constraints(cmd_context.option_value(cmd, opt_pcf).c_str());
}
return route_clock_rr_graph(
openfpga_ctx.mutable_vpr_routing_annotation(), g_vpr_ctx.device(),
g_vpr_ctx.atom(), g_vpr_ctx.clustering().clb_nlist,
openfpga_ctx.vpr_netlist_annotation(), openfpga_ctx.clock_rr_lookup(),
openfpga_ctx.clock_arch(), pin_constraints,
cmd_context.option_enable(cmd, opt_verbose));
}
} /* end namespace openfpga */
#endif

View File

@ -7,12 +7,15 @@
#include "check_circuit_library.h"
#include "check_tile_annotation.h"
#include "circuit_library_utils.h"
#include "clock_network_utils.h"
#include "command.h"
#include "command_context.h"
#include "command_exit_codes.h"
#include "globals.h"
#include "read_xml_clock_network.h"
#include "read_xml_openfpga_arch.h"
#include "vtr_log.h"
#include "write_xml_clock_network.h"
#include "write_xml_openfpga_arch.h"
/* begin namespace openfpga */
@ -209,6 +212,74 @@ int write_bitstream_setting_template(const T& openfpga_context,
return CMD_EXEC_SUCCESS;
}
/********************************************************************
* Top-level function to read an OpenFPGA bitstream setting file
* we use the APIs from the libarchopenfpga library
*
* The command will accept an option '--file' which is the bitstream setting
* file provided by users
*******************************************************************/
template <class T>
int read_openfpga_clock_arch_template(T& openfpga_context, const Command& cmd,
const CommandContext& cmd_context) {
/* Check the option '--file' is enabled or not
* Actually, it must be enabled as the shell interface will check
* before reaching this fuction
*/
CommandOptionId opt_file = cmd.option("file");
VTR_ASSERT(true == cmd_context.option_enable(cmd, opt_file));
VTR_ASSERT(false == cmd_context.option_value(cmd, opt_file).empty());
std::string arch_file_name = cmd_context.option_value(cmd, opt_file);
VTR_LOG("Reading XML clock architecture '%s'...\n", arch_file_name.c_str());
openfpga_context.mutable_clock_arch() =
read_xml_clock_network(arch_file_name.c_str());
/* Build internal links */
openfpga_context.mutable_clock_arch().link();
link_clock_network_rr_graph(openfpga_context.mutable_clock_arch(),
g_vpr_ctx.device().rr_graph);
/* Ensure clean data */
openfpga_context.clock_arch().validate();
if (!openfpga_context.clock_arch().is_valid()) {
VTR_LOG_ERROR("Pre-checking clock architecture failed!");
return CMD_EXEC_FATAL_ERROR;
}
/* TODO: should identify the error code from internal function execution */
return CMD_EXEC_SUCCESS;
}
/********************************************************************
* A function to write an OpenFPGA bitstream setting file
* we use the APIs from the libarchopenfpga library
*
* The command will accept an option '--file' which is the simulation setting
* file provided by users
*******************************************************************/
template <class T>
int write_openfpga_clock_arch_template(const T& openfpga_context,
const Command& cmd,
const CommandContext& cmd_context) {
/* Check the option '--file' is enabled or not
* Actually, it must be enabled as the shell interface will check
* before reaching this fuction
*/
CommandOptionId opt_file = cmd.option("file");
VTR_ASSERT(true == cmd_context.option_enable(cmd, opt_file));
VTR_ASSERT(false == cmd_context.option_value(cmd, opt_file).empty());
std::string arch_file_name = cmd_context.option_value(cmd, opt_file);
VTR_LOG("Writing XML clock architecture to '%s'...\n",
arch_file_name.c_str());
write_xml_clock_network(arch_file_name.c_str(),
openfpga_context.clock_arch());
/* TODO: should identify the error code from internal function execution */
return CMD_EXEC_SUCCESS;
}
} /* end namespace openfpga */
#endif

View File

@ -570,6 +570,128 @@ ShellCommandId add_pcf2place_command_template(
return shell_cmd_id;
}
/********************************************************************
* - Add a command to Shell environment: read_openfpga_clock_arch
* - Add associated options
* - Add command dependency
*******************************************************************/
template <class T>
ShellCommandId add_read_openfpga_clock_arch_command_template(
openfpga::Shell<T>& shell, const ShellCommandClassId& cmd_class_id,
const std::vector<ShellCommandId>& dependent_cmds, const bool& hidden) {
Command shell_cmd("read_openfpga_clock_arch");
/* Add an option '--file' in short '-f'*/
CommandOptionId opt_file = shell_cmd.add_option(
"file", true, "file path to the clock architecture XML");
shell_cmd.set_option_short_name(opt_file, "f");
shell_cmd.set_option_require_value(opt_file, openfpga::OPT_STRING);
/* Add command 'read_openfpga_clock_arch' to the Shell */
ShellCommandId shell_cmd_id = shell.add_command(
shell_cmd, "read OpenFPGA clock architecture file", hidden);
shell.set_command_class(shell_cmd_id, cmd_class_id);
shell.set_command_execute_function(shell_cmd_id,
read_openfpga_clock_arch_template<T>);
/* Add command dependency to the Shell */
shell.set_command_dependency(shell_cmd_id, dependent_cmds);
return shell_cmd_id;
}
/********************************************************************
* - Add a command to Shell environment: write_openfpga_clock_arch
* - Add associated options
* - Add command dependency
*******************************************************************/
template <class T>
ShellCommandId add_write_openfpga_clock_arch_command_template(
openfpga::Shell<T>& shell, const ShellCommandClassId& cmd_class_id,
const std::vector<ShellCommandId>& dependent_cmds, const bool& hidden) {
Command shell_cmd("write_openfpga_clock_arch");
/* Add an option '--file' in short '-f'*/
CommandOptionId opt_file = shell_cmd.add_option(
"file", true, "file path to the clock architecture XML");
shell_cmd.set_option_short_name(opt_file, "f");
shell_cmd.set_option_require_value(opt_file, openfpga::OPT_STRING);
/* Add command 'write_openfpga_clock_arch' to the Shell */
ShellCommandId shell_cmd_id = shell.add_command(
shell_cmd, "write OpenFPGA clock architecture file", hidden);
shell.set_command_class(shell_cmd_id, cmd_class_id);
shell.set_command_const_execute_function(
shell_cmd_id, write_openfpga_clock_arch_template<T>);
/* Add command dependency to the Shell */
shell.set_command_dependency(shell_cmd_id, dependent_cmds);
return shell_cmd_id;
}
/********************************************************************
* - Add a command to Shell environment: append_clock_rr_graph
* - Add associated options
* - Add command dependency
*******************************************************************/
template <class T>
ShellCommandId add_append_clock_rr_graph_command_template(
openfpga::Shell<T>& shell, const ShellCommandClassId& cmd_class_id,
const std::vector<ShellCommandId>& dependent_cmds, const bool& hidden) {
Command shell_cmd("append_clock_rr_graph");
/* Add an option '--verbose' */
shell_cmd.add_option("verbose", false, "Show verbose outputs");
/* Add command 'pb_pin_fixup' to the Shell */
ShellCommandId shell_cmd_id = shell.add_command(
shell_cmd,
"Append clock network to the routing resource graph built by VPR.", hidden);
shell.set_command_class(shell_cmd_id, cmd_class_id);
shell.set_command_execute_function(shell_cmd_id,
append_clock_rr_graph_template<T>);
/* Add command dependency to the Shell */
shell.set_command_dependency(shell_cmd_id, dependent_cmds);
return shell_cmd_id;
}
/********************************************************************
* - Add a command to Shell environment: route_clock_rr_graph
* - Add associated options
* - Add command dependency
*******************************************************************/
template <class T>
ShellCommandId add_route_clock_rr_graph_command_template(
openfpga::Shell<T>& shell, const ShellCommandClassId& cmd_class_id,
const std::vector<ShellCommandId>& dependent_cmds, const bool& hidden) {
Command shell_cmd("route_clock_rr_graph");
/* Add an option '--file' in short '-f'*/
CommandOptionId opt_file =
shell_cmd.add_option("pin_constraints_file", false,
"specify the file path to the pin constraints");
shell_cmd.set_option_short_name(opt_file, "pcf");
shell_cmd.set_option_require_value(opt_file, openfpga::OPT_STRING);
/* Add an option '--verbose' */
shell_cmd.add_option("verbose", false, "Show verbose outputs");
/* Add command 'pb_pin_fixup' to the Shell */
ShellCommandId shell_cmd_id = shell.add_command(
shell_cmd,
"Route clock network based on routing resource graph built by VPR.",
hidden);
shell.set_command_class(shell_cmd_id, cmd_class_id);
shell.set_command_execute_function(shell_cmd_id,
route_clock_rr_graph_template<T>);
/* Add command dependency to the Shell */
shell.set_command_dependency(shell_cmd_id, dependent_cmds);
return shell_cmd_id;
}
template <class T>
void add_setup_command_templates(openfpga::Shell<T>& shell,
const bool& hidden = false) {
@ -636,6 +758,28 @@ void add_setup_command_templates(openfpga::Shell<T>& shell,
shell, openfpga_setup_cmd_class, write_bitstream_setting_dependent_cmds,
hidden);
/********************************
* Command 'read_openfpga_clock_arch'
*/
std::vector<ShellCommandId> read_openfpga_clock_arch_dependent_cmds;
read_openfpga_clock_arch_dependent_cmds.push_back(vpr_cmd_id);
read_openfpga_clock_arch_dependent_cmds.push_back(read_arch_cmd_id);
ShellCommandId read_openfpga_clock_arch_cmd_id =
add_read_openfpga_clock_arch_command_template<T>(
shell, openfpga_setup_cmd_class, read_openfpga_clock_arch_dependent_cmds,
hidden);
/********************************
* Command 'write_openfpga_clock_arch'
*/
/* The 'write_openfpga_clock_arch' command should NOT be executed
* before 'read_openfpga_clock_arch' */
std::vector<ShellCommandId> write_openfpga_clock_arch_dependent_cmds(
1, read_openfpga_clock_arch_cmd_id);
add_write_openfpga_clock_arch_command_template<T>(
shell, openfpga_setup_cmd_class, write_openfpga_clock_arch_dependent_cmds,
hidden);
/********************************
* Command 'link_openfpga_arch'
*/
@ -649,6 +793,31 @@ void add_setup_command_templates(openfpga::Shell<T>& shell,
ShellCommandId link_arch_cmd_id = add_link_arch_command_template<T>(
shell, openfpga_setup_cmd_class, link_arch_dependent_cmds, hidden);
/********************************
* Command 'append_clock_rr_graph'
*/
/* The 'append_clock_rr_graph' command should NOT be executed before
* 'read_openfpga_clock_arch' */
std::vector<ShellCommandId> append_clock_rr_graph_dependent_cmds;
append_clock_rr_graph_dependent_cmds.push_back(
read_openfpga_clock_arch_cmd_id);
add_append_clock_rr_graph_command_template<T>(
shell, openfpga_setup_cmd_class, append_clock_rr_graph_dependent_cmds,
hidden);
/********************************
* Command 'route_clock_rr_graph'
*/
/* The 'route_clock_rr_graph' command should NOT be executed before
* 'read_openfpga_clock_arch' and 'link_arch' */
std::vector<ShellCommandId> route_clock_rr_graph_dependent_cmds;
route_clock_rr_graph_dependent_cmds.push_back(
read_openfpga_clock_arch_cmd_id);
route_clock_rr_graph_dependent_cmds.push_back(link_arch_cmd_id);
add_route_clock_rr_graph_command_template<T>(
shell, openfpga_setup_cmd_class, route_clock_rr_graph_dependent_cmds,
hidden);
/********************************
* Command 'write_gsb'
*/

View File

@ -101,6 +101,7 @@ int build_device_module_graph(
/* Build FPGA fabric top-level module */
status = build_top_module(
module_manager, decoder_lib, blwl_sr_banks, openfpga_ctx.arch().circuit_lib,
openfpga_ctx.clock_arch(), openfpga_ctx.clock_rr_lookup(),
openfpga_ctx.vpr_device_annotation(), vpr_device_ctx.grid,
openfpga_ctx.arch().tile_annotations, vpr_device_ctx.rr_graph,
openfpga_ctx.device_rr_gsb(), openfpga_ctx.tile_direct(),

View File

@ -431,7 +431,8 @@ static void add_top_module_io_children(
int build_top_module(
ModuleManager& module_manager, DecoderLibrary& decoder_lib,
MemoryBankShiftRegisterBanks& blwl_sr_banks,
const CircuitLibrary& circuit_lib,
const CircuitLibrary& circuit_lib, const ClockNetwork& clk_ntwk,
const RRClockSpatialLookup& rr_clock_lookup,
const VprDeviceAnnotation& vpr_device_annotation, const DeviceGrid& grids,
const TileAnnotation& tile_annotation, const RRGraphView& rr_graph,
const DeviceRRGSB& device_rr_gsb, const TileDirect& tile_direct,
@ -494,7 +495,8 @@ int build_top_module(
* annotation */
status = add_top_module_global_ports_from_grid_modules(
module_manager, top_module, tile_annotation, vpr_device_annotation, grids,
grid_instance_ids);
rr_graph, device_rr_gsb, cb_instance_ids, grid_instance_ids, clk_ntwk,
rr_clock_lookup);
if (CMD_EXEC_FATAL_ERROR == status) {
return status;
}

View File

@ -9,6 +9,7 @@
#include "arch_direct.h"
#include "circuit_library.h"
#include "clock_network.h"
#include "config_protocol.h"
#include "decoder_library.h"
#include "device_grid.h"
@ -16,6 +17,7 @@
#include "fabric_key.h"
#include "memory_bank_shift_register_banks.h"
#include "module_manager.h"
#include "rr_clock_spatial_lookup.h"
#include "rr_graph_view.h"
#include "tile_annotation.h"
#include "tile_direct.h"
@ -32,7 +34,8 @@ namespace openfpga {
int build_top_module(
ModuleManager& module_manager, DecoderLibrary& decoder_lib,
MemoryBankShiftRegisterBanks& blwl_sr_banks,
const CircuitLibrary& circuit_lib,
const CircuitLibrary& circuit_lib, const ClockNetwork& clk_ntwk,
const RRClockSpatialLookup& rr_clock_lookup,
const VprDeviceAnnotation& vpr_device_annotation, const DeviceGrid& grids,
const TileAnnotation& tile_annotation, const RRGraphView& rr_graph,
const DeviceRRGSB& device_rr_gsb, const TileDirect& tile_direct,

View File

@ -974,6 +974,224 @@ static int build_top_module_global_net_for_given_grid_module(
return CMD_EXEC_SUCCESS;
}
/********************************************************************
* Add nets between a global port and its sinks at each grid modules
*******************************************************************/
static int build_top_module_global_net_from_grid_modules(
ModuleManager& module_manager, const ModuleId& top_module,
const ModulePortId& top_module_port, const TileAnnotation& tile_annotation,
const TileGlobalPortId& tile_global_port,
const VprDeviceAnnotation& vpr_device_annotation, const DeviceGrid& grids,
const vtr::Matrix<size_t>& grid_instance_ids) {
int status = CMD_EXEC_SUCCESS;
std::map<e_side, std::vector<vtr::Point<size_t>>> io_coordinates =
generate_perimeter_grid_coordinates(grids);
for (size_t tile_info_id = 0;
tile_info_id <
tile_annotation.global_port_tile_names(tile_global_port).size();
++tile_info_id) {
std::string tile_name =
tile_annotation.global_port_tile_names(tile_global_port)[tile_info_id];
BasicPort tile_port =
tile_annotation.global_port_tile_ports(tile_global_port)[tile_info_id];
/* Find the coordinates for the wanted tiles */
vtr::Point<size_t> start_coord(1, 1);
vtr::Point<size_t> end_coord(grids.width() - 1, grids.height() - 1);
vtr::Point<size_t> range = tile_annotation.global_port_tile_coordinates(
tile_global_port)[tile_info_id];
bool out_of_range = false;
/* -1 means all the x should be considered */
if (size_t(-1) != range.x()) {
if ((range.x() < start_coord.x()) || (range.x() > end_coord.x())) {
out_of_range = true;
} else {
/* Set the range */
start_coord.set_x(range.x());
end_coord.set_x(range.x());
}
}
/* -1 means all the y should be considered */
if (size_t(-1) != range.y()) {
if ((range.y() < start_coord.y()) || (range.y() > end_coord.y())) {
out_of_range = true;
} else {
/* Set the range */
start_coord.set_y(range.y());
end_coord.set_y(range.y());
}
}
/* Error out immediately if the coordinate is not valid! */
if (true == out_of_range) {
VTR_LOG_ERROR(
"Coordinate (%lu, %lu) in tile annotation for tile '%s' is out of "
"range (%lu:%lu, %lu:%lu)!",
range.x(), range.y(), tile_name.c_str(), start_coord.x(), end_coord.x(),
start_coord.y(), end_coord.y());
return CMD_EXEC_FATAL_ERROR;
}
/* Spot the port from child modules from core grids */
for (size_t ix = start_coord.x(); ix < end_coord.x(); ++ix) {
for (size_t iy = start_coord.y(); iy < end_coord.y(); ++iy) {
/* Bypass EMPTY tiles */
if (true == is_empty_type(grids[ix][iy].type)) {
continue;
}
/* Skip width or height > 1 tiles (mostly heterogeneous blocks) */
if ((0 < grids[ix][iy].width_offset) ||
(0 < grids[ix][iy].height_offset)) {
continue;
}
/* Bypass the tiles whose names do not match */
if (std::string(grids[ix][iy].type->name) != tile_name) {
continue;
}
/* Create nets and finish connection build-up */
status = build_top_module_global_net_for_given_grid_module(
module_manager, top_module, top_module_port, tile_annotation,
tile_global_port, tile_port, vpr_device_annotation, grids,
vtr::Point<size_t>(ix, iy), NUM_SIDES, grid_instance_ids);
if (CMD_EXEC_FATAL_ERROR == status) {
return status;
}
}
}
/* Walk through all the grids on the perimeter, which are I/O grids */
for (const e_side& io_side : FPGA_SIDES_CLOCKWISE) {
for (const vtr::Point<size_t>& io_coordinate : io_coordinates[io_side]) {
/* Bypass EMPTY grid */
if (true ==
is_empty_type(grids[io_coordinate.x()][io_coordinate.y()].type)) {
continue;
}
/* Skip width or height > 1 tiles (mostly heterogeneous blocks) */
if ((0 < grids[io_coordinate.x()][io_coordinate.y()].width_offset) ||
(0 < grids[io_coordinate.x()][io_coordinate.y()].height_offset)) {
continue;
}
/* Bypass the tiles whose names do not match */
if (std::string(
grids[io_coordinate.x()][io_coordinate.y()].type->name) !=
tile_name) {
continue;
}
/* Check if the coordinate satisfy the tile coordinate defintion
* - Bypass if the x is a specific number (!= -1), and io_coordinate
* is different
* - Bypass if the y is a specific number (!= -1), and io_coordinate
* is different
*/
if ((size_t(-1) != range.x()) && (range.x() != io_coordinate.x())) {
continue;
}
if ((size_t(-1) != range.y()) && (range.y() != io_coordinate.y())) {
continue;
}
/* Create nets and finish connection build-up */
status = build_top_module_global_net_for_given_grid_module(
module_manager, top_module, top_module_port, tile_annotation,
tile_global_port, tile_port, vpr_device_annotation, grids,
io_coordinate, io_side, grid_instance_ids);
if (CMD_EXEC_FATAL_ERROR == status) {
return status;
}
}
}
}
return status;
}
/********************************************************************
* Add nets between a global port and its sinks at an entry point of clock tree
*******************************************************************/
static int build_top_module_global_net_from_clock_arch_tree(
ModuleManager& module_manager, const ModuleId& top_module,
const ModulePortId& top_module_port, const RRGraphView& rr_graph,
const DeviceRRGSB& device_rr_gsb,
const std::map<t_rr_type, vtr::Matrix<size_t>>& cb_instance_ids,
const ClockNetwork& clk_ntwk, const std::string& clk_tree_name,
const RRClockSpatialLookup& rr_clock_lookup) {
int status = CMD_EXEC_SUCCESS;
/* Ensure the clock arch tree name is valid */
ClockTreeId clk_tree = clk_ntwk.find_tree(clk_tree_name);
if (!clk_ntwk.valid_tree_id(clk_tree)) {
VTR_LOG(
"Fail to find a matched clock tree '%s' in the clock architecture "
"definition",
clk_tree_name.c_str());
return CMD_EXEC_FATAL_ERROR;
}
/* Ensure the clock tree width matches the global port size */
if (clk_ntwk.tree_width(clk_tree) !=
module_manager.module_port(top_module, top_module_port).get_width()) {
VTR_LOG(
"Clock tree '%s' does not have the same width '%lu' as the port '%'s of "
"FPGA top module",
clk_tree_name.c_str(), clk_ntwk.tree_width(clk_tree),
module_manager.module_port(top_module, top_module_port)
.get_name()
.c_str());
return CMD_EXEC_FATAL_ERROR;
}
for (ClockTreePinId pin : clk_ntwk.pins(clk_tree)) {
BasicPort src_port =
module_manager.module_port(top_module, top_module_port);
/* Add the module net */
ModuleNetId net = create_module_source_pin_net(
module_manager, top_module, top_module, 0, top_module_port,
src_port.pins()[size_t(pin)]);
VTR_ASSERT(ModuleNetId::INVALID() != net);
for (ClockSpineId spine : clk_ntwk.tree_top_spines(clk_tree)) {
vtr::Point<int> entry_point = clk_ntwk.spine_start_point(spine);
Direction entry_dir = clk_ntwk.spine_direction(spine);
t_rr_type entry_track_type = clk_ntwk.spine_track_type(spine);
/* Find the routing resource node of the entry point */
RRNodeId entry_rr_node =
rr_clock_lookup.find_node(entry_point.x(), entry_point.y(), clk_tree,
clk_ntwk.spine_level(spine), pin, entry_dir);
/* Get the connection block module and instance at the entry point */
const RRGSB& rr_gsb = device_rr_gsb.get_gsb_by_cb_coordinate(
entry_track_type, vtr::Point<size_t>(entry_point.x(), entry_point.y()));
ModuleId cb_module =
module_manager.find_module(generate_connection_block_module_name(
entry_track_type,
vtr::Point<size_t>(entry_point.x(), entry_point.y())));
size_t cb_instance =
cb_instance_ids.at(entry_track_type)[entry_point.x()][entry_point.y()];
ModulePinInfo des_pin_info = find_connection_block_module_chan_port(
module_manager, cb_module, rr_graph, rr_gsb, entry_track_type,
entry_rr_node);
/* Configure the net sink */
BasicPort sink_port =
module_manager.module_port(cb_module, des_pin_info.first);
module_manager.add_module_net_sink(top_module, net, cb_module,
cb_instance, des_pin_info.first,
sink_port.pins()[des_pin_info.second]);
}
}
return status;
}
/********************************************************************
* Add global ports from grid ports that are defined as global in tile
*annotation
@ -982,7 +1200,10 @@ int add_top_module_global_ports_from_grid_modules(
ModuleManager& module_manager, const ModuleId& top_module,
const TileAnnotation& tile_annotation,
const VprDeviceAnnotation& vpr_device_annotation, const DeviceGrid& grids,
const vtr::Matrix<size_t>& grid_instance_ids) {
const RRGraphView& rr_graph, const DeviceRRGSB& device_rr_gsb,
const std::map<t_rr_type, vtr::Matrix<size_t>>& cb_instance_ids,
const vtr::Matrix<size_t>& grid_instance_ids, const ClockNetwork& clk_ntwk,
const RRClockSpatialLookup& rr_clock_lookup) {
int status = CMD_EXEC_SUCCESS;
/* Add the global ports which are NOT yet added to the top-level module
@ -1015,9 +1236,6 @@ int add_top_module_global_ports_from_grid_modules(
}
/* Add module nets */
std::map<e_side, std::vector<vtr::Point<size_t>>> io_coordinates =
generate_perimeter_grid_coordinates(grids);
for (const TileGlobalPortId& tile_global_port :
tile_annotation.global_ports()) {
/* Must found one valid port! */
@ -1025,128 +1243,26 @@ int add_top_module_global_ports_from_grid_modules(
top_module, tile_annotation.global_port_name(tile_global_port));
VTR_ASSERT(ModulePortId::INVALID() != top_module_port);
for (size_t tile_info_id = 0;
tile_info_id <
tile_annotation.global_port_tile_names(tile_global_port).size();
++tile_info_id) {
std::string tile_name =
tile_annotation.global_port_tile_names(tile_global_port)[tile_info_id];
BasicPort tile_port =
tile_annotation.global_port_tile_ports(tile_global_port)[tile_info_id];
/* Find the coordinates for the wanted tiles */
vtr::Point<size_t> start_coord(1, 1);
vtr::Point<size_t> end_coord(grids.width() - 1, grids.height() - 1);
vtr::Point<size_t> range = tile_annotation.global_port_tile_coordinates(
tile_global_port)[tile_info_id];
bool out_of_range = false;
/* -1 means all the x should be considered */
if (size_t(-1) != range.x()) {
if ((range.x() < start_coord.x()) || (range.x() > end_coord.x())) {
out_of_range = true;
} else {
/* Set the range */
start_coord.set_x(range.x());
end_coord.set_x(range.x());
}
}
/* -1 means all the y should be considered */
if (size_t(-1) != range.y()) {
if ((range.y() < start_coord.y()) || (range.y() > end_coord.y())) {
out_of_range = true;
} else {
/* Set the range */
start_coord.set_y(range.y());
end_coord.set_y(range.y());
}
}
/* Error out immediately if the coordinate is not valid! */
if (true == out_of_range) {
VTR_LOG_ERROR(
"Coordinate (%lu, %lu) in tile annotation for tile '%s' is out of "
"range (%lu:%lu, %lu:%lu)!",
range.x(), range.y(), tile_name.c_str(), start_coord.x(),
end_coord.x(), start_coord.y(), end_coord.y());
return CMD_EXEC_FATAL_ERROR;
}
/* Spot the port from child modules from core grids */
for (size_t ix = start_coord.x(); ix < end_coord.x(); ++ix) {
for (size_t iy = start_coord.y(); iy < end_coord.y(); ++iy) {
/* Bypass EMPTY tiles */
if (true == is_empty_type(grids[ix][iy].type)) {
continue;
}
/* Skip width or height > 1 tiles (mostly heterogeneous blocks) */
if ((0 < grids[ix][iy].width_offset) ||
(0 < grids[ix][iy].height_offset)) {
continue;
}
/* Bypass the tiles whose names do not match */
if (std::string(grids[ix][iy].type->name) != tile_name) {
continue;
}
/* Create nets and finish connection build-up */
status = build_top_module_global_net_for_given_grid_module(
module_manager, top_module, top_module_port, tile_annotation,
tile_global_port, tile_port, vpr_device_annotation, grids,
vtr::Point<size_t>(ix, iy), NUM_SIDES, grid_instance_ids);
if (CMD_EXEC_FATAL_ERROR == status) {
return status;
}
}
}
/* Walk through all the grids on the perimeter, which are I/O grids */
for (const e_side& io_side : FPGA_SIDES_CLOCKWISE) {
for (const vtr::Point<size_t>& io_coordinate :
io_coordinates[io_side]) {
/* Bypass EMPTY grid */
if (true ==
is_empty_type(grids[io_coordinate.x()][io_coordinate.y()].type)) {
continue;
}
/* Skip width or height > 1 tiles (mostly heterogeneous blocks) */
if ((0 < grids[io_coordinate.x()][io_coordinate.y()].width_offset) ||
(0 < grids[io_coordinate.x()][io_coordinate.y()].height_offset)) {
continue;
}
/* Bypass the tiles whose names do not match */
if (std::string(
grids[io_coordinate.x()][io_coordinate.y()].type->name) !=
tile_name) {
continue;
}
/* Check if the coordinate satisfy the tile coordinate defintion
* - Bypass if the x is a specific number (!= -1), and io_coordinate
* is different
* - Bypass if the y is a specific number (!= -1), and io_coordinate
* is different
*/
if ((size_t(-1) != range.x()) && (range.x() != io_coordinate.x())) {
continue;
}
if ((size_t(-1) != range.y()) && (range.y() != io_coordinate.y())) {
continue;
}
/* Create nets and finish connection build-up */
status = build_top_module_global_net_for_given_grid_module(
module_manager, top_module, top_module_port, tile_annotation,
tile_global_port, tile_port, vpr_device_annotation, grids,
io_coordinate, io_side, grid_instance_ids);
if (CMD_EXEC_FATAL_ERROR == status) {
return status;
}
}
}
/* There are two cases when building the nets:
* - If the net will go through a dedicated clock tree network, the net will
* drive an input of a routing block
* - If the net will be directly wired to tiles, the net will drive an input
* of a tile
*/
if (!tile_annotation.global_port_clock_arch_tree_name(tile_global_port)
.empty()) {
status = build_top_module_global_net_from_clock_arch_tree(
module_manager, top_module, top_module_port, rr_graph, device_rr_gsb,
cb_instance_ids, clk_ntwk,
tile_annotation.global_port_clock_arch_tree_name(tile_global_port),
rr_clock_lookup);
} else {
status = build_top_module_global_net_from_grid_modules(
module_manager, top_module, top_module_port, tile_annotation,
tile_global_port, vpr_device_annotation, grids, grid_instance_ids);
}
if (status == CMD_EXEC_FATAL_ERROR) {
return status;
}
}

View File

@ -6,9 +6,11 @@
*******************************************************************/
#include <vector>
#include "clock_network.h"
#include "device_grid.h"
#include "device_rr_gsb.h"
#include "module_manager.h"
#include "rr_clock_spatial_lookup.h"
#include "rr_graph_view.h"
#include "tile_annotation.h"
#include "vpr_device_annotation.h"
@ -34,7 +36,10 @@ int add_top_module_global_ports_from_grid_modules(
ModuleManager& module_manager, const ModuleId& top_module,
const TileAnnotation& tile_annotation,
const VprDeviceAnnotation& vpr_device_annotation, const DeviceGrid& grids,
const vtr::Matrix<size_t>& grid_instance_ids);
const RRGraphView& rr_graph, const DeviceRRGSB& device_rr_gsb,
const std::map<t_rr_type, vtr::Matrix<size_t>>& cb_instance_ids,
const vtr::Matrix<size_t>& grid_instance_ids, const ClockNetwork& clk_ntwk,
const RRClockSpatialLookup& rr_clock_lookup);
} /* end namespace openfpga */

View File

@ -196,7 +196,7 @@ BitstreamManager build_device_bitstream(const VprContext& vpr_ctx,
openfpga_ctx.arch().circuit_lib, openfpga_ctx.mux_lib(), vpr_ctx.atom(),
openfpga_ctx.vpr_device_annotation(), openfpga_ctx.vpr_routing_annotation(),
vpr_ctx.device().rr_graph, openfpga_ctx.device_rr_gsb(),
openfpga_ctx.flow_manager().compress_routing());
openfpga_ctx.flow_manager().compress_routing(), verbose);
VTR_LOGV(verbose, "Done\n");
VTR_LOGV(verbose, "Decoded %lu configuration bits into %lu blocks\n",

View File

@ -68,6 +68,9 @@ static std::vector<bool> build_cmos_mux_bitstream(
find_mux_implementation_num_inputs(circuit_lib, mux_model, mux_size);
/* Note that the mux graph is indexed using datapath MUX size!!!! */
MuxId mux_graph_id = mux_lib.mux_graph(mux_model, mux_size);
if (!mux_lib.valid_mux_id(mux_graph_id)) {
VTR_ASSERT(mux_lib.valid_mux_id(mux_graph_id));
}
const MuxGraph mux_graph = mux_lib.mux_graph(mux_graph_id);
size_t datapath_id = path_id;

View File

@ -243,10 +243,9 @@ static void build_connection_block_mux_bitstream(
const RRGSB& rr_gsb, const e_side& cb_ipin_side, const size_t& ipin_index) {
RRNodeId src_rr_node = rr_gsb.get_ipin_node(cb_ipin_side, ipin_index);
/* Find drive_rr_nodes*/
size_t datapath_mux_size = rr_graph.node_fan_in(src_rr_node);
std::vector<RREdgeId> driver_rr_edges =
rr_gsb.get_ipin_node_in_edges(rr_graph, cb_ipin_side, ipin_index);
size_t datapath_mux_size = driver_rr_edges.size();
/* Cache input and output nets */
std::vector<ClusterNetId> input_nets;
@ -357,9 +356,12 @@ static void build_connection_block_interc_bitstream(
const MuxLibrary& mux_lib, const AtomContext& atom_ctx,
const VprDeviceAnnotation& device_annotation,
const VprRoutingAnnotation& routing_annotation, const RRGraphView& rr_graph,
const RRGSB& rr_gsb, const e_side& cb_ipin_side, const size_t& ipin_index) {
const RRGSB& rr_gsb, const e_side& cb_ipin_side, const size_t& ipin_index,
const bool& verbose) {
RRNodeId src_rr_node = rr_gsb.get_ipin_node(cb_ipin_side, ipin_index);
VTR_LOGV(verbose, "\tGenerating bitstream for IPIN '%lu'\n", ipin_index);
/* Consider configurable edges only */
std::vector<RREdgeId> driver_rr_edges =
rr_gsb.get_ipin_node_in_edges(rr_graph, cb_ipin_side, ipin_index);
@ -405,7 +407,7 @@ static void build_connection_block_bitstream(
const MuxLibrary& mux_lib, const AtomContext& atom_ctx,
const VprDeviceAnnotation& device_annotation,
const VprRoutingAnnotation& routing_annotation, const RRGraphView& rr_graph,
const RRGSB& rr_gsb, const t_rr_type& cb_type) {
const RRGSB& rr_gsb, const t_rr_type& cb_type, const bool& verbose) {
/* Find routing multiplexers on the sides of a Connection block where IPIN
* nodes locate */
std::vector<enum e_side> cb_sides = rr_gsb.get_cb_ipin_sides(cb_type);
@ -415,10 +417,12 @@ static void build_connection_block_bitstream(
SideManager side_manager(cb_ipin_side);
for (size_t inode = 0; inode < rr_gsb.get_num_ipin_nodes(cb_ipin_side);
++inode) {
VTR_LOGV(verbose, "\tGenerating bitstream for IPIN at '%s' side\n",
side_manager.to_string().c_str());
build_connection_block_interc_bitstream(
bitstream_manager, cb_configurable_block, module_manager, circuit_lib,
mux_lib, atom_ctx, device_annotation, routing_annotation, rr_graph,
rr_gsb, cb_ipin_side, inode);
rr_gsb, cb_ipin_side, inode, verbose);
}
}
}
@ -434,7 +438,7 @@ static void build_connection_block_bitstreams(
const VprDeviceAnnotation& device_annotation,
const VprRoutingAnnotation& routing_annotation, const RRGraphView& rr_graph,
const DeviceRRGSB& device_rr_gsb, const bool& compact_routing_hierarchy,
const t_rr_type& cb_type) {
const t_rr_type& cb_type, const bool& verbose) {
vtr::Point<size_t> cb_range = device_rr_gsb.get_gsb_range();
for (size_t ix = 0; ix < cb_range.x(); ++ix) {
@ -450,9 +454,19 @@ static void build_connection_block_bitstreams(
/* Skip if the cb does not contain any configuration bits! */
if (true ==
connection_block_contain_only_routing_tracks(rr_gsb, cb_type)) {
VTR_LOGV(verbose,
"\n\tSkipped %s Connection Block [%lu][%lu] as it contains "
"only routing tracks\n",
cb_type == CHANX ? "X-direction" : "Y-direction",
rr_gsb.get_cb_x(cb_type), rr_gsb.get_cb_y(cb_type));
continue;
}
VTR_LOGV(verbose,
"\n\tGenerating bitstream for %s Connection Block [%lu][%lu]\n",
cb_type == CHANX ? "X-direction" : "Y-direction",
rr_gsb.get_cb_x(cb_type), rr_gsb.get_cb_y(cb_type));
/* Find the cb module so that we can precisely reserve child blocks */
vtr::Point<size_t> cb_coord(rr_gsb.get_cb_x(cb_type),
rr_gsb.get_cb_y(cb_type));
@ -494,7 +508,9 @@ static void build_connection_block_bitstreams(
build_connection_block_bitstream(
bitstream_manager, cb_configurable_block, module_manager, circuit_lib,
mux_lib, atom_ctx, device_annotation, routing_annotation, rr_graph,
rr_gsb, cb_type);
rr_gsb, cb_type, verbose);
VTR_LOGV(verbose, "\tDone\n");
}
}
}
@ -512,7 +528,8 @@ void build_routing_bitstream(
const MuxLibrary& mux_lib, const AtomContext& atom_ctx,
const VprDeviceAnnotation& device_annotation,
const VprRoutingAnnotation& routing_annotation, const RRGraphView& rr_graph,
const DeviceRRGSB& device_rr_gsb, const bool& compact_routing_hierarchy) {
const DeviceRRGSB& device_rr_gsb, const bool& compact_routing_hierarchy,
const bool& verbose) {
/* Generate bitstream for each switch blocks
* To organize the bitstream in blocks, we create a block for each switch
* block and give names which are same as they are in top-level module
@ -531,6 +548,10 @@ void build_routing_bitstream(
continue;
}
VTR_LOGV(verbose,
"\n\tGenerating bitstream for Switch blocks[%lu][%lu]...\n", ix,
iy);
vtr::Point<size_t> sb_coord(rr_gsb.get_sb_x(), rr_gsb.get_sb_y());
/* Find the sb module so that we can precisely reserve child blocks */
@ -570,6 +591,8 @@ void build_routing_bitstream(
module_manager, circuit_lib, mux_lib,
atom_ctx, device_annotation,
routing_annotation, rr_graph, rr_gsb);
VTR_LOGV(verbose, "\tDone\n");
}
}
VTR_LOG("Done\n");
@ -584,7 +607,7 @@ void build_routing_bitstream(
build_connection_block_bitstreams(
bitstream_manager, top_configurable_block, module_manager, circuit_lib,
mux_lib, atom_ctx, device_annotation, routing_annotation, rr_graph,
device_rr_gsb, compact_routing_hierarchy, CHANX);
device_rr_gsb, compact_routing_hierarchy, CHANX, verbose);
VTR_LOG("Done\n");
VTR_LOG("Generating bitstream for Y-direction Connection blocks ...");
@ -592,7 +615,7 @@ void build_routing_bitstream(
build_connection_block_bitstreams(
bitstream_manager, top_configurable_block, module_manager, circuit_lib,
mux_lib, atom_ctx, device_annotation, routing_annotation, rr_graph,
device_rr_gsb, compact_routing_hierarchy, CHANY);
device_rr_gsb, compact_routing_hierarchy, CHANY, verbose);
VTR_LOG("Done\n");
}

View File

@ -32,7 +32,8 @@ void build_routing_bitstream(
const MuxLibrary& mux_lib, const AtomContext& atom_ctx,
const VprDeviceAnnotation& device_annotation,
const VprRoutingAnnotation& routing_annotation, const RRGraphView& rr_graph,
const DeviceRRGSB& device_rr_gsb, const bool& compact_routing_hierarchy);
const DeviceRRGSB& device_rr_gsb, const bool& compact_routing_hierarchy,
const bool& verbose);
} /* end namespace openfpga */

View File

@ -231,6 +231,13 @@ MuxLibrary build_device_mux_library(const DeviceContext& vpr_device_ctx,
VTR_LOG("Built a multiplexer library of %lu physical multiplexers.\n",
mux_lib.muxes().size());
VTR_LOG("Maximum multiplexer size is %lu.\n", mux_lib.max_mux_size());
for (auto mux_id : mux_lib.muxes()) {
VTR_LOG("\tmodel '%s', input_size='%lu'\n",
openfpga_ctx.arch()
.circuit_lib.model_name(mux_lib.mux_circuit_model(mux_id))
.c_str(),
mux_lib.mux_graph(mux_id).num_inputs());
}
return mux_lib;
}

View File

@ -11,7 +11,9 @@
/* Headers from openfpgautil library */
#include "openfpga_device_grid_utils.h"
#include "openfpga_physical_tile_utils.h"
#include "openfpga_port_parser.h"
#include "openfpga_side_manager.h"
#include "openfpga_tokenizer.h"
/* begin namespace openfpga */
namespace openfpga {
@ -116,4 +118,97 @@ std::set<e_side> find_physical_io_tile_located_sides(
return io_sides;
}
/********************************************************************
* Find the pin index of a physical tile which matches the given name.
* For example,
* io[5:5].a2f[1]
* which corresponds to the pin 'a2f[1]' of the 5th subtile 'io' in the physical
*tile
*******************************************************************/
int find_physical_tile_pin_index(t_physical_tile_type_ptr physical_tile,
std::string pin_name) {
/* Deposit an invalid value */
int pin_idx = physical_tile->num_pins;
/* precheck: return unfound pin if the tile name does not match */
StringToken tokenizer(pin_name);
std::vector<std::string> pin_tokens = tokenizer.split(".");
if (pin_tokens.size() != 2) {
VTR_LOG_ERROR("Invalid pin name '%s'. Expect <tile>.<port>\n",
pin_name.c_str());
exit(1);
}
PortParser tile_parser(pin_tokens[0]);
BasicPort tile_info = tile_parser.port();
if (tile_info.get_name() != std::string(physical_tile->name)) {
return pin_idx;
}
if (!tile_info.is_valid()) {
VTR_LOG_ERROR(
"Invalid pin name '%s' whose subtile index is not valid, expect [0, "
"%lu]\n",
pin_name.c_str(), physical_tile->capacity - 1);
exit(1);
}
/* precheck: return unfound pin if the subtile index does not match */
if (tile_info.get_width() != 1) {
VTR_LOG_ERROR(
"Invalid pin name '%s' whose subtile index range should be 1. For "
"example, clb[1:1]\n",
pin_name.c_str());
exit(1);
}
if (tile_info.get_msb() > size_t(physical_tile->capacity) - 1) {
VTR_LOG_ERROR(
"Invalid pin name '%s' whose subtile index is out of range, expect [0, "
"%lu]\n",
pin_name.c_str(), physical_tile->capacity - 1);
exit(1);
}
/* precheck: return unfound pin if the pin index does not match */
PortParser pin_parser(pin_tokens[1]);
BasicPort pin_info = pin_parser.port();
/* precheck: return unfound pin if the subtile index does not match */
if (pin_info.get_width() != 1) {
VTR_LOG_ERROR(
"Invalid pin name '%s' whose pin index range should be 1. For example, "
"clb[1:1].I[2:2]\n",
pin_name.c_str());
exit(1);
}
/* Spot the subtile by using the index */
for (const t_sub_tile& sub_tile : physical_tile->sub_tiles) {
if (!sub_tile.capacity.is_in_range(tile_info.get_lsb())) {
continue;
}
for (const t_physical_tile_port& sub_tile_port : sub_tile.ports) {
if (std::string(sub_tile_port.name) != pin_info.get_name()) {
continue;
}
if (!pin_info.is_valid()) {
VTR_LOG_ERROR(
"Invalid pin name '%s' whose pin index is not valid, expect [0, "
"%lu]\n",
pin_name.c_str(), sub_tile_port.num_pins - 1);
exit(1);
}
if (pin_info.get_msb() > size_t(sub_tile_port.num_pins) - 1) {
VTR_LOG_ERROR(
"Invalid pin name '%s' whose pin index is out of range, expect [0, "
"%lu]\n",
pin_name.c_str(), sub_tile_port.num_pins - 1);
exit(1);
}
/* Reach here, we get the port we want, return the accumulated index */
size_t accumulated_pin_idx =
sub_tile_port.absolute_first_pin_index +
sub_tile.num_phy_pins * (tile_info.get_lsb() - sub_tile.capacity.low) +
pin_info.get_lsb();
return accumulated_pin_idx;
}
}
return pin_idx;
}
} /* end namespace openfpga */

View File

@ -27,6 +27,9 @@ float find_physical_tile_pin_Fc(t_physical_tile_type_ptr type, const int& pin);
std::set<e_side> find_physical_io_tile_located_sides(
const DeviceGrid& grids, t_physical_tile_type_ptr physical_tile);
int find_physical_tile_pin_index(t_physical_tile_type_ptr physical_tile,
std::string pin_name);
} /* end namespace openfpga */
#endif

View File

@ -0,0 +1,197 @@
<?xml version="1.0"?>
<!-- Architecture annotation for OpenFPGA framework
This annotation supports the k6_N10_40nm.xml
- General purpose logic block
- K = 6, N = 10, I = 40
- Single mode
- Routing architecture
- L = 4, fc_in = 0.15, fc_out = 0.1
-->
<openfpga_architecture>
<technology_library>
<device_library>
<device_model name="logic" type="transistor">
<lib type="industry" corner="TOP_TT" ref="M" path="${OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.pm"/>
<design vdd="0.9" pn_ratio="2"/>
<pmos name="pch" chan_length="40e-9" min_width="140e-9" variation="logic_transistor_var"/>
<nmos name="nch" chan_length="40e-9" min_width="140e-9" variation="logic_transistor_var"/>
</device_model>
<device_model name="io" type="transistor">
<lib type="academia" ref="M" path="${OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.pm"/>
<design vdd="2.5" pn_ratio="3"/>
<pmos name="pch_25" chan_length="270e-9" min_width="320e-9" variation="io_transistor_var"/>
<nmos name="nch_25" chan_length="270e-9" min_width="320e-9" variation="io_transistor_var"/>
</device_model>
</device_library>
<variation_library>
<variation name="logic_transistor_var" abs_deviation="0.1" num_sigma="3"/>
<variation name="io_transistor_var" abs_deviation="0.1" num_sigma="3"/>
</variation_library>
</technology_library>
<circuit_library>
<circuit_model type="inv_buf" name="INVTX1" prefix="INVTX1" is_default="true">
<design_technology type="cmos" topology="inverter" size="1"/>
<device_technology device_model_name="logic"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<delay_matrix type="rise" in_port="in" out_port="out">
10e-12
</delay_matrix>
<delay_matrix type="fall" in_port="in" out_port="out">
10e-12
</delay_matrix>
</circuit_model>
<circuit_model type="inv_buf" name="buf4" prefix="buf4" is_default="false">
<design_technology type="cmos" topology="buffer" size="1" num_level="2" f_per_stage="4"/>
<device_technology device_model_name="logic"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<delay_matrix type="rise" in_port="in" out_port="out">
10e-12
</delay_matrix>
<delay_matrix type="fall" in_port="in" out_port="out">
10e-12
</delay_matrix>
</circuit_model>
<circuit_model type="inv_buf" name="tap_buf4" prefix="tap_buf4" is_default="false">
<design_technology type="cmos" topology="buffer" size="1" num_level="3" f_per_stage="4"/>
<device_technology device_model_name="logic"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<delay_matrix type="rise" in_port="in" out_port="out">
10e-12
</delay_matrix>
<delay_matrix type="fall" in_port="in" out_port="out">
10e-12
</delay_matrix>
</circuit_model>
<circuit_model type="pass_gate" name="TGATE" prefix="TGATE" is_default="true">
<design_technology type="cmos" topology="transmission_gate" nmos_size="1" pmos_size="2"/>
<device_technology device_model_name="logic"/>
<input_buffer exist="false"/>
<output_buffer exist="false"/>
<port type="input" prefix="in" size="1"/>
<port type="input" prefix="sel" size="1"/>
<port type="input" prefix="selb" size="1"/>
<port type="output" prefix="out" size="1"/>
<delay_matrix type="rise" in_port="in sel selb" out_port="out">
10e-12 5e-12 5e-12
</delay_matrix>
<delay_matrix type="fall" in_port="in sel selb" out_port="out">
10e-12 5e-12 5e-12
</delay_matrix>
</circuit_model>
<circuit_model type="chan_wire" name="chan_segment" prefix="track_seg" is_default="true">
<design_technology type="cmos"/>
<input_buffer exist="false"/>
<output_buffer exist="false"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<wire_param model_type="pi" R="101" C="22.5e-15" num_level="1"/>
<!-- model_type could be T, res_val and cap_val DON'T CARE -->
</circuit_model>
<circuit_model type="wire" name="direct_interc" prefix="direct_interc" is_default="true">
<design_technology type="cmos"/>
<input_buffer exist="false"/>
<output_buffer exist="false"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<wire_param model_type="pi" R="0" C="0" num_level="1"/>
<!-- model_type could be T, res_val cap_val should be defined -->
</circuit_model>
<circuit_model type="mux" name="mux_tree" prefix="mux_tree" dump_structural_verilog="true">
<design_technology type="cmos" structure="tree" add_const_input="true" const_input_val="1"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="INVTX1"/>
<pass_gate_logic circuit_model_name="TGATE"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<port type="sram" prefix="sram" size="1"/>
</circuit_model>
<circuit_model type="mux" name="mux_tree_tapbuf" prefix="mux_tree_tapbuf" is_default="true" dump_structural_verilog="true">
<design_technology type="cmos" structure="tree" add_const_input="true" const_input_val="1"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="tap_buf4"/>
<pass_gate_logic circuit_model_name="TGATE"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<port type="sram" prefix="sram" size="1"/>
</circuit_model>
<!--DFF subckt ports should be defined as <D> <Q> <CLK> <RESET> <SET> -->
<circuit_model type="ff" name="DFFSRQ" prefix="DFFSRQ" spice_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/spice/dff.sp" verilog_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/verilog/dff.v">
<design_technology type="cmos"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="INVTX1"/>
<port type="input" prefix="D" size="1"/>
<port type="input" prefix="set" lib_name="SET" size="1" is_global="true" default_val="0" is_set="true"/>
<port type="input" prefix="reset" lib_name="RST" size="1" is_global="true" default_val="0" is_reset="true"/>
<port type="output" prefix="Q" size="1"/>
<port type="clock" prefix="clk" lib_name="CK" size="1" is_global="false" default_val="0"/>
</circuit_model>
<circuit_model type="lut" name="lut4" prefix="lut4" dump_structural_verilog="true">
<design_technology type="cmos"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="INVTX1"/>
<lut_input_inverter exist="true" circuit_model_name="INVTX1"/>
<lut_input_buffer exist="true" circuit_model_name="buf4"/>
<pass_gate_logic circuit_model_name="TGATE"/>
<port type="input" prefix="in" size="4"/>
<port type="output" prefix="out" size="1"/>
<port type="sram" prefix="sram" size="16"/>
</circuit_model>
<!--Scan-chain DFF subckt ports should be defined as <D> <Q> <Qb> <CLK> <RESET> <SET> -->
<circuit_model type="ccff" name="DFF" prefix="DFF" spice_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/spice/dff.sp" verilog_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/verilog/dff.v">
<design_technology type="cmos"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="INVTX1"/>
<port type="input" prefix="D" size="1"/>
<port type="output" prefix="Q" size="1"/>
<port type="output" prefix="QN" size="1"/>
<port type="clock" prefix="prog_clk" lib_name="CK" size="1" is_global="true" default_val="0" is_prog="true"/>
</circuit_model>
<circuit_model type="iopad" name="GPIO" prefix="GPIO" spice_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/spice/gpio.sp" verilog_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/verilog/gpio.v">
<design_technology type="cmos"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="INVTX1"/>
<port type="inout" prefix="PAD" size="1" is_global="true" is_io="true" is_data_io="true"/>
<port type="sram" prefix="DIR" size="1" mode_select="true" circuit_model_name="DFF" default_val="1"/>
<port type="input" prefix="outpad" lib_name="A" size="1"/>
<port type="output" prefix="inpad" lib_name="Y" size="1"/>
</circuit_model>
</circuit_library>
<configuration_protocol>
<organization type="scan_chain" circuit_model_name="DFF"/>
</configuration_protocol>
<connection_block>
<switch name="ipin_cblock" circuit_model_name="mux_tree_tapbuf"/>
</connection_block>
<switch_block>
<switch name="0" circuit_model_name="mux_tree_tapbuf"/>
</switch_block>
<routing_segment>
<segment name="L1" circuit_model_name="chan_segment"/>
<segment name="L4" circuit_model_name="chan_segment"/>
</routing_segment>
<tile_annotations>
<global_port name="clk" is_clock="true" clock_arch_tree_name="clk_tree_2lvl" default_val="0">
<tile name="clb" port="clk" x="-1" y="-1"/>
</global_port>
</tile_annotations>
<pb_type_annotations>
<!-- physical pb_type binding in complex block IO -->
<pb_type name="io" physical_mode_name="physical" idle_mode_name="inpad"/>
<pb_type name="io[physical].iopad" circuit_model_name="GPIO" mode_bits="1"/>
<pb_type name="io[inpad].inpad" physical_pb_type_name="io[physical].iopad" mode_bits="1"/>
<pb_type name="io[outpad].outpad" physical_pb_type_name="io[physical].iopad" mode_bits="0"/>
<!-- End physical pb_type binding in complex block IO -->
<!-- physical pb_type binding in complex block CLB -->
<!-- physical mode will be the default mode if not specified -->
<pb_type name="clb">
<!-- Binding interconnect to circuit models as their physical implementation, if not defined, we use the default model -->
<interconnect name="crossbar" circuit_model_name="mux_tree"/>
</pb_type>
<pb_type name="clb.fle[n1_lut4].ble4.lut4" circuit_model_name="lut4"/>
<pb_type name="clb.fle[n1_lut4].ble4.ff" circuit_model_name="DFFSRQ"/>
<!-- End physical pb_type binding in complex block IO -->
</pb_type_annotations>
</openfpga_architecture>

View File

@ -0,0 +1,197 @@
<?xml version="1.0"?>
<!-- Architecture annotation for OpenFPGA framework
This annotation supports the k6_N10_40nm.xml
- General purpose logic block
- K = 6, N = 10, I = 40
- Single mode
- Routing architecture
- L = 4, fc_in = 0.15, fc_out = 0.1
-->
<openfpga_architecture>
<technology_library>
<device_library>
<device_model name="logic" type="transistor">
<lib type="industry" corner="TOP_TT" ref="M" path="${OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.pm"/>
<design vdd="0.9" pn_ratio="2"/>
<pmos name="pch" chan_length="40e-9" min_width="140e-9" variation="logic_transistor_var"/>
<nmos name="nch" chan_length="40e-9" min_width="140e-9" variation="logic_transistor_var"/>
</device_model>
<device_model name="io" type="transistor">
<lib type="academia" ref="M" path="${OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.pm"/>
<design vdd="2.5" pn_ratio="3"/>
<pmos name="pch_25" chan_length="270e-9" min_width="320e-9" variation="io_transistor_var"/>
<nmos name="nch_25" chan_length="270e-9" min_width="320e-9" variation="io_transistor_var"/>
</device_model>
</device_library>
<variation_library>
<variation name="logic_transistor_var" abs_deviation="0.1" num_sigma="3"/>
<variation name="io_transistor_var" abs_deviation="0.1" num_sigma="3"/>
</variation_library>
</technology_library>
<circuit_library>
<circuit_model type="inv_buf" name="INVTX1" prefix="INVTX1" is_default="true">
<design_technology type="cmos" topology="inverter" size="1"/>
<device_technology device_model_name="logic"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<delay_matrix type="rise" in_port="in" out_port="out">
10e-12
</delay_matrix>
<delay_matrix type="fall" in_port="in" out_port="out">
10e-12
</delay_matrix>
</circuit_model>
<circuit_model type="inv_buf" name="buf4" prefix="buf4" is_default="false">
<design_technology type="cmos" topology="buffer" size="1" num_level="2" f_per_stage="4"/>
<device_technology device_model_name="logic"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<delay_matrix type="rise" in_port="in" out_port="out">
10e-12
</delay_matrix>
<delay_matrix type="fall" in_port="in" out_port="out">
10e-12
</delay_matrix>
</circuit_model>
<circuit_model type="inv_buf" name="tap_buf4" prefix="tap_buf4" is_default="false">
<design_technology type="cmos" topology="buffer" size="1" num_level="3" f_per_stage="4"/>
<device_technology device_model_name="logic"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<delay_matrix type="rise" in_port="in" out_port="out">
10e-12
</delay_matrix>
<delay_matrix type="fall" in_port="in" out_port="out">
10e-12
</delay_matrix>
</circuit_model>
<circuit_model type="pass_gate" name="TGATE" prefix="TGATE" is_default="true">
<design_technology type="cmos" topology="transmission_gate" nmos_size="1" pmos_size="2"/>
<device_technology device_model_name="logic"/>
<input_buffer exist="false"/>
<output_buffer exist="false"/>
<port type="input" prefix="in" size="1"/>
<port type="input" prefix="sel" size="1"/>
<port type="input" prefix="selb" size="1"/>
<port type="output" prefix="out" size="1"/>
<delay_matrix type="rise" in_port="in sel selb" out_port="out">
10e-12 5e-12 5e-12
</delay_matrix>
<delay_matrix type="fall" in_port="in sel selb" out_port="out">
10e-12 5e-12 5e-12
</delay_matrix>
</circuit_model>
<circuit_model type="chan_wire" name="chan_segment" prefix="track_seg" is_default="true">
<design_technology type="cmos"/>
<input_buffer exist="false"/>
<output_buffer exist="false"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<wire_param model_type="pi" R="101" C="22.5e-15" num_level="1"/>
<!-- model_type could be T, res_val and cap_val DON'T CARE -->
</circuit_model>
<circuit_model type="wire" name="direct_interc" prefix="direct_interc" is_default="true">
<design_technology type="cmos"/>
<input_buffer exist="false"/>
<output_buffer exist="false"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<wire_param model_type="pi" R="0" C="0" num_level="1"/>
<!-- model_type could be T, res_val cap_val should be defined -->
</circuit_model>
<circuit_model type="mux" name="mux_tree" prefix="mux_tree" dump_structural_verilog="true">
<design_technology type="cmos" structure="tree" add_const_input="true" const_input_val="1"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="INVTX1"/>
<pass_gate_logic circuit_model_name="TGATE"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<port type="sram" prefix="sram" size="1"/>
</circuit_model>
<circuit_model type="mux" name="mux_tree_tapbuf" prefix="mux_tree_tapbuf" is_default="true" dump_structural_verilog="true">
<design_technology type="cmos" structure="tree" add_const_input="true" const_input_val="1"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="tap_buf4"/>
<pass_gate_logic circuit_model_name="TGATE"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<port type="sram" prefix="sram" size="1"/>
</circuit_model>
<!--DFF subckt ports should be defined as <D> <Q> <CLK> <RESET> <SET> -->
<circuit_model type="ff" name="DFFSRQ" prefix="DFFSRQ" spice_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/spice/dff.sp" verilog_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/verilog/dff.v">
<design_technology type="cmos"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="INVTX1"/>
<port type="input" prefix="D" size="1"/>
<port type="input" prefix="set" lib_name="SET" size="1" is_global="true" default_val="0" is_set="true"/>
<port type="input" prefix="reset" lib_name="RST" size="1" is_global="true" default_val="0" is_reset="true"/>
<port type="output" prefix="Q" size="1"/>
<port type="clock" prefix="clk" lib_name="CK" size="1" is_global="false" default_val="0"/>
</circuit_model>
<circuit_model type="lut" name="lut4" prefix="lut4" dump_structural_verilog="true">
<design_technology type="cmos"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="INVTX1"/>
<lut_input_inverter exist="true" circuit_model_name="INVTX1"/>
<lut_input_buffer exist="true" circuit_model_name="buf4"/>
<pass_gate_logic circuit_model_name="TGATE"/>
<port type="input" prefix="in" size="4"/>
<port type="output" prefix="out" size="1"/>
<port type="sram" prefix="sram" size="16"/>
</circuit_model>
<!--Scan-chain DFF subckt ports should be defined as <D> <Q> <Qb> <CLK> <RESET> <SET> -->
<circuit_model type="ccff" name="DFF" prefix="DFF" spice_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/spice/dff.sp" verilog_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/verilog/dff.v">
<design_technology type="cmos"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="INVTX1"/>
<port type="input" prefix="D" size="1"/>
<port type="output" prefix="Q" size="1"/>
<port type="output" prefix="QN" size="1"/>
<port type="clock" prefix="prog_clk" lib_name="CK" size="1" is_global="true" default_val="0" is_prog="true"/>
</circuit_model>
<circuit_model type="iopad" name="GPIO" prefix="GPIO" spice_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/spice/gpio.sp" verilog_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/verilog/gpio.v">
<design_technology type="cmos"/>
<input_buffer exist="true" circuit_model_name="INVTX1"/>
<output_buffer exist="true" circuit_model_name="INVTX1"/>
<port type="inout" prefix="PAD" size="1" is_global="true" is_io="true" is_data_io="true"/>
<port type="sram" prefix="DIR" size="1" mode_select="true" circuit_model_name="DFF" default_val="1"/>
<port type="input" prefix="outpad" lib_name="A" size="1"/>
<port type="output" prefix="inpad" lib_name="Y" size="1"/>
</circuit_model>
</circuit_library>
<configuration_protocol>
<organization type="scan_chain" circuit_model_name="DFF"/>
</configuration_protocol>
<connection_block>
<switch name="ipin_cblock" circuit_model_name="mux_tree_tapbuf"/>
</connection_block>
<switch_block>
<switch name="0" circuit_model_name="mux_tree_tapbuf"/>
</switch_block>
<routing_segment>
<segment name="L1" circuit_model_name="chan_segment"/>
<segment name="L4" circuit_model_name="chan_segment"/>
</routing_segment>
<tile_annotations>
<global_port name="clk" is_clock="true" clock_arch_tree_name="clk_tree_2lvl" default_val="0">
<tile name="clb" port="clk[0:1]"/>
</global_port>
</tile_annotations>
<pb_type_annotations>
<!-- physical pb_type binding in complex block IO -->
<pb_type name="io" physical_mode_name="physical" idle_mode_name="inpad"/>
<pb_type name="io[physical].iopad" circuit_model_name="GPIO" mode_bits="1"/>
<pb_type name="io[inpad].inpad" physical_pb_type_name="io[physical].iopad" mode_bits="1"/>
<pb_type name="io[outpad].outpad" physical_pb_type_name="io[physical].iopad" mode_bits="0"/>
<!-- End physical pb_type binding in complex block IO -->
<!-- physical pb_type binding in complex block CLB -->
<!-- physical mode will be the default mode if not specified -->
<pb_type name="clb">
<!-- Binding interconnect to circuit models as their physical implementation, if not defined, we use the default model -->
<interconnect name="crossbar" circuit_model_name="mux_tree"/>
</pb_type>
<pb_type name="clb.fle[n1_lut4].ble4.lut4" circuit_model_name="lut4"/>
<pb_type name="clb.fle[n1_lut4].ble4.ff" circuit_model_name="DFFSRQ"/>
<!-- End physical pb_type binding in complex block IO -->
</pb_type_annotations>
</openfpga_architecture>

View File

@ -0,0 +1,73 @@
# Run VPR for the 'and' design
#--write_rr_graph example_rr_graph.xml
vpr ${VPR_ARCH_FILE} ${VPR_TESTBENCH_BLIF} \
--clock_modeling ideal \
--device ${OPENFPGA_VPR_DEVICE_LAYOUT} \
--route_chan_width ${OPENFPGA_VPR_ROUTE_CHAN_WIDTH}
# Read OpenFPGA architecture definition
read_openfpga_arch -f ${OPENFPGA_ARCH_FILE}
# Read OpenFPGA simulation settings
read_openfpga_simulation_setting -f ${OPENFPGA_SIM_SETTING_FILE}
# Read OpenFPGA clock architecture
read_openfpga_clock_arch -f ${OPENFPGA_CLOCK_ARCH_FILE}
# Append clock network to vpr's routing resource graph
append_clock_rr_graph
# Annotate the OpenFPGA architecture to VPR data base
# to debug use --verbose options
link_openfpga_arch --activity_file ${ACTIVITY_FILE} --sort_gsb_chan_node_in_edges
# Route clock based on clock network definition
route_clock_rr_graph
# Check and correct any naming conflicts in the BLIF netlist
check_netlist_naming_conflict --fix --report ./netlist_renaming.xml
# Apply fix-up to Look-Up Table truth tables based on packing results
lut_truth_table_fixup
# Build the module graph
# - Enabled compression on routing architecture modules
# - Enable pin duplication on grid modules
build_fabric --compress_routing #--verbose
# Write the fabric hierarchy of module graph to a file
# This is used by hierarchical PnR flows
write_fabric_hierarchy --file ./fabric_hierarchy.txt
# Repack the netlist to physical pbs
# This must be done before bitstream generator and testbench generation
# Strongly recommend it is done after all the fix-up have been applied
repack #--verbose
# Build the bitstream
# - Output the fabric-independent bitstream to a file
build_architecture_bitstream --verbose --write_file fabric_independent_bitstream.xml
# Build fabric-dependent bitstream
build_fabric_bitstream --verbose
# Write fabric-dependent bitstream
write_fabric_bitstream --file fabric_bitstream.bit --format plain_text
# Write the Verilog netlist for FPGA fabric
# - Enable the use of explicit port mapping in Verilog netlist
write_fabric_verilog --file ./SRC --explicit_port_mapping --include_timing --print_user_defined_template --verbose
# Write the Verilog testbench for FPGA fabric
# - We suggest the use of same output directory as fabric Verilog netlists
# - Must specify the reference benchmark file if you want to output any testbenches
# - Enable top-level testbench which is a full verification including programming circuit and core logic of FPGA
# - Enable pre-configured top-level testbench which is a fast verification skipping programming phase
# - Simulation ini file is optional and is needed only when you need to interface different HDL simulators using openfpga flow-run scripts
write_full_testbench --file ./SRC --reference_benchmark_file_path ${REFERENCE_VERILOG_TESTBENCH} --explicit_port_mapping --include_signal_init --bitstream fabric_bitstream.bit
# Finish and exit OpenFPGA
exit
# Note :
# To run verification at the end of the flow maintain source in ./SRC directory

View File

@ -0,0 +1,75 @@
# Run VPR for the 'and' design
#--write_rr_graph example_rr_graph.xml
vpr ${VPR_ARCH_FILE} ${VPR_TESTBENCH_BLIF} \
--clock_modeling ideal \
--device ${OPENFPGA_VPR_DEVICE_LAYOUT} \
--route_chan_width ${OPENFPGA_VPR_ROUTE_CHAN_WIDTH}
# Read OpenFPGA architecture definition
read_openfpga_arch -f ${OPENFPGA_ARCH_FILE}
# Read OpenFPGA simulation settings
read_openfpga_simulation_setting -f ${OPENFPGA_SIM_SETTING_FILE}
# Read OpenFPGA clock architecture
read_openfpga_clock_arch -f ${OPENFPGA_CLOCK_ARCH_FILE}
# Append clock network to vpr's routing resource graph
append_clock_rr_graph
# Annotate the OpenFPGA architecture to VPR data base
# to debug use --verbose options
link_openfpga_arch --activity_file ${ACTIVITY_FILE} --sort_gsb_chan_node_in_edges
# Route clock based on clock network definition
route_clock_rr_graph --pin_constraints_file ${OPENFPGA_PIN_CONSTRAINTS_FILE}
# Check and correct any naming conflicts in the BLIF netlist
check_netlist_naming_conflict --fix --report ./netlist_renaming.xml
# Apply fix-up to Look-Up Table truth tables based on packing results
lut_truth_table_fixup
# Build the module graph
# - Enabled compression on routing architecture modules
# - Enable pin duplication on grid modules
build_fabric --compress_routing #--verbose
# Write the fabric hierarchy of module graph to a file
# This is used by hierarchical PnR flows
write_fabric_hierarchy --file ./fabric_hierarchy.txt
# Repack the netlist to physical pbs
# This must be done before bitstream generator and testbench generation
# Strongly recommend it is done after all the fix-up have been applied
repack --design_constraints ${OPENFPGA_REPACK_CONSTRAINTS_FILE} #--verbose
# Build the bitstream
# - Output the fabric-independent bitstream to a file
build_architecture_bitstream --verbose --write_file fabric_independent_bitstream.xml
# Build fabric-dependent bitstream
build_fabric_bitstream --verbose
# Write fabric-dependent bitstream
write_fabric_bitstream --file fabric_bitstream.bit --format plain_text
# Write the Verilog netlist for FPGA fabric
# - Enable the use of explicit port mapping in Verilog netlist
write_fabric_verilog --file ./SRC --explicit_port_mapping --include_timing --print_user_defined_template --verbose
# Write the Verilog testbench for FPGA fabric
# - We suggest the use of same output directory as fabric Verilog netlists
# - Must specify the reference benchmark file if you want to output any testbenches
# - Enable top-level testbench which is a full verification including programming circuit and core logic of FPGA
# - Enable pre-configured top-level testbench which is a fast verification skipping programming phase
# - Simulation ini file is optional and is needed only when you need to interface different HDL simulators using openfpga flow-run scripts
write_full_testbench --file ./SRC --reference_benchmark_file_path ${REFERENCE_VERILOG_TESTBENCH} --explicit_port_mapping --include_signal_init --bitstream fabric_bitstream.bit --pin_constraints_file ${OPENFPGA_PIN_CONSTRAINTS_FILE}
write_preconfigured_fabric_wrapper --embed_bitstream iverilog --file ./SRC --explicit_port_mapping --pin_constraints_file ${OPENFPGA_PIN_CONSTRAINTS_FILE}
write_preconfigured_testbench --file ./SRC --reference_benchmark_file_path ${REFERENCE_VERILOG_TESTBENCH} --explicit_port_mapping --pin_constraints_file ${OPENFPGA_PIN_CONSTRAINTS_FILE}
# Finish and exit OpenFPGA
exit
# Note :
# To run verification at the end of the flow maintain source in ./SRC directory

View File

@ -163,6 +163,11 @@ run-task basic_tests/global_tile_ports/global_tile_reset $@
run-task basic_tests/global_tile_ports/global_tile_4clock $@
run-task basic_tests/global_tile_ports/global_tile_4clock_pin $@
echo -e "Testing programmable clock architecture";
run-task basic_tests/clock_network/homo_1clock_2layer $@
run-task basic_tests/clock_network/homo_1clock_2layer_full_tb $@
run-task basic_tests/clock_network/homo_2clock_2layer $@
echo -e "Testing configuration chain of a K4N4 FPGA using .blif generated by yosys+verific";
run-task basic_tests/verific_test $@

View File

@ -0,0 +1,17 @@
<clock_networks default_segment="L1" default_switch="ipin_cblock">
<clock_network name="clk_tree_2lvl" width="1">
<spine name="spine_lvl0" start_x="1" start_y="1" end_x="2" end_y="1">
<switch_point tap="rib_lvl1_sw0_upper" x="1" y="1"/>
<switch_point tap="rib_lvl1_sw0_lower" x="1" y="1"/>
<switch_point tap="rib_lvl1_sw1_upper" x="2" y="1"/>
<switch_point tap="rib_lvl1_sw1_lower" x="2" y="1"/>
</spine>
<spine name="rib_lvl1_sw0_upper" start_x="1" start_y="2" end_x="1" end_y="2" type="CHANY" direction="INC_DIRECTION"/>
<spine name="rib_lvl1_sw0_lower" start_x="1" start_y="1" end_x="1" end_y="1" type="CHANY" direction="DEC_DIRECTION"/>
<spine name="rib_lvl1_sw1_upper" start_x="2" start_y="2" end_x="2" end_y="2" type="CHANY" direction="INC_DIRECTION"/>
<spine name="rib_lvl1_sw1_lower" start_x="2" start_y="1" end_x="2" end_y="1" type="CHANY" direction="DEC_DIRECTION"/>
<taps>
<tap tile_pin="clb[0:0].clk[0:0]"/>
</taps>
</clock_network>
</clock_networks>

View File

@ -0,0 +1,3 @@
<!-- A dummy pin constraint file, useful when no pin constraints are needed for a benchmark -->
<pin_constraints>
</pin_constraints>

View File

@ -0,0 +1,4 @@
<!-- A dummy pin constraint file, useful when no pin constraints are needed for a benchmark -->
<repack_design_constraints>
</repack_design_constraints>

View File

@ -0,0 +1,40 @@
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# Configuration file for running experiments
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs
# Each job execute fpga_flow script on combination of architecture & benchmark
# timeout_each_job is timeout for each job
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
[GENERAL]
run_engine=openfpga_shell
power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml
power_analysis = true
spice_output=false
verilog_output=true
timeout_each_job = 20*60
fpga_flow=yosys_vpr
[OpenFPGA_SHELL]
openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/example_clkntwk_script.openfpga
openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_N4_40nm_Ntwk1clk2lvl_cc_openfpga.xml
openfpga_clock_arch_file=${PATH:TASK_DIR}/config/clk_arch_1clk_2layer.xml
openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml
openfpga_vpr_device_layout=2x2
openfpga_vpr_route_chan_width=24
openfpga_repack_constraints_file=${PATH:TASK_DIR}/config/dummy_repack_constraints.xml
openfpga_pin_constraints_file=${PATH:TASK_DIR}/config/dummy_pin_constraints.xml
[ARCHITECTURES]
arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_N4_tileable_Ntwk1clk2lvl_40nm.xml
[BENCHMARKS]
bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2_latch/and2_latch.v
[SYNTHESIS_PARAM]
bench_read_verilog_options_common = -nolatches
bench0_top = and2_latch
[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH]
end_flow_with_test=
vpr_fpga_verilog_formal_verification_top_netlist=

View File

@ -0,0 +1,17 @@
<clock_networks default_segment="L1" default_switch="ipin_cblock">
<clock_network name="clk_tree_2lvl" width="1">
<spine name="spine_lvl0" start_x="1" start_y="1" end_x="2" end_y="1">
<switch_point tap="rib_lvl1_sw0_upper" x="1" y="1"/>
<switch_point tap="rib_lvl1_sw0_lower" x="1" y="1"/>
<switch_point tap="rib_lvl1_sw1_upper" x="2" y="1"/>
<switch_point tap="rib_lvl1_sw1_lower" x="2" y="1"/>
</spine>
<spine name="rib_lvl1_sw0_upper" start_x="1" start_y="2" end_x="1" end_y="2" type="CHANY" direction="INC_DIRECTION"/>
<spine name="rib_lvl1_sw0_lower" start_x="1" start_y="1" end_x="1" end_y="1" type="CHANY" direction="DEC_DIRECTION"/>
<spine name="rib_lvl1_sw1_upper" start_x="2" start_y="2" end_x="2" end_y="2" type="CHANY" direction="INC_DIRECTION"/>
<spine name="rib_lvl1_sw1_lower" start_x="2" start_y="1" end_x="2" end_y="1" type="CHANY" direction="DEC_DIRECTION"/>
<taps>
<tap tile_pin="clb[0:0].clk[0:0]"/>
</taps>
</clock_network>
</clock_networks>

View File

@ -0,0 +1,37 @@
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# Configuration file for running experiments
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs
# Each job execute fpga_flow script on combination of architecture & benchmark
# timeout_each_job is timeout for each job
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
[GENERAL]
run_engine=openfpga_shell
power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml
power_analysis = true
spice_output=false
verilog_output=true
timeout_each_job = 20*60
fpga_flow=yosys_vpr
[OpenFPGA_SHELL]
openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/example_clkntwk_full_tb_script.openfpga
openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_N4_40nm_Ntwk1clk2lvl_cc_openfpga.xml
openfpga_clock_arch_file=${PATH:TASK_DIR}/config/clk_arch_1clk_2layer.xml
openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml
openfpga_vpr_device_layout=2x2
openfpga_vpr_route_chan_width=24
[ARCHITECTURES]
arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_N4_tileable_Ntwk1clk2lvl_40nm.xml
[BENCHMARKS]
bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2_latch/and2_latch.v
[SYNTHESIS_PARAM]
bench_read_verilog_options_common = -nolatches
bench0_top = and2_latch
[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH]
end_flow_with_test=

View File

@ -0,0 +1,18 @@
<clock_networks default_segment="L1" default_switch="ipin_cblock">
<clock_network name="clk_tree_2lvl" width="2">
<spine name="spine_lvl0" start_x="1" start_y="1" end_x="2" end_y="1">
<switch_point tap="rib_lvl1_sw0_upper" x="1" y="1"/>
<switch_point tap="rib_lvl1_sw0_lower" x="1" y="1"/>
<switch_point tap="rib_lvl1_sw1_upper" x="2" y="1"/>
<switch_point tap="rib_lvl1_sw1_lower" x="2" y="1"/>
</spine>
<spine name="rib_lvl1_sw0_upper" start_x="1" start_y="2" end_x="1" end_y="2" type="CHANY" direction="INC_DIRECTION"/>
<spine name="rib_lvl1_sw0_lower" start_x="1" start_y="1" end_x="1" end_y="1" type="CHANY" direction="DEC_DIRECTION"/>
<spine name="rib_lvl1_sw1_upper" start_x="2" start_y="2" end_x="2" end_y="2" type="CHANY" direction="INC_DIRECTION"/>
<spine name="rib_lvl1_sw1_lower" start_x="2" start_y="1" end_x="2" end_y="1" type="CHANY" direction="DEC_DIRECTION"/>
<taps>
<tap tile_pin="clb[0:0].clk[0:0]"/>
<tap tile_pin="clb[0:0].clk[1:1]"/>
</taps>
</clock_network>
</clock_networks>

View File

@ -0,0 +1,4 @@
<pin_constraints>
<set_io pin="clk[0]" net="clk"/>
<set_io pin="clk[1]" net="OPEN"/>
</pin_constraints>

View File

@ -0,0 +1,5 @@
<repack_design_constraints>
<pin_constraint pb_type="clb" pin="clk[0:0]" net="clk"/>
<pin_constraint pb_type="clb" pin="clk[1:1]" net="OPEN"/>
</repack_design_constraints>

View File

@ -0,0 +1,40 @@
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# Configuration file for running experiments
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs
# Each job execute fpga_flow script on combination of architecture & benchmark
# timeout_each_job is timeout for each job
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
[GENERAL]
run_engine=openfpga_shell
power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml
power_analysis = true
spice_output=false
verilog_output=true
timeout_each_job = 20*60
fpga_flow=yosys_vpr
[OpenFPGA_SHELL]
openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/example_clkntwk_script.openfpga
openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_N4_40nm_Ntwk2clk2lvl_cc_openfpga.xml
openfpga_clock_arch_file=${PATH:TASK_DIR}/config/clk_arch_2clk_2layer.xml
openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml
openfpga_vpr_device_layout=2x2
openfpga_vpr_route_chan_width=24
openfpga_repack_constraints_file=${PATH:TASK_DIR}/config/repack_constraints.xml
openfpga_pin_constraints_file=${PATH:TASK_DIR}/config/pin_constraints.xml
[ARCHITECTURES]
arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_N4_tileable_Ntwk2clk2lvl_40nm.xml
[BENCHMARKS]
bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2_latch/and2_latch.v
[SYNTHESIS_PARAM]
bench_read_verilog_options_common = -nolatches
bench0_top = and2_latch
[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH]
end_flow_with_test=
vpr_fpga_verilog_formal_verification_top_netlist=

View File

@ -0,0 +1,338 @@
<?xml version="1.0"?>
<!--
Architecture with no fracturable LUTs
- 40 nm technology
- General purpose logic block:
K = 4, N = 4
- Routing architecture: L = 4, fc_in = 0.15, Fc_out = 0.1
Details on Modelling:
Based on flagship k6_frac_N10_mem32K_40nm.xml architecture. This architecture has no fracturable LUTs nor any heterogeneous blocks.
Authors: Jason Luu, Jeff Goeders, Vaughn Betz
-->
<architecture>
<!--
ODIN II specific config begins
Describes the types of user-specified netlist blocks (in blif, this corresponds to
".model [type_of_block]") that this architecture supports.
Note: Basic LUTs, I/Os, and flip-flops are not included here as there are
already special structures in blif (.names, .input, .output, and .latch)
that describe them.
-->
<models>
<!-- A virtual model for I/O to be used in the physical mode of io block -->
<model name="io">
<input_ports>
<port name="outpad"/>
</input_ports>
<output_ports>
<port name="inpad"/>
</output_ports>
</model>
</models>
<tiles>
<tile name="io" area="0">
<sub_tile name="io" capacity="8">
<equivalent_sites>
<site pb_type="io"/>
</equivalent_sites>
<input name="outpad" num_pins="1"/>
<output name="inpad" num_pins="1"/>
<fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
<pinlocations pattern="custom">
<loc side="left">io.outpad io.inpad</loc>
<loc side="top">io.outpad io.inpad</loc>
<loc side="right">io.outpad io.inpad</loc>
<loc side="bottom">io.outpad io.inpad</loc>
</pinlocations>
</sub_tile>
</tile>
<tile name="clb" area="53894">
<sub_tile name="clb">
<equivalent_sites>
<site pb_type="clb"/>
</equivalent_sites>
<input name="I" num_pins="10" equivalent="full"/>
<output name="O" num_pins="4" equivalent="none"/>
<clock name="clk" num_pins="1"/>
<fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
<fc_override port_name="clk" fc_type="frac" fc_val="0"/>
</fc>
<pinlocations pattern="custom">
<loc side="left"/>
<loc side="top">clb.I[0:4] clb.O[0:1]</loc>
<loc side="right">clb.I[5:9] clb.O[2:3] clb.clk</loc>
<loc side="bottom"/>
</pinlocations>
</sub_tile>
</tile>
</tiles>
<!-- ODIN II specific config ends -->
<!-- Physical descriptions begin -->
<layout tileable="true">
<auto_layout aspect_ratio="1.0">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</auto_layout>
<fixed_layout name="2x2" width="4" height="4">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</fixed_layout>
<fixed_layout name="4x4" width="6" height="6">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</fixed_layout>
<fixed_layout name="48x48" width="50" height="50">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</fixed_layout>
<fixed_layout name="72x72" width="74" height="74">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</fixed_layout>
<fixed_layout name="96x96" width="98" height="98">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</fixed_layout>
</layout>
<device>
<!-- VB & JL: Using Ian Kuon's transistor sizing and drive strength data for routing, at 40 nm. Ian used BPTM
models. We are modifying the delay values however, to include metal C and R, which allows more architecture
experimentation. We are also modifying the relative resistance of PMOS to be 1.8x that of NMOS
(vs. Ian's 3x) as 1.8x lines up with Jeff G's data from a 45 nm process (and is more typical of
45 nm in general). I'm upping the Rmin_nmos from Ian's just over 6k to nearly 9k, and dropping
RminW_pmos from 18k to 16k to hit this 1.8x ratio, while keeping the delays of buffers approximately
lined up with Stratix IV.
We are using Jeff G.'s capacitance data for 45 nm (in tech/ptm_45nm).
Jeff's tables list C in for transistors with widths in multiples of the minimum feature size (45 nm).
The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply drive strength sizes in this file
by 2.5x when looking up in Jeff's tables.
The delay values are lined up with Stratix IV, which has an architecture similar to this
proposed FPGA, and which is also 40 nm
C_ipin_cblock: input capacitance of a track buffer, which VPR assumes is a single-stage
4x minimum drive strength buffer. -->
<sizing R_minW_nmos="8926" R_minW_pmos="16067"/>
<!-- The grid_logic_tile_area below will be used for all blocks that do not explicitly set their own (non-routing)
area; set to 0 since we explicitly set the area of all blocks currently in this architecture file.
-->
<area grid_logic_tile_area="0"/>
<chan_width_distr>
<x distr="uniform" peak="1.000000"/>
<y distr="uniform" peak="1.000000"/>
</chan_width_distr>
<switch_block type="wilton" fs="3"/>
<connection_block input_switch_name="ipin_cblock"/>
</device>
<switchlist>
<!-- VB: the mux_trans_size and buf_size data below is in minimum width transistor *areas*, assuming the purple
book area formula. This means the mux transistors are about 5x minimum drive strength.
We assume the first stage of the buffer is 3x min drive strength to be reasonable given the large
mux transistors, and this gives a reasonable stage ratio of a bit over 5x to the second stage. We assume
the n and p transistors in the first stage are equal-sized to lower the buffer trip point, since it's fed
by a pass transistor mux. We can then reverse engineer the buffer second stage to hit the specified
buf_size (really buffer area) - 16.2x minimum drive nmos and 1.8*16.2 = 29.2x minimum drive.
I then took the data from Jeff G.'s PTM modeling of 45 nm to get the Cin (gate of first stage) and Cout
(diff of second stage) listed below. Jeff's models are in tech/ptm_45nm, and are in min feature multiples.
The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply the drive strength sizes above by
2.5x when looking up in Jeff's tables.
Finally, we choose a switch delay (58 ps) that leads to length 4 wires having a delay equal to that of SIV of 126 ps.
This also leads to the switch being 46% of the total wire delay, which is reasonable. -->
<switch type="mux" name="0" R="551" Cin=".77e-15" Cout="4e-15" Tdel="58e-12" mux_trans_size="2.630740" buf_size="27.645901"/>
<!--switch ipin_cblock resistance set to yeild for 4x minimum drive strength buffer-->
<switch type="mux" name="ipin_cblock" R="2231.5" Cout="0." Cin="1.47e-15" Tdel="7.247000e-11" mux_trans_size="1.222260" buf_size="auto"/>
</switchlist>
<segmentlist>
<!--- VB & JL: using ITRS metal stack data, 96 nm half pitch wires, which are intermediate metal width/space.
With the 96 nm half pitch, such wires would take 60 um of height, vs. a 90 nm high (approximated as square) Stratix IV tile so this seems
reasonable. Using a tile length of 90 nm, corresponding to the length of a Stratix IV tile if it were square. -->
<segment name="L1" freq="0.2" length="1" type="unidir" Rmetal="101" Cmetal="22.5e-15">
<mux name="0"/>
<sb type="pattern">1 1</sb>
<cb type="pattern">1</cb>
</segment>
<segment name="L4" freq="0.8" length="4" type="unidir" Rmetal="101" Cmetal="22.5e-15">
<mux name="0"/>
<sb type="pattern">1 1 1 1 1</sb>
<cb type="pattern">1 1 1 1</cb>
</segment>
</segmentlist>
<complexblocklist>
<!-- Define I/O pads begin -->
<!-- Capacity is a unique property of I/Os, it is the maximum number of I/Os that can be placed at the same (X,Y) location on the FPGA -->
<!-- Not sure of the area of an I/O (varies widely), and it's not relevant to the design of the FPGA core, so we're setting it to 0. -->
<pb_type name="io">
<input name="outpad" num_pins="1"/>
<output name="inpad" num_pins="1"/>
<!-- A mode denotes the physical implementation of an I/O
This mode will be not packable but is mainly used for fabric verilog generation
-->
<mode name="physical" disable_packing="true">
<pb_type name="iopad" blif_model=".subckt io" num_pb="1">
<input name="outpad" num_pins="1"/>
<output name="inpad" num_pins="1"/>
</pb_type>
<interconnect>
<direct name="outpad" input="io.outpad" output="iopad.outpad">
<delay_constant max="1.394e-11" in_port="io.outpad" out_port="iopad.outpad"/>
</direct>
<direct name="inpad" input="iopad.inpad" output="io.inpad">
<delay_constant max="4.243e-11" in_port="iopad.inpad" out_port="io.inpad"/>
</direct>
</interconnect>
</mode>
<!-- IOs can operate as either inputs or outputs.
Delays below come from Ian Kuon. They are small, so they should be interpreted as
the delays to and from registers in the I/O (and generally I/Os are registered
today and that is when you timing analyze them.
-->
<mode name="inpad">
<pb_type name="inpad" blif_model=".input" num_pb="1">
<output name="inpad" num_pins="1"/>
</pb_type>
<interconnect>
<direct name="inpad" input="inpad.inpad" output="io.inpad">
<delay_constant max="4.243e-11" in_port="inpad.inpad" out_port="io.inpad"/>
</direct>
</interconnect>
</mode>
<mode name="outpad">
<pb_type name="outpad" blif_model=".output" num_pb="1">
<input name="outpad" num_pins="1"/>
</pb_type>
<interconnect>
<direct name="outpad" input="io.outpad" output="outpad.outpad">
<delay_constant max="1.394e-11" in_port="io.outpad" out_port="outpad.outpad"/>
</direct>
</interconnect>
</mode>
<!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
<!-- IOs go on the periphery of the FPGA, for consistency,
make it physically equivalent on all sides so that only one definition of I/Os is needed.
If I do not make a physically equivalent definition, then I need to define 4 different I/Os, one for each side of the FPGA
-->
<!-- Place I/Os on the sides of the FPGA -->
<power method="ignore"/>
</pb_type>
<!-- Define I/O pads ends -->
<!-- Define general purpose logic block (CLB) begin -->
<!--- Area calculation: Total Stratix IV tile area is about 8100 um^2, and a minimum width transistor
area is 60 L^2 yields a tile area of 84375 MWTAs.
Routing at W=300 is 30481 MWTAs, leaving us with a total of 53000 MWTAs for logic block area
This means that only 37% of our area is in the general routing, and 63% is inside the logic
block. Note that the crossbar / local interconnect is considered part of the logic block
area in this analysis. That is a lower proportion of of routing area than most academics
assume, but note that the total routing area really includes the crossbar, which would push
routing area up significantly, we estimate into the ~70% range.
-->
<pb_type name="clb">
<input name="I" num_pins="10" equivalent="full"/>
<output name="O" num_pins="4" equivalent="none"/>
<clock name="clk" num_pins="1"/>
<!-- Describe basic logic element.
Each basic logic element has a 4-LUT that can be optionally registered
-->
<pb_type name="fle" num_pb="4">
<input name="in" num_pins="4"/>
<output name="out" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- 4-LUT mode definition begin -->
<mode name="n1_lut4">
<!-- Define 4-LUT mode -->
<pb_type name="ble4" num_pb="1">
<input name="in" num_pins="4"/>
<output name="out" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- Define LUT -->
<pb_type name="lut4" blif_model=".names" num_pb="1" class="lut">
<input name="in" num_pins="4" port_class="lut_in"/>
<output name="out" num_pins="1" port_class="lut_out"/>
<!-- LUT timing using delay matrix -->
<delay_matrix type="max" in_port="lut4.in" out_port="lut4.out">
261e-12
261e-12
261e-12
261e-12
</delay_matrix>
</pb_type>
<!-- Define flip-flop -->
<pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
<input name="D" num_pins="1" port_class="D"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="clk" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="ff.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
</pb_type>
<interconnect>
<direct name="direct1" input="ble4.in" output="lut4[0:0].in"/>
<direct name="direct2" input="lut4.out" output="ff.D">
<!-- Advanced user option that tells CAD tool to find LUT+FF pairs in netlist -->
<pack_pattern name="ble4" in_port="lut4.out" out_port="ff.D"/>
</direct>
<direct name="direct3" input="ble4.clk" output="ff.clk"/>
<mux name="mux1" input="ff.Q lut4.out" output="ble4.out">
<!-- LUT to output is faster than FF to output on a Stratix IV -->
<delay_constant max="25e-12" in_port="lut4.out" out_port="ble4.out"/>
<delay_constant max="45e-12" in_port="ff.Q" out_port="ble4.out"/>
</mux>
</interconnect>
</pb_type>
<interconnect>
<direct name="direct1" input="fle.in" output="ble4.in"/>
<direct name="direct2" input="ble4.out" output="fle.out[0:0]"/>
<direct name="direct3" input="fle.clk" output="ble4.clk"/>
</interconnect>
</mode>
<!-- 6-LUT mode definition end -->
</pb_type>
<interconnect>
<!-- We use a full crossbar to get logical equivalence at inputs of CLB
The delays below come from Stratix IV. the delay through a connection block
input mux + the crossbar in Stratix IV is 167 ps. We already have a 72 ps
delay on the connection block input mux (modeled by Ian Kuon), so the remaining
delay within the crossbar is 95 ps.
The delays of cluster feedbacks in Stratix IV is 100 ps, when driven by a LUT.
Since all our outputs LUT outputs go to a BLE output, and have a delay of
25 ps to do so, we subtract 25 ps from the 100 ps delay of a feedback
to get the part that should be marked on the crossbar. -->
<complete name="crossbar" input="clb.I fle[3:0].out" output="fle[3:0].in">
<delay_constant max="95e-12" in_port="clb.I" out_port="fle[3:0].in"/>
<delay_constant max="75e-12" in_port="fle[3:0].out" out_port="fle[3:0].in"/>
</complete>
<complete name="clks" input="clb.clk" output="fle[3:0].clk">
</complete>
<!-- This way of specifying direct connection to clb outputs is important because this architecture uses automatic spreading of opins.
By grouping to output pins in this fashion, if a logic block is completely filled by 6-LUTs,
then the outputs those 6-LUTs take get evenly distributed across all four sides of the CLB instead of clumped on two sides (which is what happens with a more
naive specification).
-->
<direct name="clbouts1" input="fle[3:0].out" output="clb.O"/>
</interconnect>
<!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
<!-- Place this general purpose logic block in any unspecified column -->
</pb_type>
<!-- Define general purpose logic block (CLB) ends -->
</complexblocklist>
</architecture>

View File

@ -0,0 +1,338 @@
<?xml version="1.0"?>
<!--
Architecture with no fracturable LUTs
- 40 nm technology
- General purpose logic block:
K = 4, N = 4
- Routing architecture: L = 4, fc_in = 0.15, Fc_out = 0.1
Details on Modelling:
Based on flagship k6_frac_N10_mem32K_40nm.xml architecture. This architecture has no fracturable LUTs nor any heterogeneous blocks.
Authors: Jason Luu, Jeff Goeders, Vaughn Betz
-->
<architecture>
<!--
ODIN II specific config begins
Describes the types of user-specified netlist blocks (in blif, this corresponds to
".model [type_of_block]") that this architecture supports.
Note: Basic LUTs, I/Os, and flip-flops are not included here as there are
already special structures in blif (.names, .input, .output, and .latch)
that describe them.
-->
<models>
<!-- A virtual model for I/O to be used in the physical mode of io block -->
<model name="io">
<input_ports>
<port name="outpad"/>
</input_ports>
<output_ports>
<port name="inpad"/>
</output_ports>
</model>
</models>
<tiles>
<tile name="io" area="0">
<sub_tile name="io" capacity="8">
<equivalent_sites>
<site pb_type="io"/>
</equivalent_sites>
<input name="outpad" num_pins="1"/>
<output name="inpad" num_pins="1"/>
<fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
<pinlocations pattern="custom">
<loc side="left">io.outpad io.inpad</loc>
<loc side="top">io.outpad io.inpad</loc>
<loc side="right">io.outpad io.inpad</loc>
<loc side="bottom">io.outpad io.inpad</loc>
</pinlocations>
</sub_tile>
</tile>
<tile name="clb" area="53894">
<sub_tile name="clb">
<equivalent_sites>
<site pb_type="clb"/>
</equivalent_sites>
<input name="I" num_pins="10" equivalent="full"/>
<output name="O" num_pins="4" equivalent="none"/>
<clock name="clk" num_pins="2"/>
<fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
<fc_override port_name="clk" fc_type="frac" fc_val="0"/>
</fc>
<pinlocations pattern="custom">
<loc side="left"/>
<loc side="top">clb.I[0:4] clb.O[0:1]</loc>
<loc side="right">clb.I[5:9] clb.O[2:3] clb.clk</loc>
<loc side="bottom"/>
</pinlocations>
</sub_tile>
</tile>
</tiles>
<!-- ODIN II specific config ends -->
<!-- Physical descriptions begin -->
<layout tileable="true">
<auto_layout aspect_ratio="1.0">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</auto_layout>
<fixed_layout name="2x2" width="4" height="4">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</fixed_layout>
<fixed_layout name="4x4" width="6" height="6">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</fixed_layout>
<fixed_layout name="48x48" width="50" height="50">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</fixed_layout>
<fixed_layout name="72x72" width="74" height="74">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</fixed_layout>
<fixed_layout name="96x96" width="98" height="98">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</fixed_layout>
</layout>
<device>
<!-- VB & JL: Using Ian Kuon's transistor sizing and drive strength data for routing, at 40 nm. Ian used BPTM
models. We are modifying the delay values however, to include metal C and R, which allows more architecture
experimentation. We are also modifying the relative resistance of PMOS to be 1.8x that of NMOS
(vs. Ian's 3x) as 1.8x lines up with Jeff G's data from a 45 nm process (and is more typical of
45 nm in general). I'm upping the Rmin_nmos from Ian's just over 6k to nearly 9k, and dropping
RminW_pmos from 18k to 16k to hit this 1.8x ratio, while keeping the delays of buffers approximately
lined up with Stratix IV.
We are using Jeff G.'s capacitance data for 45 nm (in tech/ptm_45nm).
Jeff's tables list C in for transistors with widths in multiples of the minimum feature size (45 nm).
The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply drive strength sizes in this file
by 2.5x when looking up in Jeff's tables.
The delay values are lined up with Stratix IV, which has an architecture similar to this
proposed FPGA, and which is also 40 nm
C_ipin_cblock: input capacitance of a track buffer, which VPR assumes is a single-stage
4x minimum drive strength buffer. -->
<sizing R_minW_nmos="8926" R_minW_pmos="16067"/>
<!-- The grid_logic_tile_area below will be used for all blocks that do not explicitly set their own (non-routing)
area; set to 0 since we explicitly set the area of all blocks currently in this architecture file.
-->
<area grid_logic_tile_area="0"/>
<chan_width_distr>
<x distr="uniform" peak="1.000000"/>
<y distr="uniform" peak="1.000000"/>
</chan_width_distr>
<switch_block type="wilton" fs="3"/>
<connection_block input_switch_name="ipin_cblock"/>
</device>
<switchlist>
<!-- VB: the mux_trans_size and buf_size data below is in minimum width transistor *areas*, assuming the purple
book area formula. This means the mux transistors are about 5x minimum drive strength.
We assume the first stage of the buffer is 3x min drive strength to be reasonable given the large
mux transistors, and this gives a reasonable stage ratio of a bit over 5x to the second stage. We assume
the n and p transistors in the first stage are equal-sized to lower the buffer trip point, since it's fed
by a pass transistor mux. We can then reverse engineer the buffer second stage to hit the specified
buf_size (really buffer area) - 16.2x minimum drive nmos and 1.8*16.2 = 29.2x minimum drive.
I then took the data from Jeff G.'s PTM modeling of 45 nm to get the Cin (gate of first stage) and Cout
(diff of second stage) listed below. Jeff's models are in tech/ptm_45nm, and are in min feature multiples.
The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply the drive strength sizes above by
2.5x when looking up in Jeff's tables.
Finally, we choose a switch delay (58 ps) that leads to length 4 wires having a delay equal to that of SIV of 126 ps.
This also leads to the switch being 46% of the total wire delay, which is reasonable. -->
<switch type="mux" name="0" R="551" Cin=".77e-15" Cout="4e-15" Tdel="58e-12" mux_trans_size="2.630740" buf_size="27.645901"/>
<!--switch ipin_cblock resistance set to yeild for 4x minimum drive strength buffer-->
<switch type="mux" name="ipin_cblock" R="2231.5" Cout="0." Cin="1.47e-15" Tdel="7.247000e-11" mux_trans_size="1.222260" buf_size="auto"/>
</switchlist>
<segmentlist>
<!--- VB & JL: using ITRS metal stack data, 96 nm half pitch wires, which are intermediate metal width/space.
With the 96 nm half pitch, such wires would take 60 um of height, vs. a 90 nm high (approximated as square) Stratix IV tile so this seems
reasonable. Using a tile length of 90 nm, corresponding to the length of a Stratix IV tile if it were square. -->
<segment name="L1" freq="0.2" length="1" type="unidir" Rmetal="101" Cmetal="22.5e-15">
<mux name="0"/>
<sb type="pattern">1 1</sb>
<cb type="pattern">1</cb>
</segment>
<segment name="L4" freq="0.8" length="4" type="unidir" Rmetal="101" Cmetal="22.5e-15">
<mux name="0"/>
<sb type="pattern">1 1 1 1 1</sb>
<cb type="pattern">1 1 1 1</cb>
</segment>
</segmentlist>
<complexblocklist>
<!-- Define I/O pads begin -->
<!-- Capacity is a unique property of I/Os, it is the maximum number of I/Os that can be placed at the same (X,Y) location on the FPGA -->
<!-- Not sure of the area of an I/O (varies widely), and it's not relevant to the design of the FPGA core, so we're setting it to 0. -->
<pb_type name="io">
<input name="outpad" num_pins="1"/>
<output name="inpad" num_pins="1"/>
<!-- A mode denotes the physical implementation of an I/O
This mode will be not packable but is mainly used for fabric verilog generation
-->
<mode name="physical" disable_packing="true">
<pb_type name="iopad" blif_model=".subckt io" num_pb="1">
<input name="outpad" num_pins="1"/>
<output name="inpad" num_pins="1"/>
</pb_type>
<interconnect>
<direct name="outpad" input="io.outpad" output="iopad.outpad">
<delay_constant max="1.394e-11" in_port="io.outpad" out_port="iopad.outpad"/>
</direct>
<direct name="inpad" input="iopad.inpad" output="io.inpad">
<delay_constant max="4.243e-11" in_port="iopad.inpad" out_port="io.inpad"/>
</direct>
</interconnect>
</mode>
<!-- IOs can operate as either inputs or outputs.
Delays below come from Ian Kuon. They are small, so they should be interpreted as
the delays to and from registers in the I/O (and generally I/Os are registered
today and that is when you timing analyze them.
-->
<mode name="inpad">
<pb_type name="inpad" blif_model=".input" num_pb="1">
<output name="inpad" num_pins="1"/>
</pb_type>
<interconnect>
<direct name="inpad" input="inpad.inpad" output="io.inpad">
<delay_constant max="4.243e-11" in_port="inpad.inpad" out_port="io.inpad"/>
</direct>
</interconnect>
</mode>
<mode name="outpad">
<pb_type name="outpad" blif_model=".output" num_pb="1">
<input name="outpad" num_pins="1"/>
</pb_type>
<interconnect>
<direct name="outpad" input="io.outpad" output="outpad.outpad">
<delay_constant max="1.394e-11" in_port="io.outpad" out_port="outpad.outpad"/>
</direct>
</interconnect>
</mode>
<!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
<!-- IOs go on the periphery of the FPGA, for consistency,
make it physically equivalent on all sides so that only one definition of I/Os is needed.
If I do not make a physically equivalent definition, then I need to define 4 different I/Os, one for each side of the FPGA
-->
<!-- Place I/Os on the sides of the FPGA -->
<power method="ignore"/>
</pb_type>
<!-- Define I/O pads ends -->
<!-- Define general purpose logic block (CLB) begin -->
<!--- Area calculation: Total Stratix IV tile area is about 8100 um^2, and a minimum width transistor
area is 60 L^2 yields a tile area of 84375 MWTAs.
Routing at W=300 is 30481 MWTAs, leaving us with a total of 53000 MWTAs for logic block area
This means that only 37% of our area is in the general routing, and 63% is inside the logic
block. Note that the crossbar / local interconnect is considered part of the logic block
area in this analysis. That is a lower proportion of of routing area than most academics
assume, but note that the total routing area really includes the crossbar, which would push
routing area up significantly, we estimate into the ~70% range.
-->
<pb_type name="clb">
<input name="I" num_pins="10" equivalent="full"/>
<output name="O" num_pins="4" equivalent="none"/>
<clock name="clk" num_pins="2"/>
<!-- Describe basic logic element.
Each basic logic element has a 4-LUT that can be optionally registered
-->
<pb_type name="fle" num_pb="4">
<input name="in" num_pins="4"/>
<output name="out" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- 4-LUT mode definition begin -->
<mode name="n1_lut4">
<!-- Define 4-LUT mode -->
<pb_type name="ble4" num_pb="1">
<input name="in" num_pins="4"/>
<output name="out" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- Define LUT -->
<pb_type name="lut4" blif_model=".names" num_pb="1" class="lut">
<input name="in" num_pins="4" port_class="lut_in"/>
<output name="out" num_pins="1" port_class="lut_out"/>
<!-- LUT timing using delay matrix -->
<delay_matrix type="max" in_port="lut4.in" out_port="lut4.out">
261e-12
261e-12
261e-12
261e-12
</delay_matrix>
</pb_type>
<!-- Define flip-flop -->
<pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
<input name="D" num_pins="1" port_class="D"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="clk" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="ff.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
</pb_type>
<interconnect>
<direct name="direct1" input="ble4.in" output="lut4[0:0].in"/>
<direct name="direct2" input="lut4.out" output="ff.D">
<!-- Advanced user option that tells CAD tool to find LUT+FF pairs in netlist -->
<pack_pattern name="ble4" in_port="lut4.out" out_port="ff.D"/>
</direct>
<direct name="direct3" input="ble4.clk" output="ff.clk"/>
<mux name="mux1" input="ff.Q lut4.out" output="ble4.out">
<!-- LUT to output is faster than FF to output on a Stratix IV -->
<delay_constant max="25e-12" in_port="lut4.out" out_port="ble4.out"/>
<delay_constant max="45e-12" in_port="ff.Q" out_port="ble4.out"/>
</mux>
</interconnect>
</pb_type>
<interconnect>
<direct name="direct1" input="fle.in" output="ble4.in"/>
<direct name="direct2" input="ble4.out" output="fle.out[0:0]"/>
<direct name="direct3" input="fle.clk" output="ble4.clk"/>
</interconnect>
</mode>
<!-- 6-LUT mode definition end -->
</pb_type>
<interconnect>
<!-- We use a full crossbar to get logical equivalence at inputs of CLB
The delays below come from Stratix IV. the delay through a connection block
input mux + the crossbar in Stratix IV is 167 ps. We already have a 72 ps
delay on the connection block input mux (modeled by Ian Kuon), so the remaining
delay within the crossbar is 95 ps.
The delays of cluster feedbacks in Stratix IV is 100 ps, when driven by a LUT.
Since all our outputs LUT outputs go to a BLE output, and have a delay of
25 ps to do so, we subtract 25 ps from the 100 ps delay of a feedback
to get the part that should be marked on the crossbar. -->
<complete name="crossbar" input="clb.I fle[3:0].out" output="fle[3:0].in">
<delay_constant max="95e-12" in_port="clb.I" out_port="fle[3:0].in"/>
<delay_constant max="75e-12" in_port="fle[3:0].out" out_port="fle[3:0].in"/>
</complete>
<complete name="clks" input="clb.clk" output="fle[3:0].clk">
</complete>
<!-- This way of specifying direct connection to clb outputs is important because this architecture uses automatic spreading of opins.
By grouping to output pins in this fashion, if a logic block is completely filled by 6-LUTs,
then the outputs those 6-LUTs take get evenly distributed across all four sides of the CLB instead of clumped on two sides (which is what happens with a more
naive specification).
-->
<direct name="clbouts1" input="fle[3:0].out" output="clb.O"/>
</interconnect>
<!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
<!-- Place this general purpose logic block in any unspecified column -->
</pb_type>
<!-- Define general purpose logic block (CLB) ends -->
</complexblocklist>
</architecture>