adapt top function of tileable rr_graph builder

This commit is contained in:
tangxifan 2020-03-06 15:24:26 -07:00
parent 441a307100
commit 3eb59d201f
8 changed files with 413 additions and 49 deletions

View File

@ -583,6 +583,12 @@ class RRGraph {
/* Validate is the edge id does exist in the RRGraph */
bool valid_edge_id(const RREdgeId& edge) const;
/* Validate switch list */
bool valid_switch_id(const RRSwitchId& switch_id) const;
/* Validate segment list */
bool valid_segment_id(const RRSegmentId& segment_id) const;
public: /* Mutators */
/* Reserve the lists of nodes, edges, switches etc. to be memory efficient.
* This function is mainly used to reserve memory space inside RRGraph,
@ -835,12 +841,6 @@ class RRGraph {
bool validate_edge_src_nodes() const;
bool validate_edge_sink_nodes() const;
/* Validate switch list */
bool valid_switch_id(const RRSwitchId& switch_id) const;
/* Validate segment list */
bool valid_segment_id(const RRSegmentId& segment_id) const;
private: /* Internal Data */
/* Node related data */
size_t num_nodes_; /* Range of node ids */

View File

@ -1,6 +1,7 @@
#ifndef CHECK_RR_GRAPH_H
#define CHECK_RR_GRAPH_H
#include "physical_types.h"
#include "vpr_context.h"
void check_rr_graph(const t_graph_type graph_type,
const DeviceGrid& grid,

View File

@ -236,30 +236,12 @@ void uniquify_edges(t_rr_edge_info_set& rr_edges_to_create);
void alloc_and_load_edges(RRGraph& rr_graph,
const t_rr_edge_info_set& rr_edges_to_create);
static void alloc_and_load_rr_switch_inf(const int num_arch_switches,
const float R_minW_nmos,
const float R_minW_pmos,
const int wire_to_arch_ipin_switch,
int* wire_to_rr_ipin_switch);
static
t_rr_switch_inf create_rr_switch_from_arch_switch(int arch_switch_idx,
const float R_minW_nmos,
const float R_minW_pmos);
static void remap_rr_node_switch_indices(const t_arch_switch_fanin& switch_fanin);
static void load_rr_switch_inf(const int num_arch_switches, const float R_minW_nmos, const float R_minW_pmos, const t_arch_switch_fanin& switch_fanin);
static void alloc_rr_switch_inf(t_arch_switch_fanin& switch_fanin);
static void rr_graph_externals(const std::vector<t_segment_inf>& segment_inf,
int max_chan_width,
int wire_to_rr_ipin_switch,
enum e_base_cost_type base_cost_type);
static t_clb_to_clb_directs* alloc_and_load_clb_to_clb_directs(const t_direct_inf* directs, const int num_directs, const int delayless_switch);
static void free_type_track_to_pin_map(t_track_to_pin_lookup& track_to_pin_map,
const std::vector<t_physical_tile_type>& types,
int max_chan_width);
@ -267,15 +249,6 @@ static void free_type_track_to_pin_map(t_track_to_pin_lookup& track_to_pin_map,
static t_seg_details* alloc_and_load_global_route_seg_details(const int global_route_switch,
int* num_seg_details = nullptr);
static std::vector<vtr::Matrix<int>> alloc_and_load_actual_fc(const std::vector<t_physical_tile_type>& types,
const int max_pins,
const std::vector<t_segment_inf>& segment_inf,
const int* sets_per_seg_type,
const int max_chan_width,
const e_fc_type fc_type,
const enum e_directionality directionality,
bool* Fc_clipped);
static RRNodeId pick_best_direct_connect_target_rr_node(const RRGraph& rr_graph,
const RRNodeId& from_rr,
const std::vector<RRNodeId>& candidate_rr_nodes);
@ -836,7 +809,7 @@ static void build_rr_graph(const t_graph_type graph_type,
* and count how many different fan-ins exist for each arch switch.
* Then we create these rr switches and update the switch indices
* of rr_nodes to index into the rr_switch_inf array. */
static void alloc_and_load_rr_switch_inf(const int num_arch_switches, const float R_minW_nmos, const float R_minW_pmos, const int wire_to_arch_ipin_switch, int* wire_to_rr_ipin_switch) {
void alloc_and_load_rr_switch_inf(const int num_arch_switches, const float R_minW_nmos, const float R_minW_pmos, const int wire_to_arch_ipin_switch, int* wire_to_rr_ipin_switch) {
/* we will potentially be creating a couple of versions of each arch switch where
* each version corresponds to a different fan-in. We will need to fill device_ctx.rr_switch_inf
* with this expanded list of switches.
@ -962,7 +935,6 @@ static void load_rr_switch_inf(const int num_arch_switches, const float R_minW_n
}
static
t_rr_switch_inf create_rr_switch_from_arch_switch(int arch_switch_idx,
const float R_minW_nmos,
const float R_minW_pmos) {
@ -1057,10 +1029,10 @@ static void remap_rr_node_switch_indices(const t_arch_switch_fanin& switch_fanin
}
}
static void rr_graph_externals(const std::vector<t_segment_inf>& segment_inf,
int max_chan_width,
int wire_to_rr_ipin_switch,
enum e_base_cost_type base_cost_type) {
void rr_graph_externals(const std::vector<t_segment_inf>& segment_inf,
int max_chan_width,
int wire_to_rr_ipin_switch,
enum e_base_cost_type base_cost_type) {
auto& device_ctx = g_vpr_ctx.device();
add_rr_graph_C_from_switches(device_ctx.rr_switch_inf[wire_to_rr_ipin_switch].Cin);
@ -1152,14 +1124,14 @@ static t_seg_details* alloc_and_load_global_route_seg_details(const int global_r
}
/* Calculates the number of track connections from each block pin to each segment type */
static std::vector<vtr::Matrix<int>> alloc_and_load_actual_fc(const std::vector<t_physical_tile_type>& types,
const int max_pins,
const std::vector<t_segment_inf>& segment_inf,
const int* sets_per_seg_type,
const int max_chan_width,
const e_fc_type fc_type,
const enum e_directionality directionality,
bool* Fc_clipped) {
std::vector<vtr::Matrix<int>> alloc_and_load_actual_fc(const std::vector<t_physical_tile_type>& types,
const int max_pins,
const std::vector<t_segment_inf>& segment_inf,
const int* sets_per_seg_type,
const int max_chan_width,
const e_fc_type fc_type,
const enum e_directionality directionality,
bool* Fc_clipped) {
//Initialize Fc of all blocks to zero
auto zeros = vtr::Matrix<int>({size_t(max_pins), segment_inf.size()}, 0);
std::vector<vtr::Matrix<int>> Fc(types.size(), zeros);
@ -2775,7 +2747,7 @@ static void build_unidir_rr_opins(const int i, const int j,
* This data structure supplements the the info in the "directs" data structure
* TODO: The function that does this parsing in placement is poorly done because it lacks generality on heterogeniety, should replace with this one
*/
static t_clb_to_clb_directs* alloc_and_load_clb_to_clb_directs(const t_direct_inf* directs, const int num_directs, int delayless_switch) {
t_clb_to_clb_directs* alloc_and_load_clb_to_clb_directs(const t_direct_inf* directs, const int num_directs, int delayless_switch) {
int i;
t_clb_to_clb_directs* clb_to_clb_directs;
char *tile_name, *port_name;

View File

@ -7,6 +7,7 @@
#define INCLUDE_TRACK_BUFFERS false
#include "device_grid.h"
#include "clb2clb_directs.h"
enum e_graph_type {
GRAPH_GLOBAL, /* One node per channel with wire capacity > 1 and full connectivity */
@ -56,4 +57,30 @@ void load_rr_switch_from_arch_switch(int arch_switch_idx,
t_non_configurable_rr_sets identify_non_configurable_rr_sets();
void rr_graph_externals(const std::vector<t_segment_inf>& segment_inf,
int max_chan_width,
int wire_to_rr_ipin_switch,
enum e_base_cost_type base_cost_type);
void alloc_and_load_rr_switch_inf(const int num_arch_switches,
const float R_minW_nmos,
const float R_minW_pmos,
const int wire_to_arch_ipin_switch,
int* wire_to_rr_ipin_switch);
t_clb_to_clb_directs* alloc_and_load_clb_to_clb_directs(const t_direct_inf* directs, const int num_directs, const int delayless_switch);
std::vector<vtr::Matrix<int>> alloc_and_load_actual_fc(const std::vector<t_physical_tile_type>& types,
const int max_pins,
const std::vector<t_segment_inf>& segment_inf,
const int* sets_per_seg_type,
const int max_chan_width,
const e_fc_type fc_type,
const enum e_directionality directionality,
bool* Fc_clipped);
t_rr_switch_inf create_rr_switch_from_arch_switch(int arch_switch_idx,
const float R_minW_nmos,
const float R_minW_pmos);
#endif

View File

@ -26,7 +26,6 @@ namespace openfpga {
* therefore, we assign tracks one by one until we meet the frequency requirement
* In this way, we can assign the number of tracks with repect to frequency
***********************************************************************/
static
std::vector<size_t> get_num_tracks_per_seg_type(const size_t& chan_width,
const std::vector<t_segment_inf>& segment_inf,
const bool& use_full_seg_groups) {

View File

@ -15,6 +15,10 @@
/* begin namespace openfpga */
namespace openfpga {
std::vector<size_t> get_num_tracks_per_seg_type(const size_t& chan_width,
const std::vector<t_segment_inf>& segment_inf,
const bool& use_full_seg_groups);
int adapt_to_tileable_route_chan_width(const int& chan_width, const std::vector<t_segment_inf>& segment_inf);
ChanNodeDetails build_unidir_chan_node_details(const size_t& chan_width,

View File

@ -0,0 +1,323 @@
/************************************************************************
* This file contains a builder for the complex rr_graph data structure
* Different from VPR rr_graph builders, this builder aims to create a
* highly regular rr_graph, where each Connection Block (CB), Switch
* Block (SB) is the same (except for those on the borders). Thus, the
* rr_graph is called tileable, which brings significant advantage in
* producing large FPGA fabrics.
***********************************************************************/
/* Headers from vtrutil library */
#include "vtr_assert.h"
#include "vtr_time.h"
#include "vtr_log.h"
#include "vtr_memory.h"
#include "vpr_error.h"
#include "vpr_utils.h"
#include "rr_graph.h"
#include "check_rr_graph.h"
#include "check_rr_graph_obj.h"
#include "rr_graph_builder_utils.h"
#include "tileable_chan_details_builder.h"
#include "tileable_rr_graph_node_builder.h"
#include "tileable_rr_graph_edge_builder.h"
#include "tileable_rr_graph_builder.h"
#include "globals.h"
/* begin namespace openfpga */
namespace openfpga {
/************************************************************************
* Main function of this file
* Builder for a detailed uni-directional tileable rr_graph
* Global graph is not supported here, the VPR rr_graph generator can be used
* It follows the procedures to complete the rr_graph generation
* 1. Assign the segments for each routing channel,
* To be specific, for each routing track, we assign a routing segment.
* The assignment is subject to users' specifications, such as
* a. length of each type of segment
* b. frequency of each type of segment.
* c. routing channel width
* 2. Estimate the number of nodes in the rr_graph
* This will estimate the number of
* a. IPINs, input pins of each grid
* b. OPINs, output pins of each grid
* c. SOURCE, virtual node which drives OPINs
* d. SINK, virtual node which is connected to IPINs
* e. CHANX and CHANY, routing segments of each channel
* 3. Create the connectivity of OPINs
* a. Evenly assign connections to OPINs to routing tracks
* b. the connection pattern should be same across the fabric
* 4. Create the connectivity of IPINs
* a. Evenly assign connections from routing tracks to IPINs
* b. the connection pattern should be same across the fabric
* 5. Create the switch block patterns,
* It is based on the type of switch block, the supported patterns are
* a. Disjoint, which connects routing track (i)th from (i)th and (i)th routing segments
* b. Universal, which connects routing track (i)th from (i)th and (M-i)th routing segments
* c. Wilton, which rotates the connection of Disjoint by 1 track
* 6. Allocate rr_graph, fill the node information
* For each node, fill
* a. basic information: coordinate(xlow, xhigh, ylow, yhigh), ptc_num
* b. edges (both incoming and outcoming)
* c. handle direct-connections
* 7. Build fast look-up for the rr_graph
* 8. Allocate external data structures
* a. cost_index
* b. RC tree
***********************************************************************/
void build_tileable_unidir_rr_graph(const std::vector<t_physical_tile_type>& types,
const DeviceGrid& grids,
const t_chan_width& chan_width,
const e_switch_block_type& sb_type, const int& Fs,
const e_switch_block_type& sb_subtype, const int& subFs,
const std::vector<t_segment_inf>& segment_inf,
const int& delayless_switch,
const int& wire_to_arch_ipin_switch,
const float R_minW_nmos,
const float R_minW_pmos,
const enum e_base_cost_type& base_cost_type,
const t_direct_inf *directs,
const int& num_directs,
int* wire_to_rr_ipin_switch,
const bool& wire_opposite_side,
int *Warnings) {
vtr::ScopedStartFinishTimer timer("Build tileable routing resource graph");
/* Reset warning flag */
*Warnings = RR_GRAPH_NO_WARN;
/* Create a matrix of grid */
/* Create a vector of channel width, we support X-direction and Y-direction has different W */
vtr::Point<size_t> device_chan_width(chan_width.x_max, chan_width.y_max);
VTR_LOG("X-direction routing channel width is %lu\n", device_chan_width.x());
VTR_LOG("Y-direction routing channel width is %lu\n", device_chan_width.y());
/* Get a mutable device ctx so that we have a mutable rr_graph */
DeviceContext& device_ctx = g_vpr_ctx.mutable_device();
/* The number of segments are in general small, reserve segments may not bring
* significant memory efficiency */
device_ctx.rr_graph.reserve_segments(segment_inf.size());
/* Create the segments */
for (size_t iseg = 0; iseg < segment_inf.size(); ++iseg) {
device_ctx.rr_graph.create_segment(segment_inf[iseg]);
}
/* TODO: Load architecture switch to rr_graph switches
* Draft the switches as internal data of RRGraph object
* These are temporary switches copied from arch switches
* We use them to build the edges
* We will reset all the switches in the function
* alloc_and_load_rr_switch_inf()
*/
/* TODO: Spot the switch id in the architecture switch list */
RRSwitchId wire_to_ipin_rr_switch = RRSwitchId::INVALID();
RRSwitchId delayless_rr_switch = RRSwitchId::INVALID();
device_ctx.rr_graph.reserve_switches(device_ctx.num_arch_switches);
/* Create the switches */
for (int iswitch = 0; iswitch < device_ctx.num_arch_switches; ++iswitch) {
const t_rr_switch_inf& temp_rr_switch = create_rr_switch_from_arch_switch(iswitch, R_minW_nmos, R_minW_pmos);
RRSwitchId rr_switch = device_ctx.rr_graph.create_switch(temp_rr_switch);
if (iswitch == wire_to_arch_ipin_switch) {
wire_to_ipin_rr_switch = rr_switch;
}
if (iswitch == delayless_switch) {
delayless_rr_switch = rr_switch;
}
}
/* Validate the special switches */
VTR_ASSERT(true == device_ctx.rr_graph.valid_switch_id(wire_to_ipin_rr_switch));
VTR_ASSERT(true == device_ctx.rr_graph.valid_switch_id(delayless_rr_switch));
/* A temp data about the driver switch ids for each rr_node */
vtr::vector<RRNodeId, RRSwitchId> rr_node_driver_switches;
/* A temp data about the track ids for each CHANX and CHANY rr_node */
std::map<RRNodeId, std::vector<size_t>> rr_node_track_ids;
/************************
* Allocate the rr_nodes
************************/
alloc_tileable_rr_graph_nodes(device_ctx.rr_graph,
rr_node_driver_switches,
grids,
device_chan_width,
segment_inf);
/************************
* Create all the rr_nodes
************************/
create_tileable_rr_graph_nodes(device_ctx.rr_graph,
rr_node_driver_switches,
rr_node_track_ids,
grids,
device_chan_width,
segment_inf,
wire_to_ipin_rr_switch,
delayless_rr_switch);
/************************************************************************
* Create the connectivity of OPINs
* a. Evenly assign connections to OPINs to routing tracks
* b. the connection pattern should be same across the fabric
*
* Create the connectivity of IPINs
* a. Evenly assign connections from routing tracks to IPINs
* b. the connection pattern should be same across the fabric
***********************************************************************/
/* Global routing uses a single longwire track */
int max_chan_width = find_unidir_routing_channel_width(chan_width.max);
VTR_ASSERT(max_chan_width > 0);
/* get maximum number of pins across all blocks */
int max_pins = types[0].num_pins;
for (const auto& type : types) {
if (is_empty_type(&type)) {
continue;
}
if (type.num_pins > max_pins) {
max_pins = type.num_pins;
}
}
/* Fc assignment still uses the old function from VPR.
* Should use tileable version so that we have can have full control
*/
std::vector<size_t> num_tracks = get_num_tracks_per_seg_type(max_chan_width / 2, segment_inf, false);
int* sets_per_seg_type = (int*)vtr::malloc(sizeof(int) * segment_inf.size());
VTR_ASSERT(num_tracks.size() == segment_inf.size());
for (size_t iseg = 0; iseg < num_tracks.size(); ++iseg) {
sets_per_seg_type[iseg] = num_tracks[iseg];
}
bool Fc_clipped = false;
/* [0..num_types-1][0..num_pins-1] */
std::vector<vtr::Matrix<int>> Fc_in;
Fc_in = alloc_and_load_actual_fc(types, max_pins, segment_inf, sets_per_seg_type, max_chan_width,
e_fc_type::IN, UNI_DIRECTIONAL, &Fc_clipped);
if (Fc_clipped) {
*Warnings |= RR_GRAPH_WARN_FC_CLIPPED;
}
Fc_clipped = false;
/* [0..num_types-1][0..num_pins-1] */
std::vector<vtr::Matrix<int>> Fc_out;
Fc_out = alloc_and_load_actual_fc(types, max_pins, segment_inf, sets_per_seg_type, max_chan_width,
e_fc_type::OUT, UNI_DIRECTIONAL, &Fc_clipped);
if (Fc_clipped) {
*Warnings |= RR_GRAPH_WARN_FC_CLIPPED;
}
/************************************************************************
* Build the connections tile by tile:
* We classify rr_nodes into a general switch block (GSB) data structure
* where we create edges to each rr_nodes in the GSB with respect to
* Fc_in and Fc_out, switch block patterns
* In addition, we will also handle direct-connections:
* Add edges that bridge OPINs and IPINs to the rr_graph
***********************************************************************/
/* Create edges for a tileable rr_graph */
build_rr_graph_edges(device_ctx.rr_graph,
rr_node_driver_switches,
grids,
device_chan_width,
segment_inf,
Fc_in, Fc_out,
sb_type, Fs, sb_subtype, subFs,
wire_opposite_side);
/************************************************************************
* Build direction connection lists
* TODO: use tile direct builder
***********************************************************************/
/* Create data structure of direct-connections */
t_clb_to_clb_directs* clb_to_clb_directs = NULL;
if (num_directs > 0) {
clb_to_clb_directs = alloc_and_load_clb_to_clb_directs(directs, num_directs, delayless_switch);
}
std::vector<t_direct_inf> arch_directs;
std::vector<t_clb_to_clb_directs> clb2clb_directs;
for (int idirect = 0; idirect < num_directs; ++idirect) {
arch_directs.push_back(directs[idirect]);
clb2clb_directs.push_back(clb_to_clb_directs[idirect]);
}
build_rr_graph_direct_connections(device_ctx.rr_graph, grids, delayless_rr_switch,
arch_directs, clb2clb_directs);
/* First time to build edges so that we can remap the architecture switch to rr_switch
* This is a must-do before function alloc_and_load_rr_switch_inf()
*/
device_ctx.rr_graph.rebuild_node_edges();
/* Allocate and load routing resource switches, which are derived from the switches from the architecture file,
* based on their fanin in the rr graph. This routine also adjusts the rr nodes to point to these new rr switches */
alloc_and_load_rr_switch_inf(device_ctx.num_arch_switches, R_minW_nmos, R_minW_pmos, wire_to_arch_ipin_switch, wire_to_rr_ipin_switch);
/* Save the channel widths for the newly constructed graph */
device_ctx.chan_width = chan_width;
/************************************************************************
* Allocate external data structures
* a. cost_index
* b. RC tree
***********************************************************************/
rr_graph_externals(segment_inf, max_chan_width,
*wire_to_rr_ipin_switch, base_cost_type);
/* Rebuild the link between RRGraph node and segments
* Should be called only AFTER the function
* rr_graph_externals()
*/
for (const RRNodeId& inode : device_ctx.rr_graph.nodes()) {
if ( (CHANX != device_ctx.rr_graph.node_type(inode))
&& (CHANY != device_ctx.rr_graph.node_type(inode)) ) {
continue;
}
short irc_data = device_ctx.rr_graph.node_cost_index(inode);
short iseg = device_ctx.rr_indexed_data[irc_data].seg_index;
device_ctx.rr_graph.set_node_segment(inode, RRSegmentId(iseg));
}
/************************************************************************
* Sanitizer for the rr_graph, check connectivities of rr_nodes
***********************************************************************/
/* Essential check for rr_graph, build look-up and */
if (false == device_ctx.rr_graph.validate()) {
/* Error out if built-in validator of rr_graph fails */
vpr_throw(VPR_ERROR_ROUTE,
__FILE__,
__LINE__,
"Fundamental errors occurred when validating rr_graph object!\n");
}
check_rr_graph(GRAPH_UNIDIR, grids, types);
/* Error out if advanced checker of rr_graph fails */
if (false == check_rr_graph(device_ctx.rr_graph)) {
vpr_throw(VPR_ERROR_ROUTE,
__FILE__,
__LINE__,
"Advanced checking rr_graph object fails! Routing may still work "
"but not smooth\n");
}
/************************************************************************
* Free all temp stucts
***********************************************************************/
free(sets_per_seg_type);
if (nullptr != clb_to_clb_directs) {
free(clb_to_clb_directs);
}
}
} /* end namespace openfpga */

View File

@ -0,0 +1,38 @@
#ifndef TILEABLE_RR_GRAPH_BUILDER_H
#define TILEABLE_RR_GRAPH_BUILDER_H
/********************************************************************
* Include header files that are required by function declaration
*******************************************************************/
#include <vector>
#include "physical_types.h"
#include "device_grid.h"
/********************************************************************
* Function declaration
*******************************************************************/
/* begin namespace openfpga */
namespace openfpga {
void build_tileable_unidir_rr_graph(const std::vector<t_physical_tile_type>& types,
const DeviceGrid& grids,
const t_chan_width& chan_width,
const e_switch_block_type& sb_type, const int& Fs,
const e_switch_block_type& sb_subtype, const int& subFs,
const std::vector<t_segment_inf>& segment_inf,
const int& delayless_switch,
const int& wire_to_arch_ipin_switch,
const float R_minW_nmos,
const float R_minW_pmos,
const enum e_base_cost_type& base_cost_type,
const t_direct_inf *directs,
const int& num_directs,
int* wire_to_rr_ipin_switch,
const bool& wire_opposite_side,
int *Warnings);
} /* end namespace openfpga */
#endif