OpenFPGA/vpr7_x2p/vpr/SRC/route/rr_graph.c

2979 lines
103 KiB
C
Executable File

#include <stdio.h>
#include <math.h>
#include <assert.h>
#include <string.h>
#include "util.h"
#include "vpr_types.h"
#include "globals.h"
#include "rr_graph_util.h"
#include "rr_graph.h"
#include "rr_graph2.h"
#include "rr_graph_sbox.h"
#include "check_rr_graph.h"
#include "rr_graph_timing_params.h"
#include "rr_graph_indexed_data.h"
#include "vpr_utils.h"
#include "read_xml_arch_file.h"
#include "ReadOptions.h"
#include "tileable_rr_graph_builder.h"
#include "rr_graph_builder_utils.h"
/* Xifan TANG: SWSEG SUPPORT */
#include "rr_graph_swseg.h"
/* end */
/* Xifan TANG: opin_to_cb support */
#include "rr_graph_opincb.h"
/* end */
/* mrFPGA: Xifan TANG */
#include "mrfpga_globals.h"
/* end */
/* #define ENABLE_DUMP */
/* #define MUX_SIZE_DIST_DISPLAY */
/* mux size statistic data structures */
typedef struct s_mux {
int size;
struct s_mux *next;
} t_mux;
typedef struct s_mux_size_distribution {
int mux_count;
int max_index;
int *distr;
struct s_mux_size_distribution *next;
} t_mux_size_distribution;
/*
typedef struct s_clb_to_clb_directs {
t_type_descriptor *from_clb_type;
int from_clb_pin_start_index;
int from_clb_pin_end_index;
t_type_descriptor *to_clb_type;
int to_clb_pin_start_index;
int to_clb_pin_end_index;
} t_clb_to_clb_directs;
*/
/* Xifan TANG: opin_to_cb support */
#include "pb_pin_eq_auto_detect.h"
/* end */
/* UDSD Modifications by WMF End */
/******************* Variables local to this module. ***********************/
/* Used to free "chunked" memory. If NULL, no rr_graph exists right now. */
static t_chunk rr_mem_ch = {NULL, 0, NULL};
/* Status of current chunk being dished out by calls to my_chunk_malloc. */
/********************* Subroutines local to this module. *******************/
static void build_bidir_rr_opins(INP int i, INP int j,
INOUTP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices,
INP int *****opin_to_track_map, INP int **Fc_out,
INP boolean * L_rr_edge_done, INP t_seg_details * seg_details,
INP struct s_grid_tile **L_grid, INP int delayless_switch,
INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs);
static void build_unidir_rr_opins(INP int i, INP int j,
INP struct s_grid_tile **L_grid, INP int **Fc_out,
INP int nodes_per_chan, INP t_seg_details * seg_details,
INOUTP int **Fc_xofs, INOUTP int **Fc_yofs,
INOUTP t_rr_node * L_rr_node, INOUTP boolean * L_rr_edge_done,
OUTP boolean * Fc_clipped, INP t_ivec *** L_rr_node_indices, INP int delayless_switch,
INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs);
static int get_opin_direct_connecions(int x, int y, int opin, INOUTP t_linked_edge ** edge_list_ptr, INP t_ivec *** L_rr_node_indices,
INP int delayless_switch, INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs);
static void alloc_and_load_rr_graph(INP int num_nodes,
INP t_rr_node * L_rr_node, INP int num_seg_types,
INP t_seg_details * seg_details, INP boolean * L_rr_edge_done,
INP struct s_ivec ****track_to_ipin_lookup,
INP int *****opin_to_track_map, INP struct s_ivec ***switch_block_conn,
INP struct s_grid_tile **L_grid, INP int L_nx, INP int L_ny, INP int Fs,
INP short *****sblock_pattern, INP int **Fc_out, INP int **Fc_xofs,
INP int **Fc_yofs, INP t_ivec *** L_rr_node_indices,
INP int nodes_per_chan, INP enum e_switch_block_type sb_type,
INP int delayless_switch, INP enum e_directionality directionality,
INP int wire_to_ipin_switch, OUTP boolean * Fc_clipped, INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs);
static void load_uniform_switch_pattern(INP t_type_ptr type,
INOUTP int ****tracks_connected_to_pin, INP int num_phys_pins,
INP int *pin_num_ordering, INP int *side_ordering,
INP int *offset_ordering, INP int nodes_per_chan, INP int Fc,
INP enum e_directionality directionality);
static void load_perturbed_switch_pattern(INP t_type_ptr type,
INOUTP int ****tracks_connected_to_pin, INP int num_phys_pins,
INP int *pin_num_ordering, INP int *side_ordering,
INP int *offset_ordering, INP int nodes_per_chan, INP int Fc,
INP enum e_directionality directionality);
static void check_all_tracks_reach_pins(t_type_ptr type,
int ****tracks_connected_to_pin, int nodes_per_chan, int Fc,
enum e_pin_type ipin_or_opin);
static void build_rr_sinks_sources(INP int i, INP int j,
INP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices,
INP int delayless_switch, INP struct s_grid_tile **L_grid);
static void build_rr_xchan(INP int i, INP int j,
INP struct s_ivec ****track_to_ipin_lookup,
INP struct s_ivec ***switch_block_conn, INP int cost_index_offset,
INP int nodes_per_chan, INP int *opin_mux_size,
INP short *****sblock_pattern, INP int Fs_per_side,
INP t_seg_details * seg_details, INP t_ivec *** L_rr_node_indices,
INP boolean * L_rr_edge_done, INOUTP t_rr_node * L_rr_node,
INP int wire_to_ipin_switch, INP enum e_directionality directionality);
static void build_rr_ychan(INP int i, INP int j,
INP struct s_ivec ****track_to_ipin_lookup,
INP struct s_ivec ***switch_block_conn, INP int cost_index_offset,
INP int nodes_per_chan, INP int *opin_mux_size,
INP short *****sblock_pattern, INP int Fs_per_side,
INP t_seg_details * seg_details, INP t_ivec *** L_rr_node_indices,
INP boolean * L_rr_edge_done, INOUTP t_rr_node * L_rr_node,
INP int wire_to_ipin_switch, INP enum e_directionality directionality);
void alloc_and_load_edges_and_switches(INP t_rr_node * L_rr_node, INP int inode,
INP int num_edges, INP boolean * L_rr_edge_done,
INP t_linked_edge * edge_list_head);
static void alloc_net_rr_terminals(void);
static void alloc_and_load_rr_clb_source(t_ivec *** L_rr_node_indices);
/*
static t_clb_to_clb_directs *alloc_and_load_clb_to_clb_directs(INP t_direct_inf *directs, INP int num_directs);
*/
#if 0
static void load_uniform_opin_switch_pattern_paired(INP int *Fc_out,
INP int num_pins,
INP int *pins_in_chan_seg,
INP int num_wire_inc_muxes,
INP int num_wire_dec_muxes,
INP int *wire_inc_muxes,
INP int *wire_dec_muxes,
INOUTP t_rr_node * L_rr_node,
INOUTP boolean *
L_rr_edge_done,
INP t_seg_details *
seg_details,
OUTP boolean * Fc_clipped);
#endif
void watch_edges(int inode, t_linked_edge * edge_list_head);
#if MUX_SIZE_DIST_DISPLAY
static void view_mux_size_distribution(t_ivec *** L_rr_node_indices,
int nodes_per_chan,
t_seg_details * seg_details_x,
t_seg_details * seg_details_y);
static void print_distribution(FILE * fptr,
t_mux_size_distribution * distr_struct);
#endif
static t_seg_details *alloc_and_load_global_route_seg_details(
INP int nodes_per_chan, INP int global_route_switch);
static
void build_classic_rr_graph(INP t_graph_type graph_type, INP int L_num_types,
INP t_type_ptr types, INP int L_nx, INP int L_ny,
INP struct s_grid_tile **L_grid, INP int chan_width,
INP struct s_chan_width_dist *chan_capacity_inf,
INP enum e_switch_block_type sb_type, INP int Fs, INP int num_seg_types,
INP int num_switches, INP t_segment_inf * segment_inf,
INP int global_route_switch, INP int delayless_switch,
INP t_timing_inf timing_inf, INP int wire_to_ipin_switch,
INP enum e_base_cost_type base_cost_type, INP t_direct_inf *directs,
INP int num_directs, INP boolean ignore_Fc_0, OUTP int *Warnings,
/*Xifan TANG: Switch Segment Pattern Support*/
INP int num_swseg_pattern, INP t_swseg_pattern_inf* swseg_patterns,
INP boolean opin_to_cb_fast_edges, INP boolean opin_logic_eq_edges);
/* UDSD Modifications by WMF End */
/******************* Subroutine definitions *******************************/
/*************************************************************************
* Top-level function of rr_graph builder
* Xifan TANG: this top function can branch between tileable rr_graph generator
* and the classical rr_graph generator
************************************************************************/
void build_rr_graph(INP t_graph_type graph_type, INP int L_num_types,
INP t_type_ptr types, INP int L_nx, INP int L_ny,
INP struct s_grid_tile **L_grid, INP int chan_width,
INP struct s_chan_width_dist *chan_capacity_inf,
INP enum e_switch_block_type sb_type, INP int Fs,
INP enum e_switch_block_type sb_sub_type, INP int sub_Fs, INP boolean wire_opposite_side,
INP int num_seg_types,
INP int num_switches, INP t_segment_inf * segment_inf,
INP int global_route_switch, INP int delayless_switch,
INP t_timing_inf timing_inf, INP int wire_to_ipin_switch,
INP enum e_base_cost_type base_cost_type, INP t_direct_inf *directs,
INP int num_directs, INP boolean ignore_Fc_0, OUTP int *Warnings,
/*Xifan TANG: Switch Segment Pattern Support*/
INP int num_swseg_pattern, INP t_swseg_pattern_inf* swseg_patterns,
INP boolean opin_to_cb_fast_edges, INP boolean opin_logic_eq_edges) {
/* Branch here */
if (GRAPH_UNIDIR_TILEABLE == graph_type) {
build_tileable_unidir_rr_graph(L_num_types, types,
L_nx, L_ny, L_grid,
chan_width,
sb_type, Fs,
sb_sub_type, sub_Fs, wire_opposite_side,
num_seg_types, segment_inf,
num_switches, delayless_switch,
timing_inf, wire_to_ipin_switch,
base_cost_type, directs, num_directs, ignore_Fc_0, Warnings);
} else {
build_classic_rr_graph(graph_type, L_num_types, types,
L_nx, L_ny, L_grid,
chan_width, chan_capacity_inf,
sb_type, Fs, num_seg_types, num_switches, segment_inf,
global_route_switch, delayless_switch,
timing_inf, wire_to_ipin_switch,
base_cost_type, directs, num_directs, ignore_Fc_0, Warnings,
num_swseg_pattern, swseg_patterns,
opin_to_cb_fast_edges, opin_logic_eq_edges);
}
/* Print statistics of RR graph */
print_rr_graph_stats();
return;
}
/* Xifan TANG: I rename the classical rr_graph builder here.
* We can have a clean build_rr_graph top function,
* where we branch for tileable routing and classical */
static
void build_classic_rr_graph(INP t_graph_type graph_type, INP int L_num_types,
INP t_type_ptr types, INP int L_nx, INP int L_ny,
INP struct s_grid_tile **L_grid, INP int chan_width,
INP struct s_chan_width_dist *chan_capacity_inf,
INP enum e_switch_block_type sb_type, INP int Fs, INP int num_seg_types,
INP int num_switches, INP t_segment_inf * segment_inf,
INP int global_route_switch, INP int delayless_switch,
INP t_timing_inf timing_inf, INP int wire_to_ipin_switch,
INP enum e_base_cost_type base_cost_type, INP t_direct_inf *directs,
INP int num_directs, INP boolean ignore_Fc_0, OUTP int *Warnings,
/*Xifan TANG: Switch Segment Pattern Support*/
INP int num_swseg_pattern, INP t_swseg_pattern_inf* swseg_patterns,
INP boolean opin_to_cb_fast_edges, INP boolean opin_logic_eq_edges) {
/* Temp structures used to build graph */
int nodes_per_chan, i, j;
t_seg_details *seg_details = NULL;
int **Fc_in = NULL; /* [0..num_types-1][0..num_pins-1] */
int **Fc_out = NULL; /* [0..num_types-1][0..num_pins-1] */
int *****opin_to_track_map = NULL; /* [0..num_types-1][0..num_pins-1][0..height][0..3][0..Fc-1] */
int *****ipin_to_track_map = NULL; /* [0..num_types-1][0..num_pins-1][0..height][0..3][0..Fc-1] */
t_ivec ****track_to_ipin_lookup = NULL; /* [0..num_types-1][0..nodes_per_chan-1][0..height][0..3] */
t_ivec ***switch_block_conn = NULL;
short *****unidir_sb_pattern = NULL;
boolean *L_rr_edge_done = NULL;
boolean is_global_graph;
boolean Fc_clipped;
boolean use_full_seg_groups;
boolean *perturb_ipins = NULL;
enum e_directionality directionality;
int **Fc_xofs = NULL; /* [0..ny-1][0..nx-1] */
int **Fc_yofs = NULL; /* [0..nx-1][0..ny-1] */
t_clb_to_clb_directs *clb_to_clb_directs;
rr_node_indices = NULL;
rr_node = NULL;
num_rr_nodes = 0;
/* Reset warning flag */
*Warnings = RR_GRAPH_NO_WARN;
/* Decode the graph_type */
is_global_graph = FALSE;
if (GRAPH_GLOBAL == graph_type) {
is_global_graph = TRUE;
}
use_full_seg_groups = FALSE;
if (GRAPH_UNIDIR_TILEABLE == graph_type) {
use_full_seg_groups = TRUE;
}
directionality = UNI_DIRECTIONAL;
if (GRAPH_BIDIR == graph_type) {
directionality = BI_DIRECTIONAL;
}
if (is_global_graph) {
directionality = BI_DIRECTIONAL;
}
/* Global routing uses a single longwire track */
nodes_per_chan = (is_global_graph ? 1 : chan_width);
assert(nodes_per_chan > 0);
clb_to_clb_directs = NULL;
if(num_directs > 0) {
clb_to_clb_directs = alloc_and_load_clb_to_clb_directs(directs, num_directs);
}
/* START SEG_DETAILS */
if (is_global_graph) {
/* Sets up a single unit length segment type for global routing. */
seg_details = alloc_and_load_global_route_seg_details(nodes_per_chan,
global_route_switch);
} else {
/* Setup segments including distrubuting tracks and staggering.
* If use_full_seg_groups is specified, nodes_per_chan may be
* changed. Warning should be singled to caller if this happens. */
seg_details = alloc_and_load_seg_details(&nodes_per_chan,
/* std::max(L_nx, L_ny), */ /* Original VPR */
std::max(L_nx, L_ny) + ( is_stack ? 1 : 0 ), /* mrFPGA: Xifan TANG */
num_seg_types, segment_inf,
use_full_seg_groups, is_global_graph, directionality);
if ((is_global_graph ? 1 : chan_width) != nodes_per_chan) {
*Warnings |= RR_GRAPH_WARN_CHAN_WIDTH_CHANGED;
}
if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_SEG_DETAILS)) {
dump_seg_details(seg_details, nodes_per_chan,
getEchoFileName(E_ECHO_SEG_DETAILS));
}
}
/* END SEG_DETAILS */
/* START FC */
/* Determine the actual value of Fc */
if (is_global_graph) {
Fc_in = (int **) my_malloc(sizeof(int) * L_num_types);
Fc_out = (int **) my_malloc(sizeof(int) * L_num_types);
for (i = 0; i < L_num_types; ++i) {
for (j = 0; j < types[i].num_pins; ++j) {
Fc_in[i][j] = 1;
Fc_out[i][j] = 1;
}
}
} else {
Fc_clipped = FALSE;
Fc_in = alloc_and_load_actual_fc(L_num_types, types, nodes_per_chan,
FALSE, directionality, &Fc_clipped, ignore_Fc_0);
if (Fc_clipped) {
*Warnings |= RR_GRAPH_WARN_FC_CLIPPED;
}
Fc_clipped = FALSE;
Fc_out = alloc_and_load_actual_fc(L_num_types, types, nodes_per_chan,
TRUE, directionality, &Fc_clipped, ignore_Fc_0);
if (Fc_clipped) {
*Warnings |= RR_GRAPH_WARN_FC_CLIPPED;
}
#ifdef VERBOSE
for (i = 1; i < L_num_types; ++i) { /* Skip "<EMPTY>" */
for (j = 0; j < type_descriptors[i].num_pins; ++j) {
if (type_descriptors[i].is_Fc_full_flex[j]) {
vpr_printf(TIO_MESSAGE_INFO, "Fc Actual Values: type = %s, Fc_out = full, Fc_in = %d.\n",
type_descriptors[i].name, Fc_in[i][j]);
}
else {
vpr_printf(TIO_MESSAGE_INFO, "Fc Actual Values: type = %s, Fc_out = %d, Fc_in = %d.\n",
type_descriptors[i].name, Fc_out[i][j], Fc_in[i][j]);
}
}
}
#endif /* VERBOSE */
}
perturb_ipins = alloc_and_load_perturb_ipins(nodes_per_chan, L_num_types,
Fc_in, Fc_out, directionality);
/* END FC */
/* Alloc node lookups, count nodes, alloc rr nodes */
num_rr_nodes = 0;
rr_node_indices = alloc_and_load_rr_node_indices(nodes_per_chan, L_nx, L_ny,
&num_rr_nodes, seg_details);
rr_node = (t_rr_node *) my_malloc(sizeof(t_rr_node) * num_rr_nodes);
memset(rr_node, 0, sizeof(t_rr_node) * num_rr_nodes);
L_rr_edge_done = (boolean *) my_malloc(sizeof(boolean) * num_rr_nodes);
memset(L_rr_edge_done, 0, sizeof(boolean) * num_rr_nodes);
/* These are data structures used by the the unidir opin mapping. */
if (UNI_DIRECTIONAL == directionality) {
Fc_xofs = (int **) alloc_matrix(0, L_ny, 0, L_nx, sizeof(int));
Fc_yofs = (int **) alloc_matrix(0, L_nx, 0, L_ny, sizeof(int));
for (i = 0; i <= L_nx; ++i) {
for (j = 0; j <= L_ny; ++j) {
Fc_xofs[j][i] = 0;
Fc_yofs[i][j] = 0;
}
}
}
/* START SB LOOKUP */
/* Alloc and load the switch block lookup */
if (is_global_graph) {
assert(nodes_per_chan == 1);
switch_block_conn = alloc_and_load_switch_block_conn(1, SUBSET, 3);
} else if (BI_DIRECTIONAL == directionality) {
switch_block_conn = alloc_and_load_switch_block_conn(nodes_per_chan,
sb_type, Fs);
} else {
assert(UNI_DIRECTIONAL == directionality);
unidir_sb_pattern = alloc_sblock_pattern_lookup(L_nx, L_ny,
nodes_per_chan);
for (i = 0; i <= L_nx; i++) {
for (j = 0; j <= L_ny; j++) {
load_sblock_pattern_lookup(i, j, nodes_per_chan, seg_details,
Fs, sb_type, unidir_sb_pattern);
}
}
}
/* END SB LOOKUP */
/* START IPINP MAP */
/* Create ipin map lookups */
ipin_to_track_map = (int *****) my_malloc(sizeof(int ****) * L_num_types);
track_to_ipin_lookup = (struct s_ivec ****) my_malloc(
sizeof(struct s_ivec ***) * L_num_types);
for (i = 0; i < L_num_types; ++i) {
ipin_to_track_map[i] = alloc_and_load_pin_to_track_map(RECEIVER,
nodes_per_chan, Fc_in[i], &types[i], perturb_ipins[i],
directionality);
track_to_ipin_lookup[i] = alloc_and_load_track_to_pin_lookup(
ipin_to_track_map[i], Fc_in[i], types[i].height,
types[i].num_pins, nodes_per_chan);
}
/* END IPINP MAP */
/* START OPINP MAP */
/* Create opin map lookups */
if (BI_DIRECTIONAL == directionality) {
opin_to_track_map = (int *****) my_malloc(
sizeof(int ****) * L_num_types);
for (i = 0; i < L_num_types; ++i) {
opin_to_track_map[i] = alloc_and_load_pin_to_track_map(DRIVER,
nodes_per_chan, Fc_out[i], &types[i], FALSE, directionality);
}
}
/* END OPINP MAP */
/* UDSD Modifications by WMF begin */
/* I'm adding 2 new fields to t_rr_node, and I want them initialized to 0. */
for (i = 0; i < num_rr_nodes; i++) {
rr_node[i].num_wire_drivers = 0;
rr_node[i].num_opin_drivers = 0;
}
alloc_and_load_rr_graph(num_rr_nodes, rr_node, num_seg_types, seg_details,
L_rr_edge_done, track_to_ipin_lookup, opin_to_track_map,
switch_block_conn, L_grid, L_nx, L_ny, Fs, unidir_sb_pattern,
Fc_out, Fc_xofs, Fc_yofs, rr_node_indices, nodes_per_chan, sb_type,
delayless_switch, directionality, wire_to_ipin_switch, &Fc_clipped, directs, num_directs, clb_to_clb_directs);
#ifdef MUX_SIZE_DIST_DISPLAY
if (UNI_DIRECTIONAL == directionality)
{
view_mux_size_distribution(rr_node_indices, nodes_per_chan,
seg_details, seg_details);
}
#endif
/* Update rr_nodes capacities if global routing */
if (graph_type == GRAPH_GLOBAL) {
for (i = 0; i < num_rr_nodes; i++) {
if (rr_node[i].type == CHANX || rr_node[i].type == CHANY) {
rr_node[i].capacity = chan_width;
}
}
}
/* Xifan TANG: Add Fast Interconnection from LB OPINs to adjacent LB IPINs*/
if (TRUE == opin_to_cb_fast_edges) { // Do only detailed rr_graph is needed
vpr_printf(TIO_MESSAGE_INFO,"Adding %d fast edges from logic block OPIN to logic block IPIN ...\n",
add_rr_graph_fast_edge_opin_to_cb(rr_node_indices));
}
/*END*/
/*Xifan TANG: Switch Segment Pattern Support*/
if (NULL != swseg_patterns) { // Do only the pointer is not NULL
vpr_printf(TIO_MESSAGE_INFO,"Applying Switch Segment Pattern...\n");
if (UNI_DIRECTIONAL == directionality) {
add_rr_graph_switch_segment_pattern(directionality,nodes_per_chan, num_swseg_pattern, swseg_patterns, rr_node_indices, seg_details, seg_details);
} else {
vpr_printf(TIO_MESSAGE_ERROR,"Switch Segment Pattern is only applicable to uni-directional routing architecture!\n");
exit(1);
}
}
/*END*/
/* Xifan TANG: Check logic equivalence of LB OPINs and IPINs. Then modify the associated rr_graph */
/* use net_rr_terminal array to find SOURCE rr_node for each net*/
if (TRUE == opin_logic_eq_edges) { // Do only detailed rr_graph is needed
vpr_printf(TIO_MESSAGE_INFO,"Adding %d logic equivalent edges for logic block OPIN ...\n",
// alloc_and_add_grids_fully_capacity_sb_rr_edges(rr_node_indices, num_directs, clb_to_clb_directs));
alloc_and_add_grids_fully_capacity_rr_edges(rr_node_indices, num_directs, clb_to_clb_directs));
}
/*END*/
rr_graph_externals(timing_inf, segment_inf, num_seg_types, nodes_per_chan,
wire_to_ipin_switch, base_cost_type);
if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_RR_GRAPH)) {
dump_rr_graph(getEchoFileName(E_ECHO_RR_GRAPH));
} else
;
check_rr_graph(graph_type, L_nx, L_ny,
num_switches, Fc_in);
/* Free all temp structs */
if (seg_details) {
free_seg_details(seg_details, nodes_per_chan);
seg_details = NULL;
}
if (Fc_in) {
free_matrix(Fc_in,0, L_num_types, 0, sizeof(int));
Fc_in = NULL;
}
if (Fc_out) {
free_matrix(Fc_out,0, L_num_types, 0, sizeof(int));
Fc_out = NULL;
}
if (perturb_ipins) {
free(perturb_ipins);
perturb_ipins = NULL;
}
if (switch_block_conn) {
free_switch_block_conn(switch_block_conn, nodes_per_chan);
switch_block_conn = NULL;
}
if (L_rr_edge_done) {
free(L_rr_edge_done);
L_rr_edge_done = NULL;
}
if (Fc_xofs) {
free_matrix(Fc_xofs, 0, L_ny, 0, sizeof(int));
Fc_xofs = NULL;
}
if (Fc_yofs) {
free_matrix(Fc_yofs, 0, L_nx, 0, sizeof(int));
Fc_yofs = NULL;
}
if (unidir_sb_pattern) {
free_sblock_pattern_lookup(unidir_sb_pattern);
unidir_sb_pattern = NULL;
}
if (opin_to_track_map) {
for (i = 0; i < L_num_types; ++i) {
free_matrix4(opin_to_track_map[i], 0, types[i].num_pins - 1, 0,
types[i].height - 1, 0, 3, 0, sizeof(int));
}
free(opin_to_track_map);
}
free_type_pin_to_track_map(ipin_to_track_map, types);
free_type_track_to_ipin_map(track_to_ipin_lookup, types, nodes_per_chan);
if(clb_to_clb_directs != NULL) {
free(clb_to_clb_directs);
}
}
void rr_graph_externals(const t_timing_inf timing_inf,
const t_segment_inf * segment_inf, const int num_seg_types, const int nodes_per_chan,
const int wire_to_ipin_switch, const enum e_base_cost_type base_cost_type) {
add_rr_graph_C_from_switches(timing_inf.C_ipin_cblock);
alloc_and_load_rr_indexed_data(segment_inf, num_seg_types, rr_node_indices,
nodes_per_chan, wire_to_ipin_switch, base_cost_type);
alloc_net_rr_terminals();
load_net_rr_terminals(rr_node_indices);
alloc_and_load_rr_clb_source(rr_node_indices);
}
boolean *
alloc_and_load_perturb_ipins(INP int nodes_per_chan, INP int L_num_types,
INP int **Fc_in, INP int **Fc_out, INP enum e_directionality directionality) {
int i;
float Fc_ratio;
boolean *result = NULL;
result = (boolean *) my_malloc(L_num_types * sizeof(boolean));
if (BI_DIRECTIONAL == directionality) {
result[0] = FALSE;
for (i = 1; i < L_num_types; ++i) {
result[i] = FALSE;
if (Fc_in[i][0] > Fc_out[i][0]) {
Fc_ratio = (float) Fc_in[i][0] / (float) Fc_out[i][0];
} else {
Fc_ratio = (float) Fc_out[i][0] / (float) Fc_in[i][0];
}
if ((Fc_in[i][0] <= nodes_per_chan - 2)
&& (fabs(Fc_ratio - nint(Fc_ratio))
< (0.5 / (float) nodes_per_chan))) {
result[i] = TRUE;
}
}
} else {
/* Unidirectional routing uses mux balancing patterns and
* thus shouldn't need perturbation. */
assert(UNI_DIRECTIONAL == directionality);
for (i = 0; i < L_num_types; ++i) {
result[i] = FALSE;
}
}
return result;
}
static t_seg_details *
alloc_and_load_global_route_seg_details(INP int nodes_per_chan,
INP int global_route_switch) {
t_seg_details *result = NULL;
assert(nodes_per_chan == 1);
result = (t_seg_details *) my_malloc(sizeof(t_seg_details));
result->index = 0;
result->length = 1;
result->wire_switch = global_route_switch;
result->opin_switch = global_route_switch;
result->longline = FALSE;
result->direction = BI_DIRECTION;
result->Cmetal = 0.0;
result->Rmetal = 0.0;
result->start = 1;
result->drivers = MULTI_BUFFERED;
result->cb = (boolean *) my_malloc(sizeof(boolean) * 1);
result->cb[0] = TRUE;
result->sb = (boolean *) my_malloc(sizeof(boolean) * 2);
result->sb[0] = TRUE;
result->sb[1] = TRUE;
result->group_size = 1;
result->group_start = 0;
return result;
}
/* Calculates the actual Fc values for the given nodes_per_chan value */
int **
alloc_and_load_actual_fc(INP int L_num_types, INP t_type_ptr types,
INP int nodes_per_chan, INP boolean is_Fc_out,
INP enum e_directionality directionality, OUTP boolean * Fc_clipped, INP boolean ignore_Fc_0) {
int i, j;
int **Result = NULL;
int fac, num_sets;
*Fc_clipped = FALSE;
/* Unidir tracks formed in pairs, otherwise no effect. */
fac = 1;
if (UNI_DIRECTIONAL == directionality) {
fac = 2;
}
assert((nodes_per_chan % fac) == 0);
num_sets = nodes_per_chan / fac;
int max_pins = types[0].num_pins;
for (i = 1; i < L_num_types; ++i) {
if (types[i].num_pins > max_pins) {
max_pins = types[i].num_pins;
}
}
Result = (int **) alloc_matrix(0, L_num_types, 0, max_pins, sizeof(int));
for (i = 1; i < L_num_types; ++i) {
float *Fc = (float *) my_malloc(sizeof(float) * types[i].num_pins); /* [0..num_pins-1] */
for (j = 0; j < types[i].num_pins; ++j) {
Fc[j] = types[i].Fc[j];
/* Xifan Tang: give an initial value! */
Result[i][j] = -1;
if(Fc[j] == 0 && ignore_Fc_0 == FALSE) {
/* Special case indicating that this pin does not connect to general-purpose routing */
Result[i][j] = 0;
} else {
/* General case indicating that this pin connects to general-purpose routing */
if (types[i].is_Fc_frac[j]) {
Result[i][j] = fac * nint(num_sets * Fc[j]);
} else {
Result[i][j] = (int)Fc[j];
}
if (is_Fc_out && types[i].is_Fc_full_flex[j]) {
Result[i][j] = nodes_per_chan;
}
Result[i][j] = std::max(Result[i][j], fac);
if (Result[i][j] > nodes_per_chan) {
*Fc_clipped = TRUE;
Result[i][j] = nodes_per_chan;
}
}
assert(Result[i][j] % fac == 0);
}
free(Fc);
}
return Result;
}
/* frees the track to ipin mapping for each physical grid type */
void free_type_track_to_ipin_map(struct s_ivec**** track_to_pin_map,
t_type_ptr types, int nodes_per_chan) {
int i, itrack, ioff, iside;
for (i = 0; i < num_types; i++) {
if (track_to_pin_map[i] != NULL) {
for (itrack = 0; itrack < nodes_per_chan; itrack++) {
for (ioff = 0; ioff < types[i].height; ioff++) {
for (iside = 0; iside < 4; iside++) {
if (track_to_pin_map[i][itrack][ioff][iside].list
!= NULL) {
free(track_to_pin_map[i][itrack][ioff][iside].list);
}
}
}
}
free_matrix3(track_to_pin_map[i], 0, nodes_per_chan - 1, 0,
types[i].height - 1, 0, sizeof(struct s_ivec));
}
}
free(track_to_pin_map);
}
/* frees the ipin to track mapping for each physical grid type */
void free_type_pin_to_track_map(int***** ipin_to_track_map,
t_type_ptr types) {
int i;
for (i = 0; i < num_types; i++) {
free_matrix4(ipin_to_track_map[i], 0, types[i].num_pins - 1, 0,
types[i].height - 1, 0, 3, 0, sizeof(int));
}
free(ipin_to_track_map);
}
/* Does the actual work of allocating the rr_graph and filling all the *
* appropriate values. Everything up to this was just a prelude! */
static void alloc_and_load_rr_graph(INP int num_nodes,
INP t_rr_node * L_rr_node, INP int num_seg_types,
INP t_seg_details * seg_details, INP boolean * L_rr_edge_done,
INP struct s_ivec ****track_to_ipin_lookup,
INP int *****opin_to_track_map, INP struct s_ivec ***switch_block_conn,
INP struct s_grid_tile **L_grid, INP int L_nx, INP int L_ny, INP int Fs,
INP short *****sblock_pattern, INP int **Fc_out, INP int **Fc_xofs,
INP int **Fc_yofs, INP t_ivec *** L_rr_node_indices,
INP int nodes_per_chan, INP enum e_switch_block_type sb_type,
INP int delayless_switch, INP enum e_directionality directionality,
INP int wire_to_ipin_switch, OUTP boolean * Fc_clipped,
INP t_direct_inf *directs, INP int num_directs,
INP t_clb_to_clb_directs *clb_to_clb_directs) {
int i, j;
boolean clipped;
int *opin_mux_size = NULL;
/* If Fc gets clipped, this will be flagged to true */
*Fc_clipped = FALSE;
/* Connection SINKS and SOURCES to their pins. */
for (i = 0; i <= (L_nx + 1); i++) {
for (j = 0; j <= (L_ny + 1); j++) {
build_rr_sinks_sources(i, j, L_rr_node, L_rr_node_indices,
delayless_switch, L_grid);
}
}
/* Build opins */
for (i = 0; i <= (L_nx + 1); ++i) {
for (j = 0; j <= (L_ny + 1); ++j) {
if (BI_DIRECTIONAL == directionality) {
build_bidir_rr_opins(i, j, L_rr_node, L_rr_node_indices,
opin_to_track_map, Fc_out, L_rr_edge_done, seg_details,
L_grid, delayless_switch,
directs, num_directs, clb_to_clb_directs);
} else {
assert(UNI_DIRECTIONAL == directionality);
build_unidir_rr_opins(i, j, L_grid, Fc_out, nodes_per_chan,
seg_details, Fc_xofs, Fc_yofs, L_rr_node,
L_rr_edge_done, &clipped, L_rr_node_indices, delayless_switch,
directs, num_directs, clb_to_clb_directs);
if (clipped) {
*Fc_clipped = TRUE;
}
}
}
}
/* We make a copy of the current fanin values for the nodes to
* know the number of OPINs driving each mux presently */
opin_mux_size = (int *) my_malloc(sizeof(int) * num_nodes);
for (i = 0; i < num_nodes; ++i) {
opin_mux_size[i] = L_rr_node[i].fan_in;
}
/* Build channels */
assert(Fs % 3 == 0);
for (i = 0; i <= L_nx; i++) {
for (j = 0; j <= L_ny; j++) {
if (i > 0) {
build_rr_xchan(i, j, track_to_ipin_lookup, switch_block_conn,
CHANX_COST_INDEX_START, nodes_per_chan, opin_mux_size,
sblock_pattern, Fs / 3, seg_details, L_rr_node_indices,
L_rr_edge_done, L_rr_node, wire_to_ipin_switch,
directionality);
}
if (j > 0) {
build_rr_ychan(i, j, track_to_ipin_lookup, switch_block_conn,
CHANX_COST_INDEX_START + num_seg_types, nodes_per_chan,
opin_mux_size, sblock_pattern, Fs / 3, seg_details,
L_rr_node_indices, L_rr_edge_done, L_rr_node,
wire_to_ipin_switch, directionality);
}
}
}
free(opin_mux_size);
}
static void build_bidir_rr_opins(INP int i, INP int j,
INOUTP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices,
INP int *****opin_to_track_map, INP int **Fc_out,
INP boolean * L_rr_edge_done, INP t_seg_details * seg_details,
INP struct s_grid_tile **L_grid, INP int delayless_switch,
INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs) {
int ipin, inode, num_edges, *Fc, ofs;
t_type_ptr type;
struct s_linked_edge *edge_list, *next;
/* OPINP edges need to be done at once so let the offset 0
* block do the work. */
if (L_grid[i][j].offset > 0) {
return;
}
type = L_grid[i][j].type;
Fc = Fc_out[type->index];
for (ipin = 0; ipin < type->num_pins; ++ipin) {
/* We only are working with opins so skip non-drivers */
if (type->class_inf[type->pin_class[ipin]].type != DRIVER) {
continue;
}
num_edges = 0;
edge_list = NULL;
if(Fc[ipin] != 0) {
for (ofs = 0; ofs < type->height; ++ofs) {
num_edges += get_bidir_opin_connections(i, j + ofs, ipin,
&edge_list, opin_to_track_map, Fc[ipin], L_rr_edge_done,
L_rr_node_indices, seg_details);
}
}
/* Add in direct connections */
num_edges += get_opin_direct_connecions(i, j, ipin, &edge_list, L_rr_node_indices, delayless_switch, directs, num_directs, clb_to_clb_directs);
inode = get_rr_node_index(i, j, OPIN, ipin, L_rr_node_indices);
alloc_and_load_edges_and_switches(L_rr_node, inode, num_edges,
L_rr_edge_done, edge_list);
while (edge_list != NULL) {
next = edge_list->next;
free(edge_list);
edge_list = next;
}
}
}
void free_rr_graph(void) {
int i;
/* Frees all the routing graph data structures, if they have been *
* allocated. I use rr_mem_chunk_list_head as a flag to indicate *
* whether or not the graph has been allocated -- if it is not NULL, *
* a routing graph exists and can be freed. Hence, you can call this *
* routine even if you're not sure of whether a rr_graph exists or not. */
if (rr_mem_ch.chunk_ptr_head == NULL) /* Nothing to free. */
return;
free_chunk_memory(&rr_mem_ch); /* Frees ALL "chunked" data */
/* Before adding any more free calls here, be sure the data is NOT chunk *
* allocated, as ALL the chunk allocated data is already free! */
if(net_rr_terminals != NULL) {
free(net_rr_terminals);
}
for (i = 0; i < num_rr_nodes; i++) {
if (rr_node[i].edges != NULL) {
free(rr_node[i].edges);
}
if (rr_node[i].switches != NULL) {
free(rr_node[i].switches);
}
}
assert(rr_node_indices);
free_rr_node_indices(rr_node_indices);
free(rr_node);
free(rr_indexed_data);
for (i = 0; i < num_blocks; i++) {
free(rr_blk_source[i]);
}
free(rr_blk_source);
rr_blk_source = NULL;
net_rr_terminals = NULL;
rr_node = NULL;
rr_node_indices = NULL;
rr_indexed_data = NULL;
num_rr_nodes = 0;
}
static void alloc_net_rr_terminals(void) {
int inet;
net_rr_terminals = (int **) my_malloc(num_nets * sizeof(int *));
for (inet = 0; inet < num_nets; inet++) {
net_rr_terminals[inet] = (int *) my_chunk_malloc(
(clb_net[inet].num_sinks + 1) * sizeof(int),
&rr_mem_ch);
}
}
void load_net_rr_terminals(t_ivec *** L_rr_node_indices) {
/* Allocates and loads the net_rr_terminals data structure. For each net *
* it stores the rr_node index of the SOURCE of the net and all the SINKs *
* of the net. [0..num_nets-1][0..num_pins-1]. Entry [inet][pnum] stores *
* the rr index corresponding to the SOURCE (opin) or SINK (ipin) of pnum. */
int inet, ipin, inode, iblk, i, j, node_block_pin, iclass;
t_type_ptr type;
for (inet = 0; inet < num_nets; inet++) {
for (ipin = 0; ipin <= clb_net[inet].num_sinks; ipin++) {
iblk = clb_net[inet].node_block[ipin];
i = block[iblk].x;
j = block[iblk].y;
type = block[iblk].type;
/* In the routing graph, each (x, y) location has unique pins on it
* so when there is capacity, blocks are packed and their pin numbers
* are offset to get their actual rr_node */
node_block_pin = clb_net[inet].node_block_pin[ipin];
iclass = type->pin_class[node_block_pin];
inode = get_rr_node_index(i, j, (ipin == 0 ? SOURCE : SINK), /* First pin is driver */
iclass, L_rr_node_indices);
net_rr_terminals[inet][ipin] = inode;
}
}
}
static void alloc_and_load_rr_clb_source(t_ivec *** L_rr_node_indices) {
/* Saves the rr_node corresponding to each SOURCE and SINK in each CLB *
* in the FPGA. Currently only the SOURCE rr_node values are used, and *
* they are used only to reserve pins for locally used OPINs in the router. *
* [0..num_blocks-1][0..num_class-1]. The values for blocks that are pads *
* are NOT valid. */
int iblk, i, j, iclass, inode;
int class_low, class_high;
t_rr_type rr_type;
t_type_ptr type;
rr_blk_source = (int **) my_malloc(num_blocks * sizeof(int *));
for (iblk = 0; iblk < num_blocks; iblk++) {
type = block[iblk].type;
get_class_range_for_block(iblk, &class_low, &class_high);
rr_blk_source[iblk] = (int *) my_malloc(type->num_class * sizeof(int));
for (iclass = 0; iclass < type->num_class; iclass++) {
if (iclass >= class_low && iclass <= class_high) {
i = block[iblk].x;
j = block[iblk].y;
if (type->class_inf[iclass].type == DRIVER)
rr_type = SOURCE;
else
rr_type = SINK;
inode = get_rr_node_index(i, j, rr_type, iclass,
L_rr_node_indices);
rr_blk_source[iblk][iclass] = inode;
} else {
rr_blk_source[iblk][iclass] = OPEN;
}
}
}
}
static void build_rr_sinks_sources(INP int i, INP int j,
INP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices,
INP int delayless_switch, INP struct s_grid_tile **L_grid) {
/* Loads IPIN, SINK, SOURCE, and OPIN.
* Loads IPINP to SINK edges, and SOURCE to OPINP edges */
int ipin, iclass, inode, pin_num, to_node, num_edges;
int num_class, num_pins;
t_type_ptr type;
struct s_class *class_inf;
int *pin_class;
const t_pb_graph_node *pb_graph_node;
int iport, ipb_pin, iporttype, z;
/* Since we share nodes within a large block, only
* start tile can initialize sinks, sources, and pins */
if (L_grid[i][j].offset > 0)
return;
type = L_grid[i][j].type;
num_class = type->num_class;
class_inf = type->class_inf;
num_pins = type->num_pins;
pin_class = type->pin_class;
z = 0;
/* SINKS and SOURCE to OPINP edges */
for (iclass = 0; iclass < num_class; iclass++) {
if (class_inf[iclass].type == DRIVER) { /* SOURCE */
inode = get_rr_node_index(i, j, SOURCE, iclass, L_rr_node_indices);
num_edges = class_inf[iclass].num_pins;
L_rr_node[inode].num_edges = num_edges;
L_rr_node[inode].edges = (int *) my_malloc(num_edges * sizeof(int));
L_rr_node[inode].switches = (short *) my_malloc(
num_edges * sizeof(short));
for (ipin = 0; ipin < class_inf[iclass].num_pins; ipin++) {
pin_num = class_inf[iclass].pinlist[ipin];
to_node = get_rr_node_index(i, j, OPIN, pin_num,
L_rr_node_indices);
L_rr_node[inode].edges[ipin] = to_node;
L_rr_node[inode].switches[ipin] = delayless_switch;
++L_rr_node[to_node].fan_in;
}
L_rr_node[inode].cost_index = SOURCE_COST_INDEX;
L_rr_node[inode].type = SOURCE;
} else { /* SINK */
assert(class_inf[iclass].type == RECEIVER);
inode = get_rr_node_index(i, j, SINK, iclass, L_rr_node_indices);
/* NOTE: To allow route throughs through clbs, change the lines below to *
* make an edge from the input SINK to the output SOURCE. Do for just the *
* special case of INPUTS = class 0 and OUTPUTS = class 1 and see what it *
* leads to. If route throughs are allowed, you may want to increase the *
* base cost of OPINs and/or SOURCES so they aren't used excessively. */
/* Initialize to unconnected to fix values */
L_rr_node[inode].num_edges = 0;
L_rr_node[inode].edges = NULL;
L_rr_node[inode].switches = NULL;
L_rr_node[inode].cost_index = SINK_COST_INDEX;
L_rr_node[inode].type = SINK;
}
/* Things common to both SOURCEs and SINKs. */
L_rr_node[inode].capacity = class_inf[iclass].num_pins;
L_rr_node[inode].occ = 0;
L_rr_node[inode].xlow = i;
L_rr_node[inode].xhigh = i;
L_rr_node[inode].ylow = j;
L_rr_node[inode].yhigh = j + type->height - 1;
L_rr_node[inode].R = 0;
L_rr_node[inode].C = 0;
L_rr_node[inode].ptc_num = iclass;
L_rr_node[inode].direction = (enum e_direction)OPEN;
L_rr_node[inode].drivers = (enum e_drivers)OPEN;
}
iporttype = iport = ipb_pin = 0;
pb_graph_node = type->pb_graph_head;
if(pb_graph_node != NULL && pb_graph_node->num_input_ports == 0) {
iporttype = 1;
}
/* Connect IPINS to SINKS and dummy for OPINS */
for (ipin = 0; ipin < num_pins; ipin++) {
iclass = pin_class[ipin];
z = ipin / (type->pb_type->num_clock_pins + type->pb_type->num_output_pins + type->pb_type->num_input_pins);
if (class_inf[iclass].type == RECEIVER) {
inode = get_rr_node_index(i, j, IPIN, ipin, L_rr_node_indices);
to_node = get_rr_node_index(i, j, SINK, iclass, L_rr_node_indices);
L_rr_node[inode].num_edges = 1;
L_rr_node[inode].edges = (int *) my_malloc(sizeof(int));
L_rr_node[inode].switches = (short *) my_malloc(sizeof(short));
L_rr_node[inode].edges[0] = to_node;
L_rr_node[inode].switches[0] = delayless_switch;
++L_rr_node[to_node].fan_in;
L_rr_node[inode].cost_index = IPIN_COST_INDEX;
L_rr_node[inode].type = IPIN;
/* Add in information so that I can identify which cluster pin this rr_node connects to later */
L_rr_node[inode].z = z;
if(iporttype == 0) {
L_rr_node[inode].pb_graph_pin = &pb_graph_node->input_pins[iport][ipb_pin];
ipb_pin++;
if(ipb_pin >= pb_graph_node->num_input_pins[iport]) {
iport++;
ipb_pin = 0;
if(iport >= pb_graph_node->num_input_ports) {
iporttype++;
iport = 0;
if(pb_graph_node->num_clock_ports == 0) {
iporttype = 0;
}
}
}
} else {
assert(iporttype == 1);
L_rr_node[inode].pb_graph_pin = &pb_graph_node->clock_pins[iport][ipb_pin];
ipb_pin++; /* Xifan TANG: Original VPR does not have this incremental!!! */
if(ipb_pin >= pb_graph_node->num_clock_pins[iport]) {
iport++;
ipb_pin = 0;
if(iport >= pb_graph_node->num_clock_ports) {
iporttype = 0;
iport = 0;
if(pb_graph_node->num_input_ports == 0) {
iporttype = 1;
}
}
}
}
} else {
assert(class_inf[iclass].type == DRIVER);
inode = get_rr_node_index(i, j, OPIN, ipin, L_rr_node_indices);
/* Add in information so that I can identify which cluster pin this rr_node connects to later */
L_rr_node[inode].z = z;
L_rr_node[inode].num_edges = 0;
L_rr_node[inode].edges = NULL;
L_rr_node[inode].switches = NULL;
L_rr_node[inode].cost_index = OPIN_COST_INDEX;
L_rr_node[inode].type = OPIN;
L_rr_node[inode].pb_graph_pin = &pb_graph_node->output_pins[iport][ipb_pin];
ipb_pin++; /* Xifan TANG: Original VPR does not have this incremental!!! */
if(ipb_pin >= pb_graph_node->num_output_pins[iport]) {
iport++;
ipb_pin = 0;
if(iport >= pb_graph_node->num_output_ports) {
iport = 0;
if(pb_graph_node->num_input_ports == 0) {
iporttype = 1;
} else {
iporttype = 0;
}
}
}
}
/* Common to both DRIVERs and RECEIVERs */
L_rr_node[inode].capacity = 1;
L_rr_node[inode].occ = 0;
L_rr_node[inode].xlow = i;
L_rr_node[inode].xhigh = i;
L_rr_node[inode].ylow = j;
L_rr_node[inode].yhigh = j + type->height - 1;
L_rr_node[inode].C = 0;
L_rr_node[inode].R = 0;
L_rr_node[inode].ptc_num = ipin;
L_rr_node[inode].direction = (enum e_direction)OPEN;
L_rr_node[inode].drivers = (enum e_drivers)OPEN;
}
}
static void build_rr_xchan(INP int i, INP int j,
INP struct s_ivec ****track_to_ipin_lookup,
INP struct s_ivec ***switch_block_conn, INP int cost_index_offset,
INP int nodes_per_chan, INP int *opin_mux_size,
INP short *****sblock_pattern, INP int Fs_per_side,
INP t_seg_details * seg_details, INP t_ivec *** L_rr_node_indices,
INOUTP boolean * L_rr_edge_done, INOUTP t_rr_node * L_rr_node,
INP int wire_to_ipin_switch, INP enum e_directionality directionality) {
/* Loads up all the routing resource nodes in the x-directed channel *
* segments starting at (i,j). */
int itrack, istart, iend, num_edges, inode, length;
struct s_linked_edge *edge_list, *next;
/* mrFPGA: Xifan TANG */
int jstart, jend;
/* END */
for (itrack = 0; itrack < nodes_per_chan; itrack++) {
/* First count number of edges and put the edges in a linked list. */
num_edges = 0;
edge_list = NULL;
/* mrFPGA : Xifan TANG*/
if ( is_stack ) {
jstart = get_seg_start (seg_details, itrack, i, j);
if ( jstart != j )
continue;
jend = get_seg_end (seg_details, itrack, jstart, i, ny);
istart = i;
iend = i;
num_edges += get_track_to_ipins(jstart, i, itrack, &edge_list, L_rr_node_indices,
track_to_ipin_lookup, seg_details, CHANX, nx, wire_to_ipin_switch,
directionality);
num_edges += get_track_to_tracks(i, jstart, itrack, CHANX, i-1, CHANY, ny,
nodes_per_chan, opin_mux_size, Fs_per_side, sblock_pattern,
&edge_list, seg_details, directionality, L_rr_node_indices,
L_rr_edge_done, switch_block_conn);
num_edges += get_track_to_tracks(i, jstart, itrack, CHANX, i, CHANY, ny,
nodes_per_chan, opin_mux_size, Fs_per_side, sblock_pattern,
&edge_list, seg_details, directionality, L_rr_node_indices,
L_rr_edge_done, switch_block_conn);
if( jstart > 0 ) {
num_edges += get_track_to_tracks(i, jstart, itrack, CHANX, jstart - 1, CHANX, ny,
nodes_per_chan, opin_mux_size, Fs_per_side,sblock_pattern, &edge_list,
seg_details, directionality, L_rr_node_indices, L_rr_edge_done, switch_block_conn);
}
if( jend < ny ) {
num_edges += get_track_to_tracks(i, jstart, itrack, CHANX, jend + 1, CHANX, ny,
nodes_per_chan, opin_mux_size, Fs_per_side, sblock_pattern, &edge_list,
seg_details, directionality, L_rr_node_indices, L_rr_edge_done, switch_block_conn);
}
} else { /* end */
/* Xifan TANG: I remove the accurate part for Original VPR*/
/* Original VPR part*/
istart = get_seg_start(seg_details, itrack, j, i);
iend = get_seg_end(seg_details, itrack, istart, j, nx);
if (i > istart)
continue; /* Not the start of this segment. */
jstart = j;
jend = j;
/* end */
/* First count number of edges and put the edges in a linked list. */
num_edges = 0;
edge_list = NULL;
num_edges += get_track_to_ipins(istart, j, itrack, &edge_list,
L_rr_node_indices, track_to_ipin_lookup, seg_details, CHANX, nx,
wire_to_ipin_switch, directionality);
if (j > 0) {
num_edges += get_track_to_tracks(j, istart, itrack, CHANX, j, CHANY,
nx, nodes_per_chan, opin_mux_size, Fs_per_side,
sblock_pattern, &edge_list, seg_details, directionality,
L_rr_node_indices, L_rr_edge_done, switch_block_conn);
}
if (j < ny) {
num_edges += get_track_to_tracks(j, istart, itrack, CHANX, j + 1,
CHANY, nx, nodes_per_chan, opin_mux_size, Fs_per_side,
sblock_pattern, &edge_list, seg_details, directionality,
L_rr_node_indices, L_rr_edge_done, switch_block_conn);
}
if (istart > 1) {
num_edges += get_track_to_tracks(j, istart, itrack, CHANX,
istart - 1, CHANX, nx, nodes_per_chan, opin_mux_size,
Fs_per_side, sblock_pattern, &edge_list, seg_details,
directionality, L_rr_node_indices, L_rr_edge_done,
switch_block_conn);
}
if (iend < nx) {
num_edges += get_track_to_tracks(j, istart, itrack, CHANX, iend + 1,
CHANX, nx, nodes_per_chan, opin_mux_size, Fs_per_side,
sblock_pattern, &edge_list, seg_details, directionality,
L_rr_node_indices, L_rr_edge_done, switch_block_conn);
}
}
/* END */
inode = get_rr_node_index(i, j, CHANX, itrack, L_rr_node_indices);
alloc_and_load_edges_and_switches(L_rr_node, inode, num_edges,
L_rr_edge_done, edge_list);
while (edge_list != NULL) {
next = edge_list->next;
free(edge_list);
edge_list = next;
}
/* Edge arrays have now been built up. Do everything else. */
L_rr_node[inode].cost_index = cost_index_offset
+ seg_details[itrack].index;
L_rr_node[inode].occ = 0;
L_rr_node[inode].capacity = 1; /* GLOBAL routing handled elsewhere */
if (is_stack) {
/* mrFPGA: Xifan TANG */
L_rr_node[inode].xlow = istart;
L_rr_node[inode].xhigh = iend;
L_rr_node[inode].ylow = jstart;
L_rr_node[inode].yhigh = jend;
} else {
/* Original VPR */
L_rr_node[inode].xlow = istart;
L_rr_node[inode].xhigh = iend;
L_rr_node[inode].ylow = j;
L_rr_node[inode].yhigh = j;
}
/* mrFPGA: Xifan TANG */
length = is_stack ? (jend - jstart) : (iend - istart + 1);
//length = (iend - istart + 1);
/* END */
L_rr_node[inode].R = length * seg_details[itrack].Rmetal;
L_rr_node[inode].C = length * seg_details[itrack].Cmetal;
L_rr_node[inode].ptc_num = itrack;
L_rr_node[inode].type = CHANX;
L_rr_node[inode].direction = seg_details[itrack].direction;
L_rr_node[inode].drivers = seg_details[itrack].drivers;
/* Xifan TANG:(For SPICE Modeling) Fill the segment inf */
//LL_rr_node[inode].seg_index = seg_details[itrack].index;
}
}
static void build_rr_ychan(INP int i, INP int j,
INP struct s_ivec ****track_to_ipin_lookup,
INP struct s_ivec ***switch_block_conn, INP int cost_index_offset,
INP int nodes_per_chan, INP int *opin_mux_size,
INP short *****sblock_pattern, INP int Fs_per_side,
INP t_seg_details * seg_details, INP t_ivec *** L_rr_node_indices,
INP boolean * L_rr_edge_done, INOUTP t_rr_node * L_rr_node,
INP int wire_to_ipin_switch, INP enum e_directionality directionality) {
/* Loads up all the routing resource nodes in the y-directed channel *
* segments starting at (i,j). */
int itrack, istart, iend, num_edges, inode, length;
struct s_linked_edge *edge_list, *next;
/* mrFPGA: Xifan TANG*/
int jstart, jend;
/* END */
for (itrack = 0; itrack < nodes_per_chan; itrack++) {
/* First count number of edges and put the edges in a linked list. */
num_edges = 0;
edge_list = NULL;
/* mrFPGA */
if ( is_stack ) {
istart = get_seg_start (seg_details, itrack, j, i);
if ( istart != i )
continue;
iend = get_seg_end (seg_details, itrack, istart, j, nx);
/* mrFPGA: Xifan TANG */
jstart = j;
jend = j;
/* end */
num_edges += get_track_to_ipins(istart, j, itrack, &edge_list, L_rr_node_indices,
track_to_ipin_lookup, seg_details, CHANY, nx, wire_to_ipin_switch,
directionality);
num_edges += get_track_to_tracks(j, istart, itrack, CHANY, j-1, CHANX, nx,
nodes_per_chan, opin_mux_size, Fs_per_side, sblock_pattern,
&edge_list, seg_details, directionality, L_rr_node_indices,
L_rr_edge_done, switch_block_conn);
num_edges += get_track_to_tracks(j, istart, itrack, CHANY, j, CHANX, nx,
nodes_per_chan, opin_mux_size, Fs_per_side, sblock_pattern,
&edge_list, seg_details, directionality, L_rr_node_indices,
L_rr_edge_done, switch_block_conn);
if (istart > 0) {
num_edges += get_track_to_tracks(j, istart, itrack, CHANY, istart - 1, CHANY, nx,
nodes_per_chan, opin_mux_size, Fs_per_side, sblock_pattern, &edge_list,
seg_details, directionality, L_rr_node_indices, L_rr_edge_done,
switch_block_conn);
}
if (iend < nx) {
num_edges += get_track_to_tracks(j, istart, itrack, CHANY, iend + 1,
CHANY, nx, nodes_per_chan, opin_mux_size, Fs_per_side,
sblock_pattern, &edge_list, seg_details, directionality,
L_rr_node_indices, L_rr_edge_done, switch_block_conn);
}
} else { /* end */
istart = get_seg_start(seg_details, itrack, i, j);
iend = get_seg_end(seg_details, itrack, istart, i, ny);
if (j > istart)
continue; /* Not the start of this segment. */
/* mrFPGA: Xifan TANG*/
jstart = i;
jend = i;
/* END */
/* Original VPR */
/* First count number of edges and put the edges in a linked list. */
num_edges = 0;
edge_list = NULL;
num_edges += get_track_to_ipins(istart, i, itrack, &edge_list,
L_rr_node_indices, track_to_ipin_lookup, seg_details, CHANY, ny,
wire_to_ipin_switch, directionality);
if (i > 0) {
num_edges += get_track_to_tracks(i, istart, itrack, CHANY, i, CHANX,
ny, nodes_per_chan, opin_mux_size, Fs_per_side,
sblock_pattern, &edge_list, seg_details, directionality,
L_rr_node_indices, L_rr_edge_done, switch_block_conn);
}
if (i < nx) {
num_edges += get_track_to_tracks(i, istart, itrack, CHANY, i + 1,
CHANX, ny, nodes_per_chan, opin_mux_size, Fs_per_side,
sblock_pattern, &edge_list, seg_details, directionality,
L_rr_node_indices, L_rr_edge_done, switch_block_conn);
}
if (istart > 1) {
num_edges += get_track_to_tracks(i, istart, itrack, CHANY,
istart - 1, CHANY, ny, nodes_per_chan, opin_mux_size,
Fs_per_side, sblock_pattern, &edge_list, seg_details,
directionality, L_rr_node_indices, L_rr_edge_done,
switch_block_conn);
}
if (iend < ny) {
num_edges += get_track_to_tracks(i, istart, itrack, CHANY, iend + 1,
CHANY, ny, nodes_per_chan, opin_mux_size, Fs_per_side,
sblock_pattern, &edge_list, seg_details, directionality,
L_rr_node_indices, L_rr_edge_done, switch_block_conn);
}
}
/* END */
inode = get_rr_node_index(i, j, CHANY, itrack, L_rr_node_indices);
alloc_and_load_edges_and_switches(L_rr_node, inode, num_edges,
L_rr_edge_done, edge_list);
while (edge_list != NULL) {
next = edge_list->next;
free(edge_list);
edge_list = next;
}
/* Edge arrays have now been built up. Do everything else. */
L_rr_node[inode].cost_index = cost_index_offset
+ seg_details[itrack].index;
L_rr_node[inode].occ = 0;
L_rr_node[inode].capacity = 1; /* GLOBAL routing handled elsewhere */
if (is_stack) {
/* mrFPGA: Xifan TANG */
L_rr_node[inode].xlow = istart;
L_rr_node[inode].xhigh = iend;
L_rr_node[inode].ylow = jstart;
L_rr_node[inode].yhigh = jend;
} else {
/* Original VPR */
L_rr_node[inode].xlow = i;
L_rr_node[inode].xhigh = i;
L_rr_node[inode].ylow = istart;
L_rr_node[inode].yhigh = iend;
}
/* mrFPGA : Xifan TANG*/
length = is_stack ? (iend - istart) : (iend - istart + 1);
//length = (iend - istart + 1);
/* END */
L_rr_node[inode].R = length * seg_details[itrack].Rmetal;
L_rr_node[inode].C = length * seg_details[itrack].Cmetal;
L_rr_node[inode].ptc_num = itrack;
L_rr_node[inode].type = CHANY;
L_rr_node[inode].direction = seg_details[itrack].direction;
L_rr_node[inode].drivers = seg_details[itrack].drivers;
/* Xifan TANG:(For SPICE Modeling) Fill the segment inf */
//LL_rr_node[inode].seg_index = seg_details[itrack].index;
}
}
void watch_edges(int inode, t_linked_edge * edge_list_head) {
t_linked_edge *list_ptr;
int i, to_node;
list_ptr = edge_list_head;
i = 0;
vpr_printf(TIO_MESSAGE_TRACE, "!!! Watching Node %d !!!!\n", inode);
print_rr_node(stdout, rr_node, inode);
vpr_printf(TIO_MESSAGE_TRACE, "Currently connects to:\n");
while (list_ptr != NULL) {
to_node = list_ptr->edge;
print_rr_node(stdout, rr_node, to_node);
list_ptr = list_ptr->next;
i++;
}
}
void alloc_and_load_edges_and_switches(INP t_rr_node * L_rr_node, INP int inode,
INP int num_edges, INOUTP boolean * L_rr_edge_done,
INP t_linked_edge * edge_list_head) {
/* Sets up all the edge related information for rr_node inode (num_edges, *
* the edges array and the switches array). The edge_list_head points to *
* a list of the num_edges edges and switches to put in the arrays. This *
* linked list is freed by this routine. This routine also resets the *
* rr_edge_done array for the next rr_node (i.e. set it so that no edges *
* are marked as having been seen before). */
t_linked_edge *list_ptr;
int i;
/* Check we aren't overwriting edges */
assert(L_rr_node[inode].num_edges < 1);
assert(NULL == L_rr_node[inode].edges);
assert(NULL == L_rr_node[inode].switches);
L_rr_node[inode].num_edges = num_edges;
L_rr_node[inode].edges = (int *) my_malloc(num_edges * sizeof(int));
L_rr_node[inode].switches = (short *) my_malloc(num_edges * sizeof(short));
i = 0;
list_ptr = edge_list_head;
while (list_ptr && (i < num_edges)) {
L_rr_node[inode].edges[i] = list_ptr->edge;
L_rr_node[inode].switches[i] = list_ptr->iswitch;
++L_rr_node[list_ptr->edge].fan_in;
/* Unmark the edge since we are done considering fanout from node. */
L_rr_edge_done[list_ptr->edge] = FALSE;
list_ptr = list_ptr->next;
++i;
}
assert(list_ptr == NULL);
assert(i == num_edges);
}
int ****
alloc_and_load_pin_to_track_map(INP enum e_pin_type pin_type,
INP int nodes_per_chan, INP int *Fc, INP t_type_ptr Type,
INP boolean perturb_switch_pattern,
INP enum e_directionality directionality) {
int **num_dir; /* [0..height][0..3] Number of *physical* pins on each side. */
int ***dir_list; /* [0..height][0..3][0..num_pins-1] list of pins of correct type *
* * on each side. Max possible space alloced for simplicity */
int i, j, k, iside, ipin, iclass, num_phys_pins, pindex, ioff;
int *pin_num_ordering, *side_ordering, *offset_ordering;
int **num_done_per_dir; /* [0..height][0..3] */
int ****tracks_connected_to_pin; /* [0..num_pins-1][0..height][0..3][0..Fc-1] */
/* NB: This wastes some space. Could set tracks_..._pin[ipin][ioff][iside] =
* NULL if there is no pin on that side, or that pin is of the wrong type.
* Probably not enough memory to worry about, esp. as it's temporary.
* If pin ipin on side iside does not exist or is of the wrong type,
* tracks_connected_to_pin[ipin][iside][0] = OPEN. */
if (Type->num_pins < 1) {
return NULL;
}
/* Currently, only two possible Fc values exist: 0 or default.
* Finding the max. value of Fc in block will result in the
* default value, which works for now. In the future, when
* the Fc values of all pins can vary, the max value will continue
* to work for matrix (de)allocation purposes. However, all looping
* will have to be modified to account for pin-based Fc values. */
int max_Fc = 0;
for (i = 0; i < Type->num_pins; ++i) {
iclass = Type->pin_class[i];
if (Fc[i] > max_Fc && Type->class_inf[iclass].type == pin_type) {
max_Fc = Fc[i];
}
}
tracks_connected_to_pin = (int ****) alloc_matrix4(0, Type->num_pins - 1, 0,
Type->height - 1, 0, 3, 0, max_Fc, sizeof(int));
for (ipin = 0; ipin < Type->num_pins; ipin++) {
for (ioff = 0; ioff < Type->height; ioff++) {
for (iside = 0; iside < 4; iside++) {
for (i = 0; i < max_Fc; ++i) {
tracks_connected_to_pin[ipin][ioff][iside][i] = OPEN; /* Unconnected. */
}
}
}
}
num_dir = (int **) alloc_matrix(0, Type->height - 1, 0, 3, sizeof(int));
dir_list = (int ***) alloc_matrix3(0, Type->height - 1, 0, 3, 0,
Type->num_pins - 1, sizeof(int));
/* Defensive coding. Try to crash hard if I use an unset entry. */
for (i = 0; i < Type->height; i++)
for (j = 0; j < 4; j++)
for (k = 0; k < Type->num_pins; k++)
dir_list[i][j][k] = (-1);
for (i = 0; i < Type->height; i++)
for (j = 0; j < 4; j++)
num_dir[i][j] = 0;
for (ipin = 0; ipin < Type->num_pins; ipin++) {
iclass = Type->pin_class[ipin];
if (Type->class_inf[iclass].type != pin_type) /* Doing either ipins OR opins */
continue;
/* Pins connecting only to global resources get no switches -> keeps the *
* area model accurate. */
if (Type->is_global_pin[ipin])
continue;
for (ioff = 0; ioff < Type->height; ioff++) {
for (iside = 0; iside < 4; iside++) {
if (Type->pinloc[ioff][iside][ipin] == 1) {
dir_list[ioff][iside][num_dir[ioff][iside]] = ipin;
num_dir[ioff][iside]++;
}
}
}
}
num_phys_pins = 0;
for (ioff = 0; ioff < Type->height; ioff++) {
for (iside = 0; iside < 4; iside++)
num_phys_pins += num_dir[ioff][iside]; /* Num. physical pins per type */
}
num_done_per_dir = (int **) alloc_matrix(0, Type->height - 1, 0, 3,
sizeof(int));
for (ioff = 0; ioff < Type->height; ioff++) {
for (iside = 0; iside < 4; iside++) {
num_done_per_dir[ioff][iside] = 0;
}
}
pin_num_ordering = (int *) my_malloc(num_phys_pins * sizeof(int));
side_ordering = (int *) my_malloc(num_phys_pins * sizeof(int));
offset_ordering = (int *) my_malloc(num_phys_pins * sizeof(int));
/* Connection block I use distributes pins evenly across the tracks *
* of ALL sides of the clb at once. Ensures that each pin connects *
* to spaced out tracks in its connection block, and that the other *
* pins (potentially in other C blocks) connect to the remaining tracks *
* first. Doesn't matter for large Fc, but should make a fairly *
* good low Fc block that leverages the fact that usually lots of pins *
* are logically equivalent. */
iside = LEFT;
ioff = Type->height - 1;
ipin = 0;
pindex = -1;
while (ipin < num_phys_pins) {
if (iside == TOP) {
iside = RIGHT;
} else if (iside == RIGHT) {
if (ioff <= 0) {
iside = BOTTOM;
} else {
ioff--;
}
} else if (iside == BOTTOM) {
iside = LEFT;
} else {
assert(iside == LEFT);
if (ioff >= Type->height - 1) {
pindex++;
iside = TOP;
} else {
ioff++;
}
}
assert(pindex < num_phys_pins);
/* Number of physical pins bounds number of logical pins */
if (num_done_per_dir[ioff][iside] >= num_dir[ioff][iside])
continue;
pin_num_ordering[ipin] = dir_list[ioff][iside][pindex];
side_ordering[ipin] = iside;
offset_ordering[ipin] = ioff;
assert(Type->pinloc[ioff][iside][dir_list[ioff][iside][pindex]]);
num_done_per_dir[ioff][iside]++;
ipin++;
}
if (perturb_switch_pattern) {
load_perturbed_switch_pattern(Type, tracks_connected_to_pin,
num_phys_pins, pin_num_ordering, side_ordering, offset_ordering,
nodes_per_chan, max_Fc, directionality);
} else {
load_uniform_switch_pattern(Type, tracks_connected_to_pin,
num_phys_pins, pin_num_ordering, side_ordering, offset_ordering,
nodes_per_chan, max_Fc, directionality);
}
check_all_tracks_reach_pins(Type, tracks_connected_to_pin, nodes_per_chan,
max_Fc, pin_type);
/* Free all temporary storage. */
free_matrix(num_dir, 0, Type->height - 1, 0, sizeof(int));
free_matrix3(dir_list, 0, Type->height - 1, 0, 3, 0, sizeof(int));
free_matrix(num_done_per_dir, 0, Type->height - 1, 0, sizeof(int));
free(pin_num_ordering);
free(side_ordering);
free(offset_ordering);
return tracks_connected_to_pin;
}
static void load_uniform_switch_pattern(INP t_type_ptr type,
INOUTP int ****tracks_connected_to_pin, INP int num_phys_pins,
INP int *pin_num_ordering, INP int *side_ordering,
INP int *offset_ordering, INP int nodes_per_chan, INP int Fc,
enum e_directionality directionality) {
/* Loads the tracks_connected_to_pin array with an even distribution of *
* switches across the tracks for each pin. For example, each pin connects *
* to every 4.3rd track in a channel, with exactly which tracks a pin *
* connects to staggered from pin to pin. */
int i, j, ipin, iside, ioff, itrack, k;
float f_track, fc_step;
int group_size;
float step_size;
/* Uni-directional drive is implemented to ensure no directional bias and this means
* two important comments noted below */
/* 1. Spacing should be (W/2)/(Fc/2), and step_size should be spacing/(num_phys_pins),
* and lay down 2 switches on an adjacent pair of tracks at a time to ensure
* no directional bias. Basically, treat W (even) as W/2 pairs of tracks, and
* assign switches to a pair at a time. Can do this because W is guaranteed to
* be even-numbered; however same approach cannot be applied to Fc_out pattern
* when L > 1 and W <> 2L multiple.
*
* 2. This generic pattern should be considered the tileable physical layout,
* meaning all track # here are physical #'s,
* so later must use vpr_to_phy conversion to find actual logical #'s to connect.
* This also means I will not use get_output_block_companion_track to ensure
* no bias, since that describes a logical # -> that would confuse people. */
step_size = (float) nodes_per_chan / (float) (Fc * num_phys_pins);
if (directionality == BI_DIRECTIONAL) {
group_size = 1;
} else {
assert(directionality == UNI_DIRECTIONAL);
group_size = 2;
}
assert((nodes_per_chan % group_size == 0) && (Fc % group_size == 0));
fc_step = (float) nodes_per_chan / (float) Fc;
for (i = 0; i < num_phys_pins; i++) {
ipin = pin_num_ordering[i];
iside = side_ordering[i];
ioff = offset_ordering[i];
/* Bi-directional treats each track separately, uni-directional works with pairs of tracks */
for (j = 0; j < (Fc / group_size); j++) {
f_track = (i * step_size) + (j * fc_step);
itrack = ((int) f_track) * group_size;
/* Catch possible floating point round error */
itrack = std::min(itrack, nodes_per_chan - group_size);
/* Assign the group of tracks for the Fc pattern */
for (k = 0; k < group_size; ++k) {
tracks_connected_to_pin[ipin][ioff][iside][group_size * j + k] =
itrack + k;
}
}
}
}
static void load_perturbed_switch_pattern(INP t_type_ptr type,
INOUTP int ****tracks_connected_to_pin, INP int num_phys_pins,
INP int *pin_num_ordering, INP int *side_ordering,
INP int *offset_ordering, INP int nodes_per_chan, INP int Fc,
enum e_directionality directionality) {
/* Loads the tracks_connected_to_pin array with an unevenly distributed *
* set of switches across the channel. This is done for inputs when *
* Fc_input = Fc_output to avoid creating "pin domains" -- certain output *
* pins being able to talk only to certain input pins because their switch *
* patterns exactly line up. Distribute Fc/2 + 1 switches over half the *
* channel and Fc/2 - 1 switches over the other half to make the switch *
* pattern different from the uniform one of the outputs. Also, have half *
* the pins put the "dense" part of their connections in the first half of *
* the channel and the other half put the "dense" part in the second half, *
* to make sure each track can connect to about the same number of ipins. */
int i, j, ipin, iside, itrack, ihalf, iconn, ioff;
int Fc_dense, Fc_sparse, Fc_half[2];
float f_track, spacing_dense, spacing_sparse, spacing[2];
float step_size;
assert(directionality == BI_DIRECTIONAL);
step_size = (float) nodes_per_chan / (float) (Fc * num_phys_pins);
Fc_dense = (Fc / 2) + 1;
Fc_sparse = Fc - Fc_dense; /* Works for even or odd Fc */
spacing_dense = (float) nodes_per_chan / (float) (2 * Fc_dense);
spacing_sparse = (float) nodes_per_chan / (float) (2 * Fc_sparse);
for (i = 0; i < num_phys_pins; i++) {
ipin = pin_num_ordering[i];
iside = side_ordering[i];
ioff = offset_ordering[i];
/* Flip every pin to balance switch density */
spacing[i % 2] = spacing_dense;
Fc_half[i % 2] = Fc_dense;
spacing[(i + 1) % 2] = spacing_sparse;
Fc_half[(i + 1) % 2] = Fc_sparse;
f_track = i * step_size; /* Start point. Staggered from pin to pin */
iconn = 0;
for (ihalf = 0; ihalf < 2; ihalf++) { /* For both dense and sparse halves. */
for (j = 0; j < Fc_half[ihalf]; ++j) {
itrack = (int) f_track;
/* Can occasionally get wraparound due to floating point rounding.
This is okay because the starting position > 0 when this occurs
so connection is valid and fine */
itrack = itrack % nodes_per_chan;
tracks_connected_to_pin[ipin][ioff][iside][iconn] = itrack;
f_track += spacing[ihalf];
iconn++;
}
}
} /* End for all physical pins. */
}
static void check_all_tracks_reach_pins(t_type_ptr type,
int ****tracks_connected_to_pin, int nodes_per_chan, int Fc,
enum e_pin_type ipin_or_opin) {
/* Checks that all tracks can be reached by some pin. */
int iconn, iside, itrack, ipin, ioff;
int *num_conns_to_track; /* [0..nodes_per_chan-1] */
assert(nodes_per_chan > 0);
num_conns_to_track = (int *) my_calloc(nodes_per_chan, sizeof(int));
for (ipin = 0; ipin < type->num_pins; ipin++) {
for (ioff = 0; ioff < type->height; ioff++) {
for (iside = 0; iside < 4; iside++) {
if (tracks_connected_to_pin[ipin][ioff][iside][0] != OPEN) { /* Pin exists */
for (iconn = 0; iconn < Fc; iconn++) {
itrack =
tracks_connected_to_pin[ipin][ioff][iside][iconn];
num_conns_to_track[itrack]++;
}
}
}
}
}
for (itrack = 0; itrack < nodes_per_chan; itrack++) {
if (num_conns_to_track[itrack] <= 0) {
vpr_printf(TIO_MESSAGE_ERROR, "check_all_tracks_reach_pins: Track %d does not connect to any CLB %ss.\n",
itrack, (ipin_or_opin == DRIVER ? "OPIN" : "IPIN"));
}
}
free(num_conns_to_track);
}
/* Allocates and loads the track to ipin lookup for each physical grid type. This
* is the same information as the ipin_to_track map but accessed in a different way. */
struct s_ivec ***
alloc_and_load_track_to_pin_lookup(INP int ****pin_to_track_map, INP int *Fc,
INP int height, INP int num_pins, INP int nodes_per_chan) {
int ipin, iside, itrack, iconn, ioff, pin_counter;
struct s_ivec ***track_to_pin_lookup;
/* [0..nodes_per_chan-1][0..height][0..3]. For each track number it stores a vector
* for each of the four sides. x-directed channels will use the TOP and
* BOTTOM vectors to figure out what clb input pins they connect to above
* and below them, respectively, while y-directed channels use the LEFT
* and RIGHT vectors. Each vector contains an nelem field saying how many
* ipins it connects to. The list[0..nelem-1] array then gives the pin
* numbers. */
/* Note that a clb pin that connects to a channel on its RIGHT means that *
* that channel connects to a clb pin on its LEFT. The convention used *
* here is always in the perspective of the CLB */
if (num_pins < 1) {
return NULL;
}
/* Alloc and zero the the lookup table */
track_to_pin_lookup = (struct s_ivec ***) alloc_matrix3(0,
nodes_per_chan - 1, 0, height - 1, 0, 3, sizeof(struct s_ivec));
for (itrack = 0; itrack < nodes_per_chan; itrack++) {
for (ioff = 0; ioff < height; ioff++) {
for (iside = 0; iside < 4; iside++) {
track_to_pin_lookup[itrack][ioff][iside].nelem = 0;
track_to_pin_lookup[itrack][ioff][iside].list = NULL;
}
}
}
/* Counting pass. */
for (ipin = 0; ipin < num_pins; ipin++) {
for (ioff = 0; ioff < height; ioff++) {
for (iside = 0; iside < 4; iside++) {
if (pin_to_track_map[ipin][ioff][iside][0] == OPEN)
continue;
for (iconn = 0; iconn < Fc[ipin]; iconn++) {
itrack = pin_to_track_map[ipin][ioff][iside][iconn];
track_to_pin_lookup[itrack][ioff][iside].nelem++;
}
}
}
}
/* Allocate space. */
for (itrack = 0; itrack < nodes_per_chan; itrack++) {
for (ioff = 0; ioff < height; ioff++) {
for (iside = 0; iside < 4; iside++) {
track_to_pin_lookup[itrack][ioff][iside].list = NULL; /* Defensive code */
if (track_to_pin_lookup[itrack][ioff][iside].nelem != 0) {
track_to_pin_lookup[itrack][ioff][iside].list =
(int *) my_malloc(
track_to_pin_lookup[itrack][ioff][iside].nelem
* sizeof(int));
track_to_pin_lookup[itrack][ioff][iside].nelem = 0;
}
}
}
}
/* Loading pass. */
for (ipin = 0; ipin < num_pins; ipin++) {
for (ioff = 0; ioff < height; ioff++) {
for (iside = 0; iside < 4; iside++) {
if (pin_to_track_map[ipin][ioff][iside][0] == OPEN)
continue;
for (iconn = 0; iconn < Fc[ipin]; iconn++) {
itrack = pin_to_track_map[ipin][ioff][iside][iconn];
pin_counter =
track_to_pin_lookup[itrack][ioff][iside].nelem;
track_to_pin_lookup[itrack][ioff][iside].list[pin_counter] =
ipin;
track_to_pin_lookup[itrack][ioff][iside].nelem++;
}
}
}
}
return track_to_pin_lookup;
}
/* A utility routine to dump the contents of the routing resource graph *
* (everything -- connectivity, occupancy, cost, etc.) into a file. Used *
* only for debugging. */
void dump_rr_graph(INP const char *file_name) {
int inode;
FILE *fp;
fp = my_fopen(file_name, "w", 0);
for (inode = 0; inode < num_rr_nodes; inode++) {
print_rr_node(fp, rr_node, inode);
fprintf(fp, "\n");
}
#if 0
fprintf(fp, "\n\n%d rr_indexed_data entries.\n\n", num_rr_indexed_data);
for (index = 0; index < num_rr_indexed_data; index++)
{
print_rr_indexed_data(fp, index);
fprintf(fp, "\n");
}
#endif
fclose(fp);
}
/* Prints all the data about node inode to file fp. */
void print_rr_node(FILE * fp, t_rr_node * L_rr_node, int inode) {
static const char *name_type[] = { "SOURCE", "SINK", "IPIN", "OPIN",
"CHANX", "CHANY", "INTRA_CLUSTER_EDGE" };
static const char *direction_name[] = { "OPEN", "INC_DIRECTION",
"DEC_DIRECTION", "BI_DIRECTION" };
static const char *drivers_name[] = { "OPEN", "MULTI_BUFFER", "SINGLE" };
t_rr_type rr_type;
int iconn;
rr_type = L_rr_node[inode].type;
/* Make sure we don't overrun const arrays */
assert((int)rr_type < (int)(sizeof(name_type) / sizeof(char *)));
assert(
(L_rr_node[inode].direction + 1) < (int)(sizeof(direction_name) / sizeof(char *)));
assert(
(L_rr_node[inode].drivers + 1) < (int)(sizeof(drivers_name) / sizeof(char *)));
fprintf(fp, "Node: %d %s ", inode, name_type[rr_type]);
if ((L_rr_node[inode].xlow == L_rr_node[inode].xhigh)
&& (L_rr_node[inode].ylow == L_rr_node[inode].yhigh)) {
fprintf(fp, "(%d, %d) ", L_rr_node[inode].xlow, L_rr_node[inode].ylow);
} else {
fprintf(fp, "(%d, %d) to (%d, %d) ", L_rr_node[inode].xlow,
L_rr_node[inode].ylow, L_rr_node[inode].xhigh,
L_rr_node[inode].yhigh);
}
fprintf(fp, "Ptc_num: %d ", L_rr_node[inode].ptc_num);
fprintf(fp, "Direction: %s ",
direction_name[L_rr_node[inode].direction + 1]);
fprintf(fp, "Drivers: %s ", drivers_name[L_rr_node[inode].drivers + 1]);
fprintf(fp, "\n");
fprintf(fp, "%d edge(s):", L_rr_node[inode].num_edges);
for (iconn = 0; iconn < L_rr_node[inode].num_edges; iconn++)
fprintf(fp, " %d", L_rr_node[inode].edges[iconn]);
fprintf(fp, "\n");
fprintf(fp, "Switch types:");
for (iconn = 0; iconn < L_rr_node[inode].num_edges; iconn++)
fprintf(fp, " %d", L_rr_node[inode].switches[iconn]);
fprintf(fp, "\n");
fprintf(fp, "Occ: %d Capacity: %d\n", L_rr_node[inode].occ,
L_rr_node[inode].capacity);
if (rr_type != INTRA_CLUSTER_EDGE) {
fprintf(fp, "R: %g C: %g\n", L_rr_node[inode].R, L_rr_node[inode].C);
}
fprintf(fp, "Cost_index: %d\n", L_rr_node[inode].cost_index);
}
/* Prints all the rr_indexed_data of index to file fp. */
void print_rr_indexed_data(FILE * fp, int index) {
fprintf(fp, "Index: %d\n", index);
fprintf(fp, "ortho_cost_index: %d ",
rr_indexed_data[index].ortho_cost_index);
fprintf(fp, "base_cost: %g ", rr_indexed_data[index].saved_base_cost);
fprintf(fp, "saved_base_cost: %g\n",
rr_indexed_data[index].saved_base_cost);
fprintf(fp, "Seg_index: %d ", rr_indexed_data[index].seg_index);
fprintf(fp, "inv_length: %g\n", rr_indexed_data[index].inv_length);
fprintf(fp, "T_linear: %g ", rr_indexed_data[index].T_linear);
fprintf(fp, "T_quadratic: %g ", rr_indexed_data[index].T_quadratic);
fprintf(fp, "C_load: %g\n", rr_indexed_data[index].C_load);
}
static void build_unidir_rr_opins(INP int i, INP int j,
INP struct s_grid_tile **L_grid, INP int **Fc_out,
INP int nodes_per_chan, INP t_seg_details * seg_details,
INOUTP int **Fc_xofs, INOUTP int **Fc_yofs,
INOUTP t_rr_node * L_rr_node, INOUTP boolean * L_rr_edge_done,
OUTP boolean * Fc_clipped, INP t_ivec *** L_rr_node_indices, INP int delayless_switch,
INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs) {
/* This routine returns a list of the opins rr_nodes on each
* side/offset of the block. You must free the result with
* free_matrix. */
t_type_ptr type;
int ipin, iclass, ofs, chan, seg, max_len, inode, max_Fc = -1;
enum e_side side;
t_rr_type chan_type;
t_linked_edge *edge_list = NULL, *next;
boolean clipped, vert, pos_dir;
int num_edges;
int **Fc_ofs;
*Fc_clipped = FALSE;
/* Only the base block of a set should use this function */
if (L_grid[i][j].offset > 0) {
return;
}
type = L_grid[i][j].type;
/* Currently, only two possible Fc values exist: 0 or default.
* Finding the max. value of Fc in block will result in the
* default value, which works for now. In the future, when
* the Fc values of all pins can vary, the max value will continue
* to work for matrix allocation purposes. However, all looping
* will have to be modified to account for pin-based Fc values. */
if (type->index > 0) {
max_Fc = 0;
for (ipin = 0; ipin < type->num_pins; ++ipin) {
iclass = type->pin_class[ipin];
if (Fc_out[type->index][ipin] > max_Fc && type->class_inf[iclass].type == DRIVER) {
max_Fc = Fc_out[type->index][ipin];
}
}
}
/* Go through each pin and find its fanout. */
for (ipin = 0; ipin < type->num_pins; ++ipin) {
/* Skip global pins and ipins */
iclass = type->pin_class[ipin];
if (type->class_inf[iclass].type != DRIVER) {
continue;
}
if (type->is_global_pin[ipin]) {
continue;
}
num_edges = 0;
edge_list = NULL;
if(Fc_out[type->index][ipin] != 0) {
for (ofs = 0; ofs < type->height; ++ofs) {
for (side = (enum e_side)0; side < 4; side = (enum e_side)(side + 1)) {
/* Can't do anything if pin isn't at this location */
if (0 == type->pinloc[ofs][side][ipin]) {
continue;
}
/* Figure out the chan seg at that side.
* side is the side of the logic or io block. */
vert = (boolean) ((side == TOP) || (side == BOTTOM));
/* mrFPGA */
if (is_stack) {
vert = (boolean)(!vert);
}
/* END */
pos_dir = (boolean) ((side == TOP) || (side == RIGHT));
/* mrFPGA */
//chan_type = (vert ? CHANX : CHANY);
chan_type = ((is_stack ? !vert : vert) ? CHANX : CHANY);
/* END */
chan = (vert ? (j + ofs) : i);
seg = (vert ? i : (j + ofs));
max_len = (vert ? nx : ny);
Fc_ofs = (vert ? Fc_xofs : Fc_yofs);
if (FALSE == pos_dir) {
/* mrFPGA */
if (is_stack) {
--seg;
/* END */
} else {
--chan;
}
}
/* mrFPGA */
/* Skip the location if there is no channel. */
if (chan < (is_stack ? 1:0)) {
continue;
}
if (seg < (is_stack ? 0:1)) {
continue;
}
/* END */
if (seg > (vert ? nx : ny)) {
continue;
}
if (chan > (vert ? ny : nx)) {
continue;
}
/* Get the list of opin to mux connections for that chan seg. */
num_edges += get_unidir_opin_connections(chan, seg,
max_Fc, chan_type, seg_details, &edge_list,
Fc_ofs, L_rr_edge_done, max_len, nodes_per_chan,
L_rr_node_indices, &clipped);
if (clipped) {
*Fc_clipped = TRUE;
}
}
}
}
/* Add in direct connections */
num_edges += get_opin_direct_connecions(i, j, ipin, &edge_list, L_rr_node_indices, delayless_switch, directs, num_directs, clb_to_clb_directs);
/* Add the edges */
inode = get_rr_node_index(i, j, OPIN, ipin, L_rr_node_indices);
alloc_and_load_edges_and_switches(rr_node, inode, num_edges,
L_rr_edge_done, edge_list);
while (edge_list != NULL) {
next = edge_list->next;
free(edge_list);
edge_list = next;
}
}
}
#if 0
static void
load_uniform_opin_switch_pattern_paired(INP int *Fc_out,
INP int num_pins,
INP int *pins_in_chan_seg,
INP int num_wire_inc_muxes,
INP int num_wire_dec_muxes,
INP int *wire_inc_muxes,
INP int *wire_dec_muxes,
INOUTP t_rr_node * L_rr_node,
INOUTP boolean * L_rr_edge_done,
INP t_seg_details * seg_details,
OUTP boolean * Fc_clipped)
{
/* Directionality is assumed to be uni-directional */
/* Make turn-based assignment to avoid overlap when Fc_ouput is low. This is a bipartite
* matching problem. Out of "num_wire_muxes" muxes "Fc_output" of them is assigned
* to each outpin (total "num_pins" of them); assignment is uniform (spacing - spreadout)
* and staggered to avoid overlap when Fc_output is low. */
/* The natural order wires muxes are stored in wire_muxes is alternating in directionality
* already (by my implementation), so no need to do anything extra to avoid directional bias */
/* TODO: Due to spacing, it's possible to have directional bias: all Fc_out wires connected
* to one opin goes in either INC or DEC -> whereas I want a mix of both.
* SOLUTION: Use quantization of 2 to ensure that if an opin connects to one wire, it
* must also connect to its companion wire, which runs in the opposite direction. This
* means instead of having num_wire_muxes as the matching set, pick out the INC wires
* in num_wires_muxes as the matching set (the DEC wires are their companions) April 17, 2007
* NEWS: That solution does not work, as treating wires in groups will lead to serious
* abnormal patterns (conns crossing multiple blocks) for W nonquantized to multiples of 2L.
* So, I'm chaning that approach to a new one that avoids directional bias: I will separate
* the INC muxes and DEC muxes into two sets. Each set is uniformly assigned to opins with
* Fc_output/2; this should be identical as before for normal cases and contains all conns
* in the same chan segment for the nonquantized cases. */
/* Finally, separated the two approaches: 1. Take all wire muxes and assign them to opins
* one at a time (load_uniform_opin_switch_pattern) 2. Take pairs (by companion)
* of wire muxes and assign them to opins a pair at a time (load_uniform_opin_switch_pattern_paired).
* The first is used for fringe channel segments (ends of channels, where
* there are lots of muxes due to partial wire segments) and the second is used in core */
/* float spacing, step_size, f_mux; */
int ipin, iconn, num_edges, init_mux;
int from_node, to_node, to_track;
int xlow, ylow;
t_linked_edge *edge_list;
int *wire_muxes;
int k, num_wire_muxes, Fc_output_per_side, CurFc;
int count_inc, count_dec;
t_type_ptr type;
*Fc_clipped = FALSE;
count_inc = count_dec = 0;
for (ipin = 0; ipin < num_pins; ipin++)
{
from_node = pins_in_chan_seg[ipin];
xlow = L_rr_node[from_node].xlow;
ylow = L_rr_node[from_node].ylow;
type = grid[xlow][ylow].type;
edge_list = NULL;
num_edges = 0;
/* Assigning the INC muxes first, then DEC muxes */
for (k = 0; k < 2; ++k)
{
if (k == 0)
{
num_wire_muxes = num_wire_inc_muxes;
wire_muxes = wire_inc_muxes;
}
else
{
num_wire_muxes = num_wire_dec_muxes;
wire_muxes = wire_dec_muxes;
}
/* Half the Fc will be assigned for each direction. */
assert(Fc_out[type->index] % 2 == 0);
Fc_output_per_side = Fc_out[type->index] / 2;
/* Clip the demand. Make sure to use a new variable so
* on the second pass it is not clipped. */
CurFc = Fc_output_per_side;
if (Fc_output_per_side > num_wire_muxes)
{
*Fc_clipped = TRUE;
CurFc = num_wire_muxes;
}
if (k == 0)
{
init_mux = (count_inc) % num_wire_muxes;
count_inc += CurFc;
}
else
{
init_mux = (count_dec) % num_wire_muxes;
count_dec += CurFc;
}
for (iconn = 0; iconn < CurFc; iconn++)
{
/* FINALLY, make the outpin to mux connection */
/* Latest update: I'm not using Uniform Pattern, but a similarly staggered pattern */
to_node =
wire_muxes[(init_mux +
iconn) % num_wire_muxes];
L_rr_node[to_node].num_opin_drivers++; /* keep track of mux size */
to_track = L_rr_node[to_node].ptc_num;
if (FALSE == L_rr_edge_done[to_node])
{
/* Use of alloc_and_load_edges_and_switches
* must be accompanied by rr_edge_done check. */
L_rr_edge_done[to_node] = TRUE;
edge_list =
insert_in_edge_list(edge_list,
to_node,
seg_details
[to_track].
wire_switch);
num_edges++;
}
}
}
if (num_edges < 1)
{
vpr_printf(TIO_MESSAGE_ERROR, "opin %d at (%d,%d) does not connect to any tracks.\n",
L_rr_node[from_node].ptc_num, L_rr_node[from_node].xlow, L_rr_node[from_node].ylow);
exit(1);
}
alloc_and_load_edges_and_switches(L_rr_node, from_node, num_edges,
L_rr_edge_done, edge_list);
}
}
#endif
#if MUX_SIZE_DIST_DISPLAY
/* This routine prints and dumps statistics on the mux sizes on a sblock
* per sblock basis, over the entire chip. Mux sizes should be balanced (off by
* at most 1) for all muxes in the same sblock in the core, and corner sblocks.
* Fringe sblocks will have imbalance due to missing one side and constrains on
* where wires must connect. Comparing two core sblock sblocks, muxes need not
* be balanced if W is not quantized to 2L multiples, again for reasons that
* there could be sblocks with different number of muxes but same number of incoming
* wires that need to make connections to these muxes (we don't want to under-connect
* user-specified Fc and Fs). */
static void
view_mux_size_distribution(t_ivec *** L_rr_node_indices,
int nodes_per_chan,
t_seg_details * seg_details_x,
t_seg_details * seg_details_y)
{
int i, j, itrack, seg_num, chan_num, max_len;
int start, end, inode, max_value, min_value;
int array_count, k, num_muxes;
short direction, side;
float *percent_range_array;
float percent_range, percent_range_sum, avg_percent_range;
float std_dev_percent_range, deviation_f;
int range, *range_array, global_max_range;
float avg_range, range_sum, std_dev_range;
t_seg_details *seg_details;
t_mux *new_mux, *sblock_mux_list_head, *current, *next;
#ifdef ENABLE_DUMP
FILE *dump_file_per_sblock, *dump_file;
#endif /* ENABLE_DUMP */
t_mux_size_distribution *distr_list, *distr_current, *new_distribution,
*distr_next;
#ifdef ENABLE_DUMP
dump_file = my_fopen("mux_size_dump.txt", "w", 0);
dump_file_per_sblock = my_fopen("mux_size_per_sblock_dump.txt", "w", 0);
#endif /* ENABLE_DUMP */
sblock_mux_list_head = NULL;
percent_range_array =
(float *)my_malloc((nx - 1) * (ny - 1) * sizeof(float));
range_array = (int *)my_malloc((nx - 1) * (ny - 1) * sizeof(int));
array_count = 0;
percent_range_sum = 0.0;
range_sum = 0.0;
global_max_range = 0;
min_value = 0;
max_value = 0;
seg_num = 0;
chan_num = 0;
direction = 0;
seg_details = 0;
max_len = 0;
distr_list = NULL;
/* With the specified range, I'm only looking at core sblocks */
for (j = (ny - 1); j > 0; j--)
{
for (i = 1; i < nx; i++)
{
num_muxes = 0;
for (side = 0; side < 4; side++)
{
switch (side)
{
case LEFT:
seg_num = i;
chan_num = j;
direction = DEC_DIRECTION; /* only DEC have muxes in that sblock */
seg_details = seg_details_x;
max_len = nx;
break;
case RIGHT:
seg_num = i + 1;
chan_num = j;
direction = INC_DIRECTION;
seg_details = seg_details_x;
max_len = nx;
break;
case TOP:
seg_num = j + 1;
chan_num = i;
direction = INC_DIRECTION;
seg_details = seg_details_y;
max_len = ny;
break;
case BOTTOM:
seg_num = j;
chan_num = i;
direction = DEC_DIRECTION;
seg_details = seg_details_y;
max_len = ny;
break;
default:
assert(FALSE);
}
assert(nodes_per_chan > 0);
for (itrack = 0; itrack < nodes_per_chan; itrack++)
{
start =
get_seg_start(seg_details, itrack,
seg_num, chan_num);
end =
get_seg_end(seg_details, itrack,
start, chan_num, max_len);
if ((seg_details[itrack].direction ==
direction) && (((start == seg_num)
&& (direction ==
INC_DIRECTION))
|| ((end == seg_num)
&& (direction ==
DEC_DIRECTION))))
{ /* mux found */
num_muxes++;
if (side == LEFT || side == RIGHT)
{ /* CHANX */
inode =
get_rr_node_index
(seg_num, chan_num,
CHANX, itrack,
L_rr_node_indices);
}
else
{
assert((side == TOP) || (side == BOTTOM)); /* CHANY */
inode =
get_rr_node_index
(chan_num, seg_num,
CHANY, itrack,
L_rr_node_indices);
}
new_mux = (t_mux *)
my_malloc(sizeof(t_mux));
new_mux->size =
rr_node[inode].
num_wire_drivers +
rr_node[inode].
num_opin_drivers;
new_mux->next = NULL;
/* insert in linked list, descending */
if (sblock_mux_list_head == NULL)
{
/* first entry */
sblock_mux_list_head =
new_mux;
}
else if (sblock_mux_list_head->
size < new_mux->size)
{
/* insert before head */
new_mux->next =
sblock_mux_list_head;
sblock_mux_list_head =
new_mux;
}
else
{
/* insert after head */
current =
sblock_mux_list_head;
next = current->next;
while ((next != NULL)
&& (next->size >
new_mux->size))
{
current = next;
next =
current->next;
}
if (next == NULL)
{
current->next =
new_mux;
}
else
{
new_mux->next =
current->next;
current->next =
new_mux;
}
}
/* end of insert in linked list */
}
}
} /* end of mux searching over all four sides of sblock */
/* now sblock_mux_list_head holds a linked list of all muxes in this sblock */
current = sblock_mux_list_head;
#ifdef ENABLE_DUMP
fprintf(dump_file_per_sblock,
"sblock at (%d, %d) has mux sizes: {", i, j);
#endif /* ENABLE_DUMP */
if (current != NULL)
{
max_value = min_value = current->size;
}
while (current != NULL)
{
if (max_value < current->size)
max_value = current->size;
if (min_value > current->size)
min_value = current->size;
#ifdef ENABLE_DUMP
fprintf(dump_file_per_sblock, "%d ",
current->size);
fprintf(dump_file, "%d\n", current->size);
#endif /* ENABLE_DUMP */
current = current->next;
}
#ifdef ENABLE_DUMP
fprintf(dump_file_per_sblock, "}\n\tmax: %d\tmin:%d",
max_value, min_value);
#endif /* ENABLE_DUMP */
range = max_value - min_value;
percent_range = ((float)range) / ((float)min_value);
if (global_max_range < range)
global_max_range = range;
#ifdef ENABLE_DUMP
fprintf(dump_file_per_sblock,
"\t\trange: %d\t\tpercent range:%.2f\n",
range, percent_range);
#endif /* ENABLE_DUMP */
percent_range_array[array_count] = percent_range;
range_array[array_count] = range;
percent_range_sum += percent_range;
range_sum += range;
array_count++;
/* I will use a distribution for each (core) sblock type.
* There are more than 1 type of sblocks,
* when quantization of W to 2L multiples is not observed. */
distr_current = distr_list;
while (distr_current != NULL
&& distr_current->mux_count != num_muxes)
{
distr_current = distr_current->next;
}
if (distr_current == NULL)
{
/* Create a distribution for the new sblock type,
* and put it as head of linked list by convention */
new_distribution = (t_mux_size_distribution *)
my_malloc(sizeof(t_mux_size_distribution));
new_distribution->mux_count = num_muxes;
new_distribution->max_index = max_value;
new_distribution->distr =
(int *)my_calloc(max_value + 1, sizeof(int));
/* filling in the distribution */
current = sblock_mux_list_head;
while (current != NULL)
{
assert(current->size <=
new_distribution->max_index);
new_distribution->distr[current->size]++;
current = current->next;
}
/* add it to head */
new_distribution->next = distr_list;
distr_list = new_distribution;
}
else
{
/* distr_current->mux_count == num_muxes so add this sblock's mux sizes in this distribution */
current = sblock_mux_list_head;
while (current != NULL)
{
if (current->size >
distr_current->max_index)
{
/* needs to realloc to expand the distribution array to hold the new large-valued data */
distr_current->distr =
my_realloc(distr_current->
distr,
(current->size +
1) * sizeof(int));
/* initializing the newly allocated elements */
for (k =
(distr_current->max_index +
1); k <= current->size; k++)
distr_current->distr[k] = 0;
distr_current->max_index =
current->size;
distr_current->distr[current->
size]++;
}
else
{
distr_current->distr[current->
size]++;
}
current = current->next;
}
}
/* done - now free memory */
current = sblock_mux_list_head;
while (current != NULL)
{
next = current->next;
free(current);
current = next;
}
sblock_mux_list_head = NULL;
}
}
avg_percent_range = (float)percent_range_sum / array_count;
avg_range = (float)range_sum / array_count;
percent_range_sum = 0.0;
range_sum = 0.0;
for (k = 0; k < array_count; k++)
{
deviation_f = (percent_range_array[k] - avg_percent_range);
percent_range_sum += deviation_f * deviation_f;
deviation_f = ((float)range_array[k] - avg_range);
range_sum += deviation_f * deviation_f;
}
std_dev_percent_range =
sqrt(percent_range_sum / ((float)array_count - 1.0));
std_dev_range = sqrt(range_sum / ((float)array_count - 1.0));
vpr_printf(TIO_MESSAGE_INFO, "==== MUX size statistics ====\n");
vpr_printf(TIO_MESSAGE_INFO, "Max range of mux size within a sblock: %d\n", global_max_range);
vpr_printf(TIO_MESSAGE_INFO, "Average range of mux size within a sblock: %.2f\n", avg_range);
vpr_printf(TIO_MESSAGE_INFO, "Std dev of range of mux size within a sblock: %.2f\n", std_dev_range);
vpr_printf(TIO_MESSAGE_INFO, "Average percent range of mux size within a sblock: %.2f%%\n", avg_percent_range * 100.0);
vpr_printf(TIO_MESSAGE_INFO, "Std dev of percent range of mux size within a sblock: %.2f%%\n", std_dev_percent_range * 100.0);
vpr_printf(TIO_MESSAGE_INFO, " -- Detailed MUX size distribution by sblock type -- \n");
distr_current = distr_list;
while (distr_current != NULL)
{
print_distribution(stdout, distr_current);
/* free */
distr_next = distr_current->next;
free(distr_current->distr);
free(distr_current);
distr_current = distr_next;
}
free(percent_range_array);
free(range_array);
#ifdef ENABLE_DUMP
fclose(dump_file_per_sblock);
fclose(dump_file);
#endif /* ENABLE_DUMP */
}
static void
print_distribution(FILE * fptr,
t_mux_size_distribution * distr_struct)
{
int *distr;
int k;
float sum;
boolean zeros;
distr = distr_struct->distr;
fprintf(fptr,
"For Sblocks containing %d MUXes, the MUX size distribution is:\n",
distr_struct->mux_count);
fprintf(fptr, "\t\t\tSize\t\t\tFrequency (percent)\n");
sum = 0.0;
for (k = 0; k <= distr_struct->max_index; k++)
sum += distr[k];
zeros = TRUE;
for (k = 0; k <= distr_struct->max_index; k++)
{
if (zeros && (distr[k] == 0))
{
/* do nothing for leading string of zeros */
}
else
{
zeros = FALSE; /* leading string of zeros ended */
fprintf(fptr, "\t\t\t%d\t\t\t%d (%.2f%%)\n", k, distr[k],
(float)distr[k] / sum * 100.0);
}
}
fprintf(fptr, "\nEnd of this Sblock MUX size distribution.\n");
}
#endif
/**
* Parse out which CLB pins should connect directly to which other CLB pins then store that in a clb_to_clb_directs data structure
* This data structure supplements the the info in the "directs" data structure
* TODO: The function that does this parsing in placement is poorly done because it lacks generality on heterogeniety, should replace with this one
static t_clb_to_clb_directs * alloc_and_load_clb_to_clb_directs(INP t_direct_inf *directs, INP int num_directs) {
int i, j;
t_clb_to_clb_directs *clb_to_clb_directs;
char *pb_type_name, *port_name;
int start_pin_index, end_pin_index;
t_pb_type *pb_type;
clb_to_clb_directs = (t_clb_to_clb_directs*)my_calloc(num_directs, sizeof(t_clb_to_clb_directs));
pb_type_name = NULL;
port_name = NULL;
for(i = 0; i < num_directs; i++) {
pb_type_name = (char*)my_malloc((strlen(directs[i].from_pin) + strlen(directs[i].to_pin)) * sizeof(char));
port_name = (char*)my_malloc((strlen(directs[i].from_pin) + strlen(directs[i].to_pin)) * sizeof(char));
// Load from pins
// Parse out the pb_type name, port name, and pin range
parse_direct_pin_name(directs[i].from_pin, directs[i].line, &start_pin_index, &end_pin_index, pb_type_name, port_name);
// Figure out which type, port, and pin is used
for(j = 0; j < num_types; j++) {
if(strcmp(type_descriptors[j].name, pb_type_name) == 0) {
break;
}
}
assert(j < num_types);
clb_to_clb_directs[i].from_clb_type = &type_descriptors[j];
pb_type = clb_to_clb_directs[i].from_clb_type->pb_type;
for(j = 0; j < pb_type->num_ports; j++) {
if(strcmp(pb_type->ports[j].name, port_name) == 0) {
break;
}
}
assert(j < pb_type->num_ports);
if(start_pin_index == OPEN) {
assert(start_pin_index == end_pin_index);
start_pin_index = 0;
end_pin_index = pb_type->ports[j].num_pins - 1;
}
get_blk_pin_from_port_pin(clb_to_clb_directs[i].from_clb_type->index, j, start_pin_index, &clb_to_clb_directs[i].from_clb_pin_start_index);
get_blk_pin_from_port_pin(clb_to_clb_directs[i].from_clb_type->index, j, end_pin_index, &clb_to_clb_directs[i].from_clb_pin_end_index);
// Load to pins
// Parse out the pb_type name, port name, and pin range
parse_direct_pin_name(directs[i].to_pin, directs[i].line, &start_pin_index, &end_pin_index, pb_type_name, port_name);
// Figure out which type, port, and pin is used
for(j = 0; j < num_types; j++) {
if(strcmp(type_descriptors[j].name, pb_type_name) == 0) {
break;
}
}
assert(j < num_types);
clb_to_clb_directs[i].to_clb_type = &type_descriptors[j];
pb_type = clb_to_clb_directs[i].to_clb_type->pb_type;
for(j = 0; j < pb_type->num_ports; j++) {
if(strcmp(pb_type->ports[j].name, port_name) == 0) {
break;
}
}
assert(j < pb_type->num_ports);
if(start_pin_index == OPEN) {
assert(start_pin_index == end_pin_index);
start_pin_index = 0;
end_pin_index = pb_type->ports[j].num_pins - 1;
}
get_blk_pin_from_port_pin(clb_to_clb_directs[i].to_clb_type->index, j, start_pin_index, &clb_to_clb_directs[i].to_clb_pin_start_index);
get_blk_pin_from_port_pin(clb_to_clb_directs[i].to_clb_type->index, j, end_pin_index, &clb_to_clb_directs[i].to_clb_pin_end_index);
if(abs(clb_to_clb_directs[i].from_clb_pin_start_index - clb_to_clb_directs[i].from_clb_pin_end_index) != abs(clb_to_clb_directs[i].to_clb_pin_start_index - clb_to_clb_directs[i].to_clb_pin_end_index)) {
vpr_printf(TIO_MESSAGE_ERROR, "[LINE %d] Range mismatch from %s to %s.\n", directs[i].line, directs[i].from_pin, directs[i].to_pin);
exit(1);
}
free(pb_type_name);
free(port_name);
}
return clb_to_clb_directs;
}
*/
/* Add all direct clb-pin-to-clb-pin edges to given opin */
static int get_opin_direct_connecions(int x, int y, int opin, INOUTP t_linked_edge ** edge_list_ptr, INP t_ivec *** L_rr_node_indices,
INP int delayless_switch, INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs) {
t_type_ptr type;
int grid_ofs;
int i, ipin, inode;
t_linked_edge *edge_list_head;
int max_index, min_index, offset, swap;
int new_edges;
type = grid[x][y].type;
edge_list_head = *edge_list_ptr;
new_edges = 0;
/* Iterate through all direct connections */
for(i = 0; i < num_directs; i++) {
/* Find matching direct clb-to-clb connections with the same type as current grid location */
if(clb_to_clb_directs[i].from_clb_type == type) {
/* Compute index of opin with regards to given pins */
if(clb_to_clb_directs[i].from_clb_pin_start_index > clb_to_clb_directs[i].from_clb_pin_end_index) {
swap = TRUE;
max_index = clb_to_clb_directs[i].from_clb_pin_start_index;
min_index = clb_to_clb_directs[i].from_clb_pin_end_index;
} else {
swap = FALSE;
min_index = clb_to_clb_directs[i].from_clb_pin_start_index;
max_index = clb_to_clb_directs[i].from_clb_pin_end_index;
}
if(max_index >= opin && min_index <= opin) {
offset = opin - min_index;
/* This opin is specified to connect directly to an ipin, now compute which ipin to connect to */
if(x + directs[i].x_offset < nx + 1 &&
x + directs[i].x_offset > 0 &&
y + directs[i].y_offset < ny + 1 &&
y + directs[i].y_offset > 0) {
ipin = OPEN;
if(clb_to_clb_directs[i].to_clb_pin_start_index > clb_to_clb_directs[i].to_clb_pin_end_index) {
if(swap == TRUE) {
ipin = clb_to_clb_directs[i].to_clb_pin_end_index + offset;
} else {
ipin = clb_to_clb_directs[i].to_clb_pin_start_index - offset;
}
} else {
if(swap == TRUE) {
ipin = clb_to_clb_directs[i].to_clb_pin_end_index - offset;
} else {
ipin = clb_to_clb_directs[i].to_clb_pin_start_index + offset;
}
}
/* Add new ipin edge to list of edges */
grid_ofs = grid[x + directs[i].x_offset][y + directs[i].y_offset].offset;
inode = get_rr_node_index(x + directs[i].x_offset, y + directs[i].y_offset - grid_ofs, IPIN, ipin, L_rr_node_indices);
edge_list_head = insert_in_edge_list(edge_list_head, inode, delayless_switch);
new_edges++;
}
}
}
}
*edge_list_ptr = edge_list_head;
return new_edges;
}