2929 lines
101 KiB
C
Executable File
2929 lines
101 KiB
C
Executable File
#include <stdio.h>
|
|
#include <math.h>
|
|
#include <assert.h>
|
|
#include <string.h>
|
|
#include "util.h"
|
|
#include "vpr_types.h"
|
|
#include "globals.h"
|
|
#include "rr_graph_util.h"
|
|
#include "rr_graph.h"
|
|
#include "rr_graph2.h"
|
|
#include "rr_graph_sbox.h"
|
|
#include "check_rr_graph.h"
|
|
#include "rr_graph_timing_params.h"
|
|
#include "rr_graph_indexed_data.h"
|
|
#include "vpr_utils.h"
|
|
#include "read_xml_arch_file.h"
|
|
#include "ReadOptions.h"
|
|
|
|
/* Xifan TANG: SWSEG SUPPORT */
|
|
#include "rr_graph_swseg.h"
|
|
/* end */
|
|
/* Xifan TANG: opin_to_cb support */
|
|
#include "rr_graph_opincb.h"
|
|
/* end */
|
|
|
|
/* mrFPGA: Xifan TANG */
|
|
#include "mrfpga_globals.h"
|
|
/* end */
|
|
|
|
/* #define ENABLE_DUMP */
|
|
/* #define MUX_SIZE_DIST_DISPLAY */
|
|
|
|
/* mux size statistic data structures */
|
|
|
|
typedef struct s_mux {
|
|
int size;
|
|
struct s_mux *next;
|
|
} t_mux;
|
|
|
|
typedef struct s_mux_size_distribution {
|
|
int mux_count;
|
|
int max_index;
|
|
int *distr;
|
|
struct s_mux_size_distribution *next;
|
|
} t_mux_size_distribution;
|
|
|
|
/*
|
|
typedef struct s_clb_to_clb_directs {
|
|
t_type_descriptor *from_clb_type;
|
|
int from_clb_pin_start_index;
|
|
int from_clb_pin_end_index;
|
|
t_type_descriptor *to_clb_type;
|
|
int to_clb_pin_start_index;
|
|
int to_clb_pin_end_index;
|
|
} t_clb_to_clb_directs;
|
|
*/
|
|
|
|
/* Xifan TANG: opin_to_cb support */
|
|
#include "pb_pin_eq_auto_detect.h"
|
|
/* end */
|
|
|
|
/* UDSD Modifications by WMF End */
|
|
|
|
/******************* Variables local to this module. ***********************/
|
|
|
|
/* Used to free "chunked" memory. If NULL, no rr_graph exists right now. */
|
|
static t_chunk rr_mem_ch = {NULL, 0, NULL};
|
|
|
|
/* Status of current chunk being dished out by calls to my_chunk_malloc. */
|
|
|
|
/********************* Subroutines local to this module. *******************/
|
|
static int ****alloc_and_load_pin_to_track_map(INP enum e_pin_type pin_type,
|
|
INP int nodes_per_chan, INP int *Fc, INP t_type_ptr Type,
|
|
INP boolean perturb_switch_pattern,
|
|
INP enum e_directionality directionality);
|
|
|
|
static struct s_ivec ***alloc_and_load_track_to_pin_lookup(
|
|
INP int ****pin_to_track_map, INP int *Fc, INP int height,
|
|
INP int num_pins, INP int nodes_per_chan);
|
|
|
|
static void build_bidir_rr_opins(INP int i, INP int j,
|
|
INOUTP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices,
|
|
INP int *****opin_to_track_map, INP int **Fc_out,
|
|
INP boolean * L_rr_edge_done, INP t_seg_details * seg_details,
|
|
INP struct s_grid_tile **L_grid, INP int delayless_switch,
|
|
INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs);
|
|
|
|
static void build_unidir_rr_opins(INP int i, INP int j,
|
|
INP struct s_grid_tile **L_grid, INP int **Fc_out,
|
|
INP int nodes_per_chan, INP t_seg_details * seg_details,
|
|
INOUTP int **Fc_xofs, INOUTP int **Fc_yofs,
|
|
INOUTP t_rr_node * L_rr_node, INOUTP boolean * L_rr_edge_done,
|
|
OUTP boolean * Fc_clipped, INP t_ivec *** L_rr_node_indices, INP int delayless_switch,
|
|
INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs);
|
|
|
|
static int get_opin_direct_connecions(int x, int y, int opin, INOUTP t_linked_edge ** edge_list_ptr, INP t_ivec *** L_rr_node_indices,
|
|
INP int delayless_switch, INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs);
|
|
|
|
static void alloc_and_load_rr_graph(INP int num_nodes,
|
|
INP t_rr_node * L_rr_node, INP int num_seg_types,
|
|
INP t_seg_details * seg_details, INP boolean * L_rr_edge_done,
|
|
INP struct s_ivec ****track_to_ipin_lookup,
|
|
INP int *****opin_to_track_map, INP struct s_ivec ***switch_block_conn,
|
|
INP struct s_grid_tile **L_grid, INP int L_nx, INP int L_ny, INP int Fs,
|
|
INP short *****sblock_pattern, INP int **Fc_out, INP int **Fc_xofs,
|
|
INP int **Fc_yofs, INP t_ivec *** L_rr_node_indices,
|
|
INP int nodes_per_chan, INP enum e_switch_block_type sb_type,
|
|
INP int delayless_switch, INP enum e_directionality directionality,
|
|
INP int wire_to_ipin_switch, OUTP boolean * Fc_clipped, INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs);
|
|
|
|
static void load_uniform_switch_pattern(INP t_type_ptr type,
|
|
INOUTP int ****tracks_connected_to_pin, INP int num_phys_pins,
|
|
INP int *pin_num_ordering, INP int *side_ordering,
|
|
INP int *offset_ordering, INP int nodes_per_chan, INP int Fc,
|
|
INP enum e_directionality directionality);
|
|
|
|
static void load_perturbed_switch_pattern(INP t_type_ptr type,
|
|
INOUTP int ****tracks_connected_to_pin, INP int num_phys_pins,
|
|
INP int *pin_num_ordering, INP int *side_ordering,
|
|
INP int *offset_ordering, INP int nodes_per_chan, INP int Fc,
|
|
INP enum e_directionality directionality);
|
|
|
|
static void check_all_tracks_reach_pins(t_type_ptr type,
|
|
int ****tracks_connected_to_pin, int nodes_per_chan, int Fc,
|
|
enum e_pin_type ipin_or_opin);
|
|
|
|
static boolean *alloc_and_load_perturb_ipins(INP int nodes_per_chan,
|
|
INP int L_num_types, INP int **Fc_in, INP int **Fc_out,
|
|
INP enum e_directionality directionality);
|
|
|
|
static void build_rr_sinks_sources(INP int i, INP int j,
|
|
INP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices,
|
|
INP int delayless_switch, INP struct s_grid_tile **L_grid);
|
|
|
|
static void build_rr_xchan(INP int i, INP int j,
|
|
INP struct s_ivec ****track_to_ipin_lookup,
|
|
INP struct s_ivec ***switch_block_conn, INP int cost_index_offset,
|
|
INP int nodes_per_chan, INP int *opin_mux_size,
|
|
INP short *****sblock_pattern, INP int Fs_per_side,
|
|
INP t_seg_details * seg_details, INP t_ivec *** L_rr_node_indices,
|
|
INP boolean * L_rr_edge_done, INOUTP t_rr_node * L_rr_node,
|
|
INP int wire_to_ipin_switch, INP enum e_directionality directionality);
|
|
|
|
static void build_rr_ychan(INP int i, INP int j,
|
|
INP struct s_ivec ****track_to_ipin_lookup,
|
|
INP struct s_ivec ***switch_block_conn, INP int cost_index_offset,
|
|
INP int nodes_per_chan, INP int *opin_mux_size,
|
|
INP short *****sblock_pattern, INP int Fs_per_side,
|
|
INP t_seg_details * seg_details, INP t_ivec *** L_rr_node_indices,
|
|
INP boolean * L_rr_edge_done, INOUTP t_rr_node * L_rr_node,
|
|
INP int wire_to_ipin_switch, INP enum e_directionality directionality);
|
|
|
|
static void rr_graph_externals(t_timing_inf timing_inf,
|
|
t_segment_inf * segment_inf, int num_seg_types, int nodes_per_chan,
|
|
int wire_to_ipin_switch, enum e_base_cost_type base_cost_type);
|
|
|
|
void alloc_and_load_edges_and_switches(INP t_rr_node * L_rr_node, INP int inode,
|
|
INP int num_edges, INP boolean * L_rr_edge_done,
|
|
INP t_linked_edge * edge_list_head);
|
|
|
|
static void alloc_net_rr_terminals(void);
|
|
|
|
static void alloc_and_load_rr_clb_source(t_ivec *** L_rr_node_indices);
|
|
|
|
/*
|
|
static t_clb_to_clb_directs *alloc_and_load_clb_to_clb_directs(INP t_direct_inf *directs, INP int num_directs);
|
|
*/
|
|
|
|
#if 0
|
|
static void load_uniform_opin_switch_pattern_paired(INP int *Fc_out,
|
|
INP int num_pins,
|
|
INP int *pins_in_chan_seg,
|
|
INP int num_wire_inc_muxes,
|
|
INP int num_wire_dec_muxes,
|
|
INP int *wire_inc_muxes,
|
|
INP int *wire_dec_muxes,
|
|
INOUTP t_rr_node * L_rr_node,
|
|
INOUTP boolean *
|
|
L_rr_edge_done,
|
|
INP t_seg_details *
|
|
seg_details,
|
|
OUTP boolean * Fc_clipped);
|
|
#endif
|
|
void watch_edges(int inode, t_linked_edge * edge_list_head);
|
|
#if MUX_SIZE_DIST_DISPLAY
|
|
static void view_mux_size_distribution(t_ivec *** L_rr_node_indices,
|
|
int nodes_per_chan,
|
|
t_seg_details * seg_details_x,
|
|
t_seg_details * seg_details_y);
|
|
static void print_distribution(FILE * fptr,
|
|
t_mux_size_distribution * distr_struct);
|
|
#endif
|
|
|
|
static void free_type_pin_to_track_map(int***** ipin_to_track_map,
|
|
t_type_ptr types);
|
|
|
|
static void free_type_track_to_ipin_map(struct s_ivec**** track_to_pin_map,
|
|
t_type_ptr types, int nodes_per_chan);
|
|
|
|
static t_seg_details *alloc_and_load_global_route_seg_details(
|
|
INP int nodes_per_chan, INP int global_route_switch);
|
|
|
|
/* UDSD Modifications by WMF End */
|
|
|
|
static int **alloc_and_load_actual_fc(INP int L_num_types, INP t_type_ptr types,
|
|
INP int nodes_per_chan, INP boolean is_Fc_out,
|
|
INP enum e_directionality directionality, OUTP boolean * Fc_clipped, INP boolean ignore_Fc_0);
|
|
|
|
/******************* Subroutine definitions *******************************/
|
|
|
|
void build_rr_graph(INP t_graph_type graph_type, INP int L_num_types,
|
|
INP t_type_ptr types, INP int L_nx, INP int L_ny,
|
|
INP struct s_grid_tile **L_grid, INP int chan_width,
|
|
INP struct s_chan_width_dist *chan_capacity_inf,
|
|
INP enum e_switch_block_type sb_type, INP int Fs, INP int num_seg_types,
|
|
INP int num_switches, INP t_segment_inf * segment_inf,
|
|
INP int global_route_switch, INP int delayless_switch,
|
|
INP t_timing_inf timing_inf, INP int wire_to_ipin_switch,
|
|
INP enum e_base_cost_type base_cost_type, INP t_direct_inf *directs,
|
|
INP int num_directs, INP boolean ignore_Fc_0, OUTP int *Warnings,
|
|
/*Xifan TANG: Switch Segment Pattern Support*/
|
|
INP int num_swseg_pattern, INP t_swseg_pattern_inf* swseg_patterns,
|
|
INP boolean opin_to_cb_fast_edges, INP boolean opin_logic_eq_edges) {
|
|
/* Temp structures used to build graph */
|
|
int nodes_per_chan, i, j;
|
|
t_seg_details *seg_details = NULL;
|
|
int **Fc_in = NULL; /* [0..num_types-1][0..num_pins-1] */
|
|
int **Fc_out = NULL; /* [0..num_types-1][0..num_pins-1] */
|
|
|
|
int *****opin_to_track_map = NULL; /* [0..num_types-1][0..num_pins-1][0..height][0..3][0..Fc-1] */
|
|
int *****ipin_to_track_map = NULL; /* [0..num_types-1][0..num_pins-1][0..height][0..3][0..Fc-1] */
|
|
t_ivec ****track_to_ipin_lookup = NULL; /* [0..num_types-1][0..nodes_per_chan-1][0..height][0..3] */
|
|
t_ivec ***switch_block_conn = NULL;
|
|
short *****unidir_sb_pattern = NULL;
|
|
boolean *L_rr_edge_done = NULL;
|
|
boolean is_global_graph;
|
|
boolean Fc_clipped;
|
|
boolean use_full_seg_groups;
|
|
boolean *perturb_ipins = NULL;
|
|
enum e_directionality directionality;
|
|
int **Fc_xofs = NULL; /* [0..ny-1][0..nx-1] */
|
|
int **Fc_yofs = NULL; /* [0..nx-1][0..ny-1] */
|
|
t_clb_to_clb_directs *clb_to_clb_directs;
|
|
|
|
rr_node_indices = NULL;
|
|
rr_node = NULL;
|
|
num_rr_nodes = 0;
|
|
|
|
/* Reset warning flag */
|
|
*Warnings = RR_GRAPH_NO_WARN;
|
|
|
|
/* Decode the graph_type */
|
|
is_global_graph = FALSE;
|
|
if (GRAPH_GLOBAL == graph_type) {
|
|
is_global_graph = TRUE;
|
|
}
|
|
use_full_seg_groups = FALSE;
|
|
if (GRAPH_UNIDIR_TILEABLE == graph_type) {
|
|
use_full_seg_groups = TRUE;
|
|
}
|
|
directionality = UNI_DIRECTIONAL;
|
|
if (GRAPH_BIDIR == graph_type) {
|
|
directionality = BI_DIRECTIONAL;
|
|
}
|
|
if (is_global_graph) {
|
|
directionality = BI_DIRECTIONAL;
|
|
}
|
|
|
|
/* Global routing uses a single longwire track */
|
|
nodes_per_chan = (is_global_graph ? 1 : chan_width);
|
|
assert(nodes_per_chan > 0);
|
|
|
|
clb_to_clb_directs = NULL;
|
|
if(num_directs > 0) {
|
|
clb_to_clb_directs = alloc_and_load_clb_to_clb_directs(directs, num_directs);
|
|
}
|
|
|
|
/* START SEG_DETAILS */
|
|
if (is_global_graph) {
|
|
/* Sets up a single unit length segment type for global routing. */
|
|
seg_details = alloc_and_load_global_route_seg_details(nodes_per_chan,
|
|
global_route_switch);
|
|
} else {
|
|
/* Setup segments including distrubuting tracks and staggering.
|
|
* If use_full_seg_groups is specified, nodes_per_chan may be
|
|
* changed. Warning should be singled to caller if this happens. */
|
|
seg_details = alloc_and_load_seg_details(&nodes_per_chan,
|
|
/* std::max(L_nx, L_ny), */ /* Original VPR */
|
|
std::max(L_nx, L_ny) + ( is_stack ? 1 : 0 ), /* mrFPGA: Xifan TANG */
|
|
num_seg_types, segment_inf,
|
|
use_full_seg_groups, is_global_graph, directionality);
|
|
if ((is_global_graph ? 1 : chan_width) != nodes_per_chan) {
|
|
*Warnings |= RR_GRAPH_WARN_CHAN_WIDTH_CHANGED;
|
|
}
|
|
if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_SEG_DETAILS)) {
|
|
dump_seg_details(seg_details, nodes_per_chan,
|
|
getEchoFileName(E_ECHO_SEG_DETAILS));
|
|
} else
|
|
;
|
|
}
|
|
/* END SEG_DETAILS */
|
|
|
|
/* START FC */
|
|
/* Determine the actual value of Fc */
|
|
if (is_global_graph) {
|
|
Fc_in = (int **) my_malloc(sizeof(int) * L_num_types);
|
|
Fc_out = (int **) my_malloc(sizeof(int) * L_num_types);
|
|
for (i = 0; i < L_num_types; ++i) {
|
|
for (j = 0; j < types[i].num_pins; ++j) {
|
|
Fc_in[i][j] = 1;
|
|
Fc_out[i][j] = 1;
|
|
}
|
|
}
|
|
} else {
|
|
Fc_clipped = FALSE;
|
|
Fc_in = alloc_and_load_actual_fc(L_num_types, types, nodes_per_chan,
|
|
FALSE, directionality, &Fc_clipped, ignore_Fc_0);
|
|
if (Fc_clipped) {
|
|
*Warnings |= RR_GRAPH_WARN_FC_CLIPPED;
|
|
}
|
|
Fc_clipped = FALSE;
|
|
Fc_out = alloc_and_load_actual_fc(L_num_types, types, nodes_per_chan,
|
|
TRUE, directionality, &Fc_clipped, ignore_Fc_0);
|
|
if (Fc_clipped) {
|
|
*Warnings |= RR_GRAPH_WARN_FC_CLIPPED;
|
|
}
|
|
|
|
#ifdef VERBOSE
|
|
for (i = 1; i < L_num_types; ++i) { /* Skip "<EMPTY>" */
|
|
for (j = 0; j < type_descriptors[i].num_pins; ++j) {
|
|
if (type_descriptors[i].is_Fc_full_flex[j]) {
|
|
vpr_printf(TIO_MESSAGE_INFO, "Fc Actual Values: type = %s, Fc_out = full, Fc_in = %d.\n",
|
|
type_descriptors[i].name, Fc_in[i][j]);
|
|
}
|
|
else {
|
|
vpr_printf(TIO_MESSAGE_INFO, "Fc Actual Values: type = %s, Fc_out = %d, Fc_in = %d.\n",
|
|
type_descriptors[i].name, Fc_out[i][j], Fc_in[i][j]);
|
|
}
|
|
}
|
|
}
|
|
#endif /* VERBOSE */
|
|
}
|
|
|
|
perturb_ipins = alloc_and_load_perturb_ipins(nodes_per_chan, L_num_types,
|
|
Fc_in, Fc_out, directionality);
|
|
/* END FC */
|
|
|
|
/* Alloc node lookups, count nodes, alloc rr nodes */
|
|
num_rr_nodes = 0;
|
|
rr_node_indices = alloc_and_load_rr_node_indices(nodes_per_chan, L_nx, L_ny,
|
|
&num_rr_nodes, seg_details);
|
|
rr_node = (t_rr_node *) my_malloc(sizeof(t_rr_node) * num_rr_nodes);
|
|
memset(rr_node, 0, sizeof(t_rr_node) * num_rr_nodes);
|
|
L_rr_edge_done = (boolean *) my_malloc(sizeof(boolean) * num_rr_nodes);
|
|
memset(L_rr_edge_done, 0, sizeof(boolean) * num_rr_nodes);
|
|
|
|
/* These are data structures used by the the unidir opin mapping. */
|
|
if (UNI_DIRECTIONAL == directionality) {
|
|
Fc_xofs = (int **) alloc_matrix(0, L_ny, 0, L_nx, sizeof(int));
|
|
Fc_yofs = (int **) alloc_matrix(0, L_nx, 0, L_ny, sizeof(int));
|
|
for (i = 0; i <= L_nx; ++i) {
|
|
for (j = 0; j <= L_ny; ++j) {
|
|
Fc_xofs[j][i] = 0;
|
|
Fc_yofs[i][j] = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* START SB LOOKUP */
|
|
/* Alloc and load the switch block lookup */
|
|
if (is_global_graph) {
|
|
assert(nodes_per_chan == 1);
|
|
switch_block_conn = alloc_and_load_switch_block_conn(1, SUBSET, 3);
|
|
} else if (BI_DIRECTIONAL == directionality) {
|
|
switch_block_conn = alloc_and_load_switch_block_conn(nodes_per_chan,
|
|
sb_type, Fs);
|
|
} else {
|
|
assert(UNI_DIRECTIONAL == directionality);
|
|
|
|
unidir_sb_pattern = alloc_sblock_pattern_lookup(L_nx, L_ny,
|
|
nodes_per_chan);
|
|
for (i = 0; i <= L_nx; i++) {
|
|
for (j = 0; j <= L_ny; j++) {
|
|
load_sblock_pattern_lookup(i, j, nodes_per_chan, seg_details,
|
|
Fs, sb_type, unidir_sb_pattern);
|
|
}
|
|
}
|
|
}
|
|
/* END SB LOOKUP */
|
|
|
|
/* START IPINP MAP */
|
|
/* Create ipin map lookups */
|
|
ipin_to_track_map = (int *****) my_malloc(sizeof(int ****) * L_num_types);
|
|
track_to_ipin_lookup = (struct s_ivec ****) my_malloc(
|
|
sizeof(struct s_ivec ***) * L_num_types);
|
|
for (i = 0; i < L_num_types; ++i) {
|
|
ipin_to_track_map[i] = alloc_and_load_pin_to_track_map(RECEIVER,
|
|
nodes_per_chan, Fc_in[i], &types[i], perturb_ipins[i],
|
|
directionality);
|
|
track_to_ipin_lookup[i] = alloc_and_load_track_to_pin_lookup(
|
|
ipin_to_track_map[i], Fc_in[i], types[i].height,
|
|
types[i].num_pins, nodes_per_chan);
|
|
}
|
|
/* END IPINP MAP */
|
|
|
|
/* START OPINP MAP */
|
|
/* Create opin map lookups */
|
|
if (BI_DIRECTIONAL == directionality) {
|
|
opin_to_track_map = (int *****) my_malloc(
|
|
sizeof(int ****) * L_num_types);
|
|
for (i = 0; i < L_num_types; ++i) {
|
|
opin_to_track_map[i] = alloc_and_load_pin_to_track_map(DRIVER,
|
|
nodes_per_chan, Fc_out[i], &types[i], FALSE, directionality);
|
|
}
|
|
}
|
|
/* END OPINP MAP */
|
|
|
|
/* UDSD Modifications by WMF begin */
|
|
/* I'm adding 2 new fields to t_rr_node, and I want them initialized to 0. */
|
|
for (i = 0; i < num_rr_nodes; i++) {
|
|
rr_node[i].num_wire_drivers = 0;
|
|
rr_node[i].num_opin_drivers = 0;
|
|
}
|
|
|
|
alloc_and_load_rr_graph(num_rr_nodes, rr_node, num_seg_types, seg_details,
|
|
L_rr_edge_done, track_to_ipin_lookup, opin_to_track_map,
|
|
switch_block_conn, L_grid, L_nx, L_ny, Fs, unidir_sb_pattern,
|
|
Fc_out, Fc_xofs, Fc_yofs, rr_node_indices, nodes_per_chan, sb_type,
|
|
delayless_switch, directionality, wire_to_ipin_switch, &Fc_clipped, directs, num_directs, clb_to_clb_directs);
|
|
|
|
#ifdef MUX_SIZE_DIST_DISPLAY
|
|
if (UNI_DIRECTIONAL == directionality)
|
|
{
|
|
view_mux_size_distribution(rr_node_indices, nodes_per_chan,
|
|
seg_details, seg_details);
|
|
}
|
|
#endif
|
|
|
|
/* Update rr_nodes capacities if global routing */
|
|
if (graph_type == GRAPH_GLOBAL) {
|
|
for (i = 0; i < num_rr_nodes; i++) {
|
|
if (rr_node[i].type == CHANX || rr_node[i].type == CHANY) {
|
|
rr_node[i].capacity = chan_width;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Xifan TANG: Add Fast Interconnection from LB OPINs to adjacent LB IPINs*/
|
|
if (TRUE == opin_to_cb_fast_edges) { // Do only detailed rr_graph is needed
|
|
vpr_printf(TIO_MESSAGE_INFO,"Adding %d fast edges from logic block OPIN to logic block IPIN ...\n",
|
|
add_rr_graph_fast_edge_opin_to_cb(rr_node_indices));
|
|
}
|
|
/*END*/
|
|
|
|
/*Xifan TANG: Switch Segment Pattern Support*/
|
|
if (NULL != swseg_patterns) { // Do only the pointer is not NULL
|
|
vpr_printf(TIO_MESSAGE_INFO,"Applying Switch Segment Pattern...\n");
|
|
if (UNI_DIRECTIONAL == directionality) {
|
|
add_rr_graph_switch_segment_pattern(directionality,nodes_per_chan, num_swseg_pattern, swseg_patterns, rr_node_indices, seg_details, seg_details);
|
|
} else {
|
|
vpr_printf(TIO_MESSAGE_ERROR,"Switch Segment Pattern is only applicable to uni-directional routing architecture!\n");
|
|
exit(1);
|
|
}
|
|
}
|
|
/*END*/
|
|
|
|
/* Xifan TANG: Check logic equivalence of LB OPINs and IPINs. Then modify the associated rr_graph */
|
|
/* use net_rr_terminal array to find SOURCE rr_node for each net*/
|
|
if (TRUE == opin_logic_eq_edges) { // Do only detailed rr_graph is needed
|
|
vpr_printf(TIO_MESSAGE_INFO,"Adding %d logic equivalent edges for logic block OPIN ...\n",
|
|
// alloc_and_add_grids_fully_capacity_sb_rr_edges(rr_node_indices, num_directs, clb_to_clb_directs));
|
|
alloc_and_add_grids_fully_capacity_rr_edges(rr_node_indices, num_directs, clb_to_clb_directs));
|
|
}
|
|
/*END*/
|
|
|
|
rr_graph_externals(timing_inf, segment_inf, num_seg_types, nodes_per_chan,
|
|
wire_to_ipin_switch, base_cost_type);
|
|
|
|
if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_RR_GRAPH)) {
|
|
dump_rr_graph(getEchoFileName(E_ECHO_RR_GRAPH));
|
|
} else
|
|
;
|
|
|
|
check_rr_graph(graph_type, types, L_nx, L_ny, nodes_per_chan, Fs,
|
|
num_seg_types, num_switches, segment_inf, global_route_switch,
|
|
delayless_switch, wire_to_ipin_switch, seg_details, Fc_in, Fc_out,
|
|
opin_to_track_map, ipin_to_track_map, track_to_ipin_lookup,
|
|
switch_block_conn, perturb_ipins);
|
|
|
|
/* Free all temp structs */
|
|
if (seg_details) {
|
|
free_seg_details(seg_details, nodes_per_chan);
|
|
seg_details = NULL;
|
|
}
|
|
if (Fc_in) {
|
|
free_matrix(Fc_in,0, L_num_types, 0, sizeof(int));
|
|
Fc_in = NULL;
|
|
}
|
|
if (Fc_out) {
|
|
free_matrix(Fc_out,0, L_num_types, 0, sizeof(int));
|
|
Fc_out = NULL;
|
|
}
|
|
if (perturb_ipins) {
|
|
free(perturb_ipins);
|
|
perturb_ipins = NULL;
|
|
}
|
|
if (switch_block_conn) {
|
|
free_switch_block_conn(switch_block_conn, nodes_per_chan);
|
|
switch_block_conn = NULL;
|
|
}
|
|
if (L_rr_edge_done) {
|
|
free(L_rr_edge_done);
|
|
L_rr_edge_done = NULL;
|
|
}
|
|
if (Fc_xofs) {
|
|
free_matrix(Fc_xofs, 0, L_ny, 0, sizeof(int));
|
|
Fc_xofs = NULL;
|
|
}
|
|
if (Fc_yofs) {
|
|
free_matrix(Fc_yofs, 0, L_nx, 0, sizeof(int));
|
|
Fc_yofs = NULL;
|
|
}
|
|
if (unidir_sb_pattern) {
|
|
free_sblock_pattern_lookup(unidir_sb_pattern);
|
|
unidir_sb_pattern = NULL;
|
|
}
|
|
if (opin_to_track_map) {
|
|
for (i = 0; i < L_num_types; ++i) {
|
|
free_matrix4(opin_to_track_map[i], 0, types[i].num_pins - 1, 0,
|
|
types[i].height - 1, 0, 3, 0, sizeof(int));
|
|
}
|
|
free(opin_to_track_map);
|
|
}
|
|
|
|
free_type_pin_to_track_map(ipin_to_track_map, types);
|
|
free_type_track_to_ipin_map(track_to_ipin_lookup, types, nodes_per_chan);
|
|
if(clb_to_clb_directs != NULL) {
|
|
free(clb_to_clb_directs);
|
|
}
|
|
}
|
|
|
|
static void rr_graph_externals(t_timing_inf timing_inf,
|
|
t_segment_inf * segment_inf, int num_seg_types, int nodes_per_chan,
|
|
int wire_to_ipin_switch, enum e_base_cost_type base_cost_type) {
|
|
add_rr_graph_C_from_switches(timing_inf.C_ipin_cblock);
|
|
alloc_and_load_rr_indexed_data(segment_inf, num_seg_types, rr_node_indices,
|
|
nodes_per_chan, wire_to_ipin_switch, base_cost_type);
|
|
|
|
alloc_net_rr_terminals();
|
|
load_net_rr_terminals(rr_node_indices);
|
|
alloc_and_load_rr_clb_source(rr_node_indices);
|
|
}
|
|
|
|
static boolean *
|
|
alloc_and_load_perturb_ipins(INP int nodes_per_chan, INP int L_num_types,
|
|
INP int **Fc_in, INP int **Fc_out, INP enum e_directionality directionality) {
|
|
int i;
|
|
float Fc_ratio;
|
|
boolean *result = NULL;
|
|
|
|
result = (boolean *) my_malloc(L_num_types * sizeof(boolean));
|
|
|
|
if (BI_DIRECTIONAL == directionality) {
|
|
result[0] = FALSE;
|
|
for (i = 1; i < L_num_types; ++i) {
|
|
result[i] = FALSE;
|
|
|
|
if (Fc_in[i][0] > Fc_out[i][0]) {
|
|
Fc_ratio = (float) Fc_in[i][0] / (float) Fc_out[i][0];
|
|
} else {
|
|
Fc_ratio = (float) Fc_out[i][0] / (float) Fc_in[i][0];
|
|
}
|
|
|
|
if ((Fc_in[i][0] <= nodes_per_chan - 2)
|
|
&& (fabs(Fc_ratio - nint(Fc_ratio))
|
|
< (0.5 / (float) nodes_per_chan))) {
|
|
result[i] = TRUE;
|
|
}
|
|
}
|
|
} else {
|
|
/* Unidirectional routing uses mux balancing patterns and
|
|
* thus shouldn't need perturbation. */
|
|
assert(UNI_DIRECTIONAL == directionality);
|
|
for (i = 0; i < L_num_types; ++i) {
|
|
result[i] = FALSE;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
static t_seg_details *
|
|
alloc_and_load_global_route_seg_details(INP int nodes_per_chan,
|
|
INP int global_route_switch) {
|
|
t_seg_details *result = NULL;
|
|
|
|
assert(nodes_per_chan == 1);
|
|
result = (t_seg_details *) my_malloc(sizeof(t_seg_details));
|
|
|
|
result->index = 0;
|
|
result->length = 1;
|
|
result->wire_switch = global_route_switch;
|
|
result->opin_switch = global_route_switch;
|
|
result->longline = FALSE;
|
|
result->direction = BI_DIRECTION;
|
|
result->Cmetal = 0.0;
|
|
result->Rmetal = 0.0;
|
|
result->start = 1;
|
|
result->drivers = MULTI_BUFFERED;
|
|
result->cb = (boolean *) my_malloc(sizeof(boolean) * 1);
|
|
result->cb[0] = TRUE;
|
|
result->sb = (boolean *) my_malloc(sizeof(boolean) * 2);
|
|
result->sb[0] = TRUE;
|
|
result->sb[1] = TRUE;
|
|
result->group_size = 1;
|
|
result->group_start = 0;
|
|
|
|
return result;
|
|
}
|
|
|
|
/* Calculates the actual Fc values for the given nodes_per_chan value */
|
|
static int **
|
|
alloc_and_load_actual_fc(INP int L_num_types, INP t_type_ptr types,
|
|
INP int nodes_per_chan, INP boolean is_Fc_out,
|
|
INP enum e_directionality directionality, OUTP boolean * Fc_clipped, INP boolean ignore_Fc_0) {
|
|
|
|
int i, j;
|
|
int **Result = NULL;
|
|
int fac, num_sets;
|
|
|
|
*Fc_clipped = FALSE;
|
|
|
|
/* Unidir tracks formed in pairs, otherwise no effect. */
|
|
fac = 1;
|
|
if (UNI_DIRECTIONAL == directionality) {
|
|
fac = 2;
|
|
}
|
|
|
|
assert((nodes_per_chan % fac) == 0);
|
|
num_sets = nodes_per_chan / fac;
|
|
|
|
int max_pins = types[0].num_pins;
|
|
for (i = 1; i < L_num_types; ++i) {
|
|
if (types[i].num_pins > max_pins) {
|
|
max_pins = types[i].num_pins;
|
|
}
|
|
}
|
|
|
|
Result = (int **) alloc_matrix(0, L_num_types, 0, max_pins, sizeof(int));
|
|
|
|
for (i = 1; i < L_num_types; ++i) {
|
|
float *Fc = (float *) my_malloc(sizeof(float) * types[i].num_pins); /* [0..num_pins-1] */
|
|
for (j = 0; j < types[i].num_pins; ++j) {
|
|
Fc[j] = types[i].Fc[j];
|
|
|
|
if(Fc[j] == 0 && ignore_Fc_0 == FALSE) {
|
|
/* Special case indicating that this pin does not connect to general-purpose routing */
|
|
Result[i][j] = 0;
|
|
} else {
|
|
/* General case indicating that this pin connects to general-purpose routing */
|
|
if (types[i].is_Fc_frac[j]) {
|
|
Result[i][j] = fac * nint(num_sets * Fc[j]);
|
|
} else {
|
|
Result[i][j] = (int)Fc[j];
|
|
}
|
|
|
|
if (is_Fc_out && types[i].is_Fc_full_flex[j]) {
|
|
Result[i][j] = nodes_per_chan;
|
|
}
|
|
|
|
Result[i][j] = std::max(Result[i][j], fac);
|
|
if (Result[i][j] > nodes_per_chan) {
|
|
*Fc_clipped = TRUE;
|
|
Result[i][j] = nodes_per_chan;
|
|
}
|
|
}
|
|
assert(Result[i][j] % fac == 0);
|
|
}
|
|
free(Fc);
|
|
}
|
|
|
|
return Result;
|
|
}
|
|
|
|
/* frees the track to ipin mapping for each physical grid type */
|
|
static void free_type_track_to_ipin_map(struct s_ivec**** track_to_pin_map,
|
|
t_type_ptr types, int nodes_per_chan) {
|
|
int i, itrack, ioff, iside;
|
|
for (i = 0; i < num_types; i++) {
|
|
if (track_to_pin_map[i] != NULL) {
|
|
for (itrack = 0; itrack < nodes_per_chan; itrack++) {
|
|
for (ioff = 0; ioff < types[i].height; ioff++) {
|
|
for (iside = 0; iside < 4; iside++) {
|
|
if (track_to_pin_map[i][itrack][ioff][iside].list
|
|
!= NULL) {
|
|
free(track_to_pin_map[i][itrack][ioff][iside].list);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
free_matrix3(track_to_pin_map[i], 0, nodes_per_chan - 1, 0,
|
|
types[i].height - 1, 0, sizeof(struct s_ivec));
|
|
}
|
|
}
|
|
free(track_to_pin_map);
|
|
}
|
|
|
|
/* frees the ipin to track mapping for each physical grid type */
|
|
static void free_type_pin_to_track_map(int***** ipin_to_track_map,
|
|
t_type_ptr types) {
|
|
int i;
|
|
for (i = 0; i < num_types; i++) {
|
|
free_matrix4(ipin_to_track_map[i], 0, types[i].num_pins - 1, 0,
|
|
types[i].height - 1, 0, 3, 0, sizeof(int));
|
|
}
|
|
free(ipin_to_track_map);
|
|
}
|
|
|
|
/* Does the actual work of allocating the rr_graph and filling all the *
|
|
* appropriate values. Everything up to this was just a prelude! */
|
|
static void alloc_and_load_rr_graph(INP int num_nodes,
|
|
INP t_rr_node * L_rr_node, INP int num_seg_types,
|
|
INP t_seg_details * seg_details, INP boolean * L_rr_edge_done,
|
|
INP struct s_ivec ****track_to_ipin_lookup,
|
|
INP int *****opin_to_track_map, INP struct s_ivec ***switch_block_conn,
|
|
INP struct s_grid_tile **L_grid, INP int L_nx, INP int L_ny, INP int Fs,
|
|
INP short *****sblock_pattern, INP int **Fc_out, INP int **Fc_xofs,
|
|
INP int **Fc_yofs, INP t_ivec *** L_rr_node_indices,
|
|
INP int nodes_per_chan, INP enum e_switch_block_type sb_type,
|
|
INP int delayless_switch, INP enum e_directionality directionality,
|
|
INP int wire_to_ipin_switch, OUTP boolean * Fc_clipped,
|
|
INP t_direct_inf *directs, INP int num_directs,
|
|
INP t_clb_to_clb_directs *clb_to_clb_directs) {
|
|
|
|
int i, j;
|
|
boolean clipped;
|
|
int *opin_mux_size = NULL;
|
|
|
|
/* If Fc gets clipped, this will be flagged to true */
|
|
*Fc_clipped = FALSE;
|
|
|
|
/* Connection SINKS and SOURCES to their pins. */
|
|
for (i = 0; i <= (L_nx + 1); i++) {
|
|
for (j = 0; j <= (L_ny + 1); j++) {
|
|
build_rr_sinks_sources(i, j, L_rr_node, L_rr_node_indices,
|
|
delayless_switch, L_grid);
|
|
}
|
|
}
|
|
|
|
/* Build opins */
|
|
for (i = 0; i <= (L_nx + 1); ++i) {
|
|
for (j = 0; j <= (L_ny + 1); ++j) {
|
|
if (BI_DIRECTIONAL == directionality) {
|
|
build_bidir_rr_opins(i, j, L_rr_node, L_rr_node_indices,
|
|
opin_to_track_map, Fc_out, L_rr_edge_done, seg_details,
|
|
L_grid, delayless_switch,
|
|
directs, num_directs, clb_to_clb_directs);
|
|
} else {
|
|
assert(UNI_DIRECTIONAL == directionality);
|
|
build_unidir_rr_opins(i, j, L_grid, Fc_out, nodes_per_chan,
|
|
seg_details, Fc_xofs, Fc_yofs, L_rr_node,
|
|
L_rr_edge_done, &clipped, L_rr_node_indices, delayless_switch,
|
|
directs, num_directs, clb_to_clb_directs);
|
|
if (clipped) {
|
|
*Fc_clipped = TRUE;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* We make a copy of the current fanin values for the nodes to
|
|
* know the number of OPINs driving each mux presently */
|
|
opin_mux_size = (int *) my_malloc(sizeof(int) * num_nodes);
|
|
for (i = 0; i < num_nodes; ++i) {
|
|
opin_mux_size[i] = L_rr_node[i].fan_in;
|
|
}
|
|
|
|
/* Build channels */
|
|
assert(Fs % 3 == 0);
|
|
for (i = 0; i <= L_nx; i++) {
|
|
for (j = 0; j <= L_ny; j++) {
|
|
if (i > 0) {
|
|
build_rr_xchan(i, j, track_to_ipin_lookup, switch_block_conn,
|
|
CHANX_COST_INDEX_START, nodes_per_chan, opin_mux_size,
|
|
sblock_pattern, Fs / 3, seg_details, L_rr_node_indices,
|
|
L_rr_edge_done, L_rr_node, wire_to_ipin_switch,
|
|
directionality);
|
|
}
|
|
if (j > 0) {
|
|
build_rr_ychan(i, j, track_to_ipin_lookup, switch_block_conn,
|
|
CHANX_COST_INDEX_START + num_seg_types, nodes_per_chan,
|
|
opin_mux_size, sblock_pattern, Fs / 3, seg_details,
|
|
L_rr_node_indices, L_rr_edge_done, L_rr_node,
|
|
wire_to_ipin_switch, directionality);
|
|
}
|
|
}
|
|
}
|
|
free(opin_mux_size);
|
|
}
|
|
|
|
static void build_bidir_rr_opins(INP int i, INP int j,
|
|
INOUTP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices,
|
|
INP int *****opin_to_track_map, INP int **Fc_out,
|
|
INP boolean * L_rr_edge_done, INP t_seg_details * seg_details,
|
|
INP struct s_grid_tile **L_grid, INP int delayless_switch,
|
|
INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs) {
|
|
|
|
int ipin, inode, num_edges, *Fc, ofs;
|
|
t_type_ptr type;
|
|
struct s_linked_edge *edge_list, *next;
|
|
|
|
/* OPINP edges need to be done at once so let the offset 0
|
|
* block do the work. */
|
|
if (L_grid[i][j].offset > 0) {
|
|
return;
|
|
}
|
|
|
|
type = L_grid[i][j].type;
|
|
Fc = Fc_out[type->index];
|
|
|
|
for (ipin = 0; ipin < type->num_pins; ++ipin) {
|
|
/* We only are working with opins so skip non-drivers */
|
|
if (type->class_inf[type->pin_class[ipin]].type != DRIVER) {
|
|
continue;
|
|
}
|
|
|
|
num_edges = 0;
|
|
edge_list = NULL;
|
|
if(Fc[ipin] != 0) {
|
|
for (ofs = 0; ofs < type->height; ++ofs) {
|
|
num_edges += get_bidir_opin_connections(i, j + ofs, ipin,
|
|
&edge_list, opin_to_track_map, Fc[ipin], L_rr_edge_done,
|
|
L_rr_node_indices, seg_details);
|
|
}
|
|
}
|
|
|
|
/* Add in direct connections */
|
|
num_edges += get_opin_direct_connecions(i, j, ipin, &edge_list, L_rr_node_indices, delayless_switch, directs, num_directs, clb_to_clb_directs);
|
|
|
|
inode = get_rr_node_index(i, j, OPIN, ipin, L_rr_node_indices);
|
|
alloc_and_load_edges_and_switches(L_rr_node, inode, num_edges,
|
|
L_rr_edge_done, edge_list);
|
|
while (edge_list != NULL) {
|
|
next = edge_list->next;
|
|
free(edge_list);
|
|
edge_list = next;
|
|
}
|
|
}
|
|
}
|
|
|
|
void free_rr_graph(void) {
|
|
int i;
|
|
|
|
/* Frees all the routing graph data structures, if they have been *
|
|
* allocated. I use rr_mem_chunk_list_head as a flag to indicate *
|
|
* whether or not the graph has been allocated -- if it is not NULL, *
|
|
* a routing graph exists and can be freed. Hence, you can call this *
|
|
* routine even if you're not sure of whether a rr_graph exists or not. */
|
|
|
|
if (rr_mem_ch.chunk_ptr_head == NULL) /* Nothing to free. */
|
|
return;
|
|
|
|
free_chunk_memory(&rr_mem_ch); /* Frees ALL "chunked" data */
|
|
|
|
/* Before adding any more free calls here, be sure the data is NOT chunk *
|
|
* allocated, as ALL the chunk allocated data is already free! */
|
|
|
|
if(net_rr_terminals != NULL) {
|
|
free(net_rr_terminals);
|
|
}
|
|
for (i = 0; i < num_rr_nodes; i++) {
|
|
if (rr_node[i].edges != NULL) {
|
|
free(rr_node[i].edges);
|
|
}
|
|
if (rr_node[i].switches != NULL) {
|
|
free(rr_node[i].switches);
|
|
}
|
|
}
|
|
|
|
assert(rr_node_indices);
|
|
free_rr_node_indices(rr_node_indices);
|
|
free(rr_node);
|
|
free(rr_indexed_data);
|
|
for (i = 0; i < num_blocks; i++) {
|
|
free(rr_blk_source[i]);
|
|
}
|
|
free(rr_blk_source);
|
|
rr_blk_source = NULL;
|
|
net_rr_terminals = NULL;
|
|
rr_node = NULL;
|
|
rr_node_indices = NULL;
|
|
rr_indexed_data = NULL;
|
|
num_rr_nodes = 0;
|
|
}
|
|
|
|
static void alloc_net_rr_terminals(void) {
|
|
int inet;
|
|
|
|
net_rr_terminals = (int **) my_malloc(num_nets * sizeof(int *));
|
|
|
|
for (inet = 0; inet < num_nets; inet++) {
|
|
net_rr_terminals[inet] = (int *) my_chunk_malloc(
|
|
(clb_net[inet].num_sinks + 1) * sizeof(int),
|
|
&rr_mem_ch);
|
|
}
|
|
}
|
|
|
|
void load_net_rr_terminals(t_ivec *** L_rr_node_indices) {
|
|
|
|
/* Allocates and loads the net_rr_terminals data structure. For each net *
|
|
* it stores the rr_node index of the SOURCE of the net and all the SINKs *
|
|
* of the net. [0..num_nets-1][0..num_pins-1]. Entry [inet][pnum] stores *
|
|
* the rr index corresponding to the SOURCE (opin) or SINK (ipin) of pnum. */
|
|
|
|
int inet, ipin, inode, iblk, i, j, node_block_pin, iclass;
|
|
t_type_ptr type;
|
|
|
|
for (inet = 0; inet < num_nets; inet++) {
|
|
for (ipin = 0; ipin <= clb_net[inet].num_sinks; ipin++) {
|
|
iblk = clb_net[inet].node_block[ipin];
|
|
i = block[iblk].x;
|
|
j = block[iblk].y;
|
|
type = block[iblk].type;
|
|
|
|
/* In the routing graph, each (x, y) location has unique pins on it
|
|
* so when there is capacity, blocks are packed and their pin numbers
|
|
* are offset to get their actual rr_node */
|
|
node_block_pin = clb_net[inet].node_block_pin[ipin];
|
|
|
|
iclass = type->pin_class[node_block_pin];
|
|
|
|
inode = get_rr_node_index(i, j, (ipin == 0 ? SOURCE : SINK), /* First pin is driver */
|
|
iclass, L_rr_node_indices);
|
|
net_rr_terminals[inet][ipin] = inode;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void alloc_and_load_rr_clb_source(t_ivec *** L_rr_node_indices) {
|
|
|
|
/* Saves the rr_node corresponding to each SOURCE and SINK in each CLB *
|
|
* in the FPGA. Currently only the SOURCE rr_node values are used, and *
|
|
* they are used only to reserve pins for locally used OPINs in the router. *
|
|
* [0..num_blocks-1][0..num_class-1]. The values for blocks that are pads *
|
|
* are NOT valid. */
|
|
|
|
int iblk, i, j, iclass, inode;
|
|
int class_low, class_high;
|
|
t_rr_type rr_type;
|
|
t_type_ptr type;
|
|
|
|
rr_blk_source = (int **) my_malloc(num_blocks * sizeof(int *));
|
|
|
|
for (iblk = 0; iblk < num_blocks; iblk++) {
|
|
type = block[iblk].type;
|
|
get_class_range_for_block(iblk, &class_low, &class_high);
|
|
rr_blk_source[iblk] = (int *) my_malloc(type->num_class * sizeof(int));
|
|
for (iclass = 0; iclass < type->num_class; iclass++) {
|
|
if (iclass >= class_low && iclass <= class_high) {
|
|
i = block[iblk].x;
|
|
j = block[iblk].y;
|
|
|
|
if (type->class_inf[iclass].type == DRIVER)
|
|
rr_type = SOURCE;
|
|
else
|
|
rr_type = SINK;
|
|
|
|
inode = get_rr_node_index(i, j, rr_type, iclass,
|
|
L_rr_node_indices);
|
|
rr_blk_source[iblk][iclass] = inode;
|
|
} else {
|
|
rr_blk_source[iblk][iclass] = OPEN;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void build_rr_sinks_sources(INP int i, INP int j,
|
|
INP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices,
|
|
INP int delayless_switch, INP struct s_grid_tile **L_grid) {
|
|
|
|
/* Loads IPIN, SINK, SOURCE, and OPIN.
|
|
* Loads IPINP to SINK edges, and SOURCE to OPINP edges */
|
|
|
|
int ipin, iclass, inode, pin_num, to_node, num_edges;
|
|
int num_class, num_pins;
|
|
t_type_ptr type;
|
|
struct s_class *class_inf;
|
|
int *pin_class;
|
|
const t_pb_graph_node *pb_graph_node;
|
|
int iport, ipb_pin, iporttype, z;
|
|
|
|
/* Since we share nodes within a large block, only
|
|
* start tile can initialize sinks, sources, and pins */
|
|
if (L_grid[i][j].offset > 0)
|
|
return;
|
|
|
|
type = L_grid[i][j].type;
|
|
num_class = type->num_class;
|
|
class_inf = type->class_inf;
|
|
num_pins = type->num_pins;
|
|
pin_class = type->pin_class;
|
|
z = 0;
|
|
|
|
/* SINKS and SOURCE to OPINP edges */
|
|
for (iclass = 0; iclass < num_class; iclass++) {
|
|
if (class_inf[iclass].type == DRIVER) { /* SOURCE */
|
|
inode = get_rr_node_index(i, j, SOURCE, iclass, L_rr_node_indices);
|
|
|
|
num_edges = class_inf[iclass].num_pins;
|
|
L_rr_node[inode].num_edges = num_edges;
|
|
L_rr_node[inode].edges = (int *) my_malloc(num_edges * sizeof(int));
|
|
L_rr_node[inode].switches = (short *) my_malloc(
|
|
num_edges * sizeof(short));
|
|
|
|
for (ipin = 0; ipin < class_inf[iclass].num_pins; ipin++) {
|
|
pin_num = class_inf[iclass].pinlist[ipin];
|
|
to_node = get_rr_node_index(i, j, OPIN, pin_num,
|
|
L_rr_node_indices);
|
|
L_rr_node[inode].edges[ipin] = to_node;
|
|
L_rr_node[inode].switches[ipin] = delayless_switch;
|
|
|
|
++L_rr_node[to_node].fan_in;
|
|
}
|
|
|
|
L_rr_node[inode].cost_index = SOURCE_COST_INDEX;
|
|
L_rr_node[inode].type = SOURCE;
|
|
} else { /* SINK */
|
|
assert(class_inf[iclass].type == RECEIVER);
|
|
inode = get_rr_node_index(i, j, SINK, iclass, L_rr_node_indices);
|
|
|
|
/* NOTE: To allow route throughs through clbs, change the lines below to *
|
|
* make an edge from the input SINK to the output SOURCE. Do for just the *
|
|
* special case of INPUTS = class 0 and OUTPUTS = class 1 and see what it *
|
|
* leads to. If route throughs are allowed, you may want to increase the *
|
|
* base cost of OPINs and/or SOURCES so they aren't used excessively. */
|
|
|
|
/* Initialize to unconnected to fix values */
|
|
L_rr_node[inode].num_edges = 0;
|
|
L_rr_node[inode].edges = NULL;
|
|
L_rr_node[inode].switches = NULL;
|
|
|
|
L_rr_node[inode].cost_index = SINK_COST_INDEX;
|
|
L_rr_node[inode].type = SINK;
|
|
}
|
|
|
|
/* Things common to both SOURCEs and SINKs. */
|
|
L_rr_node[inode].capacity = class_inf[iclass].num_pins;
|
|
L_rr_node[inode].occ = 0;
|
|
L_rr_node[inode].xlow = i;
|
|
L_rr_node[inode].xhigh = i;
|
|
L_rr_node[inode].ylow = j;
|
|
L_rr_node[inode].yhigh = j + type->height - 1;
|
|
L_rr_node[inode].R = 0;
|
|
L_rr_node[inode].C = 0;
|
|
L_rr_node[inode].ptc_num = iclass;
|
|
L_rr_node[inode].direction = (enum e_direction)OPEN;
|
|
L_rr_node[inode].drivers = (enum e_drivers)OPEN;
|
|
}
|
|
|
|
iporttype = iport = ipb_pin = 0;
|
|
|
|
pb_graph_node = type->pb_graph_head;
|
|
if(pb_graph_node != NULL && pb_graph_node->num_input_ports == 0) {
|
|
iporttype = 1;
|
|
}
|
|
/* Connect IPINS to SINKS and dummy for OPINS */
|
|
for (ipin = 0; ipin < num_pins; ipin++) {
|
|
iclass = pin_class[ipin];
|
|
z = ipin / (type->pb_type->num_clock_pins + type->pb_type->num_output_pins + type->pb_type->num_input_pins);
|
|
|
|
if (class_inf[iclass].type == RECEIVER) {
|
|
inode = get_rr_node_index(i, j, IPIN, ipin, L_rr_node_indices);
|
|
to_node = get_rr_node_index(i, j, SINK, iclass, L_rr_node_indices);
|
|
|
|
L_rr_node[inode].num_edges = 1;
|
|
L_rr_node[inode].edges = (int *) my_malloc(sizeof(int));
|
|
L_rr_node[inode].switches = (short *) my_malloc(sizeof(short));
|
|
|
|
L_rr_node[inode].edges[0] = to_node;
|
|
L_rr_node[inode].switches[0] = delayless_switch;
|
|
|
|
++L_rr_node[to_node].fan_in;
|
|
|
|
L_rr_node[inode].cost_index = IPIN_COST_INDEX;
|
|
L_rr_node[inode].type = IPIN;
|
|
|
|
/* Add in information so that I can identify which cluster pin this rr_node connects to later */
|
|
L_rr_node[inode].z = z;
|
|
if(iporttype == 0) {
|
|
L_rr_node[inode].pb_graph_pin = &pb_graph_node->input_pins[iport][ipb_pin];
|
|
ipb_pin++;
|
|
if(ipb_pin >= pb_graph_node->num_input_pins[iport]) {
|
|
iport++;
|
|
ipb_pin = 0;
|
|
if(iport >= pb_graph_node->num_input_ports) {
|
|
iporttype++;
|
|
iport = 0;
|
|
if(pb_graph_node->num_clock_ports == 0) {
|
|
iporttype = 0;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
assert(iporttype == 1);
|
|
L_rr_node[inode].pb_graph_pin = &pb_graph_node->clock_pins[iport][ipb_pin];
|
|
ipb_pin++; /* Xifan TANG: Original VPR does not have this incremental!!! */
|
|
if(ipb_pin >= pb_graph_node->num_clock_pins[iport]) {
|
|
iport++;
|
|
ipb_pin = 0;
|
|
if(iport >= pb_graph_node->num_clock_ports) {
|
|
iporttype = 0;
|
|
iport = 0;
|
|
if(pb_graph_node->num_input_ports == 0) {
|
|
iporttype = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
assert(class_inf[iclass].type == DRIVER);
|
|
|
|
inode = get_rr_node_index(i, j, OPIN, ipin, L_rr_node_indices);
|
|
|
|
/* Add in information so that I can identify which cluster pin this rr_node connects to later */
|
|
L_rr_node[inode].z = z;
|
|
|
|
L_rr_node[inode].num_edges = 0;
|
|
L_rr_node[inode].edges = NULL;
|
|
|
|
L_rr_node[inode].switches = NULL;
|
|
|
|
L_rr_node[inode].cost_index = OPIN_COST_INDEX;
|
|
L_rr_node[inode].type = OPIN;
|
|
|
|
L_rr_node[inode].pb_graph_pin = &pb_graph_node->output_pins[iport][ipb_pin];
|
|
ipb_pin++; /* Xifan TANG: Original VPR does not have this incremental!!! */
|
|
if(ipb_pin >= pb_graph_node->num_output_pins[iport]) {
|
|
iport++;
|
|
ipb_pin = 0;
|
|
if(iport >= pb_graph_node->num_output_ports) {
|
|
iport = 0;
|
|
if(pb_graph_node->num_input_ports == 0) {
|
|
iporttype = 1;
|
|
} else {
|
|
iporttype = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Common to both DRIVERs and RECEIVERs */
|
|
L_rr_node[inode].capacity = 1;
|
|
L_rr_node[inode].occ = 0;
|
|
L_rr_node[inode].xlow = i;
|
|
L_rr_node[inode].xhigh = i;
|
|
L_rr_node[inode].ylow = j;
|
|
L_rr_node[inode].yhigh = j + type->height - 1;
|
|
L_rr_node[inode].C = 0;
|
|
L_rr_node[inode].R = 0;
|
|
L_rr_node[inode].ptc_num = ipin;
|
|
L_rr_node[inode].direction = (enum e_direction)OPEN;
|
|
L_rr_node[inode].drivers = (enum e_drivers)OPEN;
|
|
}
|
|
}
|
|
|
|
static void build_rr_xchan(INP int i, INP int j,
|
|
INP struct s_ivec ****track_to_ipin_lookup,
|
|
INP struct s_ivec ***switch_block_conn, INP int cost_index_offset,
|
|
INP int nodes_per_chan, INP int *opin_mux_size,
|
|
INP short *****sblock_pattern, INP int Fs_per_side,
|
|
INP t_seg_details * seg_details, INP t_ivec *** L_rr_node_indices,
|
|
INOUTP boolean * L_rr_edge_done, INOUTP t_rr_node * L_rr_node,
|
|
INP int wire_to_ipin_switch, INP enum e_directionality directionality) {
|
|
|
|
/* Loads up all the routing resource nodes in the x-directed channel *
|
|
* segments starting at (i,j). */
|
|
|
|
int itrack, istart, iend, num_edges, inode, length;
|
|
struct s_linked_edge *edge_list, *next;
|
|
/* mrFPGA: Xifan TANG */
|
|
int jstart, jend;
|
|
/* END */
|
|
|
|
for (itrack = 0; itrack < nodes_per_chan; itrack++) {
|
|
/* First count number of edges and put the edges in a linked list. */
|
|
num_edges = 0;
|
|
edge_list = NULL;
|
|
|
|
/* mrFPGA : Xifan TANG*/
|
|
if ( is_stack ) {
|
|
jstart = get_seg_start (seg_details, itrack, i, j);
|
|
if ( jstart != j )
|
|
continue;
|
|
jend = get_seg_end (seg_details, itrack, jstart, i, ny);
|
|
|
|
istart = i;
|
|
iend = i;
|
|
|
|
num_edges += get_track_to_ipins(jstart, i, itrack, &edge_list, L_rr_node_indices,
|
|
track_to_ipin_lookup, seg_details, CHANX, nx, wire_to_ipin_switch,
|
|
directionality);
|
|
|
|
num_edges += get_track_to_tracks(i, jstart, itrack, CHANX, i-1, CHANY, ny,
|
|
nodes_per_chan, opin_mux_size, Fs_per_side, sblock_pattern,
|
|
&edge_list, seg_details, directionality, L_rr_node_indices,
|
|
L_rr_edge_done, switch_block_conn);
|
|
|
|
num_edges += get_track_to_tracks(i, jstart, itrack, CHANX, i, CHANY, ny,
|
|
nodes_per_chan, opin_mux_size, Fs_per_side, sblock_pattern,
|
|
&edge_list, seg_details, directionality, L_rr_node_indices,
|
|
L_rr_edge_done, switch_block_conn);
|
|
|
|
if( jstart > 0 ) {
|
|
num_edges += get_track_to_tracks(i, jstart, itrack, CHANX, jstart - 1, CHANX, ny,
|
|
nodes_per_chan, opin_mux_size, Fs_per_side,sblock_pattern, &edge_list,
|
|
seg_details, directionality, L_rr_node_indices, L_rr_edge_done, switch_block_conn);
|
|
}
|
|
if( jend < ny ) {
|
|
num_edges += get_track_to_tracks(i, jstart, itrack, CHANX, jend + 1, CHANX, ny,
|
|
nodes_per_chan, opin_mux_size, Fs_per_side, sblock_pattern, &edge_list,
|
|
seg_details, directionality, L_rr_node_indices, L_rr_edge_done, switch_block_conn);
|
|
}
|
|
} else { /* end */
|
|
/* Xifan TANG: I remove the accurate part for Original VPR*/
|
|
/* Original VPR part*/
|
|
istart = get_seg_start(seg_details, itrack, j, i);
|
|
iend = get_seg_end(seg_details, itrack, istart, j, nx);
|
|
if (i > istart)
|
|
continue; /* Not the start of this segment. */
|
|
jstart = j;
|
|
jend = j;
|
|
/* end */
|
|
|
|
/* First count number of edges and put the edges in a linked list. */
|
|
num_edges = 0;
|
|
edge_list = NULL;
|
|
|
|
num_edges += get_track_to_ipins(istart, j, itrack, &edge_list,
|
|
L_rr_node_indices, track_to_ipin_lookup, seg_details, CHANX, nx,
|
|
wire_to_ipin_switch, directionality);
|
|
|
|
if (j > 0) {
|
|
num_edges += get_track_to_tracks(j, istart, itrack, CHANX, j, CHANY,
|
|
nx, nodes_per_chan, opin_mux_size, Fs_per_side,
|
|
sblock_pattern, &edge_list, seg_details, directionality,
|
|
L_rr_node_indices, L_rr_edge_done, switch_block_conn);
|
|
}
|
|
|
|
if (j < ny) {
|
|
num_edges += get_track_to_tracks(j, istart, itrack, CHANX, j + 1,
|
|
CHANY, nx, nodes_per_chan, opin_mux_size, Fs_per_side,
|
|
sblock_pattern, &edge_list, seg_details, directionality,
|
|
L_rr_node_indices, L_rr_edge_done, switch_block_conn);
|
|
}
|
|
|
|
if (istart > 1) {
|
|
num_edges += get_track_to_tracks(j, istart, itrack, CHANX,
|
|
istart - 1, CHANX, nx, nodes_per_chan, opin_mux_size,
|
|
Fs_per_side, sblock_pattern, &edge_list, seg_details,
|
|
directionality, L_rr_node_indices, L_rr_edge_done,
|
|
switch_block_conn);
|
|
}
|
|
|
|
if (iend < nx) {
|
|
num_edges += get_track_to_tracks(j, istart, itrack, CHANX, iend + 1,
|
|
CHANX, nx, nodes_per_chan, opin_mux_size, Fs_per_side,
|
|
sblock_pattern, &edge_list, seg_details, directionality,
|
|
L_rr_node_indices, L_rr_edge_done, switch_block_conn);
|
|
}
|
|
}
|
|
/* END */
|
|
|
|
inode = get_rr_node_index(i, j, CHANX, itrack, L_rr_node_indices);
|
|
alloc_and_load_edges_and_switches(L_rr_node, inode, num_edges,
|
|
L_rr_edge_done, edge_list);
|
|
|
|
while (edge_list != NULL) {
|
|
next = edge_list->next;
|
|
free(edge_list);
|
|
edge_list = next;
|
|
}
|
|
|
|
/* Edge arrays have now been built up. Do everything else. */
|
|
L_rr_node[inode].cost_index = cost_index_offset
|
|
+ seg_details[itrack].index;
|
|
L_rr_node[inode].occ = 0;
|
|
|
|
L_rr_node[inode].capacity = 1; /* GLOBAL routing handled elsewhere */
|
|
|
|
if (is_stack) {
|
|
/* mrFPGA: Xifan TANG */
|
|
L_rr_node[inode].xlow = istart;
|
|
L_rr_node[inode].xhigh = iend;
|
|
L_rr_node[inode].ylow = jstart;
|
|
L_rr_node[inode].yhigh = jend;
|
|
} else {
|
|
/* Original VPR */
|
|
L_rr_node[inode].xlow = istart;
|
|
L_rr_node[inode].xhigh = iend;
|
|
L_rr_node[inode].ylow = j;
|
|
L_rr_node[inode].yhigh = j;
|
|
}
|
|
|
|
/* mrFPGA: Xifan TANG */
|
|
length = is_stack ? (jend - jstart) : (iend - istart + 1);
|
|
//length = (iend - istart + 1);
|
|
/* END */
|
|
|
|
L_rr_node[inode].R = length * seg_details[itrack].Rmetal;
|
|
L_rr_node[inode].C = length * seg_details[itrack].Cmetal;
|
|
|
|
L_rr_node[inode].ptc_num = itrack;
|
|
L_rr_node[inode].type = CHANX;
|
|
L_rr_node[inode].direction = seg_details[itrack].direction;
|
|
L_rr_node[inode].drivers = seg_details[itrack].drivers;
|
|
/* Xifan TANG:(For SPICE Modeling) Fill the segment inf */
|
|
//LL_rr_node[inode].seg_index = seg_details[itrack].index;
|
|
}
|
|
}
|
|
|
|
static void build_rr_ychan(INP int i, INP int j,
|
|
INP struct s_ivec ****track_to_ipin_lookup,
|
|
INP struct s_ivec ***switch_block_conn, INP int cost_index_offset,
|
|
INP int nodes_per_chan, INP int *opin_mux_size,
|
|
INP short *****sblock_pattern, INP int Fs_per_side,
|
|
INP t_seg_details * seg_details, INP t_ivec *** L_rr_node_indices,
|
|
INP boolean * L_rr_edge_done, INOUTP t_rr_node * L_rr_node,
|
|
INP int wire_to_ipin_switch, INP enum e_directionality directionality) {
|
|
|
|
/* Loads up all the routing resource nodes in the y-directed channel *
|
|
* segments starting at (i,j). */
|
|
|
|
int itrack, istart, iend, num_edges, inode, length;
|
|
struct s_linked_edge *edge_list, *next;
|
|
|
|
/* mrFPGA: Xifan TANG*/
|
|
int jstart, jend;
|
|
/* END */
|
|
|
|
for (itrack = 0; itrack < nodes_per_chan; itrack++) {
|
|
/* First count number of edges and put the edges in a linked list. */
|
|
num_edges = 0;
|
|
edge_list = NULL;
|
|
|
|
/* mrFPGA */
|
|
if ( is_stack ) {
|
|
istart = get_seg_start (seg_details, itrack, j, i);
|
|
if ( istart != i )
|
|
continue;
|
|
iend = get_seg_end (seg_details, itrack, istart, j, nx);
|
|
/* mrFPGA: Xifan TANG */
|
|
jstart = j;
|
|
jend = j;
|
|
/* end */
|
|
|
|
num_edges += get_track_to_ipins(istart, j, itrack, &edge_list, L_rr_node_indices,
|
|
track_to_ipin_lookup, seg_details, CHANY, nx, wire_to_ipin_switch,
|
|
directionality);
|
|
|
|
num_edges += get_track_to_tracks(j, istart, itrack, CHANY, j-1, CHANX, nx,
|
|
nodes_per_chan, opin_mux_size, Fs_per_side, sblock_pattern,
|
|
&edge_list, seg_details, directionality, L_rr_node_indices,
|
|
L_rr_edge_done, switch_block_conn);
|
|
|
|
num_edges += get_track_to_tracks(j, istart, itrack, CHANY, j, CHANX, nx,
|
|
nodes_per_chan, opin_mux_size, Fs_per_side, sblock_pattern,
|
|
&edge_list, seg_details, directionality, L_rr_node_indices,
|
|
L_rr_edge_done, switch_block_conn);
|
|
|
|
if (istart > 0) {
|
|
num_edges += get_track_to_tracks(j, istart, itrack, CHANY, istart - 1, CHANY, nx,
|
|
nodes_per_chan, opin_mux_size, Fs_per_side, sblock_pattern, &edge_list,
|
|
seg_details, directionality, L_rr_node_indices, L_rr_edge_done,
|
|
switch_block_conn);
|
|
}
|
|
|
|
if (iend < nx) {
|
|
num_edges += get_track_to_tracks(j, istart, itrack, CHANY, iend + 1,
|
|
CHANY, nx, nodes_per_chan, opin_mux_size, Fs_per_side,
|
|
sblock_pattern, &edge_list, seg_details, directionality,
|
|
L_rr_node_indices, L_rr_edge_done, switch_block_conn);
|
|
|
|
}
|
|
} else { /* end */
|
|
istart = get_seg_start(seg_details, itrack, i, j);
|
|
iend = get_seg_end(seg_details, itrack, istart, i, ny);
|
|
|
|
if (j > istart)
|
|
continue; /* Not the start of this segment. */
|
|
/* mrFPGA: Xifan TANG*/
|
|
jstart = i;
|
|
jend = i;
|
|
/* END */
|
|
|
|
/* Original VPR */
|
|
/* First count number of edges and put the edges in a linked list. */
|
|
num_edges = 0;
|
|
edge_list = NULL;
|
|
|
|
num_edges += get_track_to_ipins(istart, i, itrack, &edge_list,
|
|
L_rr_node_indices, track_to_ipin_lookup, seg_details, CHANY, ny,
|
|
wire_to_ipin_switch, directionality);
|
|
|
|
if (i > 0) {
|
|
num_edges += get_track_to_tracks(i, istart, itrack, CHANY, i, CHANX,
|
|
ny, nodes_per_chan, opin_mux_size, Fs_per_side,
|
|
sblock_pattern, &edge_list, seg_details, directionality,
|
|
L_rr_node_indices, L_rr_edge_done, switch_block_conn);
|
|
}
|
|
|
|
if (i < nx) {
|
|
num_edges += get_track_to_tracks(i, istart, itrack, CHANY, i + 1,
|
|
CHANX, ny, nodes_per_chan, opin_mux_size, Fs_per_side,
|
|
sblock_pattern, &edge_list, seg_details, directionality,
|
|
L_rr_node_indices, L_rr_edge_done, switch_block_conn);
|
|
}
|
|
|
|
if (istart > 1) {
|
|
num_edges += get_track_to_tracks(i, istart, itrack, CHANY,
|
|
istart - 1, CHANY, ny, nodes_per_chan, opin_mux_size,
|
|
Fs_per_side, sblock_pattern, &edge_list, seg_details,
|
|
directionality, L_rr_node_indices, L_rr_edge_done,
|
|
switch_block_conn);
|
|
}
|
|
|
|
if (iend < ny) {
|
|
num_edges += get_track_to_tracks(i, istart, itrack, CHANY, iend + 1,
|
|
CHANY, ny, nodes_per_chan, opin_mux_size, Fs_per_side,
|
|
sblock_pattern, &edge_list, seg_details, directionality,
|
|
L_rr_node_indices, L_rr_edge_done, switch_block_conn);
|
|
}
|
|
}
|
|
/* END */
|
|
|
|
inode = get_rr_node_index(i, j, CHANY, itrack, L_rr_node_indices);
|
|
alloc_and_load_edges_and_switches(L_rr_node, inode, num_edges,
|
|
L_rr_edge_done, edge_list);
|
|
|
|
while (edge_list != NULL) {
|
|
next = edge_list->next;
|
|
free(edge_list);
|
|
edge_list = next;
|
|
}
|
|
|
|
/* Edge arrays have now been built up. Do everything else. */
|
|
L_rr_node[inode].cost_index = cost_index_offset
|
|
+ seg_details[itrack].index;
|
|
L_rr_node[inode].occ = 0;
|
|
|
|
L_rr_node[inode].capacity = 1; /* GLOBAL routing handled elsewhere */
|
|
|
|
if (is_stack) {
|
|
/* mrFPGA: Xifan TANG */
|
|
L_rr_node[inode].xlow = istart;
|
|
L_rr_node[inode].xhigh = iend;
|
|
L_rr_node[inode].ylow = jstart;
|
|
L_rr_node[inode].yhigh = jend;
|
|
} else {
|
|
/* Original VPR */
|
|
L_rr_node[inode].xlow = i;
|
|
L_rr_node[inode].xhigh = i;
|
|
L_rr_node[inode].ylow = istart;
|
|
L_rr_node[inode].yhigh = iend;
|
|
}
|
|
|
|
/* mrFPGA : Xifan TANG*/
|
|
length = is_stack ? (iend - istart) : (iend - istart + 1);
|
|
//length = (iend - istart + 1);
|
|
/* END */
|
|
|
|
L_rr_node[inode].R = length * seg_details[itrack].Rmetal;
|
|
L_rr_node[inode].C = length * seg_details[itrack].Cmetal;
|
|
|
|
L_rr_node[inode].ptc_num = itrack;
|
|
L_rr_node[inode].type = CHANY;
|
|
L_rr_node[inode].direction = seg_details[itrack].direction;
|
|
L_rr_node[inode].drivers = seg_details[itrack].drivers;
|
|
/* Xifan TANG:(For SPICE Modeling) Fill the segment inf */
|
|
//LL_rr_node[inode].seg_index = seg_details[itrack].index;
|
|
}
|
|
}
|
|
|
|
void watch_edges(int inode, t_linked_edge * edge_list_head) {
|
|
t_linked_edge *list_ptr;
|
|
int i, to_node;
|
|
|
|
list_ptr = edge_list_head;
|
|
i = 0;
|
|
|
|
vpr_printf(TIO_MESSAGE_TRACE, "!!! Watching Node %d !!!!\n", inode);
|
|
print_rr_node(stdout, rr_node, inode);
|
|
vpr_printf(TIO_MESSAGE_TRACE, "Currently connects to:\n");
|
|
while (list_ptr != NULL) {
|
|
to_node = list_ptr->edge;
|
|
print_rr_node(stdout, rr_node, to_node);
|
|
list_ptr = list_ptr->next;
|
|
i++;
|
|
}
|
|
}
|
|
|
|
void alloc_and_load_edges_and_switches(INP t_rr_node * L_rr_node, INP int inode,
|
|
INP int num_edges, INOUTP boolean * L_rr_edge_done,
|
|
INP t_linked_edge * edge_list_head) {
|
|
|
|
/* Sets up all the edge related information for rr_node inode (num_edges, *
|
|
* the edges array and the switches array). The edge_list_head points to *
|
|
* a list of the num_edges edges and switches to put in the arrays. This *
|
|
* linked list is freed by this routine. This routine also resets the *
|
|
* rr_edge_done array for the next rr_node (i.e. set it so that no edges *
|
|
* are marked as having been seen before). */
|
|
|
|
t_linked_edge *list_ptr;
|
|
int i;
|
|
|
|
/* Check we aren't overwriting edges */
|
|
assert(L_rr_node[inode].num_edges < 1);
|
|
assert(NULL == L_rr_node[inode].edges);
|
|
assert(NULL == L_rr_node[inode].switches);
|
|
|
|
L_rr_node[inode].num_edges = num_edges;
|
|
L_rr_node[inode].edges = (int *) my_malloc(num_edges * sizeof(int));
|
|
L_rr_node[inode].switches = (short *) my_malloc(num_edges * sizeof(short));
|
|
|
|
i = 0;
|
|
list_ptr = edge_list_head;
|
|
while (list_ptr && (i < num_edges)) {
|
|
L_rr_node[inode].edges[i] = list_ptr->edge;
|
|
L_rr_node[inode].switches[i] = list_ptr->iswitch;
|
|
|
|
++L_rr_node[list_ptr->edge].fan_in;
|
|
|
|
/* Unmark the edge since we are done considering fanout from node. */
|
|
L_rr_edge_done[list_ptr->edge] = FALSE;
|
|
|
|
list_ptr = list_ptr->next;
|
|
++i;
|
|
}
|
|
assert(list_ptr == NULL);
|
|
assert(i == num_edges);
|
|
}
|
|
|
|
static int ****
|
|
alloc_and_load_pin_to_track_map(INP enum e_pin_type pin_type,
|
|
INP int nodes_per_chan, INP int *Fc, INP t_type_ptr Type,
|
|
INP boolean perturb_switch_pattern,
|
|
INP enum e_directionality directionality) {
|
|
|
|
int **num_dir; /* [0..height][0..3] Number of *physical* pins on each side. */
|
|
int ***dir_list; /* [0..height][0..3][0..num_pins-1] list of pins of correct type *
|
|
* * on each side. Max possible space alloced for simplicity */
|
|
|
|
int i, j, k, iside, ipin, iclass, num_phys_pins, pindex, ioff;
|
|
int *pin_num_ordering, *side_ordering, *offset_ordering;
|
|
int **num_done_per_dir; /* [0..height][0..3] */
|
|
int ****tracks_connected_to_pin; /* [0..num_pins-1][0..height][0..3][0..Fc-1] */
|
|
|
|
/* NB: This wastes some space. Could set tracks_..._pin[ipin][ioff][iside] =
|
|
* NULL if there is no pin on that side, or that pin is of the wrong type.
|
|
* Probably not enough memory to worry about, esp. as it's temporary.
|
|
* If pin ipin on side iside does not exist or is of the wrong type,
|
|
* tracks_connected_to_pin[ipin][iside][0] = OPEN. */
|
|
|
|
if (Type->num_pins < 1) {
|
|
return NULL;
|
|
}
|
|
|
|
/* Currently, only two possible Fc values exist: 0 or default.
|
|
* Finding the max. value of Fc in block will result in the
|
|
* default value, which works for now. In the future, when
|
|
* the Fc values of all pins can vary, the max value will continue
|
|
* to work for matrix (de)allocation purposes. However, all looping
|
|
* will have to be modified to account for pin-based Fc values. */
|
|
int max_Fc = 0;
|
|
for (i = 0; i < Type->num_pins; ++i) {
|
|
iclass = Type->pin_class[i];
|
|
if (Fc[i] > max_Fc && Type->class_inf[iclass].type == pin_type) {
|
|
max_Fc = Fc[i];
|
|
}
|
|
}
|
|
|
|
tracks_connected_to_pin = (int ****) alloc_matrix4(0, Type->num_pins - 1, 0,
|
|
Type->height - 1, 0, 3, 0, max_Fc, sizeof(int));
|
|
|
|
for (ipin = 0; ipin < Type->num_pins; ipin++) {
|
|
for (ioff = 0; ioff < Type->height; ioff++) {
|
|
for (iside = 0; iside < 4; iside++) {
|
|
for (i = 0; i < max_Fc; ++i) {
|
|
tracks_connected_to_pin[ipin][ioff][iside][i] = OPEN; /* Unconnected. */
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
num_dir = (int **) alloc_matrix(0, Type->height - 1, 0, 3, sizeof(int));
|
|
dir_list = (int ***) alloc_matrix3(0, Type->height - 1, 0, 3, 0,
|
|
Type->num_pins - 1, sizeof(int));
|
|
|
|
/* Defensive coding. Try to crash hard if I use an unset entry. */
|
|
for (i = 0; i < Type->height; i++)
|
|
for (j = 0; j < 4; j++)
|
|
for (k = 0; k < Type->num_pins; k++)
|
|
dir_list[i][j][k] = (-1);
|
|
|
|
for (i = 0; i < Type->height; i++)
|
|
for (j = 0; j < 4; j++)
|
|
num_dir[i][j] = 0;
|
|
|
|
for (ipin = 0; ipin < Type->num_pins; ipin++) {
|
|
iclass = Type->pin_class[ipin];
|
|
if (Type->class_inf[iclass].type != pin_type) /* Doing either ipins OR opins */
|
|
continue;
|
|
|
|
/* Pins connecting only to global resources get no switches -> keeps the *
|
|
* area model accurate. */
|
|
|
|
if (Type->is_global_pin[ipin])
|
|
continue;
|
|
for (ioff = 0; ioff < Type->height; ioff++) {
|
|
for (iside = 0; iside < 4; iside++) {
|
|
if (Type->pinloc[ioff][iside][ipin] == 1) {
|
|
dir_list[ioff][iside][num_dir[ioff][iside]] = ipin;
|
|
num_dir[ioff][iside]++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
num_phys_pins = 0;
|
|
for (ioff = 0; ioff < Type->height; ioff++) {
|
|
for (iside = 0; iside < 4; iside++)
|
|
num_phys_pins += num_dir[ioff][iside]; /* Num. physical pins per type */
|
|
}
|
|
num_done_per_dir = (int **) alloc_matrix(0, Type->height - 1, 0, 3,
|
|
sizeof(int));
|
|
for (ioff = 0; ioff < Type->height; ioff++) {
|
|
for (iside = 0; iside < 4; iside++) {
|
|
num_done_per_dir[ioff][iside] = 0;
|
|
}
|
|
}
|
|
pin_num_ordering = (int *) my_malloc(num_phys_pins * sizeof(int));
|
|
side_ordering = (int *) my_malloc(num_phys_pins * sizeof(int));
|
|
offset_ordering = (int *) my_malloc(num_phys_pins * sizeof(int));
|
|
|
|
/* Connection block I use distributes pins evenly across the tracks *
|
|
* of ALL sides of the clb at once. Ensures that each pin connects *
|
|
* to spaced out tracks in its connection block, and that the other *
|
|
* pins (potentially in other C blocks) connect to the remaining tracks *
|
|
* first. Doesn't matter for large Fc, but should make a fairly *
|
|
* good low Fc block that leverages the fact that usually lots of pins *
|
|
* are logically equivalent. */
|
|
|
|
iside = LEFT;
|
|
ioff = Type->height - 1;
|
|
ipin = 0;
|
|
pindex = -1;
|
|
|
|
while (ipin < num_phys_pins) {
|
|
if (iside == TOP) {
|
|
iside = RIGHT;
|
|
} else if (iside == RIGHT) {
|
|
if (ioff <= 0) {
|
|
iside = BOTTOM;
|
|
} else {
|
|
ioff--;
|
|
}
|
|
} else if (iside == BOTTOM) {
|
|
iside = LEFT;
|
|
} else {
|
|
assert(iside == LEFT);
|
|
if (ioff >= Type->height - 1) {
|
|
pindex++;
|
|
iside = TOP;
|
|
} else {
|
|
ioff++;
|
|
}
|
|
}
|
|
|
|
assert(pindex < num_phys_pins);
|
|
/* Number of physical pins bounds number of logical pins */
|
|
|
|
if (num_done_per_dir[ioff][iside] >= num_dir[ioff][iside])
|
|
continue;
|
|
pin_num_ordering[ipin] = dir_list[ioff][iside][pindex];
|
|
side_ordering[ipin] = iside;
|
|
offset_ordering[ipin] = ioff;
|
|
assert(Type->pinloc[ioff][iside][dir_list[ioff][iside][pindex]]);
|
|
num_done_per_dir[ioff][iside]++;
|
|
ipin++;
|
|
}
|
|
|
|
if (perturb_switch_pattern) {
|
|
load_perturbed_switch_pattern(Type, tracks_connected_to_pin,
|
|
num_phys_pins, pin_num_ordering, side_ordering, offset_ordering,
|
|
nodes_per_chan, max_Fc, directionality);
|
|
} else {
|
|
load_uniform_switch_pattern(Type, tracks_connected_to_pin,
|
|
num_phys_pins, pin_num_ordering, side_ordering, offset_ordering,
|
|
nodes_per_chan, max_Fc, directionality);
|
|
}
|
|
check_all_tracks_reach_pins(Type, tracks_connected_to_pin, nodes_per_chan,
|
|
max_Fc, pin_type);
|
|
|
|
/* Free all temporary storage. */
|
|
free_matrix(num_dir, 0, Type->height - 1, 0, sizeof(int));
|
|
free_matrix3(dir_list, 0, Type->height - 1, 0, 3, 0, sizeof(int));
|
|
free_matrix(num_done_per_dir, 0, Type->height - 1, 0, sizeof(int));
|
|
free(pin_num_ordering);
|
|
free(side_ordering);
|
|
free(offset_ordering);
|
|
|
|
return tracks_connected_to_pin;
|
|
}
|
|
|
|
static void load_uniform_switch_pattern(INP t_type_ptr type,
|
|
INOUTP int ****tracks_connected_to_pin, INP int num_phys_pins,
|
|
INP int *pin_num_ordering, INP int *side_ordering,
|
|
INP int *offset_ordering, INP int nodes_per_chan, INP int Fc,
|
|
enum e_directionality directionality) {
|
|
|
|
/* Loads the tracks_connected_to_pin array with an even distribution of *
|
|
* switches across the tracks for each pin. For example, each pin connects *
|
|
* to every 4.3rd track in a channel, with exactly which tracks a pin *
|
|
* connects to staggered from pin to pin. */
|
|
|
|
int i, j, ipin, iside, ioff, itrack, k;
|
|
float f_track, fc_step;
|
|
int group_size;
|
|
float step_size;
|
|
|
|
/* Uni-directional drive is implemented to ensure no directional bias and this means
|
|
* two important comments noted below */
|
|
/* 1. Spacing should be (W/2)/(Fc/2), and step_size should be spacing/(num_phys_pins),
|
|
* and lay down 2 switches on an adjacent pair of tracks at a time to ensure
|
|
* no directional bias. Basically, treat W (even) as W/2 pairs of tracks, and
|
|
* assign switches to a pair at a time. Can do this because W is guaranteed to
|
|
* be even-numbered; however same approach cannot be applied to Fc_out pattern
|
|
* when L > 1 and W <> 2L multiple.
|
|
*
|
|
* 2. This generic pattern should be considered the tileable physical layout,
|
|
* meaning all track # here are physical #'s,
|
|
* so later must use vpr_to_phy conversion to find actual logical #'s to connect.
|
|
* This also means I will not use get_output_block_companion_track to ensure
|
|
* no bias, since that describes a logical # -> that would confuse people. */
|
|
|
|
step_size = (float) nodes_per_chan / (float) (Fc * num_phys_pins);
|
|
|
|
if (directionality == BI_DIRECTIONAL) {
|
|
group_size = 1;
|
|
} else {
|
|
assert(directionality == UNI_DIRECTIONAL);
|
|
group_size = 2;
|
|
}
|
|
|
|
assert((nodes_per_chan % group_size == 0) && (Fc % group_size == 0));
|
|
|
|
fc_step = (float) nodes_per_chan / (float) Fc;
|
|
|
|
for (i = 0; i < num_phys_pins; i++) {
|
|
ipin = pin_num_ordering[i];
|
|
iside = side_ordering[i];
|
|
ioff = offset_ordering[i];
|
|
|
|
/* Bi-directional treats each track separately, uni-directional works with pairs of tracks */
|
|
for (j = 0; j < (Fc / group_size); j++) {
|
|
f_track = (i * step_size) + (j * fc_step);
|
|
itrack = ((int) f_track) * group_size;
|
|
|
|
/* Catch possible floating point round error */
|
|
itrack = std::min(itrack, nodes_per_chan - group_size);
|
|
|
|
/* Assign the group of tracks for the Fc pattern */
|
|
for (k = 0; k < group_size; ++k) {
|
|
tracks_connected_to_pin[ipin][ioff][iside][group_size * j + k] =
|
|
itrack + k;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void load_perturbed_switch_pattern(INP t_type_ptr type,
|
|
INOUTP int ****tracks_connected_to_pin, INP int num_phys_pins,
|
|
INP int *pin_num_ordering, INP int *side_ordering,
|
|
INP int *offset_ordering, INP int nodes_per_chan, INP int Fc,
|
|
enum e_directionality directionality) {
|
|
|
|
/* Loads the tracks_connected_to_pin array with an unevenly distributed *
|
|
* set of switches across the channel. This is done for inputs when *
|
|
* Fc_input = Fc_output to avoid creating "pin domains" -- certain output *
|
|
* pins being able to talk only to certain input pins because their switch *
|
|
* patterns exactly line up. Distribute Fc/2 + 1 switches over half the *
|
|
* channel and Fc/2 - 1 switches over the other half to make the switch *
|
|
* pattern different from the uniform one of the outputs. Also, have half *
|
|
* the pins put the "dense" part of their connections in the first half of *
|
|
* the channel and the other half put the "dense" part in the second half, *
|
|
* to make sure each track can connect to about the same number of ipins. */
|
|
|
|
int i, j, ipin, iside, itrack, ihalf, iconn, ioff;
|
|
int Fc_dense, Fc_sparse, Fc_half[2];
|
|
float f_track, spacing_dense, spacing_sparse, spacing[2];
|
|
float step_size;
|
|
|
|
assert(directionality == BI_DIRECTIONAL);
|
|
|
|
step_size = (float) nodes_per_chan / (float) (Fc * num_phys_pins);
|
|
|
|
Fc_dense = (Fc / 2) + 1;
|
|
Fc_sparse = Fc - Fc_dense; /* Works for even or odd Fc */
|
|
|
|
spacing_dense = (float) nodes_per_chan / (float) (2 * Fc_dense);
|
|
spacing_sparse = (float) nodes_per_chan / (float) (2 * Fc_sparse);
|
|
|
|
for (i = 0; i < num_phys_pins; i++) {
|
|
ipin = pin_num_ordering[i];
|
|
iside = side_ordering[i];
|
|
ioff = offset_ordering[i];
|
|
|
|
/* Flip every pin to balance switch density */
|
|
spacing[i % 2] = spacing_dense;
|
|
Fc_half[i % 2] = Fc_dense;
|
|
spacing[(i + 1) % 2] = spacing_sparse;
|
|
Fc_half[(i + 1) % 2] = Fc_sparse;
|
|
|
|
f_track = i * step_size; /* Start point. Staggered from pin to pin */
|
|
iconn = 0;
|
|
|
|
for (ihalf = 0; ihalf < 2; ihalf++) { /* For both dense and sparse halves. */
|
|
for (j = 0; j < Fc_half[ihalf]; ++j) {
|
|
itrack = (int) f_track;
|
|
|
|
/* Can occasionally get wraparound due to floating point rounding.
|
|
This is okay because the starting position > 0 when this occurs
|
|
so connection is valid and fine */
|
|
itrack = itrack % nodes_per_chan;
|
|
tracks_connected_to_pin[ipin][ioff][iside][iconn] = itrack;
|
|
|
|
f_track += spacing[ihalf];
|
|
iconn++;
|
|
}
|
|
}
|
|
} /* End for all physical pins. */
|
|
}
|
|
|
|
static void check_all_tracks_reach_pins(t_type_ptr type,
|
|
int ****tracks_connected_to_pin, int nodes_per_chan, int Fc,
|
|
enum e_pin_type ipin_or_opin) {
|
|
|
|
/* Checks that all tracks can be reached by some pin. */
|
|
|
|
int iconn, iside, itrack, ipin, ioff;
|
|
int *num_conns_to_track; /* [0..nodes_per_chan-1] */
|
|
|
|
assert(nodes_per_chan > 0);
|
|
|
|
num_conns_to_track = (int *) my_calloc(nodes_per_chan, sizeof(int));
|
|
|
|
for (ipin = 0; ipin < type->num_pins; ipin++) {
|
|
for (ioff = 0; ioff < type->height; ioff++) {
|
|
for (iside = 0; iside < 4; iside++) {
|
|
if (tracks_connected_to_pin[ipin][ioff][iside][0] != OPEN) { /* Pin exists */
|
|
for (iconn = 0; iconn < Fc; iconn++) {
|
|
itrack =
|
|
tracks_connected_to_pin[ipin][ioff][iside][iconn];
|
|
num_conns_to_track[itrack]++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (itrack = 0; itrack < nodes_per_chan; itrack++) {
|
|
if (num_conns_to_track[itrack] <= 0) {
|
|
vpr_printf(TIO_MESSAGE_ERROR, "check_all_tracks_reach_pins: Track %d does not connect to any CLB %ss.\n",
|
|
itrack, (ipin_or_opin == DRIVER ? "OPIN" : "IPIN"));
|
|
}
|
|
}
|
|
|
|
free(num_conns_to_track);
|
|
}
|
|
|
|
/* Allocates and loads the track to ipin lookup for each physical grid type. This
|
|
* is the same information as the ipin_to_track map but accessed in a different way. */
|
|
|
|
static struct s_ivec ***
|
|
alloc_and_load_track_to_pin_lookup(INP int ****pin_to_track_map, INP int *Fc,
|
|
INP int height, INP int num_pins, INP int nodes_per_chan) {
|
|
int ipin, iside, itrack, iconn, ioff, pin_counter;
|
|
struct s_ivec ***track_to_pin_lookup;
|
|
|
|
/* [0..nodes_per_chan-1][0..height][0..3]. For each track number it stores a vector
|
|
* for each of the four sides. x-directed channels will use the TOP and
|
|
* BOTTOM vectors to figure out what clb input pins they connect to above
|
|
* and below them, respectively, while y-directed channels use the LEFT
|
|
* and RIGHT vectors. Each vector contains an nelem field saying how many
|
|
* ipins it connects to. The list[0..nelem-1] array then gives the pin
|
|
* numbers. */
|
|
|
|
/* Note that a clb pin that connects to a channel on its RIGHT means that *
|
|
* that channel connects to a clb pin on its LEFT. The convention used *
|
|
* here is always in the perspective of the CLB */
|
|
|
|
if (num_pins < 1) {
|
|
return NULL;
|
|
}
|
|
|
|
/* Alloc and zero the the lookup table */
|
|
track_to_pin_lookup = (struct s_ivec ***) alloc_matrix3(0,
|
|
nodes_per_chan - 1, 0, height - 1, 0, 3, sizeof(struct s_ivec));
|
|
for (itrack = 0; itrack < nodes_per_chan; itrack++) {
|
|
for (ioff = 0; ioff < height; ioff++) {
|
|
for (iside = 0; iside < 4; iside++) {
|
|
track_to_pin_lookup[itrack][ioff][iside].nelem = 0;
|
|
track_to_pin_lookup[itrack][ioff][iside].list = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Counting pass. */
|
|
for (ipin = 0; ipin < num_pins; ipin++) {
|
|
for (ioff = 0; ioff < height; ioff++) {
|
|
for (iside = 0; iside < 4; iside++) {
|
|
if (pin_to_track_map[ipin][ioff][iside][0] == OPEN)
|
|
continue;
|
|
|
|
for (iconn = 0; iconn < Fc[ipin]; iconn++) {
|
|
itrack = pin_to_track_map[ipin][ioff][iside][iconn];
|
|
track_to_pin_lookup[itrack][ioff][iside].nelem++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Allocate space. */
|
|
for (itrack = 0; itrack < nodes_per_chan; itrack++) {
|
|
for (ioff = 0; ioff < height; ioff++) {
|
|
for (iside = 0; iside < 4; iside++) {
|
|
track_to_pin_lookup[itrack][ioff][iside].list = NULL; /* Defensive code */
|
|
if (track_to_pin_lookup[itrack][ioff][iside].nelem != 0) {
|
|
track_to_pin_lookup[itrack][ioff][iside].list =
|
|
(int *) my_malloc(
|
|
track_to_pin_lookup[itrack][ioff][iside].nelem
|
|
* sizeof(int));
|
|
track_to_pin_lookup[itrack][ioff][iside].nelem = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Loading pass. */
|
|
for (ipin = 0; ipin < num_pins; ipin++) {
|
|
for (ioff = 0; ioff < height; ioff++) {
|
|
for (iside = 0; iside < 4; iside++) {
|
|
if (pin_to_track_map[ipin][ioff][iside][0] == OPEN)
|
|
continue;
|
|
|
|
for (iconn = 0; iconn < Fc[ipin]; iconn++) {
|
|
itrack = pin_to_track_map[ipin][ioff][iside][iconn];
|
|
pin_counter =
|
|
track_to_pin_lookup[itrack][ioff][iside].nelem;
|
|
track_to_pin_lookup[itrack][ioff][iside].list[pin_counter] =
|
|
ipin;
|
|
track_to_pin_lookup[itrack][ioff][iside].nelem++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return track_to_pin_lookup;
|
|
}
|
|
|
|
/* A utility routine to dump the contents of the routing resource graph *
|
|
* (everything -- connectivity, occupancy, cost, etc.) into a file. Used *
|
|
* only for debugging. */
|
|
void dump_rr_graph(INP const char *file_name) {
|
|
|
|
int inode;
|
|
FILE *fp;
|
|
|
|
fp = my_fopen(file_name, "w", 0);
|
|
|
|
for (inode = 0; inode < num_rr_nodes; inode++) {
|
|
print_rr_node(fp, rr_node, inode);
|
|
fprintf(fp, "\n");
|
|
}
|
|
|
|
#if 0
|
|
fprintf(fp, "\n\n%d rr_indexed_data entries.\n\n", num_rr_indexed_data);
|
|
|
|
for (index = 0; index < num_rr_indexed_data; index++)
|
|
{
|
|
print_rr_indexed_data(fp, index);
|
|
fprintf(fp, "\n");
|
|
}
|
|
#endif
|
|
|
|
fclose(fp);
|
|
}
|
|
|
|
/* Prints all the data about node inode to file fp. */
|
|
void print_rr_node(FILE * fp, t_rr_node * L_rr_node, int inode) {
|
|
|
|
static const char *name_type[] = { "SOURCE", "SINK", "IPIN", "OPIN",
|
|
"CHANX", "CHANY", "INTRA_CLUSTER_EDGE" };
|
|
static const char *direction_name[] = { "OPEN", "INC_DIRECTION",
|
|
"DEC_DIRECTION", "BI_DIRECTION" };
|
|
static const char *drivers_name[] = { "OPEN", "MULTI_BUFFER", "SINGLE" };
|
|
|
|
t_rr_type rr_type;
|
|
int iconn;
|
|
|
|
rr_type = L_rr_node[inode].type;
|
|
|
|
/* Make sure we don't overrun const arrays */
|
|
assert((int)rr_type < (int)(sizeof(name_type) / sizeof(char *)));
|
|
assert(
|
|
(L_rr_node[inode].direction + 1) < (int)(sizeof(direction_name) / sizeof(char *)));
|
|
assert(
|
|
(L_rr_node[inode].drivers + 1) < (int)(sizeof(drivers_name) / sizeof(char *)));
|
|
|
|
fprintf(fp, "Node: %d %s ", inode, name_type[rr_type]);
|
|
if ((L_rr_node[inode].xlow == L_rr_node[inode].xhigh)
|
|
&& (L_rr_node[inode].ylow == L_rr_node[inode].yhigh)) {
|
|
fprintf(fp, "(%d, %d) ", L_rr_node[inode].xlow, L_rr_node[inode].ylow);
|
|
} else {
|
|
fprintf(fp, "(%d, %d) to (%d, %d) ", L_rr_node[inode].xlow,
|
|
L_rr_node[inode].ylow, L_rr_node[inode].xhigh,
|
|
L_rr_node[inode].yhigh);
|
|
}
|
|
fprintf(fp, "Ptc_num: %d ", L_rr_node[inode].ptc_num);
|
|
fprintf(fp, "Direction: %s ",
|
|
direction_name[L_rr_node[inode].direction + 1]);
|
|
fprintf(fp, "Drivers: %s ", drivers_name[L_rr_node[inode].drivers + 1]);
|
|
fprintf(fp, "\n");
|
|
|
|
fprintf(fp, "%d edge(s):", L_rr_node[inode].num_edges);
|
|
for (iconn = 0; iconn < L_rr_node[inode].num_edges; iconn++)
|
|
fprintf(fp, " %d", L_rr_node[inode].edges[iconn]);
|
|
fprintf(fp, "\n");
|
|
|
|
fprintf(fp, "Switch types:");
|
|
for (iconn = 0; iconn < L_rr_node[inode].num_edges; iconn++)
|
|
fprintf(fp, " %d", L_rr_node[inode].switches[iconn]);
|
|
fprintf(fp, "\n");
|
|
|
|
fprintf(fp, "Occ: %d Capacity: %d\n", L_rr_node[inode].occ,
|
|
L_rr_node[inode].capacity);
|
|
if (rr_type != INTRA_CLUSTER_EDGE) {
|
|
fprintf(fp, "R: %g C: %g\n", L_rr_node[inode].R, L_rr_node[inode].C);
|
|
}
|
|
fprintf(fp, "Cost_index: %d\n", L_rr_node[inode].cost_index);
|
|
}
|
|
|
|
/* Prints all the rr_indexed_data of index to file fp. */
|
|
void print_rr_indexed_data(FILE * fp, int index) {
|
|
|
|
fprintf(fp, "Index: %d\n", index);
|
|
|
|
fprintf(fp, "ortho_cost_index: %d ",
|
|
rr_indexed_data[index].ortho_cost_index);
|
|
fprintf(fp, "base_cost: %g ", rr_indexed_data[index].saved_base_cost);
|
|
fprintf(fp, "saved_base_cost: %g\n",
|
|
rr_indexed_data[index].saved_base_cost);
|
|
|
|
fprintf(fp, "Seg_index: %d ", rr_indexed_data[index].seg_index);
|
|
fprintf(fp, "inv_length: %g\n", rr_indexed_data[index].inv_length);
|
|
|
|
fprintf(fp, "T_linear: %g ", rr_indexed_data[index].T_linear);
|
|
fprintf(fp, "T_quadratic: %g ", rr_indexed_data[index].T_quadratic);
|
|
fprintf(fp, "C_load: %g\n", rr_indexed_data[index].C_load);
|
|
}
|
|
|
|
static void build_unidir_rr_opins(INP int i, INP int j,
|
|
INP struct s_grid_tile **L_grid, INP int **Fc_out,
|
|
INP int nodes_per_chan, INP t_seg_details * seg_details,
|
|
INOUTP int **Fc_xofs, INOUTP int **Fc_yofs,
|
|
INOUTP t_rr_node * L_rr_node, INOUTP boolean * L_rr_edge_done,
|
|
OUTP boolean * Fc_clipped, INP t_ivec *** L_rr_node_indices, INP int delayless_switch,
|
|
INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs) {
|
|
/* This routine returns a list of the opins rr_nodes on each
|
|
* side/offset of the block. You must free the result with
|
|
* free_matrix. */
|
|
|
|
t_type_ptr type;
|
|
int ipin, iclass, ofs, chan, seg, max_len, inode, max_Fc = -1;
|
|
enum e_side side;
|
|
t_rr_type chan_type;
|
|
t_linked_edge *edge_list = NULL, *next;
|
|
boolean clipped, vert, pos_dir;
|
|
int num_edges;
|
|
int **Fc_ofs;
|
|
|
|
*Fc_clipped = FALSE;
|
|
|
|
/* Only the base block of a set should use this function */
|
|
if (L_grid[i][j].offset > 0) {
|
|
return;
|
|
}
|
|
|
|
type = L_grid[i][j].type;
|
|
|
|
/* Currently, only two possible Fc values exist: 0 or default.
|
|
* Finding the max. value of Fc in block will result in the
|
|
* default value, which works for now. In the future, when
|
|
* the Fc values of all pins can vary, the max value will continue
|
|
* to work for matrix allocation purposes. However, all looping
|
|
* will have to be modified to account for pin-based Fc values. */
|
|
if (type->index > 0) {
|
|
max_Fc = 0;
|
|
for (ipin = 0; ipin < type->num_pins; ++ipin) {
|
|
iclass = type->pin_class[ipin];
|
|
if (Fc_out[type->index][ipin] > max_Fc && type->class_inf[iclass].type == DRIVER) {
|
|
max_Fc = Fc_out[type->index][ipin];
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Go through each pin and find its fanout. */
|
|
for (ipin = 0; ipin < type->num_pins; ++ipin) {
|
|
/* Skip global pins and ipins */
|
|
iclass = type->pin_class[ipin];
|
|
if (type->class_inf[iclass].type != DRIVER) {
|
|
continue;
|
|
}
|
|
if (type->is_global_pin[ipin]) {
|
|
continue;
|
|
}
|
|
|
|
num_edges = 0;
|
|
edge_list = NULL;
|
|
if(Fc_out[type->index][ipin] != 0) {
|
|
for (ofs = 0; ofs < type->height; ++ofs) {
|
|
for (side = (enum e_side)0; side < 4; side = (enum e_side)(side + 1)) {
|
|
/* Can't do anything if pin isn't at this location */
|
|
if (0 == type->pinloc[ofs][side][ipin]) {
|
|
continue;
|
|
}
|
|
|
|
/* Figure out the chan seg at that side.
|
|
* side is the side of the logic or io block. */
|
|
vert = (boolean) ((side == TOP) || (side == BOTTOM));
|
|
/* mrFPGA */
|
|
if (is_stack) {
|
|
vert = (boolean)(!vert);
|
|
}
|
|
/* END */
|
|
pos_dir = (boolean) ((side == TOP) || (side == RIGHT));
|
|
/* mrFPGA */
|
|
//chan_type = (vert ? CHANX : CHANY);
|
|
chan_type = ((is_stack ? !vert : vert) ? CHANX : CHANY);
|
|
/* END */
|
|
chan = (vert ? (j + ofs) : i);
|
|
seg = (vert ? i : (j + ofs));
|
|
max_len = (vert ? nx : ny);
|
|
Fc_ofs = (vert ? Fc_xofs : Fc_yofs);
|
|
if (FALSE == pos_dir) {
|
|
/* mrFPGA */
|
|
if (is_stack) {
|
|
--seg;
|
|
/* END */
|
|
} else {
|
|
--chan;
|
|
}
|
|
}
|
|
|
|
/* mrFPGA */
|
|
/* Skip the location if there is no channel. */
|
|
if (chan < (is_stack ? 1:0)) {
|
|
continue;
|
|
}
|
|
if (seg < (is_stack ? 0:1)) {
|
|
continue;
|
|
}
|
|
/* END */
|
|
if (seg > (vert ? nx : ny)) {
|
|
continue;
|
|
}
|
|
if (chan > (vert ? ny : nx)) {
|
|
continue;
|
|
}
|
|
|
|
/* Get the list of opin to mux connections for that chan seg. */
|
|
num_edges += get_unidir_opin_connections(chan, seg,
|
|
max_Fc, chan_type, seg_details, &edge_list,
|
|
Fc_ofs, L_rr_edge_done, max_len, nodes_per_chan,
|
|
L_rr_node_indices, &clipped);
|
|
if (clipped) {
|
|
*Fc_clipped = TRUE;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Add in direct connections */
|
|
num_edges += get_opin_direct_connecions(i, j, ipin, &edge_list, L_rr_node_indices, delayless_switch, directs, num_directs, clb_to_clb_directs);
|
|
|
|
/* Add the edges */
|
|
inode = get_rr_node_index(i, j, OPIN, ipin, L_rr_node_indices);
|
|
alloc_and_load_edges_and_switches(rr_node, inode, num_edges,
|
|
L_rr_edge_done, edge_list);
|
|
while (edge_list != NULL) {
|
|
next = edge_list->next;
|
|
free(edge_list);
|
|
edge_list = next;
|
|
}
|
|
}
|
|
}
|
|
#if 0
|
|
static void
|
|
load_uniform_opin_switch_pattern_paired(INP int *Fc_out,
|
|
INP int num_pins,
|
|
INP int *pins_in_chan_seg,
|
|
INP int num_wire_inc_muxes,
|
|
INP int num_wire_dec_muxes,
|
|
INP int *wire_inc_muxes,
|
|
INP int *wire_dec_muxes,
|
|
INOUTP t_rr_node * L_rr_node,
|
|
INOUTP boolean * L_rr_edge_done,
|
|
INP t_seg_details * seg_details,
|
|
OUTP boolean * Fc_clipped)
|
|
{
|
|
|
|
/* Directionality is assumed to be uni-directional */
|
|
|
|
/* Make turn-based assignment to avoid overlap when Fc_ouput is low. This is a bipartite
|
|
* matching problem. Out of "num_wire_muxes" muxes "Fc_output" of them is assigned
|
|
* to each outpin (total "num_pins" of them); assignment is uniform (spacing - spreadout)
|
|
* and staggered to avoid overlap when Fc_output is low. */
|
|
|
|
/* The natural order wires muxes are stored in wire_muxes is alternating in directionality
|
|
* already (by my implementation), so no need to do anything extra to avoid directional bias */
|
|
|
|
/* TODO: Due to spacing, it's possible to have directional bias: all Fc_out wires connected
|
|
* to one opin goes in either INC or DEC -> whereas I want a mix of both.
|
|
* SOLUTION: Use quantization of 2 to ensure that if an opin connects to one wire, it
|
|
* must also connect to its companion wire, which runs in the opposite direction. This
|
|
* means instead of having num_wire_muxes as the matching set, pick out the INC wires
|
|
* in num_wires_muxes as the matching set (the DEC wires are their companions) April 17, 2007
|
|
* NEWS: That solution does not work, as treating wires in groups will lead to serious
|
|
* abnormal patterns (conns crossing multiple blocks) for W nonquantized to multiples of 2L.
|
|
* So, I'm chaning that approach to a new one that avoids directional bias: I will separate
|
|
* the INC muxes and DEC muxes into two sets. Each set is uniformly assigned to opins with
|
|
* Fc_output/2; this should be identical as before for normal cases and contains all conns
|
|
* in the same chan segment for the nonquantized cases. */
|
|
|
|
/* Finally, separated the two approaches: 1. Take all wire muxes and assign them to opins
|
|
* one at a time (load_uniform_opin_switch_pattern) 2. Take pairs (by companion)
|
|
* of wire muxes and assign them to opins a pair at a time (load_uniform_opin_switch_pattern_paired).
|
|
* The first is used for fringe channel segments (ends of channels, where
|
|
* there are lots of muxes due to partial wire segments) and the second is used in core */
|
|
|
|
/* float spacing, step_size, f_mux; */
|
|
int ipin, iconn, num_edges, init_mux;
|
|
int from_node, to_node, to_track;
|
|
int xlow, ylow;
|
|
t_linked_edge *edge_list;
|
|
int *wire_muxes;
|
|
int k, num_wire_muxes, Fc_output_per_side, CurFc;
|
|
int count_inc, count_dec;
|
|
t_type_ptr type;
|
|
|
|
*Fc_clipped = FALSE;
|
|
|
|
count_inc = count_dec = 0;
|
|
|
|
for (ipin = 0; ipin < num_pins; ipin++)
|
|
{
|
|
from_node = pins_in_chan_seg[ipin];
|
|
xlow = L_rr_node[from_node].xlow;
|
|
ylow = L_rr_node[from_node].ylow;
|
|
type = grid[xlow][ylow].type;
|
|
edge_list = NULL;
|
|
num_edges = 0;
|
|
|
|
/* Assigning the INC muxes first, then DEC muxes */
|
|
for (k = 0; k < 2; ++k)
|
|
{
|
|
if (k == 0)
|
|
{
|
|
num_wire_muxes = num_wire_inc_muxes;
|
|
wire_muxes = wire_inc_muxes;
|
|
}
|
|
else
|
|
{
|
|
num_wire_muxes = num_wire_dec_muxes;
|
|
wire_muxes = wire_dec_muxes;
|
|
}
|
|
|
|
/* Half the Fc will be assigned for each direction. */
|
|
assert(Fc_out[type->index] % 2 == 0);
|
|
Fc_output_per_side = Fc_out[type->index] / 2;
|
|
|
|
/* Clip the demand. Make sure to use a new variable so
|
|
* on the second pass it is not clipped. */
|
|
CurFc = Fc_output_per_side;
|
|
if (Fc_output_per_side > num_wire_muxes)
|
|
{
|
|
*Fc_clipped = TRUE;
|
|
CurFc = num_wire_muxes;
|
|
}
|
|
|
|
if (k == 0)
|
|
{
|
|
init_mux = (count_inc) % num_wire_muxes;
|
|
count_inc += CurFc;
|
|
}
|
|
else
|
|
{
|
|
init_mux = (count_dec) % num_wire_muxes;
|
|
count_dec += CurFc;
|
|
}
|
|
|
|
for (iconn = 0; iconn < CurFc; iconn++)
|
|
{
|
|
/* FINALLY, make the outpin to mux connection */
|
|
/* Latest update: I'm not using Uniform Pattern, but a similarly staggered pattern */
|
|
to_node =
|
|
wire_muxes[(init_mux +
|
|
iconn) % num_wire_muxes];
|
|
|
|
L_rr_node[to_node].num_opin_drivers++; /* keep track of mux size */
|
|
to_track = L_rr_node[to_node].ptc_num;
|
|
|
|
if (FALSE == L_rr_edge_done[to_node])
|
|
{
|
|
/* Use of alloc_and_load_edges_and_switches
|
|
* must be accompanied by rr_edge_done check. */
|
|
L_rr_edge_done[to_node] = TRUE;
|
|
edge_list =
|
|
insert_in_edge_list(edge_list,
|
|
to_node,
|
|
seg_details
|
|
[to_track].
|
|
wire_switch);
|
|
num_edges++;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (num_edges < 1)
|
|
{
|
|
vpr_printf(TIO_MESSAGE_ERROR, "opin %d at (%d,%d) does not connect to any tracks.\n",
|
|
L_rr_node[from_node].ptc_num, L_rr_node[from_node].xlow, L_rr_node[from_node].ylow);
|
|
exit(1);
|
|
}
|
|
|
|
alloc_and_load_edges_and_switches(L_rr_node, from_node, num_edges,
|
|
L_rr_edge_done, edge_list);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#if MUX_SIZE_DIST_DISPLAY
|
|
/* This routine prints and dumps statistics on the mux sizes on a sblock
|
|
* per sblock basis, over the entire chip. Mux sizes should be balanced (off by
|
|
* at most 1) for all muxes in the same sblock in the core, and corner sblocks.
|
|
* Fringe sblocks will have imbalance due to missing one side and constrains on
|
|
* where wires must connect. Comparing two core sblock sblocks, muxes need not
|
|
* be balanced if W is not quantized to 2L multiples, again for reasons that
|
|
* there could be sblocks with different number of muxes but same number of incoming
|
|
* wires that need to make connections to these muxes (we don't want to under-connect
|
|
* user-specified Fc and Fs). */
|
|
static void
|
|
view_mux_size_distribution(t_ivec *** L_rr_node_indices,
|
|
int nodes_per_chan,
|
|
t_seg_details * seg_details_x,
|
|
t_seg_details * seg_details_y)
|
|
{
|
|
|
|
int i, j, itrack, seg_num, chan_num, max_len;
|
|
int start, end, inode, max_value, min_value;
|
|
int array_count, k, num_muxes;
|
|
short direction, side;
|
|
float *percent_range_array;
|
|
float percent_range, percent_range_sum, avg_percent_range;
|
|
float std_dev_percent_range, deviation_f;
|
|
int range, *range_array, global_max_range;
|
|
float avg_range, range_sum, std_dev_range;
|
|
t_seg_details *seg_details;
|
|
t_mux *new_mux, *sblock_mux_list_head, *current, *next;
|
|
|
|
#ifdef ENABLE_DUMP
|
|
FILE *dump_file_per_sblock, *dump_file;
|
|
#endif /* ENABLE_DUMP */
|
|
t_mux_size_distribution *distr_list, *distr_current, *new_distribution,
|
|
*distr_next;
|
|
|
|
#ifdef ENABLE_DUMP
|
|
dump_file = my_fopen("mux_size_dump.txt", "w", 0);
|
|
dump_file_per_sblock = my_fopen("mux_size_per_sblock_dump.txt", "w", 0);
|
|
#endif /* ENABLE_DUMP */
|
|
|
|
sblock_mux_list_head = NULL;
|
|
percent_range_array =
|
|
(float *)my_malloc((nx - 1) * (ny - 1) * sizeof(float));
|
|
range_array = (int *)my_malloc((nx - 1) * (ny - 1) * sizeof(int));
|
|
array_count = 0;
|
|
percent_range_sum = 0.0;
|
|
range_sum = 0.0;
|
|
global_max_range = 0;
|
|
min_value = 0;
|
|
max_value = 0;
|
|
seg_num = 0;
|
|
chan_num = 0;
|
|
direction = 0;
|
|
seg_details = 0;
|
|
max_len = 0;
|
|
distr_list = NULL;
|
|
|
|
/* With the specified range, I'm only looking at core sblocks */
|
|
for (j = (ny - 1); j > 0; j--)
|
|
{
|
|
for (i = 1; i < nx; i++)
|
|
{
|
|
num_muxes = 0;
|
|
for (side = 0; side < 4; side++)
|
|
{
|
|
switch (side)
|
|
{
|
|
case LEFT:
|
|
seg_num = i;
|
|
chan_num = j;
|
|
direction = DEC_DIRECTION; /* only DEC have muxes in that sblock */
|
|
seg_details = seg_details_x;
|
|
max_len = nx;
|
|
break;
|
|
|
|
case RIGHT:
|
|
seg_num = i + 1;
|
|
chan_num = j;
|
|
direction = INC_DIRECTION;
|
|
seg_details = seg_details_x;
|
|
max_len = nx;
|
|
break;
|
|
|
|
case TOP:
|
|
seg_num = j + 1;
|
|
chan_num = i;
|
|
direction = INC_DIRECTION;
|
|
seg_details = seg_details_y;
|
|
max_len = ny;
|
|
break;
|
|
|
|
case BOTTOM:
|
|
seg_num = j;
|
|
chan_num = i;
|
|
direction = DEC_DIRECTION;
|
|
seg_details = seg_details_y;
|
|
max_len = ny;
|
|
break;
|
|
|
|
default:
|
|
assert(FALSE);
|
|
}
|
|
|
|
assert(nodes_per_chan > 0);
|
|
for (itrack = 0; itrack < nodes_per_chan; itrack++)
|
|
{
|
|
start =
|
|
get_seg_start(seg_details, itrack,
|
|
seg_num, chan_num);
|
|
end =
|
|
get_seg_end(seg_details, itrack,
|
|
start, chan_num, max_len);
|
|
|
|
if ((seg_details[itrack].direction ==
|
|
direction) && (((start == seg_num)
|
|
&& (direction ==
|
|
INC_DIRECTION))
|
|
|| ((end == seg_num)
|
|
&& (direction ==
|
|
DEC_DIRECTION))))
|
|
{ /* mux found */
|
|
num_muxes++;
|
|
if (side == LEFT || side == RIGHT)
|
|
{ /* CHANX */
|
|
inode =
|
|
get_rr_node_index
|
|
(seg_num, chan_num,
|
|
CHANX, itrack,
|
|
L_rr_node_indices);
|
|
}
|
|
else
|
|
{
|
|
assert((side == TOP) || (side == BOTTOM)); /* CHANY */
|
|
inode =
|
|
get_rr_node_index
|
|
(chan_num, seg_num,
|
|
CHANY, itrack,
|
|
L_rr_node_indices);
|
|
}
|
|
|
|
new_mux = (t_mux *)
|
|
my_malloc(sizeof(t_mux));
|
|
new_mux->size =
|
|
rr_node[inode].
|
|
num_wire_drivers +
|
|
rr_node[inode].
|
|
num_opin_drivers;
|
|
new_mux->next = NULL;
|
|
|
|
/* insert in linked list, descending */
|
|
if (sblock_mux_list_head == NULL)
|
|
{
|
|
/* first entry */
|
|
sblock_mux_list_head =
|
|
new_mux;
|
|
}
|
|
else if (sblock_mux_list_head->
|
|
size < new_mux->size)
|
|
{
|
|
/* insert before head */
|
|
new_mux->next =
|
|
sblock_mux_list_head;
|
|
sblock_mux_list_head =
|
|
new_mux;
|
|
}
|
|
else
|
|
{
|
|
/* insert after head */
|
|
current =
|
|
sblock_mux_list_head;
|
|
next = current->next;
|
|
|
|
while ((next != NULL)
|
|
&& (next->size >
|
|
new_mux->size))
|
|
{
|
|
current = next;
|
|
next =
|
|
current->next;
|
|
}
|
|
|
|
if (next == NULL)
|
|
{
|
|
current->next =
|
|
new_mux;
|
|
}
|
|
else
|
|
{
|
|
new_mux->next =
|
|
current->next;
|
|
current->next =
|
|
new_mux;
|
|
}
|
|
}
|
|
/* end of insert in linked list */
|
|
}
|
|
}
|
|
} /* end of mux searching over all four sides of sblock */
|
|
/* now sblock_mux_list_head holds a linked list of all muxes in this sblock */
|
|
|
|
current = sblock_mux_list_head;
|
|
|
|
#ifdef ENABLE_DUMP
|
|
fprintf(dump_file_per_sblock,
|
|
"sblock at (%d, %d) has mux sizes: {", i, j);
|
|
#endif /* ENABLE_DUMP */
|
|
|
|
if (current != NULL)
|
|
{
|
|
max_value = min_value = current->size;
|
|
}
|
|
while (current != NULL)
|
|
{
|
|
if (max_value < current->size)
|
|
max_value = current->size;
|
|
if (min_value > current->size)
|
|
min_value = current->size;
|
|
|
|
#ifdef ENABLE_DUMP
|
|
fprintf(dump_file_per_sblock, "%d ",
|
|
current->size);
|
|
fprintf(dump_file, "%d\n", current->size);
|
|
#endif /* ENABLE_DUMP */
|
|
|
|
current = current->next;
|
|
}
|
|
|
|
#ifdef ENABLE_DUMP
|
|
fprintf(dump_file_per_sblock, "}\n\tmax: %d\tmin:%d",
|
|
max_value, min_value);
|
|
#endif /* ENABLE_DUMP */
|
|
|
|
range = max_value - min_value;
|
|
percent_range = ((float)range) / ((float)min_value);
|
|
|
|
if (global_max_range < range)
|
|
global_max_range = range;
|
|
|
|
#ifdef ENABLE_DUMP
|
|
fprintf(dump_file_per_sblock,
|
|
"\t\trange: %d\t\tpercent range:%.2f\n",
|
|
range, percent_range);
|
|
#endif /* ENABLE_DUMP */
|
|
|
|
percent_range_array[array_count] = percent_range;
|
|
range_array[array_count] = range;
|
|
|
|
percent_range_sum += percent_range;
|
|
range_sum += range;
|
|
|
|
array_count++;
|
|
|
|
/* I will use a distribution for each (core) sblock type.
|
|
* There are more than 1 type of sblocks,
|
|
* when quantization of W to 2L multiples is not observed. */
|
|
|
|
distr_current = distr_list;
|
|
while (distr_current != NULL
|
|
&& distr_current->mux_count != num_muxes)
|
|
{
|
|
distr_current = distr_current->next;
|
|
}
|
|
|
|
if (distr_current == NULL)
|
|
{
|
|
/* Create a distribution for the new sblock type,
|
|
* and put it as head of linked list by convention */
|
|
|
|
new_distribution = (t_mux_size_distribution *)
|
|
my_malloc(sizeof(t_mux_size_distribution));
|
|
new_distribution->mux_count = num_muxes;
|
|
new_distribution->max_index = max_value;
|
|
new_distribution->distr =
|
|
(int *)my_calloc(max_value + 1, sizeof(int));
|
|
|
|
/* filling in the distribution */
|
|
current = sblock_mux_list_head;
|
|
while (current != NULL)
|
|
{
|
|
assert(current->size <=
|
|
new_distribution->max_index);
|
|
new_distribution->distr[current->size]++;
|
|
current = current->next;
|
|
}
|
|
|
|
/* add it to head */
|
|
new_distribution->next = distr_list;
|
|
distr_list = new_distribution;
|
|
}
|
|
else
|
|
{
|
|
/* distr_current->mux_count == num_muxes so add this sblock's mux sizes in this distribution */
|
|
current = sblock_mux_list_head;
|
|
|
|
while (current != NULL)
|
|
{
|
|
if (current->size >
|
|
distr_current->max_index)
|
|
{
|
|
/* needs to realloc to expand the distribution array to hold the new large-valued data */
|
|
distr_current->distr =
|
|
my_realloc(distr_current->
|
|
distr,
|
|
(current->size +
|
|
1) * sizeof(int));
|
|
|
|
/* initializing the newly allocated elements */
|
|
for (k =
|
|
(distr_current->max_index +
|
|
1); k <= current->size; k++)
|
|
distr_current->distr[k] = 0;
|
|
|
|
distr_current->max_index =
|
|
current->size;
|
|
distr_current->distr[current->
|
|
size]++;
|
|
}
|
|
else
|
|
{
|
|
distr_current->distr[current->
|
|
size]++;
|
|
}
|
|
current = current->next;
|
|
}
|
|
}
|
|
|
|
/* done - now free memory */
|
|
current = sblock_mux_list_head;
|
|
while (current != NULL)
|
|
{
|
|
next = current->next;
|
|
free(current);
|
|
current = next;
|
|
}
|
|
sblock_mux_list_head = NULL;
|
|
}
|
|
}
|
|
|
|
avg_percent_range = (float)percent_range_sum / array_count;
|
|
avg_range = (float)range_sum / array_count;
|
|
|
|
percent_range_sum = 0.0;
|
|
range_sum = 0.0;
|
|
for (k = 0; k < array_count; k++)
|
|
{
|
|
deviation_f = (percent_range_array[k] - avg_percent_range);
|
|
percent_range_sum += deviation_f * deviation_f;
|
|
|
|
deviation_f = ((float)range_array[k] - avg_range);
|
|
range_sum += deviation_f * deviation_f;
|
|
}
|
|
std_dev_percent_range =
|
|
sqrt(percent_range_sum / ((float)array_count - 1.0));
|
|
std_dev_range = sqrt(range_sum / ((float)array_count - 1.0));
|
|
vpr_printf(TIO_MESSAGE_INFO, "==== MUX size statistics ====\n");
|
|
vpr_printf(TIO_MESSAGE_INFO, "Max range of mux size within a sblock: %d\n", global_max_range);
|
|
vpr_printf(TIO_MESSAGE_INFO, "Average range of mux size within a sblock: %.2f\n", avg_range);
|
|
vpr_printf(TIO_MESSAGE_INFO, "Std dev of range of mux size within a sblock: %.2f\n", std_dev_range);
|
|
vpr_printf(TIO_MESSAGE_INFO, "Average percent range of mux size within a sblock: %.2f%%\n", avg_percent_range * 100.0);
|
|
vpr_printf(TIO_MESSAGE_INFO, "Std dev of percent range of mux size within a sblock: %.2f%%\n", std_dev_percent_range * 100.0);
|
|
|
|
vpr_printf(TIO_MESSAGE_INFO, " -- Detailed MUX size distribution by sblock type -- \n");
|
|
distr_current = distr_list;
|
|
while (distr_current != NULL)
|
|
{
|
|
print_distribution(stdout, distr_current);
|
|
|
|
/* free */
|
|
distr_next = distr_current->next;
|
|
|
|
free(distr_current->distr);
|
|
free(distr_current);
|
|
|
|
distr_current = distr_next;
|
|
}
|
|
|
|
free(percent_range_array);
|
|
free(range_array);
|
|
#ifdef ENABLE_DUMP
|
|
fclose(dump_file_per_sblock);
|
|
fclose(dump_file);
|
|
#endif /* ENABLE_DUMP */
|
|
}
|
|
|
|
static void
|
|
print_distribution(FILE * fptr,
|
|
t_mux_size_distribution * distr_struct)
|
|
{
|
|
int *distr;
|
|
int k;
|
|
float sum;
|
|
boolean zeros;
|
|
|
|
distr = distr_struct->distr;
|
|
fprintf(fptr,
|
|
"For Sblocks containing %d MUXes, the MUX size distribution is:\n",
|
|
distr_struct->mux_count);
|
|
fprintf(fptr, "\t\t\tSize\t\t\tFrequency (percent)\n");
|
|
|
|
sum = 0.0;
|
|
for (k = 0; k <= distr_struct->max_index; k++)
|
|
sum += distr[k];
|
|
|
|
zeros = TRUE;
|
|
for (k = 0; k <= distr_struct->max_index; k++)
|
|
{
|
|
if (zeros && (distr[k] == 0))
|
|
{
|
|
/* do nothing for leading string of zeros */
|
|
}
|
|
else
|
|
{
|
|
zeros = FALSE; /* leading string of zeros ended */
|
|
fprintf(fptr, "\t\t\t%d\t\t\t%d (%.2f%%)\n", k, distr[k],
|
|
(float)distr[k] / sum * 100.0);
|
|
}
|
|
}
|
|
fprintf(fptr, "\nEnd of this Sblock MUX size distribution.\n");
|
|
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* Parse out which CLB pins should connect directly to which other CLB pins then store that in a clb_to_clb_directs data structure
|
|
* This data structure supplements the the info in the "directs" data structure
|
|
* TODO: The function that does this parsing in placement is poorly done because it lacks generality on heterogeniety, should replace with this one
|
|
static t_clb_to_clb_directs * alloc_and_load_clb_to_clb_directs(INP t_direct_inf *directs, INP int num_directs) {
|
|
int i, j;
|
|
t_clb_to_clb_directs *clb_to_clb_directs;
|
|
char *pb_type_name, *port_name;
|
|
int start_pin_index, end_pin_index;
|
|
t_pb_type *pb_type;
|
|
|
|
clb_to_clb_directs = (t_clb_to_clb_directs*)my_calloc(num_directs, sizeof(t_clb_to_clb_directs));
|
|
|
|
pb_type_name = NULL;
|
|
port_name = NULL;
|
|
|
|
for(i = 0; i < num_directs; i++) {
|
|
pb_type_name = (char*)my_malloc((strlen(directs[i].from_pin) + strlen(directs[i].to_pin)) * sizeof(char));
|
|
port_name = (char*)my_malloc((strlen(directs[i].from_pin) + strlen(directs[i].to_pin)) * sizeof(char));
|
|
|
|
// Load from pins
|
|
// Parse out the pb_type name, port name, and pin range
|
|
parse_direct_pin_name(directs[i].from_pin, directs[i].line, &start_pin_index, &end_pin_index, pb_type_name, port_name);
|
|
|
|
// Figure out which type, port, and pin is used
|
|
for(j = 0; j < num_types; j++) {
|
|
if(strcmp(type_descriptors[j].name, pb_type_name) == 0) {
|
|
break;
|
|
}
|
|
}
|
|
assert(j < num_types);
|
|
clb_to_clb_directs[i].from_clb_type = &type_descriptors[j];
|
|
pb_type = clb_to_clb_directs[i].from_clb_type->pb_type;
|
|
|
|
for(j = 0; j < pb_type->num_ports; j++) {
|
|
if(strcmp(pb_type->ports[j].name, port_name) == 0) {
|
|
break;
|
|
}
|
|
}
|
|
assert(j < pb_type->num_ports);
|
|
|
|
if(start_pin_index == OPEN) {
|
|
assert(start_pin_index == end_pin_index);
|
|
start_pin_index = 0;
|
|
end_pin_index = pb_type->ports[j].num_pins - 1;
|
|
}
|
|
get_blk_pin_from_port_pin(clb_to_clb_directs[i].from_clb_type->index, j, start_pin_index, &clb_to_clb_directs[i].from_clb_pin_start_index);
|
|
get_blk_pin_from_port_pin(clb_to_clb_directs[i].from_clb_type->index, j, end_pin_index, &clb_to_clb_directs[i].from_clb_pin_end_index);
|
|
|
|
// Load to pins
|
|
// Parse out the pb_type name, port name, and pin range
|
|
parse_direct_pin_name(directs[i].to_pin, directs[i].line, &start_pin_index, &end_pin_index, pb_type_name, port_name);
|
|
|
|
// Figure out which type, port, and pin is used
|
|
for(j = 0; j < num_types; j++) {
|
|
if(strcmp(type_descriptors[j].name, pb_type_name) == 0) {
|
|
break;
|
|
}
|
|
}
|
|
assert(j < num_types);
|
|
clb_to_clb_directs[i].to_clb_type = &type_descriptors[j];
|
|
pb_type = clb_to_clb_directs[i].to_clb_type->pb_type;
|
|
|
|
for(j = 0; j < pb_type->num_ports; j++) {
|
|
if(strcmp(pb_type->ports[j].name, port_name) == 0) {
|
|
break;
|
|
}
|
|
}
|
|
assert(j < pb_type->num_ports);
|
|
|
|
if(start_pin_index == OPEN) {
|
|
assert(start_pin_index == end_pin_index);
|
|
start_pin_index = 0;
|
|
end_pin_index = pb_type->ports[j].num_pins - 1;
|
|
}
|
|
|
|
get_blk_pin_from_port_pin(clb_to_clb_directs[i].to_clb_type->index, j, start_pin_index, &clb_to_clb_directs[i].to_clb_pin_start_index);
|
|
get_blk_pin_from_port_pin(clb_to_clb_directs[i].to_clb_type->index, j, end_pin_index, &clb_to_clb_directs[i].to_clb_pin_end_index);
|
|
|
|
if(abs(clb_to_clb_directs[i].from_clb_pin_start_index - clb_to_clb_directs[i].from_clb_pin_end_index) != abs(clb_to_clb_directs[i].to_clb_pin_start_index - clb_to_clb_directs[i].to_clb_pin_end_index)) {
|
|
vpr_printf(TIO_MESSAGE_ERROR, "[LINE %d] Range mismatch from %s to %s.\n", directs[i].line, directs[i].from_pin, directs[i].to_pin);
|
|
exit(1);
|
|
}
|
|
|
|
free(pb_type_name);
|
|
free(port_name);
|
|
}
|
|
return clb_to_clb_directs;
|
|
}
|
|
*/
|
|
|
|
/* Add all direct clb-pin-to-clb-pin edges to given opin */
|
|
static int get_opin_direct_connecions(int x, int y, int opin, INOUTP t_linked_edge ** edge_list_ptr, INP t_ivec *** L_rr_node_indices,
|
|
INP int delayless_switch, INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs) {
|
|
t_type_ptr type;
|
|
int grid_ofs;
|
|
int i, ipin, inode;
|
|
t_linked_edge *edge_list_head;
|
|
int max_index, min_index, offset, swap;
|
|
int new_edges;
|
|
|
|
type = grid[x][y].type;
|
|
edge_list_head = *edge_list_ptr;
|
|
new_edges = 0;
|
|
|
|
/* Iterate through all direct connections */
|
|
for(i = 0; i < num_directs; i++) {
|
|
/* Find matching direct clb-to-clb connections with the same type as current grid location */
|
|
if(clb_to_clb_directs[i].from_clb_type == type) {
|
|
/* Compute index of opin with regards to given pins */
|
|
if(clb_to_clb_directs[i].from_clb_pin_start_index > clb_to_clb_directs[i].from_clb_pin_end_index) {
|
|
swap = TRUE;
|
|
max_index = clb_to_clb_directs[i].from_clb_pin_start_index;
|
|
min_index = clb_to_clb_directs[i].from_clb_pin_end_index;
|
|
} else {
|
|
swap = FALSE;
|
|
min_index = clb_to_clb_directs[i].from_clb_pin_start_index;
|
|
max_index = clb_to_clb_directs[i].from_clb_pin_end_index;
|
|
}
|
|
if(max_index >= opin && min_index <= opin) {
|
|
offset = opin - min_index;
|
|
/* This opin is specified to connect directly to an ipin, now compute which ipin to connect to */
|
|
if(x + directs[i].x_offset < nx + 1 &&
|
|
x + directs[i].x_offset > 0 &&
|
|
y + directs[i].y_offset < ny + 1 &&
|
|
y + directs[i].y_offset > 0) {
|
|
ipin = OPEN;
|
|
if(clb_to_clb_directs[i].to_clb_pin_start_index > clb_to_clb_directs[i].to_clb_pin_end_index) {
|
|
if(swap == TRUE) {
|
|
ipin = clb_to_clb_directs[i].to_clb_pin_end_index + offset;
|
|
} else {
|
|
ipin = clb_to_clb_directs[i].to_clb_pin_start_index - offset;
|
|
}
|
|
} else {
|
|
if(swap == TRUE) {
|
|
ipin = clb_to_clb_directs[i].to_clb_pin_end_index - offset;
|
|
} else {
|
|
ipin = clb_to_clb_directs[i].to_clb_pin_start_index + offset;
|
|
}
|
|
}
|
|
/* Add new ipin edge to list of edges */
|
|
grid_ofs = grid[x + directs[i].x_offset][y + directs[i].y_offset].offset;
|
|
inode = get_rr_node_index(x + directs[i].x_offset, y + directs[i].y_offset - grid_ofs, IPIN, ipin, L_rr_node_indices);
|
|
edge_list_head = insert_in_edge_list(edge_list_head, inode, delayless_switch);
|
|
new_edges++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
*edge_list_ptr = edge_list_head;
|
|
return new_edges;
|
|
}
|
|
|
|
|
|
|