OpenFPGA/vpr7_x2p/vpr/SRC/route/rr_graph_indexed_data.c

339 lines
13 KiB
C
Executable File

#include <math.h> /* Needed only for sqrt call (remove if sqrt removed) */
#include "util.h"
#include "vpr_types.h"
#include "globals.h"
#include "rr_graph_util.h"
#include "rr_graph2.h"
#include "rr_graph_indexed_data.h"
#include "read_xml_arch_file.h"
/* mrFPGA: Xifan TANG */
#include "mrfpga_globals.h"
/* end */
/******************* Subroutines local to this module ************************/
static void load_rr_indexed_data_base_costs(int nodes_per_chan,
t_ivec *** L_rr_node_indices, enum e_base_cost_type base_cost_type,
int wire_to_ipin_switch);
static float get_delay_normalization_fac(int nodes_per_chan,
t_ivec *** L_rr_node_indices);
static float get_average_opin_delay(t_ivec *** L_rr_node_indices,
int nodes_per_chan);
static void load_rr_indexed_data_T_values(int index_start,
int num_indices_to_load, t_rr_type rr_type, int nodes_per_chan,
t_ivec *** L_rr_node_indices, const t_segment_inf * segment_inf);
/******************** Subroutine definitions *********************************/
/* Allocates the rr_indexed_data array and loads it with appropriate values. *
* It currently stores the segment type (or OPEN if the index doesn't *
* correspond to an CHANX or CHANY type), the base cost of nodes of that *
* type, and some info to allow rapid estimates of time to get to a target *
* to be computed by the router. *
*
* Right now all SOURCES have the same base cost; and similarly there's only *
* one base cost for each of SINKs, OPINs, and IPINs (four total). This can *
* be changed just by allocating more space in the array below and changing *
* the cost_index values for these rr_nodes, if you want to make some pins *
* etc. more expensive than others. I give each segment type in an *
* x-channel its own cost_index, and each segment type in a y-channel its *
* own cost_index. */
void alloc_and_load_rr_indexed_data(INP const t_segment_inf * segment_inf,
INP int num_segment, INP t_ivec *** L_rr_node_indices,
INP int nodes_per_chan, int wire_to_ipin_switch,
enum e_base_cost_type base_cost_type) {
int iseg, length, i, index;
num_rr_indexed_data = CHANX_COST_INDEX_START + (2 * num_segment);
rr_indexed_data = (t_rr_indexed_data *) my_malloc(
num_rr_indexed_data * sizeof(t_rr_indexed_data));
/* For rr_types that aren't CHANX or CHANY, base_cost is valid, but most *
* * other fields are invalid. For IPINs, the T_linear field is also valid; *
* * all other fields are invalid. For SOURCES, SINKs and OPINs, all fields *
* * other than base_cost are invalid. Mark invalid fields as OPEN for safety. */
for (i = SOURCE_COST_INDEX; i <= IPIN_COST_INDEX; i++) {
rr_indexed_data[i].ortho_cost_index = OPEN;
rr_indexed_data[i].seg_index = OPEN;
rr_indexed_data[i].inv_length = OPEN;
rr_indexed_data[i].T_linear = OPEN;
rr_indexed_data[i].T_quadratic = OPEN;
rr_indexed_data[i].C_load = OPEN;
}
rr_indexed_data[IPIN_COST_INDEX].T_linear =
switch_inf[wire_to_ipin_switch].Tdel;
/* X-directed segments. */
for (iseg = 0; iseg < num_segment; iseg++) {
index = CHANX_COST_INDEX_START + iseg;
rr_indexed_data[index].ortho_cost_index = index + num_segment;
if (segment_inf[iseg].longline)
length = nx;
else
length = std::min(segment_inf[iseg].length, nx);
rr_indexed_data[index].inv_length = 1. / length;
rr_indexed_data[index].seg_index = iseg;
}
load_rr_indexed_data_T_values(CHANX_COST_INDEX_START, num_segment, CHANX,
nodes_per_chan, L_rr_node_indices, segment_inf);
/* Y-directed segments. */
for (iseg = 0; iseg < num_segment; iseg++) {
index = CHANX_COST_INDEX_START + num_segment + iseg;
rr_indexed_data[index].ortho_cost_index = index - num_segment;
if (segment_inf[iseg].longline)
length = ny;
else
length = std::min(segment_inf[iseg].length, ny);
rr_indexed_data[index].inv_length = 1. / length;
rr_indexed_data[index].seg_index = iseg;
}
load_rr_indexed_data_T_values((CHANX_COST_INDEX_START + num_segment),
num_segment, CHANY, nodes_per_chan, L_rr_node_indices, segment_inf);
load_rr_indexed_data_base_costs(nodes_per_chan, L_rr_node_indices,
base_cost_type, wire_to_ipin_switch);
}
static void load_rr_indexed_data_base_costs(int nodes_per_chan,
t_ivec *** L_rr_node_indices, enum e_base_cost_type base_cost_type,
int wire_to_ipin_switch) {
/* Loads the base_cost member of rr_indexed_data according to the specified *
* base_cost_type. */
float delay_normalization_fac;
int index;
if (base_cost_type == DELAY_NORMALIZED) {
delay_normalization_fac = get_delay_normalization_fac(nodes_per_chan,
L_rr_node_indices);
} else {
delay_normalization_fac = 1.;
}
if (base_cost_type == DEMAND_ONLY || base_cost_type == DELAY_NORMALIZED) {
rr_indexed_data[SOURCE_COST_INDEX].base_cost = delay_normalization_fac;
/* rr_indexed_data[SOURCE_COST_INDEX].base_cost = 0; Xifan TANG: TODO: Update routing cost to 1*/
rr_indexed_data[SINK_COST_INDEX].base_cost = 0.;
rr_indexed_data[OPIN_COST_INDEX].base_cost = delay_normalization_fac;
/*rr_indexed_data[OPIN_COST_INDEX].base_cost = 0.95; Xifan TANG: TODO: Update routing cost to 1*/
#ifndef SPEC
rr_indexed_data[IPIN_COST_INDEX].base_cost = 0.95
* delay_normalization_fac;
#else /* Avoid roundoff for SPEC */
rr_indexed_data[IPIN_COST_INDEX].base_cost =
delay_normalization_fac;
#endif
}
else if (base_cost_type == INTRINSIC_DELAY) {
rr_indexed_data[SOURCE_COST_INDEX].base_cost = 0.;
rr_indexed_data[SINK_COST_INDEX].base_cost = 0.;
rr_indexed_data[OPIN_COST_INDEX].base_cost = get_average_opin_delay(
L_rr_node_indices, nodes_per_chan);
rr_indexed_data[IPIN_COST_INDEX].base_cost =
switch_inf[wire_to_ipin_switch].Tdel;
}
/* Load base costs for CHANX and CHANY segments */
for (index = CHANX_COST_INDEX_START; index < num_rr_indexed_data; index++) {
if (base_cost_type == INTRINSIC_DELAY)
rr_indexed_data[index].base_cost = rr_indexed_data[index].T_linear
+ rr_indexed_data[index].T_quadratic;
else
/* rr_indexed_data[index].base_cost = delay_normalization_fac /
rr_indexed_data[index].inv_length; */
rr_indexed_data[index].base_cost = delay_normalization_fac;
/* rr_indexed_data[index].base_cost = delay_normalization_fac *
sqrt (1. / rr_indexed_data[index].inv_length); */
/* rr_indexed_data[index].base_cost = delay_normalization_fac *
(1. + 1. / rr_indexed_data[index].inv_length); */
}
/* Save a copy of the base costs -- if dynamic costing is used by the *
* router, the base_cost values will get changed all the time and being *
* able to restore them from a saved version is useful. */
for (index = 0; index < num_rr_indexed_data; index++) {
rr_indexed_data[index].saved_base_cost =
rr_indexed_data[index].base_cost;
}
}
static float get_delay_normalization_fac(int nodes_per_chan,
t_ivec *** L_rr_node_indices) {
/* Returns the average delay to go 1 CLB distance along a wire. */
const int clb_dist = 3; /* Number of CLBs I think the average conn. goes. */
int inode, itrack, cost_index;
float Tdel, Tdel_sum, frac_num_seg;
Tdel_sum = 0.;
for (itrack = 0; itrack < nodes_per_chan; itrack++) {
inode = get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, CHANX, itrack,
L_rr_node_indices);
cost_index = rr_node[inode].cost_index;
frac_num_seg = clb_dist * rr_indexed_data[cost_index].inv_length;
Tdel = frac_num_seg * rr_indexed_data[cost_index].T_linear
+ frac_num_seg * frac_num_seg
* rr_indexed_data[cost_index].T_quadratic;
Tdel_sum += Tdel / (float) clb_dist;
}
for (itrack = 0; itrack < nodes_per_chan; itrack++) {
inode = get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, CHANY, itrack,
L_rr_node_indices);
cost_index = rr_node[inode].cost_index;
frac_num_seg = clb_dist * rr_indexed_data[cost_index].inv_length;
Tdel = frac_num_seg * rr_indexed_data[cost_index].T_linear
+ frac_num_seg * frac_num_seg
* rr_indexed_data[cost_index].T_quadratic;
Tdel_sum += Tdel / (float) clb_dist;
}
return (Tdel_sum / (2. * nodes_per_chan));
}
static float get_average_opin_delay(t_ivec *** L_rr_node_indices,
int nodes_per_chan) {
/* Returns the average delay from an OPIN to a wire in an adjacent channel. */
/* RESEARCH TODO: Got to think if this heuristic needs to change for hetero, right now, I'll calculate
* the average delay of non-IO blocks */
int inode, ipin, iclass, iedge, itype, num_edges, to_switch, to_node,
num_conn;
float Cload, Tdel;
Tdel = 0.;
num_conn = 0;
for (itype = 0; itype < num_types && &type_descriptors[itype] != IO_TYPE;
itype++) {
for (ipin = 0; ipin < type_descriptors[itype].num_pins; ipin++) {
iclass = type_descriptors[itype].pin_class[ipin];
if (type_descriptors[itype].class_inf[iclass].type == DRIVER) { /* OPIN */
inode = get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, OPIN,
ipin, L_rr_node_indices);
num_edges = rr_node[inode].num_edges;
for (iedge = 0; iedge < num_edges; iedge++) {
to_node = rr_node[inode].edges[iedge];
to_switch = rr_node[inode].switches[iedge];
Cload = rr_node[to_node].C;
Tdel += Cload * switch_inf[to_switch].R
+ switch_inf[to_switch].Tdel;
num_conn++;
}
}
}
}
Tdel /= (float) num_conn;
return (Tdel);
}
static void load_rr_indexed_data_T_values(int index_start,
int num_indices_to_load, t_rr_type rr_type, int nodes_per_chan,
t_ivec *** L_rr_node_indices, const t_segment_inf * segment_inf) {
/* Loads the average propagation times through segments of each index type *
* for either all CHANX segment types or all CHANY segment types. It does *
* this by looking at all the segments in one channel in the middle of the *
* array and averaging the R and C values of all segments of the same type *
* and using them to compute average delay values for this type of segment. */
int itrack, iseg, inode, cost_index, iswitch;
float *C_total, *R_total; /* [0..num_rr_indexed_data - 1] */
int *num_nodes_of_index; /* [0..num_rr_indexed_data - 1] */
float Rnode, Cnode, Rsw, Tsw;
num_nodes_of_index = (int *) my_calloc(num_rr_indexed_data, sizeof(int));
C_total = (float *) my_calloc(num_rr_indexed_data, sizeof(float));
R_total = (float *) my_calloc(num_rr_indexed_data, sizeof(float));
/* Get average C and R values for all the segments of this type in one *
* channel segment, near the middle of the array. */
for (itrack = 0; itrack < nodes_per_chan; itrack++) {
inode = get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, rr_type, itrack,
L_rr_node_indices);
cost_index = rr_node[inode].cost_index;
num_nodes_of_index[cost_index]++;
C_total[cost_index] += rr_node[inode].C;
R_total[cost_index] += rr_node[inode].R;
}
for (cost_index = index_start;
cost_index < index_start + num_indices_to_load; cost_index++) {
if (num_nodes_of_index[cost_index] == 0) { /* Segments don't exist. */
rr_indexed_data[cost_index].T_linear = OPEN;
rr_indexed_data[cost_index].T_quadratic = OPEN;
rr_indexed_data[cost_index].C_load = OPEN;
} else {
Rnode = R_total[cost_index] / num_nodes_of_index[cost_index];
Cnode = C_total[cost_index] / num_nodes_of_index[cost_index];
/* mrFPGA: Xifan TANG */
if (is_isolation) {
Cnode += switch_inf[iswitch].Cin + switch_inf[iswitch].Cout;
}
/* end */
iseg = rr_indexed_data[cost_index].seg_index;
iswitch = segment_inf[iseg].wire_switch;
Rsw = switch_inf[iswitch].R;
Tsw = switch_inf[iswitch].Tdel;
if (switch_inf[iswitch].buffered) {
rr_indexed_data[cost_index].T_linear = Tsw + Rsw * Cnode
+ 0.5 * Rnode * Cnode;
rr_indexed_data[cost_index].T_quadratic = 0.;
rr_indexed_data[cost_index].C_load = 0.;
} else { /* Pass transistor */
rr_indexed_data[cost_index].C_load = Cnode;
/* See Dec. 23, 1997 notes for deriviation of formulae. */
rr_indexed_data[cost_index].T_linear = Tsw + 0.5 * Rsw * Cnode;
rr_indexed_data[cost_index].T_quadratic = (Rsw + Rnode) * 0.5
* Cnode;
/* mrFPGA: Xifan TANG */
if (is_mrFPGA && is_wire_buffer) {
rr_indexed_data[cost_index].T_linear += wire_buffer_inf.R * Cnode + wire_buffer_inf.C * (Rnode + Rsw)
+ sqrt(rr_indexed_data[cost_index].T_quadratic * (wire_buffer_inf.Tdel
+ wire_buffer_inf.R * wire_buffer_inf.C));
rr_indexed_data[cost_index].T_quadratic = 0.;
}
/* end */
}
}
}
free(num_nodes_of_index);
free(C_total);
free(R_total);
}