#include /* Needed only for sqrt call (remove if sqrt removed) */ #include "util.h" #include "vpr_types.h" #include "globals.h" #include "rr_graph_util.h" #include "rr_graph2.h" #include "rr_graph_indexed_data.h" #include "read_xml_arch_file.h" /* mrFPGA: Xifan TANG */ #include "mrfpga_globals.h" /* end */ /******************* Subroutines local to this module ************************/ static void load_rr_indexed_data_base_costs(int nodes_per_chan, t_ivec *** L_rr_node_indices, enum e_base_cost_type base_cost_type, int wire_to_ipin_switch); static float get_delay_normalization_fac(int nodes_per_chan, t_ivec *** L_rr_node_indices); static float get_average_opin_delay(t_ivec *** L_rr_node_indices, int nodes_per_chan); static void load_rr_indexed_data_T_values(int index_start, int num_indices_to_load, t_rr_type rr_type, int nodes_per_chan, t_ivec *** L_rr_node_indices, const t_segment_inf * segment_inf); /******************** Subroutine definitions *********************************/ /* Allocates the rr_indexed_data array and loads it with appropriate values. * * It currently stores the segment type (or OPEN if the index doesn't * * correspond to an CHANX or CHANY type), the base cost of nodes of that * * type, and some info to allow rapid estimates of time to get to a target * * to be computed by the router. * * * Right now all SOURCES have the same base cost; and similarly there's only * * one base cost for each of SINKs, OPINs, and IPINs (four total). This can * * be changed just by allocating more space in the array below and changing * * the cost_index values for these rr_nodes, if you want to make some pins * * etc. more expensive than others. I give each segment type in an * * x-channel its own cost_index, and each segment type in a y-channel its * * own cost_index. */ void alloc_and_load_rr_indexed_data(INP const t_segment_inf * segment_inf, INP int num_segment, INP t_ivec *** L_rr_node_indices, INP int nodes_per_chan, int wire_to_ipin_switch, enum e_base_cost_type base_cost_type) { int iseg, length, i, index; num_rr_indexed_data = CHANX_COST_INDEX_START + (2 * num_segment); rr_indexed_data = (t_rr_indexed_data *) my_malloc( num_rr_indexed_data * sizeof(t_rr_indexed_data)); /* For rr_types that aren't CHANX or CHANY, base_cost is valid, but most * * * other fields are invalid. For IPINs, the T_linear field is also valid; * * * all other fields are invalid. For SOURCES, SINKs and OPINs, all fields * * * other than base_cost are invalid. Mark invalid fields as OPEN for safety. */ for (i = SOURCE_COST_INDEX; i <= IPIN_COST_INDEX; i++) { rr_indexed_data[i].ortho_cost_index = OPEN; rr_indexed_data[i].seg_index = OPEN; rr_indexed_data[i].inv_length = OPEN; rr_indexed_data[i].T_linear = OPEN; rr_indexed_data[i].T_quadratic = OPEN; rr_indexed_data[i].C_load = OPEN; } rr_indexed_data[IPIN_COST_INDEX].T_linear = switch_inf[wire_to_ipin_switch].Tdel; /* X-directed segments. */ for (iseg = 0; iseg < num_segment; iseg++) { index = CHANX_COST_INDEX_START + iseg; rr_indexed_data[index].ortho_cost_index = index + num_segment; if (segment_inf[iseg].longline) length = nx; else length = std::min(segment_inf[iseg].length, nx); rr_indexed_data[index].inv_length = 1. / length; rr_indexed_data[index].seg_index = iseg; } load_rr_indexed_data_T_values(CHANX_COST_INDEX_START, num_segment, CHANX, nodes_per_chan, L_rr_node_indices, segment_inf); /* Y-directed segments. */ for (iseg = 0; iseg < num_segment; iseg++) { index = CHANX_COST_INDEX_START + num_segment + iseg; rr_indexed_data[index].ortho_cost_index = index - num_segment; if (segment_inf[iseg].longline) length = ny; else length = std::min(segment_inf[iseg].length, ny); rr_indexed_data[index].inv_length = 1. / length; rr_indexed_data[index].seg_index = iseg; } load_rr_indexed_data_T_values((CHANX_COST_INDEX_START + num_segment), num_segment, CHANY, nodes_per_chan, L_rr_node_indices, segment_inf); load_rr_indexed_data_base_costs(nodes_per_chan, L_rr_node_indices, base_cost_type, wire_to_ipin_switch); } static void load_rr_indexed_data_base_costs(int nodes_per_chan, t_ivec *** L_rr_node_indices, enum e_base_cost_type base_cost_type, int wire_to_ipin_switch) { /* Loads the base_cost member of rr_indexed_data according to the specified * * base_cost_type. */ float delay_normalization_fac; int index; if (base_cost_type == DELAY_NORMALIZED) { delay_normalization_fac = get_delay_normalization_fac(nodes_per_chan, L_rr_node_indices); } else { delay_normalization_fac = 1.; } if (base_cost_type == DEMAND_ONLY || base_cost_type == DELAY_NORMALIZED) { rr_indexed_data[SOURCE_COST_INDEX].base_cost = delay_normalization_fac; /* rr_indexed_data[SOURCE_COST_INDEX].base_cost = 0; Xifan TANG: TODO: Update routing cost to 1*/ rr_indexed_data[SINK_COST_INDEX].base_cost = 0.; rr_indexed_data[OPIN_COST_INDEX].base_cost = delay_normalization_fac; /*rr_indexed_data[OPIN_COST_INDEX].base_cost = 0.95; Xifan TANG: TODO: Update routing cost to 1*/ #ifndef SPEC rr_indexed_data[IPIN_COST_INDEX].base_cost = 0.95 * delay_normalization_fac; #else /* Avoid roundoff for SPEC */ rr_indexed_data[IPIN_COST_INDEX].base_cost = delay_normalization_fac; #endif } else if (base_cost_type == INTRINSIC_DELAY) { rr_indexed_data[SOURCE_COST_INDEX].base_cost = 0.; rr_indexed_data[SINK_COST_INDEX].base_cost = 0.; rr_indexed_data[OPIN_COST_INDEX].base_cost = get_average_opin_delay( L_rr_node_indices, nodes_per_chan); rr_indexed_data[IPIN_COST_INDEX].base_cost = switch_inf[wire_to_ipin_switch].Tdel; } /* Load base costs for CHANX and CHANY segments */ for (index = CHANX_COST_INDEX_START; index < num_rr_indexed_data; index++) { if (base_cost_type == INTRINSIC_DELAY) rr_indexed_data[index].base_cost = rr_indexed_data[index].T_linear + rr_indexed_data[index].T_quadratic; else /* rr_indexed_data[index].base_cost = delay_normalization_fac / rr_indexed_data[index].inv_length; */ rr_indexed_data[index].base_cost = delay_normalization_fac; /* rr_indexed_data[index].base_cost = delay_normalization_fac * sqrt (1. / rr_indexed_data[index].inv_length); */ /* rr_indexed_data[index].base_cost = delay_normalization_fac * (1. + 1. / rr_indexed_data[index].inv_length); */ } /* Save a copy of the base costs -- if dynamic costing is used by the * * router, the base_cost values will get changed all the time and being * * able to restore them from a saved version is useful. */ for (index = 0; index < num_rr_indexed_data; index++) { rr_indexed_data[index].saved_base_cost = rr_indexed_data[index].base_cost; } } static float get_delay_normalization_fac(int nodes_per_chan, t_ivec *** L_rr_node_indices) { /* Returns the average delay to go 1 CLB distance along a wire. */ const int clb_dist = 3; /* Number of CLBs I think the average conn. goes. */ int inode, itrack, cost_index; float Tdel, Tdel_sum, frac_num_seg; Tdel_sum = 0.; for (itrack = 0; itrack < nodes_per_chan; itrack++) { inode = get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, CHANX, itrack, L_rr_node_indices); cost_index = rr_node[inode].cost_index; frac_num_seg = clb_dist * rr_indexed_data[cost_index].inv_length; Tdel = frac_num_seg * rr_indexed_data[cost_index].T_linear + frac_num_seg * frac_num_seg * rr_indexed_data[cost_index].T_quadratic; Tdel_sum += Tdel / (float) clb_dist; } for (itrack = 0; itrack < nodes_per_chan; itrack++) { inode = get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, CHANY, itrack, L_rr_node_indices); cost_index = rr_node[inode].cost_index; frac_num_seg = clb_dist * rr_indexed_data[cost_index].inv_length; Tdel = frac_num_seg * rr_indexed_data[cost_index].T_linear + frac_num_seg * frac_num_seg * rr_indexed_data[cost_index].T_quadratic; Tdel_sum += Tdel / (float) clb_dist; } return (Tdel_sum / (2. * nodes_per_chan)); } static float get_average_opin_delay(t_ivec *** L_rr_node_indices, int nodes_per_chan) { /* Returns the average delay from an OPIN to a wire in an adjacent channel. */ /* RESEARCH TODO: Got to think if this heuristic needs to change for hetero, right now, I'll calculate * the average delay of non-IO blocks */ int inode, ipin, iclass, iedge, itype, num_edges, to_switch, to_node, num_conn; float Cload, Tdel; Tdel = 0.; num_conn = 0; for (itype = 0; itype < num_types && &type_descriptors[itype] != IO_TYPE; itype++) { for (ipin = 0; ipin < type_descriptors[itype].num_pins; ipin++) { iclass = type_descriptors[itype].pin_class[ipin]; if (type_descriptors[itype].class_inf[iclass].type == DRIVER) { /* OPIN */ inode = get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, OPIN, ipin, L_rr_node_indices); num_edges = rr_node[inode].num_edges; for (iedge = 0; iedge < num_edges; iedge++) { to_node = rr_node[inode].edges[iedge]; to_switch = rr_node[inode].switches[iedge]; Cload = rr_node[to_node].C; Tdel += Cload * switch_inf[to_switch].R + switch_inf[to_switch].Tdel; num_conn++; } } } } Tdel /= (float) num_conn; return (Tdel); } static void load_rr_indexed_data_T_values(int index_start, int num_indices_to_load, t_rr_type rr_type, int nodes_per_chan, t_ivec *** L_rr_node_indices, const t_segment_inf * segment_inf) { /* Loads the average propagation times through segments of each index type * * for either all CHANX segment types or all CHANY segment types. It does * * this by looking at all the segments in one channel in the middle of the * * array and averaging the R and C values of all segments of the same type * * and using them to compute average delay values for this type of segment. */ int itrack, iseg, inode, cost_index, iswitch; float *C_total, *R_total; /* [0..num_rr_indexed_data - 1] */ int *num_nodes_of_index; /* [0..num_rr_indexed_data - 1] */ float Rnode, Cnode, Rsw, Tsw; num_nodes_of_index = (int *) my_calloc(num_rr_indexed_data, sizeof(int)); C_total = (float *) my_calloc(num_rr_indexed_data, sizeof(float)); R_total = (float *) my_calloc(num_rr_indexed_data, sizeof(float)); /* Get average C and R values for all the segments of this type in one * * channel segment, near the middle of the array. */ for (itrack = 0; itrack < nodes_per_chan; itrack++) { inode = get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, rr_type, itrack, L_rr_node_indices); cost_index = rr_node[inode].cost_index; num_nodes_of_index[cost_index]++; C_total[cost_index] += rr_node[inode].C; R_total[cost_index] += rr_node[inode].R; } for (cost_index = index_start; cost_index < index_start + num_indices_to_load; cost_index++) { if (num_nodes_of_index[cost_index] == 0) { /* Segments don't exist. */ rr_indexed_data[cost_index].T_linear = OPEN; rr_indexed_data[cost_index].T_quadratic = OPEN; rr_indexed_data[cost_index].C_load = OPEN; } else { Rnode = R_total[cost_index] / num_nodes_of_index[cost_index]; Cnode = C_total[cost_index] / num_nodes_of_index[cost_index]; /* mrFPGA: Xifan TANG */ if (is_isolation) { Cnode += switch_inf[iswitch].Cin + switch_inf[iswitch].Cout; } /* end */ iseg = rr_indexed_data[cost_index].seg_index; iswitch = segment_inf[iseg].wire_switch; Rsw = switch_inf[iswitch].R; Tsw = switch_inf[iswitch].Tdel; if (switch_inf[iswitch].buffered) { rr_indexed_data[cost_index].T_linear = Tsw + Rsw * Cnode + 0.5 * Rnode * Cnode; rr_indexed_data[cost_index].T_quadratic = 0.; rr_indexed_data[cost_index].C_load = 0.; } else { /* Pass transistor */ rr_indexed_data[cost_index].C_load = Cnode; /* See Dec. 23, 1997 notes for deriviation of formulae. */ rr_indexed_data[cost_index].T_linear = Tsw + 0.5 * Rsw * Cnode; rr_indexed_data[cost_index].T_quadratic = (Rsw + Rnode) * 0.5 * Cnode; /* mrFPGA: Xifan TANG */ if (is_mrFPGA && is_wire_buffer) { rr_indexed_data[cost_index].T_linear += wire_buffer_inf.R * Cnode + wire_buffer_inf.C * (Rnode + Rsw) + sqrt(rr_indexed_data[cost_index].T_quadratic * (wire_buffer_inf.Tdel + wire_buffer_inf.R * wire_buffer_inf.C)); rr_indexed_data[cost_index].T_quadratic = 0.; } /* end */ } } } free(num_nodes_of_index); free(C_total); free(R_total); }