bug fixing for memory leaking in allocating pb_rr_graph and power estimation

This commit is contained in:
tangxifan 2019-06-15 12:23:36 -06:00
parent d3296d0975
commit c8bf456097
4 changed files with 34 additions and 9 deletions

View File

@ -403,7 +403,7 @@ void alloc_and_load_rr_graph_route_structs(t_rr_graph* local_rr_graph) {
int inode;
local_rr_graph->rr_node_route_inf = (t_rr_node_route_inf *) my_malloc(local_rr_graph->num_rr_nodes * sizeof(t_rr_node_route_inf));
local_rr_graph->rr_node_route_inf = (t_rr_node_route_inf *) my_calloc(local_rr_graph->num_rr_nodes, sizeof(t_rr_node_route_inf));
for (inode = 0; inode < local_rr_graph->num_rr_nodes; inode++) {
local_rr_graph->rr_node_route_inf[inode].prev_node = NO_PREVIOUS;

View File

@ -33,6 +33,8 @@
#include "fpga_x2p_pbtypes_utils.h"
#include "fpga_x2p_globals.h"
#include "fpga_x2p_pb_rr_graph.h"
/* Count the number of rr_graph nodes that should be allocated
* (a) INPUT pins at the top-level pb_graph_node should be a local_rr_node and plus a SOURCE
* (b) CLOCK pins at the top-level pb_graph_node should be a local_rr_node and plus a SOURCE
@ -104,6 +106,7 @@ void init_one_rr_node_pack_cost_for_phy_graph_node(INP t_pb_graph_pin* cur_pb_gr
/* Override the fan-in and fan-out for a top/primitive pb_graph_node */
static
void override_one_rr_node_for_top_primitive_phy_pb_graph_node(INP t_pb_graph_pin* cur_pb_graph_pin,
INOUTP t_rr_graph* local_rr_graph,
int cur_rr_node_index,
@ -151,6 +154,7 @@ void override_one_rr_node_for_top_primitive_phy_pb_graph_node(INP t_pb_graph_pin
}
/* initialize a rr_node in a rr_graph of phyical pb_graph_node */
static
void init_one_rr_node_for_phy_pb_graph_node(INP t_pb_graph_pin* cur_pb_graph_pin,
INOUTP t_rr_graph* local_rr_graph,
int cur_rr_node_index,
@ -206,7 +210,9 @@ void init_one_rr_node_for_phy_pb_graph_node(INP t_pb_graph_pin* cur_pb_graph_pin
local_rr_graph->rr_node[cur_rr_node_index].prev_edge = OPEN;
local_rr_graph->rr_node[cur_rr_node_index].capacity = 1;
local_rr_graph->rr_node[cur_rr_node_index].occ = 0;
local_rr_graph->rr_node[cur_rr_node_index].type = rr_node_type;
local_rr_graph->rr_node[cur_rr_node_index].cost_index = 0;
return;
}
@ -270,6 +276,7 @@ void connect_one_rr_node_for_phy_pb_graph_node(INP t_pb_graph_pin* cur_pb_graph_
/* Recursively configure all the rr_nodes in the rr_graph
* Initialize the routing cost, fan-in rr_nodes and fan-out rr_nodes, and switches
*/
static
void rec_init_rr_graph_for_phy_pb_graph_node(INP t_pb_graph_node* cur_pb_graph_node,
INOUTP t_rr_graph* local_rr_graph,
int* cur_rr_node_index) {
@ -418,6 +425,7 @@ void rec_init_rr_graph_for_phy_pb_graph_node(INP t_pb_graph_node* cur_pb_graph_n
/* Recursively connect all the rr_nodes in the rr_graph
* output_edges, output_switches
*/
static
void rec_connect_rr_graph_for_phy_pb_graph_node(INP t_pb_graph_node* cur_pb_graph_node,
INOUTP t_rr_graph* local_rr_graph,
int* cur_rr_node_index) {
@ -598,6 +606,7 @@ void alloc_and_load_rr_graph_for_phy_pb_graph_node(INP t_pb_graph_node* top_pb_g
/* Check the vpack_net_num of a rr_node mapped to a pb_graph_pin and
* mark the used vpack_net_num in the list
*/
static
void mark_vpack_net_used_in_pb_pin(t_pb* cur_op_pb, t_pb_graph_pin* cur_pb_graph_pin,
int L_num_vpack_nets, boolean* vpack_net_used_in_pb) {
int inode;
@ -621,6 +630,7 @@ void mark_vpack_net_used_in_pb_pin(t_pb* cur_op_pb, t_pb_graph_pin* cur_pb_graph
/* Recursively visit all the child pbs and
* mark the used vpack_net_num in the list
*/
static
void mark_vpack_net_used_in_pb(t_pb* cur_op_pb,
int L_num_vpack_nets, boolean* vpack_net_used_in_pb) {
int mode_index, ipb, jpb;
@ -718,6 +728,7 @@ void alloc_and_load_phy_pb_rr_graph_nets(INP t_pb* cur_op_pb,
}
/* Find the rr_node in the primitive node of a pb_rr_graph*/
static
void sync_pb_graph_pin_vpack_net_num_to_phy_pb(t_rr_node* cur_op_pb_rr_graph,
t_pb_graph_pin* cur_pb_graph_pin,
t_rr_graph* local_rr_graph) {
@ -787,6 +798,7 @@ void sync_pb_graph_pin_vpack_net_num_to_phy_pb(t_rr_node* cur_op_pb_rr_graph,
return;
}
static
void rec_sync_wired_pb_vpack_net_num_to_phy_pb_rr_graph(t_pb_graph_node* cur_pb_graph_node,
t_rr_node* op_pb_rr_graph,
t_rr_graph* local_rr_graph) {
@ -851,6 +863,7 @@ void rec_sync_wired_pb_vpack_net_num_to_phy_pb_rr_graph(t_pb_graph_node* cur_pb_
* synchronize the vpack_net_num of the top-level/primitive pb_graph_pin
* to the physical pb rr_node nodes
*/
static
void rec_sync_pb_vpack_net_num_to_phy_pb_rr_graph(t_pb* cur_op_pb,
t_rr_graph* local_rr_graph) {
int mode_index, ipb, jpb;
@ -921,6 +934,7 @@ void rec_sync_pb_vpack_net_num_to_phy_pb_rr_graph(t_pb* cur_op_pb,
* 3. Find the SOURCE and SINK rr_nodes related to the pb_graph_pin
* 4. Configure the net_rr_terminals with the SINK/SOURCE rr_nodes
*/
static
void alloc_and_load_phy_pb_rr_graph_net_rr_terminals(INP t_pb* cur_op_pb,
t_rr_graph* local_rr_graph) {
int inet, inode, rr_node_net_name;
@ -1013,10 +1027,11 @@ void alloc_and_load_phy_pb_rr_graph_net_rr_terminals(INP t_pb* cur_op_pb,
return;
}
static
void alloc_pb_rr_graph_rr_indexed_data(t_rr_graph* local_rr_graph) {
/* inside a cluster, I do not consider rr_indexed_data cost, set to 1 since other costs are multiplied by it */
alloc_rr_graph_rr_indexed_data(local_rr_graph, 1);
local_rr_graph->rr_indexed_data[0].base_cost = 1;
local_rr_graph->rr_indexed_data[0].base_cost = 1.;
return;
}
@ -1025,8 +1040,8 @@ void alloc_pb_rr_graph_rr_indexed_data(t_rr_graph* local_rr_graph) {
* Add an output edge to the rr_node of the used input
* connect it to the rr_node of the used LUT output
*/
static
void add_rr_node_edge_to_one_wired_lut(t_pb_graph_node* cur_pb_graph_node,
t_pb_type* cur_pb_type,
t_rr_node* op_pb_rr_graph,
t_rr_graph* local_rr_graph) {
int iport, ipin;
@ -1140,6 +1155,7 @@ void add_rr_node_edge_to_one_wired_lut(t_pb_graph_node* cur_pb_graph_node,
/* Add rr edges connecting from an input of a LUT to its output
* IMPORTANT: this is only applied to LUT which operates in wire mode (a buffer)
*/
static
void rec_add_unused_rr_graph_wired_lut_rr_edges(INP t_pb_graph_node* cur_op_pb_graph_node,
INP t_rr_node* cur_op_pb_rr_graph,
INOUTP t_rr_graph* local_rr_graph) {
@ -1162,7 +1178,6 @@ void rec_add_unused_rr_graph_wired_lut_rr_edges(INP t_pb_graph_node* cur_op_pb_g
* connect it to the rr_node of the used LUT output
*/
add_rr_node_edge_to_one_wired_lut(cur_op_pb_graph_node,
cur_pb_type,
cur_op_pb_rr_graph,
local_rr_graph);
}
@ -1194,6 +1209,7 @@ void rec_add_unused_rr_graph_wired_lut_rr_edges(INP t_pb_graph_node* cur_op_pb_g
/* Add rr edges connecting from an input of a LUT to its output
* IMPORTANT: this is only applied to LUT which operates in wire mode (a buffer)
*/
static
void rec_add_rr_graph_wired_lut_rr_edges(INP t_pb* cur_op_pb,
INOUTP t_rr_graph* local_rr_graph) {
int mode_index, ipb, jpb, imode;
@ -1214,7 +1230,6 @@ void rec_add_rr_graph_wired_lut_rr_edges(INP t_pb* cur_op_pb,
* connect it to the rr_node of the used LUT output
*/
add_rr_node_edge_to_one_wired_lut(cur_op_pb->pb_graph_node,
cur_pb_type,
cur_op_pb->rr_graph,
local_rr_graph);
}
@ -1253,6 +1268,7 @@ void rec_add_rr_graph_wired_lut_rr_edges(INP t_pb* cur_op_pb,
* For each multiple-source net, I add a new source as the unique source in routing purpose
* As so, edges have to be added to the decendents of sources
*/
static
int add_virtual_sources_to_rr_graph_multi_sources(t_rr_graph* local_rr_graph) {
int inet, isrc;
int unique_src_node;
@ -1271,6 +1287,8 @@ int add_virtual_sources_to_rr_graph_multi_sources(t_rr_graph* local_rr_graph) {
unique_src_node = local_rr_graph->num_rr_nodes - 1;
local_rr_graph->rr_node[unique_src_node].type = SOURCE;
local_rr_graph->rr_node[unique_src_node].capacity = 1;
local_rr_graph->rr_node[unique_src_node].occ = 0;
local_rr_graph->rr_node[unique_src_node].cost_index = 0;
local_rr_graph->rr_node[unique_src_node].fan_in = 0;
local_rr_graph->rr_node[unique_src_node].num_drive_rr_nodes = 0;
local_rr_graph->rr_node[unique_src_node].drive_rr_nodes = NULL;
@ -1314,6 +1332,9 @@ void alloc_and_load_rr_graph_for_phy_pb(INP t_pb* cur_op_pb,
/* Allocate rr_graph*/
cur_phy_pb->rr_graph = (t_rr_graph*) my_calloc(1, sizeof(t_rr_graph));
/* Allocate and initialize cost index */
alloc_pb_rr_graph_rr_indexed_data(cur_phy_pb->rr_graph);
/* Create rr_graph */
alloc_and_load_rr_graph_for_phy_pb_graph_node(cur_phy_pb->pb_graph_node, cur_phy_pb->rr_graph);
@ -1332,8 +1353,6 @@ void alloc_and_load_rr_graph_for_phy_pb(INP t_pb* cur_op_pb,
/* Allocate trace in rr_graph */
alloc_rr_graph_route_static_structs(cur_phy_pb->rr_graph, nx * ny); /* TODO: nx * ny should be reduced for pb-only routing */
alloc_pb_rr_graph_rr_indexed_data(cur_phy_pb->rr_graph);
/* Fill the net_rr_terminals with
* 1. pin-to-pin mapping in pb_graph_node in cur_op_pb
* 2. rr_graph in the cur_op_pb

View File

@ -201,6 +201,7 @@ static void power_usage_primitive(t_power_usage * power_usage, t_pb * pb,
}
}
static
void power_usage_local_pin_toggle(t_power_usage * power_usage, t_pb * pb,
t_pb_graph_pin * pin) {
float scale_factor;
@ -222,6 +223,7 @@ void power_usage_local_pin_toggle(t_power_usage * power_usage, t_pb * pb,
/ g_solution_inf.T_crit;
}
static
void power_usage_local_pin_buffer_and_wire(t_power_usage * power_usage,
t_pb * pb, t_pb_graph_pin * pin) {
t_power_usage sub_power_usage;
@ -1027,6 +1029,7 @@ static void power_usage_routing(t_power_usage * power_usage,
}
}
static
void power_alloc_and_init_pb_pin(t_pb_graph_pin * pin) {
int port_idx;
t_port * port_to_find;
@ -1085,6 +1088,7 @@ void power_alloc_and_init_pb_pin(t_pb_graph_pin * pin) {
}
}
static
void power_init_pb_pins_rec(t_pb_graph_node * pb_node) {
int mode;
int type;
@ -1131,6 +1135,7 @@ void power_init_pb_pins_rec(t_pb_graph_node * pb_node) {
}
}
static
void power_pb_pins_init() {
int type_idx;
@ -1141,6 +1146,7 @@ void power_pb_pins_init() {
}
}
static
void power_routing_init(t_det_routing_arch * routing_arch) {
int net_idx;
int rr_node_idx;
@ -1367,7 +1373,7 @@ boolean power_uninit(void) {
}
delete mux_info;
}
free(g_power_commonly_used);
delete g_power_commonly_used;
if (g_power_output->out) {
fclose(g_power_output->out);

View File

@ -32,5 +32,5 @@ cd -
# Run VPR
#valgrind
./vpr $arch_xml_file $blif_file --full_stats --nodisp --activity_file $act_file --fpga_verilog --fpga_verilog_dir $verilog_output_dirpath/$verilog_output_dirname --fpga_x2p_rename_illegal_port --fpga_bitstream_generator --fpga_verilog_print_top_testbench --fpga_verilog_print_input_blif_testbench --fpga_verilog_include_timing --fpga_verilog_include_signal_init --fpga_verilog_print_formal_verification_top_netlist --fpga_verilog_print_autocheck_top_testbench $verilog_reference --fpga_verilog_print_user_defined_template --route_chan_width $vpr_route_chan_width --fpga_verilog_include_icarus_simulator --fpga_verilog_print_report_timing_tcl --power --tech_properties $tech_file --fpga_verilog_print_sdc_pnr --fpga_verilog_print_sdc_analysis --fpga_x2p_compact_routing_hierarchy
./vpr $arch_xml_file $blif_file --full_stats --nodisp --activity_file $act_file --fpga_verilog --fpga_verilog_dir $verilog_output_dirpath/$verilog_output_dirname --fpga_x2p_rename_illegal_port --fpga_bitstream_generator --fpga_verilog_print_top_testbench --fpga_verilog_print_input_blif_testbench --fpga_verilog_include_timing --fpga_verilog_include_signal_init --fpga_verilog_print_formal_verification_top_netlist --fpga_verilog_print_autocheck_top_testbench $verilog_reference --fpga_verilog_print_user_defined_template --route_chan_width $vpr_route_chan_width --fpga_verilog_include_icarus_simulator --fpga_verilog_print_report_timing_tcl --power --tech_properties $tech_file --fpga_verilog_print_sdc_pnr --fpga_verilog_print_sdc_analysis --fpga_x2p_compact_routing_hierarchy