/*#include */ #include #include #include #include "util.h" #include "vpr_types.h" #include "globals.h" #include "place.h" #include "read_place.h" #include "draw.h" #include "place_and_route.h" #include "net_delay.h" #include "path_delay.h" #include "timing_place_lookup.h" #include "timing_place.h" #include "place_stats.h" #include "read_xml_arch_file.h" #include "ReadOptions.h" #include "vpr_utils.h" #include "place_macro.h" /************** Types and defines local to place.c ***************************/ /* Cut off for incremental bounding box updates. * * 4 is fastest -- I checked. */ /* To turn off incremental bounding box updates, set this to a huge value */ #define SMALL_NET 4 /* This defines the error tolerance for floating points variables used in * * cost computation. 0.01 means that there is a 1% error tolerance. */ #define ERROR_TOL .01 /* This defines the maximum number of swap attempts before invoking the * * once-in-a-while placement legality check as well as floating point * * variables round-offs check. */ #define MAX_MOVES_BEFORE_RECOMPUTE 50000 /* The maximum number of tries when trying to place a carry chain at a * * random location before trying exhaustive placement - find the fist * * legal position and place it during initial placement. */ #define MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY 4 /* Flags for the states of the bounding box. * * Stored as char for memory efficiency. */ #define NOT_UPDATED_YET 'N' #define UPDATED_ONCE 'U' #define GOT_FROM_SCRATCH 'S' /* For comp_cost. NORMAL means use the method that generates updateable * * bounding boxes for speed. CHECK means compute all bounding boxes from * * scratch using a very simple routine to allow checks of the other * * costs. */ enum cost_methods { NORMAL, CHECK }; /* This is for the placement swap routines. A swap attempt could be * * rejected, accepted or aborted (due to the limitations placed on the * * carry chain support at this point). */ enum swap_result { REJECTED, ACCEPTED, ABORTED }; #define MAX_INV_TIMING_COST 1.e9 /* Stops inverse timing cost from going to infinity with very lax timing constraints, which avoids multiplying by a gigantic inverse_prev_timing_cost when auto-normalizing. The exact value of this cost has relatively little impact, but should not be large enough to be on the order of timing costs for normal constraints. */ /********************** Data Sturcture Definition ***************************/ /* Stores the information of the move for a block that is * * moved during placement * * block_num: the index of the moved block * * xold: the x_coord that the block is moved from * * xnew: the x_coord that the block is moved to * * yold: the y_coord that the block is moved from * * xnew: the x_coord that the block is moved to * */ typedef struct s_pl_moved_block { int block_num; int xold; int xnew; int yold; int ynew; int zold; int znew; int swapped_to_empty; }t_pl_moved_block; /* Stores the list of blocks to be moved in a swap during * * placement. * * num_moved_blocks: total number of blocks moved when * * swapping two blocks. * * moved blocks: a list of moved blocks data structure with * * information on the move. * * [0...num_moved_blocks-1] * */ typedef struct s_pl_blocks_to_be_moved { int num_moved_blocks; t_pl_moved_block * moved_blocks; }t_pl_blocks_to_be_moved; /********************** Variables local to place.c ***************************/ /* Cost of a net, and a temporary cost of a net used during move assessment. */ static float *net_cost = NULL, *temp_net_cost = NULL; /* [0..num_nets-1] */ /* legal positions for type */ typedef struct s_legal_pos { int x; int y; int z; }t_legal_pos; static t_legal_pos **legal_pos = NULL; /* [0..num_types-1][0..type_tsize - 1] */ static int *num_legal_pos = NULL; /* [0..num_legal_pos-1] */ /* [0...num_nets-1] * * A flag array to indicate whether the specific bounding box has been updated * * in this particular swap or not. If it has been updated before, the code * * must use the updated data, instead of the out-of-date data passed into the * * subroutine, particularly used in try_swap(). The value NOT_UPDATED_YET * * indicates that the net has not been updated before, UPDATED_ONCE indicated * * that the net has been updated once, if it is going to be updated again, the * * values from the previous update must be used. GOT_FROM_SCRATCH is only * * applicable for nets larger than SMALL_NETS and it indicates that the * * particular bounding box cannot be updated incrementally before, hence the * * bounding box is got from scratch, so the bounding box would definitely be * * right, DO NOT update again. * * [0...num_nets-1] */ static char * bb_updated_before = NULL; /* [0..num_nets-1][1..num_pins-1]. What is the value of the timing */ /* driven portion of the cost function. These arrays will be set to */ /* (criticality * delay) for each point to point connection. */ static float **point_to_point_timing_cost = NULL; static float **temp_point_to_point_timing_cost = NULL; /* [0..num_nets-1][1..num_pins-1]. What is the value of the delay */ /* for each connection in the circuit */ static float **point_to_point_delay_cost = NULL; static float **temp_point_to_point_delay_cost = NULL; /* [0..num_blocks-1][0..pins_per_clb-1]. Indicates which pin on the net */ /* this block corresponds to, this is only required during timing-driven */ /* placement. It is used to allow us to update individual connections on */ /* each net */ static int **net_pin_index = NULL; /* [0..num_nets-1]. Store the bounding box coordinates and the number of * * blocks on each of a net's bounding box (to allow efficient updates), * * respectively. */ static struct s_bb *bb_coords = NULL, *bb_num_on_edges = NULL; /* Store the information on the blocks to be moved in a swap during * * placement, in the form of array of structs instead of struct with * * arrays for cache effifiency * */ static t_pl_blocks_to_be_moved blocks_affected; /* The arrays below are used to precompute the inverse of the average * * number of tracks per channel between [subhigh] and [sublow]. Access * * them as chan?_place_cost_fac[subhigh][sublow]. They are used to * * speed up the computation of the cost function that takes the length * * of the net bounding box in each dimension, divided by the average * * number of tracks in that direction; for other cost functions they * * will never be used. * * [0...ny] [0...nx] */ static float **chanx_place_cost_fac, **chany_place_cost_fac; /* The following arrays are used by the try_swap function for speed. */ /* [0...num_nets-1] */ static struct s_bb *ts_bb_coord_new = NULL; static struct s_bb *ts_bb_edge_new = NULL; static int *ts_nets_to_update = NULL; /* The pl_macros array stores all the carry chains placement macros. * * [0...num_pl_macros-1] */ static t_pl_macro * pl_macros = NULL; static int num_pl_macros; /* These file-scoped variables keep track of the number of swaps * * rejected, accepted or aborted. The total number of swap attempts * * is the sum of the three number. */ static int num_swap_rejected = 0; static int num_swap_accepted = 0; static int num_swap_aborted = 0; static int num_ts_called = 0; /* Expected crossing counts for nets with different #'s of pins. From * * ICCAD 94 pp. 690 - 695 (with linear interpolation applied by me). * * Multiplied to bounding box of a net to better estimate wire length * * for higher fanout nets. Each entry is the correction factor for the * * fanout index-1 */ static const float cross_count[50] = { /* [0..49] */1.0, 1.0, 1.0, 1.0828, 1.1536, 1.2206, 1.2823, 1.3385, 1.3991, 1.4493, 1.4974, 1.5455, 1.5937, 1.6418, 1.6899, 1.7304, 1.7709, 1.8114, 1.8519, 1.8924, 1.9288, 1.9652, 2.0015, 2.0379, 2.0743, 2.1061, 2.1379, 2.1698, 2.2016, 2.2334, 2.2646, 2.2958, 2.3271, 2.3583, 2.3895, 2.4187, 2.4479, 2.4772, 2.5064, 2.5356, 2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148, 2.7410, 2.7671, 2.7933 }; /********************* Static subroutines local to place.c *******************/ #ifdef VERBOSE static void print_clb_placement(const char *fname); #endif static void alloc_and_load_placement_structs( float place_cost_exp, float ***old_region_occ_x, float ***old_region_occ_y, struct s_placer_opts placer_opts, t_direct_inf *directs, int num_directs); static void alloc_and_load_try_swap_structs(); static void free_placement_structs( float **old_region_occ_x, float **old_region_occ_y, struct s_placer_opts placer_opts); static void alloc_and_load_for_fast_cost_update(float place_cost_exp); static void free_fast_cost_update(void); static void alloc_legal_placements(); static void load_legal_placements(); static void free_legal_placements(); static int check_macro_can_be_placed(int imacro, int itype, int x, int y, int z); static int try_place_macro(int itype, int ichoice, int imacro, int * free_locations); static void initial_placement_pl_macros(int macros_max_num_tries, int * free_locations); static void initial_placement_blocks(int * free_locations, enum e_pad_loc_type pad_loc_type); static void initial_placement(enum e_pad_loc_type pad_loc_type, char *pad_loc_file); static float comp_bb_cost(enum cost_methods method); static int setup_blocks_affected(int b_from, int x_to, int y_to, int z_to); static int find_affected_blocks(int b_from, int x_to, int y_to, int z_to); static enum swap_result try_swap(float t, float *cost, float *bb_cost, float *timing_cost, float rlim, float **old_region_occ_x, float **old_region_occ_y, enum e_place_algorithm place_algorithm, float timing_tradeoff, float inverse_prev_bb_cost, float inverse_prev_timing_cost, float *delay_cost); static void check_place(float bb_cost, float timing_cost, enum e_place_algorithm place_algorithm, float delay_cost); static float starting_t(float *cost_ptr, float *bb_cost_ptr, float *timing_cost_ptr, float **old_region_occ_x, float **old_region_occ_y, struct s_annealing_sched annealing_sched, int max_moves, float rlim, enum e_place_algorithm place_algorithm, float timing_tradeoff, float inverse_prev_bb_cost, float inverse_prev_timing_cost, float *delay_cost_ptr); static void update_t(float *t, float std_dev, float rlim, float success_rat, struct s_annealing_sched annealing_sched); static void update_rlim(float *rlim, float success_rat); static int exit_crit(float t, float cost, struct s_annealing_sched annealing_sched); static int count_connections(void); static double get_std_dev(int n, double sum_x_squared, double av_x); static float recompute_bb_cost(void); static float comp_td_point_to_point_delay(int inet, int ipin); static void update_td_cost(void); static void comp_delta_td_cost(float *delta_timing, float *delta_delay); static void comp_td_costs(float *timing_cost, float *connection_delay_sum); static enum swap_result assess_swap(float delta_c, float t); static boolean find_to(int x_from, int y_from, t_type_ptr type, float rlim, int *x_to, int *y_to); static void get_non_updateable_bb(int inet, struct s_bb *bb_coord_new); static void update_bb(int inet, struct s_bb *bb_coord_new, struct s_bb *bb_edge_new, int xold, int yold, int xnew, int ynew); static int find_affected_nets(int *nets_to_update); static float get_net_cost(int inet, struct s_bb *bb_ptr); static void get_bb_from_scratch(int inet, struct s_bb *coords, struct s_bb *num_on_edges); static double get_net_wirelength_estimate(int inet, struct s_bb *bbptr); static void free_try_swap_arrays(void); /*****************************************************************************/ /* RESEARCH TODO: Bounding Box and rlim need to be redone for heterogeneous to prevent a QoR penalty */ void try_place(struct s_placer_opts placer_opts, struct s_annealing_sched annealing_sched, t_chan_width_dist chan_width_dist, struct s_router_opts router_opts, struct s_det_routing_arch det_routing_arch, t_segment_inf * segment_inf, t_timing_inf timing_inf, t_direct_inf *directs, int num_directs) { /* Does almost all the work of placing a circuit. Width_fac gives the * * width of the widest channel. Place_cost_exp says what exponent the * * width should be taken to when calculating costs. This allows a * * greater bias for anisotropic architectures. */ int tot_iter, inner_iter, success_sum, move_lim, moves_since_cost_recompute, width_fac, num_connections, inet, ipin, outer_crit_iter_count, inner_crit_iter_count, inner_recompute_limit, swap_result; float t, success_rat, rlim, cost, timing_cost, bb_cost, new_bb_cost, new_timing_cost, delay_cost, new_delay_cost, place_delay_value, inverse_prev_bb_cost, inverse_prev_timing_cost, oldt, **old_region_occ_x, **old_region_occ_y, **net_delay = NULL, crit_exponent, first_rlim, final_rlim, inverse_delta_rlim, critical_path_delay = UNDEFINED, **remember_net_delay_original_ptr; /*used to free net_delay if it is re-assigned */ double av_cost, av_bb_cost, av_timing_cost, av_delay_cost, sum_of_squares, std_dev; int total_swap_attempts; float reject_rate; float accept_rate; float abort_rate; char msg[BUFSIZE]; t_slack * slacks = NULL; /* Allocated here because it goes into timing critical code where each memory allocation is expensive */ remember_net_delay_original_ptr = NULL; /*prevents compiler warning */ /* init file scope variables */ num_swap_rejected = 0; num_swap_accepted = 0; num_swap_aborted = 0; num_ts_called = 0; if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || placer_opts.enable_timing_computations) { /*do this before the initial placement to avoid messing up the initial placement */ slacks = alloc_lookups_and_criticalities(chan_width_dist, router_opts, det_routing_arch, segment_inf, timing_inf, &net_delay, directs, num_directs); remember_net_delay_original_ptr = net_delay; /*#define PRINT_LOWER_BOUND */ #ifdef PRINT_LOWER_BOUND /*print the crit_path, assuming delay between blocks that are* *block_dist apart*/ if (placer_opts.block_dist <= nx) place_delay_value = delta_clb_to_clb[placer_opts.block_dist][0]; else if (placer_opts.block_dist <= ny) place_delay_value = delta_clb_to_clb[0][placer_opts.block_dist]; else place_delay_value = delta_clb_to_clb[nx][ny]; vpr_printf(TIO_MESSAGE_INFO, "\n"); vpr_printf(TIO_MESSAGE_INFO, "Lower bound assuming delay of %g\n", place_delay_value); load_constant_net_delay(net_delay, place_delay_value); load_timing_graph_net_delays(net_delay); do_timing_analysis(slacks, FALSE, FALSE, TRUE); if (getEchoEnabled()) { if(isEchoFileEnabled(E_ECHO_PLACEMENT_CRITICAL_PATH)) print_critical_path(getEchoFileName(E_ECHO_PLACEMENT_CRITICAL_PATH)); if(isEchoFileEnabled(E_ECHO_PLACEMENT_LOWER_BOUND_SINK_DELAYS)) print_sink_delays(getEchoFileName(E_ECHO_PLACEMENT_LOWER_BOUND_SINK_DELAYS)); if(isEchoFileEnabled(E_ECHO_PLACEMENT_LOGIC_SINK_DELAYS)) print_sink_delays(getEchoFileName(E_ECHO_PLACEMENT_LOGIC_SINK_DELAYS)); } /*also print sink delays assuming 0 delay between blocks, * this tells us how much logic delay is on each path */ load_constant_net_delay(net_delay, 0); load_timing_graph_net_delays(net_delay); do_timing_analysis(slacks, FALSE, FALSE, TRUE); #endif } width_fac = placer_opts.place_chan_width; init_chan(width_fac, chan_width_dist); alloc_and_load_placement_structs( placer_opts.place_cost_exp, &old_region_occ_x, &old_region_occ_y, placer_opts, directs, num_directs); initial_placement(placer_opts.pad_loc_type, placer_opts.pad_loc_file); init_draw_coords((float) width_fac); /* Storing the number of pins on each type of block makes the swap routine * * slightly more efficient. */ /* Gets initial cost and loads bounding boxes. */ if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { bb_cost = comp_bb_cost(NORMAL); crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */ num_connections = count_connections(); vpr_printf(TIO_MESSAGE_INFO, "\n"); vpr_printf(TIO_MESSAGE_INFO, "There are %d point to point connections in this circuit.\n", num_connections); vpr_printf(TIO_MESSAGE_INFO, "\n"); if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE) { for (inet = 0; inet < num_nets; inet++) for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) timing_place_crit[inet][ipin] = 0; /*dummy crit values */ comp_td_costs(&timing_cost, &delay_cost); /*first pass gets delay_cost, which is used * in criticality computations in the next call * to comp_td_costs. */ place_delay_value = delay_cost / num_connections; /*used for computing criticalities */ load_constant_net_delay(net_delay, place_delay_value, clb_net, num_nets); } else place_delay_value = 0; if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { net_delay = point_to_point_delay_cost; /*this keeps net_delay up to date with * * *the same values that the placer is using * * *point_to_point_delay_cost is computed each* * *time that comp_td_costs is called, and is * * *also updated after any swap is accepted */ } load_timing_graph_net_delays(net_delay); do_timing_analysis(slacks, FALSE, FALSE, FALSE); load_criticalities(slacks, crit_exponent); if (getEchoEnabled()) { if(isEchoFileEnabled(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH)) print_timing_graph(getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH)); if(isEchoFileEnabled(E_ECHO_INITIAL_PLACEMENT_SLACK)) print_slack(slacks->slack, FALSE, getEchoFileName(E_ECHO_INITIAL_PLACEMENT_SLACK)); if(isEchoFileEnabled(E_ECHO_INITIAL_PLACEMENT_CRITICALITY)) print_criticality(slacks, FALSE, getEchoFileName(E_ECHO_INITIAL_PLACEMENT_CRITICALITY)); } outer_crit_iter_count = 1; /*now we can properly compute costs */ comp_td_costs(&timing_cost, &delay_cost); /*also vpr_printf proper values into point_to_point_delay_cost */ inverse_prev_timing_cost = 1 / timing_cost; inverse_prev_bb_cost = 1 / bb_cost; cost = 1; /*our new cost function uses normalized values of */ /*bb_cost and timing_cost, the value of cost will be reset */ /*to 1 at each temperature when *_TIMING_DRIVEN_PLACE is true */ } else { /*BOUNDING_BOX_PLACE */ cost = bb_cost = comp_bb_cost(NORMAL); timing_cost = 0; delay_cost = 0; place_delay_value = 0; outer_crit_iter_count = 0; num_connections = 0; crit_exponent = 0; inverse_prev_timing_cost = 0; /*inverses not used */ inverse_prev_bb_cost = 0; } move_lim = (int) (annealing_sched.inner_num * pow(num_blocks, 1.3333)); if (placer_opts.inner_loop_recompute_divider != 0) inner_recompute_limit = (int) (0.5 + (float) move_lim / (float) placer_opts.inner_loop_recompute_divider); else /*don't do an inner recompute */ inner_recompute_limit = move_lim + 1; /* Sometimes I want to run the router with a random placement. Avoid * * using 0 moves to stop division by 0 and 0 length vector problems, * * by setting move_lim to 1 (which is still too small to do any * * significant optimization). */ if (move_lim <= 0) move_lim = 1; rlim = (float) std::max(nx + 1, ny + 1); first_rlim = rlim; /*used in timing-driven placement for exponent computation */ final_rlim = 1; inverse_delta_rlim = 1 / (first_rlim - final_rlim); t = starting_t(&cost, &bb_cost, &timing_cost, old_region_occ_x, old_region_occ_y, annealing_sched, move_lim, rlim, placer_opts.place_algorithm, placer_opts.timing_tradeoff, inverse_prev_bb_cost, inverse_prev_timing_cost, &delay_cost); tot_iter = 0; moves_since_cost_recompute = 0; vpr_printf(TIO_MESSAGE_INFO, "Initial placement cost: %g bb_cost: %g td_cost: %g delay_cost: %g\n", cost, bb_cost, timing_cost, delay_cost); vpr_printf(TIO_MESSAGE_INFO, "\n"); #ifndef SPEC vpr_printf(TIO_MESSAGE_INFO, "%9s %9s %11s %11s %11s %11s %8s %8s %7s %7s %7s %9s %7s\n", "---------", "---------", "-----------", "-----------", "-----------", "-----------", "--------", "--------", "-------", "-------", "-------", "---------", "-------"); vpr_printf(TIO_MESSAGE_INFO, "%9s %9s %11s %11s %11s %11s %8s %8s %7s %7s %7s %9s %7s\n", "T", "Cost", "Av BB Cost", "Av TD Cost", "Av Tot Del", "P to P Del", "d_max", "Ac Rate", "Std Dev", "R limit", "Exp", "Tot Moves", "Alpha"); vpr_printf(TIO_MESSAGE_INFO, "%9s %9s %11s %11s %11s %11s %8s %8s %7s %7s %7s %9s %7s\n", "---------", "---------", "-----------", "-----------", "-----------", "-----------", "--------", "--------", "-------", "-------", "-------", "---------", "-------"); #endif sprintf(msg, "Initial Placement. Cost: %g BB Cost: %g TD Cost %g Delay Cost: %g \t Channel Factor: %d", cost, bb_cost, timing_cost, delay_cost, width_fac); update_screen(MAJOR, msg, PLACEMENT, FALSE); while (exit_crit(t, cost, annealing_sched) == 0) { if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { cost = 1; } av_cost = 0.; av_bb_cost = 0.; av_delay_cost = 0.; av_timing_cost = 0.; sum_of_squares = 0.; success_sum = 0; if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { if (outer_crit_iter_count >= placer_opts.recompute_crit_iter || placer_opts.inner_loop_recompute_divider != 0) { #ifdef VERBOSE vpr_printf(TIO_MESSAGE_INFO, "Outer loop recompute criticalities\n"); #endif place_delay_value = delay_cost / num_connections; if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE) load_constant_net_delay(net_delay, place_delay_value, clb_net, num_nets); /*note, for path_based, the net delay is not updated since it is current, *because it accesses point_to_point_delay array */ load_timing_graph_net_delays(net_delay); do_timing_analysis(slacks, FALSE, FALSE, FALSE); load_criticalities(slacks, crit_exponent); /*recompute costs from scratch, based on new criticalities */ comp_td_costs(&timing_cost, &delay_cost); outer_crit_iter_count = 0; } outer_crit_iter_count++; /*at each temperature change we update these values to be used */ /*for normalizing the tradeoff between timing and wirelength (bb) */ inverse_prev_bb_cost = 1 / bb_cost; /*Prevent inverse timing cost from going to infinity */ inverse_prev_timing_cost = std::min(1 / timing_cost, (float)MAX_INV_TIMING_COST); } inner_crit_iter_count = 1; for (inner_iter = 0; inner_iter < move_lim; inner_iter++) { swap_result = try_swap(t, &cost, &bb_cost, &timing_cost, rlim, old_region_occ_x, old_region_occ_y, placer_opts.place_algorithm, placer_opts.timing_tradeoff, inverse_prev_bb_cost, inverse_prev_timing_cost, &delay_cost); if (swap_result == ACCEPTED) { /* Move was accepted. Update statistics that are useful for the annealing schedule. */ success_sum++; av_cost += cost; av_bb_cost += bb_cost; av_timing_cost += timing_cost; av_delay_cost += delay_cost; sum_of_squares += cost * cost; num_swap_accepted++; } else if (swap_result == ABORTED) { num_swap_aborted++; } else { // swap_result == REJECTED num_swap_rejected++; } if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { /* Do we want to re-timing analyze the circuit to get updated slack and criticality values? * We do this only once in a while, since it is expensive. */ if (inner_crit_iter_count >= inner_recompute_limit && inner_iter != move_lim - 1) { /*on last iteration don't recompute */ inner_crit_iter_count = 0; #ifdef VERBOSE vpr_printf(TIO_MESSAGE_TRACE, "Inner loop recompute criticalities\n"); #endif if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE) { /* Use a constant delay per connection as the delay estimate, rather than * estimating based on the current placement. Not a great idea, but not the * default. */ place_delay_value = delay_cost / num_connections; load_constant_net_delay(net_delay, place_delay_value, clb_net, num_nets); } /* Using the delays in net_delay, do a timing analysis to update slacks and * criticalities; then update the timing cost since it will change. */ load_timing_graph_net_delays(net_delay); do_timing_analysis(slacks, FALSE, FALSE, FALSE); load_criticalities(slacks, crit_exponent); comp_td_costs(&timing_cost, &delay_cost); } inner_crit_iter_count++; } #ifdef VERBOSE vpr_printf(TIO_MESSAGE_TRACE, "t = %g cost = %g bb_cost = %g timing_cost = %g move = %d dmax = %g\n", t, cost, bb_cost, timing_cost, inner_iter, delay_cost); if (fabs(bb_cost - comp_bb_cost(CHECK)) > bb_cost * ERROR_TOL) exit(1); #endif } /* Lines below prevent too much round-off error from accumulating * * in the cost over many iterations. This round-off can lead to * * error checks failing because the cost is different from what * * you get when you recompute from scratch. */ moves_since_cost_recompute += move_lim; if (moves_since_cost_recompute > MAX_MOVES_BEFORE_RECOMPUTE) { new_bb_cost = recompute_bb_cost(); if (fabs(new_bb_cost - bb_cost) > bb_cost * ERROR_TOL) { vpr_printf(TIO_MESSAGE_ERROR, "in try_place: new_bb_cost = %g, old bb_cost = %g\n", new_bb_cost, bb_cost); exit(1); } bb_cost = new_bb_cost; if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { comp_td_costs(&new_timing_cost, &new_delay_cost); if (fabs(new_timing_cost - timing_cost) > timing_cost * ERROR_TOL) { vpr_printf(TIO_MESSAGE_ERROR, "in try_place: new_timing_cost = %g, old timing_cost = %g\n", new_timing_cost, timing_cost); exit(1); } if (fabs(new_delay_cost - delay_cost) > delay_cost * ERROR_TOL) { vpr_printf(TIO_MESSAGE_ERROR, "in try_place: new_delay_cost = %g, old delay_cost = %g\n", new_delay_cost, delay_cost); exit(1); } timing_cost = new_timing_cost; } if (placer_opts.place_algorithm == BOUNDING_BOX_PLACE) { cost = new_bb_cost; } moves_since_cost_recompute = 0; } tot_iter += move_lim; success_rat = ((float) success_sum) / move_lim; if (success_sum == 0) { av_cost = cost; av_bb_cost = bb_cost; av_timing_cost = timing_cost; av_delay_cost = delay_cost; } else { av_cost /= success_sum; av_bb_cost /= success_sum; av_timing_cost /= success_sum; av_delay_cost /= success_sum; } std_dev = get_std_dev(success_sum, sum_of_squares, av_cost); oldt = t; /* for finding and printing alpha. */ update_t(&t, std_dev, rlim, success_rat, annealing_sched); #ifndef SPEC critical_path_delay = get_critical_path_delay(); vpr_printf(TIO_MESSAGE_INFO, "%9.5f %9.5g %11.6g %11.6g %11.6g %11.6g %8.4f %8.4f %7.4f %7.4f %7.4f %9d %7.4f\n", oldt, av_cost, av_bb_cost, av_timing_cost, av_delay_cost, place_delay_value, critical_path_delay, success_rat, std_dev, rlim, crit_exponent, tot_iter, t / oldt); #endif sprintf(msg, "Cost: %g BB Cost %g TD Cost %g Temperature: %g", cost, bb_cost, timing_cost, t); update_screen(MINOR, msg, PLACEMENT, FALSE); update_rlim(&rlim, success_rat); if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { crit_exponent = (1 - (rlim - final_rlim) * inverse_delta_rlim) * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first) + placer_opts.td_place_exp_first; } #ifdef VERBOSE if (getEchoEnabled()) { print_clb_placement("first_iteration_clb_placement.echo"); } #endif } t = 0; /* freeze out */ av_cost = 0.; av_bb_cost = 0.; av_timing_cost = 0.; sum_of_squares = 0.; av_delay_cost = 0.; success_sum = 0; if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { /*at each temperature change we update these values to be used */ /*for normalizing the tradeoff between timing and wirelength (bb) */ if (outer_crit_iter_count >= placer_opts.recompute_crit_iter || placer_opts.inner_loop_recompute_divider != 0) { #ifdef VERBOSE vpr_printf(TIO_MESSAGE_INFO, "Outer loop recompute criticalities\n"); #endif place_delay_value = delay_cost / num_connections; if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE) load_constant_net_delay(net_delay, place_delay_value, clb_net, num_nets); load_timing_graph_net_delays(net_delay); do_timing_analysis(slacks, FALSE, FALSE, FALSE); load_criticalities(slacks, crit_exponent); /*recompute criticaliies */ comp_td_costs(&timing_cost, &delay_cost); outer_crit_iter_count = 0; } outer_crit_iter_count++; inverse_prev_bb_cost = 1 / (bb_cost); /*Prevent inverse timing cost from going to infinity */ inverse_prev_timing_cost = std::min(1 / timing_cost, (float)MAX_INV_TIMING_COST); } inner_crit_iter_count = 1; for (inner_iter = 0; inner_iter < move_lim; inner_iter++) { swap_result = try_swap(t, &cost, &bb_cost, &timing_cost, rlim, old_region_occ_x, old_region_occ_y, placer_opts.place_algorithm, placer_opts.timing_tradeoff, inverse_prev_bb_cost, inverse_prev_timing_cost, &delay_cost); if (swap_result == ACCEPTED) { success_sum++; av_cost += cost; av_bb_cost += bb_cost; av_delay_cost += delay_cost; av_timing_cost += timing_cost; sum_of_squares += cost * cost; if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { if (inner_crit_iter_count >= inner_recompute_limit && inner_iter != move_lim - 1) { inner_crit_iter_count = 0; #ifdef VERBOSE vpr_printf(TIO_MESSAGE_TRACE, "Inner loop recompute criticalities\n"); #endif if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE) { place_delay_value = delay_cost / num_connections; load_constant_net_delay(net_delay, place_delay_value, clb_net, num_nets); } load_timing_graph_net_delays(net_delay); do_timing_analysis(slacks, FALSE, FALSE, FALSE); load_criticalities(slacks, crit_exponent); comp_td_costs(&timing_cost, &delay_cost); } inner_crit_iter_count++; } num_swap_accepted++; } else if (swap_result == ABORTED) { num_swap_aborted++; } else { num_swap_rejected++; } #ifdef VERBOSE vpr_printf(TIO_MESSAGE_INFO, "t = %g, cost = %g, move = %d\n", t, cost, tot_iter); #endif } tot_iter += move_lim; success_rat = ((float) success_sum) / move_lim; if (success_sum == 0) { av_cost = cost; av_bb_cost = bb_cost; av_delay_cost = delay_cost; av_timing_cost = timing_cost; } else { av_cost /= success_sum; av_bb_cost /= success_sum; av_delay_cost /= success_sum; av_timing_cost /= success_sum; } std_dev = get_std_dev(success_sum, sum_of_squares, av_cost); #ifndef SPEC vpr_printf(TIO_MESSAGE_INFO, "%9.5f %9.5g %11.6g %11.6g %11.6g %11.6g %8s %8.4f %7.4f %7.4f %7.4f %9d\n", t, av_cost, av_bb_cost, av_timing_cost, av_delay_cost, place_delay_value, " ", success_rat, std_dev, rlim, crit_exponent, tot_iter); #endif // TODO: // 1. print a message about number of aborted moves. // 2. add some subroutine hierarchy! Too big! // 3. put statistics counters (av_cost, success_sum, etc.) in a struct so a // pointer to it can be passed around. #ifdef VERBOSE if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_END_CLB_PLACEMENT)) { print_clb_placement(getEchoFileName(E_ECHO_END_CLB_PLACEMENT)); } #endif check_place(bb_cost, timing_cost, placer_opts.place_algorithm, delay_cost); if (placer_opts.enable_timing_computations && placer_opts.place_algorithm == BOUNDING_BOX_PLACE) { /*need this done since the timing data has not been kept up to date* *in bounding_box mode */ for (inet = 0; inet < num_nets; inet++) for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) timing_place_crit[inet][ipin] = 0; /*dummy crit values */ comp_td_costs(&timing_cost, &delay_cost); /*computes point_to_point_delay_cost */ } if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || placer_opts.enable_timing_computations) { net_delay = point_to_point_delay_cost; /*this makes net_delay up to date with * *the same values that the placer is using*/ load_timing_graph_net_delays(net_delay); do_timing_analysis(slacks, FALSE, FALSE, FALSE); if (getEchoEnabled()) { if(isEchoFileEnabled(E_ECHO_PLACEMENT_SINK_DELAYS)) print_sink_delays(getEchoFileName(E_ECHO_PLACEMENT_SINK_DELAYS)); if(isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_SLACK)) print_slack(slacks->slack, FALSE, getEchoFileName(E_ECHO_FINAL_PLACEMENT_SLACK)); if(isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_CRITICALITY)) print_criticality(slacks, FALSE, getEchoFileName(E_ECHO_FINAL_PLACEMENT_CRITICALITY)); if(isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH)) print_timing_graph(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH)); if(isEchoFileEnabled(E_ECHO_PLACEMENT_CRIT_PATH)) print_critical_path(getEchoFileName(E_ECHO_PLACEMENT_CRIT_PATH)); } /* Print critical path delay. */ critical_path_delay = get_critical_path_delay(); vpr_printf(TIO_MESSAGE_INFO, "\n"); vpr_printf(TIO_MESSAGE_INFO, "Placement estimated critical path delay: %g ns\n", critical_path_delay); } sprintf(msg, "Placement. Cost: %g bb_cost: %g td_cost: %g Channel Factor: %d", cost, bb_cost, timing_cost, width_fac); vpr_printf(TIO_MESSAGE_INFO, "Placement cost: %g, bb_cost: %g, td_cost: %g, delay_cost: %g\n", cost, bb_cost, timing_cost, delay_cost); update_screen(MAJOR, msg, PLACEMENT, FALSE); // Print out swap statistics total_swap_attempts = num_swap_rejected + num_swap_accepted + num_swap_aborted; reject_rate = num_swap_rejected / total_swap_attempts; accept_rate = num_swap_accepted / total_swap_attempts; abort_rate = num_swap_aborted / total_swap_attempts; vpr_printf(TIO_MESSAGE_INFO, "Placement total # of swap attempts: %d\n", total_swap_attempts); vpr_printf(TIO_MESSAGE_INFO, "\tSwap reject rate: %g\n", reject_rate); vpr_printf(TIO_MESSAGE_INFO, "\tSwap accept rate: %g\n", accept_rate); vpr_printf(TIO_MESSAGE_INFO, "\tSwap abort rate: %g\n", abort_rate); #ifdef SPEC vpr_printf(TIO_MESSAGE_INFO, "Total moves attempted: %d.0\n", tot_iter); #endif free_placement_structs( old_region_occ_x, old_region_occ_y, placer_opts); if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || placer_opts.enable_timing_computations) { net_delay = remember_net_delay_original_ptr; free_lookups_and_criticalities(&net_delay, slacks); } free_try_swap_arrays(); } static int count_connections() { /*only count non-global connections */ int count, inet; count = 0; for (inet = 0; inet < num_nets; inet++) { if (clb_net[inet].is_global) continue; count += clb_net[inet].num_sinks; } return (count); } static double get_std_dev(int n, double sum_x_squared, double av_x) { /* Returns the standard deviation of data set x. There are n sample points, * * sum_x_squared is the summation over n of x^2 and av_x is the average x. * * All operations are done in double precision, since round off error can be * * a problem in the initial temp. std_dev calculation for big circuits. */ double std_dev; if (n <= 1) std_dev = 0.; else std_dev = (sum_x_squared - n * av_x * av_x) / (double) (n - 1); if (std_dev > 0.) /* Very small variances sometimes round negative */ std_dev = sqrt(std_dev); else std_dev = 0.; return (std_dev); } static void update_rlim(float *rlim, float success_rat) { /* Update the range limited to keep acceptance prob. near 0.44. Use * * a floating point rlim to allow gradual transitions at low temps. */ float upper_lim; *rlim = (*rlim) * (1. - 0.44 + success_rat); upper_lim = std::max(nx + 1, ny + 1); *rlim = std::min(*rlim, upper_lim); *rlim = std::max(*rlim, (float)1.); } /* Update the temperature according to the annealing schedule selected. */ static void update_t(float *t, float std_dev, float rlim, float success_rat, struct s_annealing_sched annealing_sched) { /* float fac; */ if (annealing_sched.type == USER_SCHED) { *t = annealing_sched.alpha_t * (*t); } /* Old standard deviation based stuff is below. This bogs down horribly * for big circuits (alu4 and especially bigkey_mod). */ /* #define LAMBDA .7 */ /* ------------------------------------ */ #if 0 else if (std_dev == 0.) { *t = 0.; } else { fac = exp(-LAMBDA * (*t) / std_dev); fac = max(0.5, fac); *t = (*t) * fac; } #endif /* ------------------------------------- */ else { /* AUTO_SCHED */ if (success_rat > 0.96) { *t = (*t) * 0.5; } else if (success_rat > 0.8) { *t = (*t) * 0.9; } else if (success_rat > 0.15 || rlim > 1.) { *t = (*t) * 0.95; } else { *t = (*t) * 0.8; } } } static int exit_crit(float t, float cost, struct s_annealing_sched annealing_sched) { /* Return 1 when the exit criterion is met. */ if (annealing_sched.type == USER_SCHED) { if (t < annealing_sched.exit_t) { return (1); } else { return (0); } } /* Automatic annealing schedule */ if (t < 0.005 * cost / num_nets) { return (1); } else { return (0); } } static float starting_t(float *cost_ptr, float *bb_cost_ptr, float *timing_cost_ptr, float **old_region_occ_x, float **old_region_occ_y, struct s_annealing_sched annealing_sched, int max_moves, float rlim, enum e_place_algorithm place_algorithm, float timing_tradeoff, float inverse_prev_bb_cost, float inverse_prev_timing_cost, float *delay_cost_ptr) { /* Finds the starting temperature (hot condition). */ int i, num_accepted, move_lim, swap_result; double std_dev, av, sum_of_squares; /* Double important to avoid round off */ if (annealing_sched.type == USER_SCHED) return (annealing_sched.init_t); move_lim = std::min(max_moves, num_blocks); num_accepted = 0; av = 0.; sum_of_squares = 0.; /* Try one move per block. Set t high so essentially all accepted. */ for (i = 0; i < move_lim; i++) { swap_result = try_swap(HUGE_POSITIVE_FLOAT, cost_ptr, bb_cost_ptr, timing_cost_ptr, rlim, old_region_occ_x, old_region_occ_y, place_algorithm, timing_tradeoff, inverse_prev_bb_cost, inverse_prev_timing_cost, delay_cost_ptr); if (swap_result == ACCEPTED) { num_accepted++; av += *cost_ptr; sum_of_squares += *cost_ptr * (*cost_ptr); num_swap_accepted++; } else if (swap_result == ABORTED) { num_swap_aborted++; } else { num_swap_rejected++; } } if (num_accepted != 0) av /= num_accepted; else av = 0.; std_dev = get_std_dev(num_accepted, sum_of_squares, av); #ifdef DEBUG if (num_accepted != move_lim) { vpr_printf(TIO_MESSAGE_WARNING, "Starting t: %d of %d configurations accepted.\n", num_accepted, move_lim); } #endif #ifdef VERBOSE vpr_printf(TIO_MESSAGE_INFO, "std_dev: %g, average cost: %g, starting temp: %g\n", std_dev, av, 20. * std_dev); #endif /* Set the initial temperature to 20 times the standard of deviation */ /* so that the initial temperature adjusts according to the circuit */ return (20. * std_dev); } static int setup_blocks_affected(int b_from, int x_to, int y_to, int z_to) { /* Find all the blocks affected when b_from is swapped with b_to. * Returns abort_swap. */ int imoved_blk, imacro; int x_from, y_from, z_from, b_to; int abort_swap = FALSE; /* Xifan TANG: support swap between macros */ /* int from_macro; */ x_from = block[b_from].x; y_from = block[b_from].y; z_from = block[b_from].z; b_to = grid[x_to][y_to].blocks[z_to]; // Check whether the to_location is empty if (b_to == EMPTY) { // Swap the block, dont swap the nets yet block[b_from].x = x_to; block[b_from].y = y_to; block[b_from].z = z_to; // Sets up the blocks moved imoved_blk = blocks_affected.num_moved_blocks; blocks_affected.moved_blocks[imoved_blk].block_num = b_from; blocks_affected.moved_blocks[imoved_blk].xold = x_from; blocks_affected.moved_blocks[imoved_blk].xnew = x_to; blocks_affected.moved_blocks[imoved_blk].yold = y_from; blocks_affected.moved_blocks[imoved_blk].ynew = y_to; blocks_affected.moved_blocks[imoved_blk].zold = z_from; blocks_affected.moved_blocks[imoved_blk].znew = z_to; blocks_affected.moved_blocks[imoved_blk].swapped_to_empty = TRUE; blocks_affected.num_moved_blocks ++; } else { // Does not allow a swap with a macro yet /* Xifan TANG: allow macro swapping...*/ get_imacro_from_iblk(&imacro, b_to, pl_macros, num_pl_macros); /* get_imacro_from_iblk(&from_macro, b_from, pl_macros, num_pl_macros); if (((-1 != from_macro)||(imacro != -1)) &&(!((-1 != from_macro)&&(imacro != -1)))) { */ if (imacro != -1) { abort_swap = TRUE; return (abort_swap); } // Swap the block, dont swap the nets yet block[b_to].x = x_from; block[b_to].y = y_from; block[b_to].z = z_from; block[b_from].x = x_to; block[b_from].y = y_to; block[b_from].z = z_to; // Sets up the blocks moved imoved_blk = blocks_affected.num_moved_blocks; blocks_affected.moved_blocks[imoved_blk].block_num = b_from; blocks_affected.moved_blocks[imoved_blk].xold = x_from; blocks_affected.moved_blocks[imoved_blk].xnew = x_to; blocks_affected.moved_blocks[imoved_blk].yold = y_from; blocks_affected.moved_blocks[imoved_blk].ynew = y_to; blocks_affected.moved_blocks[imoved_blk].zold = z_from; blocks_affected.moved_blocks[imoved_blk].znew = z_to; blocks_affected.moved_blocks[imoved_blk].swapped_to_empty = FALSE; blocks_affected.num_moved_blocks ++; imoved_blk = blocks_affected.num_moved_blocks; blocks_affected.moved_blocks[imoved_blk].block_num = b_to; blocks_affected.moved_blocks[imoved_blk].xold = x_to; blocks_affected.moved_blocks[imoved_blk].xnew = x_from; blocks_affected.moved_blocks[imoved_blk].yold = y_to; blocks_affected.moved_blocks[imoved_blk].ynew = y_from; blocks_affected.moved_blocks[imoved_blk].zold = z_to; blocks_affected.moved_blocks[imoved_blk].znew = z_from; blocks_affected.moved_blocks[imoved_blk].swapped_to_empty = FALSE; blocks_affected.num_moved_blocks ++; } // Finish swapping the blocks and setting up blocks_affected return (abort_swap); } static int find_affected_blocks(int b_from, int x_to, int y_to, int z_to) { /* Finds and set ups the affected_blocks array. * Returns abort_swap. */ int imacro, imember; int x_swap_offset, y_swap_offset, z_swap_offset, x_from, y_from, z_from; int curr_b_from, curr_x_from, curr_y_from, curr_z_from, curr_x_to, curr_y_to, curr_z_to; int abort_swap = FALSE; /* int to_imacro;*/ /* Xifan TANG: for more checking */ x_from = block[b_from].x; y_from = block[b_from].y; z_from = block[b_from].z; get_imacro_from_iblk(&imacro, b_from, pl_macros, num_pl_macros); if ( imacro != -1) { // b_from is part of a macro, I need to swap the whole macro // Record down the relative position of the swap x_swap_offset = x_to - x_from; y_swap_offset = y_to - y_from; z_swap_offset = z_to - z_from; for (imember = 0; imember < pl_macros[imacro].num_blocks && abort_swap == FALSE; imember++) { // Gets the new from and to info for every block in the macro // cannot use the old from and to info curr_b_from = pl_macros[imacro].members[imember].blk_index; curr_x_from = block[curr_b_from].x; curr_y_from = block[curr_b_from].y; curr_z_from = block[curr_b_from].z; curr_x_to = curr_x_from + x_swap_offset; curr_y_to = curr_y_from + y_swap_offset; curr_z_to = curr_z_from + z_swap_offset; /* Xifan TANG: double check*/ assert(block[curr_b_from].type == grid[curr_x_from][curr_y_from].type); // Make sure that the swap_to location is still on the chip if (curr_x_to < 1 || curr_x_to > nx || curr_y_to < 1 || curr_y_to > ny || curr_z_to < 0) { abort_swap = TRUE; /* Xifan TANG: We need to check if the swap_to location has the same type! */ /* } else if (grid[curr_x_from][curr_y_from].type != grid[curr_x_to][curr_y_to].type) { abort_swap = TRUE; */ } else { /* Xifan TANG: Check if the to_x, to_y is also a marco... * If the follow cases are true then we should abort the swap * 1. length of to_macro is larger than this macro * 2. length of to_macro is the same as this macro, but its starting point is not align with this macro. * 2. length of to_macro is less this macro, but its starting/ending point is out of the range of this macro. */ /* curr_b_to = grid[curr_x_to][curr_y_to].blocks[curr_z_to]; if (OPEN != curr_b_to) { get_imacro_from_iblk(&to_imacro, curr_b_to, pl_macros, num_pl_macros); } if (OPEN != to_imacro) { if (pl_macros[imacro].num_blocks < pl_macros[to_imacro].num_blocks) { abort_swap = TRUE; } else if ((pl_macros[imacro].num_blocks == pl_macros[to_imacro].num_blocks) && (imember != spot_blk_position_in_a_macro(pl_macros[to_imacro],curr_b_to))) { abort_swap = TRUE; } else if ((pl_macros[imacro].num_blocks > pl_macros[to_imacro].num_blocks) && (0 == check_macros_contained(pl_macros[imacro], pl_macros[to_imacro]))) { abort_swap = TRUE; } } } } */ /* Xifan TANG: Only all the memebers in the macro pass the check, we can proceed to setup swap */ /* if (FALSE == abort_swap) { for (imember = 0; imember < pl_macros[imacro].num_blocks && abort_swap == FALSE; imember++) { // Gets the new from and to info for every block in the macro // cannot use the old from and to info curr_b_from = pl_macros[imacro].members[imember].blk_index; curr_x_from = block[curr_b_from].x; curr_y_from = block[curr_b_from].y; curr_z_from = block[curr_b_from].z; curr_x_to = curr_x_from + x_swap_offset; curr_y_to = curr_y_from + y_swap_offset; curr_z_to = curr_z_from + z_swap_offset; */ abort_swap = setup_blocks_affected(curr_b_from, curr_x_to, curr_y_to, curr_z_to); } // Finish going through all the blocks in the macro } } else { // This is not a macro - I could use the from and to info from before abort_swap = setup_blocks_affected(b_from, x_to, y_to, z_to); } // Finish handling cases for blocks in macro and otherwise return (abort_swap); } static enum swap_result try_swap(float t, float *cost, float *bb_cost, float *timing_cost, float rlim, float **old_region_occ_x, float **old_region_occ_y, enum e_place_algorithm place_algorithm, float timing_tradeoff, float inverse_prev_bb_cost, float inverse_prev_timing_cost, float *delay_cost) { /* Picks some block and moves it to another spot. If this spot is * * occupied, switch the blocks. Assess the change in cost function * * and accept or reject the move. If rejected, return 0. If * * accepted return 1. Pass back the new value of the cost function. * * rlim is the range limiter. */ enum swap_result keep_switch; int b_from, x_from, y_from, z_from, x_to, y_to, z_to; int num_nets_affected; float delta_c, bb_delta_c, timing_delta_c, delay_delta_c; int inet, iblk, bnum, iblk_pin, inet_affected; int abort_swap = FALSE; num_ts_called ++; /* I'm using negative values of temp_net_cost as a flag, so DO NOT * * use cost functions that can go negative. */ delta_c = 0; /* Change in cost due to this swap. */ bb_delta_c = 0; timing_delta_c = 0; delay_delta_c = 0.0; /* Pick a random block to be swapped with another random block */ b_from = my_irand(num_blocks - 1); /* If the pins are fixed we never move them from their initial * * random locations. The code below could be made more efficient * * by using the fact that pins appear first in the block list, * * but this shouldn't cause any significant slowdown and won't be * * broken if I ever change the parser so that the pins aren't * * necessarily at the start of the block list. */ while (block[b_from].isFixed == TRUE) { b_from = my_irand(num_blocks - 1); } x_from = block[b_from].x; y_from = block[b_from].y; z_from = block[b_from].z; if (!find_to(x_from, y_from, block[b_from].type, rlim, &x_to, &y_to)) { return REJECTED; } z_to = 0; if (grid[x_to][y_to].type->capacity > 1) { z_to = my_irand(grid[x_to][y_to].type->capacity - 1); } /* Make the switch in order to make computing the new bounding * * box simpler. If the cost increase is too high, switch them * * back. (block data structures switched, clbs not switched * * until success of move is determined.) * * Also check that whether those are the only 2 blocks * * to be moved - check for carry chains and other placement * * macros. */ /* Check whether the from_block is part of a macro first. * * If it is, the whole macro has to be moved. Calculate the * * x, y, z offsets of the swap to maintain relative placements * * of the blocks. Abort the swap if the to_block is part of a * * macro (not supported yet). */ abort_swap = find_affected_blocks(b_from, x_to, y_to, z_to); if (abort_swap == FALSE) { // Find all the nets affected by this swap num_nets_affected = find_affected_nets(ts_nets_to_update); /* Go through all the pins in all the blocks moved and update the bounding boxes. * * Do not update the net cost here since it should only be updated once per net, * * not once per pin */ for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) { bnum = blocks_affected.moved_blocks[iblk].block_num; /* Go through all the pins in the moved block */ for (iblk_pin = 0; iblk_pin < block[bnum].type->num_pins; iblk_pin++) { inet = block[bnum].nets[iblk_pin]; if (inet == OPEN) continue; if (clb_net[inet].is_global) continue; if (clb_net[inet].num_sinks < SMALL_NET) { if(bb_updated_before[inet] == NOT_UPDATED_YET) /* Brute force bounding box recomputation, once only for speed. */ get_non_updateable_bb(inet, &ts_bb_coord_new[inet]); } else { update_bb(inet, &ts_bb_coord_new[inet], &ts_bb_edge_new[inet], blocks_affected.moved_blocks[iblk].xold, blocks_affected.moved_blocks[iblk].yold + block[bnum].type->pin_height[iblk_pin], blocks_affected.moved_blocks[iblk].xnew, blocks_affected.moved_blocks[iblk].ynew + block[bnum].type->pin_height[iblk_pin]); } } } /* Now update the cost function. The cost is only updated once for every net * * May have to do major optimizations here later. */ for (inet_affected = 0; inet_affected < num_nets_affected; inet_affected++) { inet = ts_nets_to_update[inet_affected]; temp_net_cost[inet] = get_net_cost(inet, &ts_bb_coord_new[inet]); bb_delta_c += temp_net_cost[inet] - net_cost[inet]; } if (place_algorithm == NET_TIMING_DRIVEN_PLACE || place_algorithm == PATH_TIMING_DRIVEN_PLACE) { /*in this case we redefine delta_c as a combination of timing and bb. * *additionally, we normalize all values, therefore delta_c is in * *relation to 1*/ comp_delta_td_cost(&timing_delta_c, &delay_delta_c); delta_c = (1 - timing_tradeoff) * bb_delta_c * inverse_prev_bb_cost + timing_tradeoff * timing_delta_c * inverse_prev_timing_cost; } else { delta_c = bb_delta_c; } /* 1 -> move accepted, 0 -> rejected. */ keep_switch = assess_swap(delta_c, t); if (keep_switch == ACCEPTED) { *cost = *cost + delta_c; *bb_cost = *bb_cost + bb_delta_c; if (place_algorithm == NET_TIMING_DRIVEN_PLACE || place_algorithm == PATH_TIMING_DRIVEN_PLACE) { /*update the point_to_point_timing_cost and point_to_point_delay_cost * values from the temporary values */ *timing_cost = *timing_cost + timing_delta_c; *delay_cost = *delay_cost + delay_delta_c; update_td_cost(); } /* update net cost functions and reset flags. */ for (inet_affected = 0; inet_affected < num_nets_affected; inet_affected++) { inet = ts_nets_to_update[inet_affected]; bb_coords[inet] = ts_bb_coord_new[inet]; if (clb_net[inet].num_sinks >= SMALL_NET) bb_num_on_edges[inet] = ts_bb_edge_new[inet]; net_cost[inet] = temp_net_cost[inet]; /* negative temp_net_cost value is acting as a flag. */ temp_net_cost[inet] = -1; bb_updated_before[inet] = NOT_UPDATED_YET; } /* Update clb data structures since we kept the move. */ /* Swap physical location */ for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) { x_to = blocks_affected.moved_blocks[iblk].xnew; y_to = blocks_affected.moved_blocks[iblk].ynew; z_to = blocks_affected.moved_blocks[iblk].znew; /* Xifan TANG: not sure if this is needed */ //b_to = grid[x_to][y_to].blocks[z_to]; x_from = blocks_affected.moved_blocks[iblk].xold; y_from = blocks_affected.moved_blocks[iblk].yold; z_from = blocks_affected.moved_blocks[iblk].zold; b_from = blocks_affected.moved_blocks[iblk].block_num; grid[x_to][y_to].blocks[z_to] = b_from; /* Xifan TANG: not sure if this is needed */ //grid[x_from][y_from].blocks[z_from] = b_to; if (blocks_affected.moved_blocks[iblk].swapped_to_empty == TRUE) { grid[x_to][y_to].usage++; grid[x_from][y_from].usage--; grid[x_from][y_from].blocks[z_from] = -1; } } // Finish updating clb for all blocks } else { /* Move was rejected. */ /* Reset the net cost function flags first. */ for (inet_affected = 0; inet_affected < num_nets_affected; inet_affected++) { inet = ts_nets_to_update[inet_affected]; temp_net_cost[inet] = -1; bb_updated_before[inet] = NOT_UPDATED_YET; } /* Restore the block data structures to their state before the move. */ for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) { b_from = blocks_affected.moved_blocks[iblk].block_num; block[b_from].x = blocks_affected.moved_blocks[iblk].xold; block[b_from].y = blocks_affected.moved_blocks[iblk].yold; block[b_from].z = blocks_affected.moved_blocks[iblk].zold; } } /* Resets the num_moved_blocks, but do not free blocks_moved array. Defensive Coding */ blocks_affected.num_moved_blocks = 0; //check_place(*bb_cost, *timing_cost, place_algorithm, *delay_cost); return (keep_switch); } else { /* Restore the block data structures to their state before the move. */ for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) { b_from = blocks_affected.moved_blocks[iblk].block_num; block[b_from].x = blocks_affected.moved_blocks[iblk].xold; block[b_from].y = blocks_affected.moved_blocks[iblk].yold; block[b_from].z = blocks_affected.moved_blocks[iblk].zold; } /* Resets the num_moved_blocks, but do not free blocks_moved array. Defensive Coding */ blocks_affected.num_moved_blocks = 0; return ABORTED; } } static int find_affected_nets(int *nets_to_update) { /* Puts a list of all the nets that are changed by the swap into * * nets_to_update. Returns the number of affected nets. */ int iblk, iblk_pin, inet, bnum, num_affected_nets; num_affected_nets = 0; /* Go through all the blocks moved */ for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) { bnum = blocks_affected.moved_blocks[iblk].block_num; /* Go through all the pins in the moved block */ for (iblk_pin = 0; iblk_pin < block[bnum].type->num_pins; iblk_pin++) { /* Updates the pins_to_nets array, set to -1 if * * that pin is not connected to any net or it is a * * global pin that does not need to be updated */ inet = block[bnum].nets[iblk_pin]; if (inet == OPEN) continue; if (clb_net[inet].is_global) continue; if (temp_net_cost[inet] < 0.) { /* Net not marked yet. */ nets_to_update[num_affected_nets] = inet; num_affected_nets++; /* Flag to say we've marked this net. */ temp_net_cost[inet] = 1.; } } } return num_affected_nets; } static boolean find_to(int x_from, int y_from, t_type_ptr type, float rlim, int *x_to, int *y_to) { /* Returns the point to which I want to swap, properly range limited. * rlim must always be between 1 and nx (inclusive) for this routine * to work. Assumes that a column only contains blocks of the same type. */ int x_rel, y_rel, rlx, rly, min_x, max_x, min_y, max_y; int num_tries; int active_area; boolean is_legal; int block_index, ipos; if (type != grid[x_from][y_from].type) { assert(type == grid[x_from][y_from].type); } rlx = (int)std::min((float)nx + 1, rlim); rly = (int)std::min((float)ny + 1, rlim); /* Added rly for aspect_ratio != 1 case. */ active_area = 4 * rlx * rly; min_x = std::max(0, x_from - rlx); max_x = std::min(nx + 1, x_from + rlx); min_y = std::max(0, y_from - rly); max_y = std::min(ny + 1, y_from + rly); #ifdef DEBUG if (rlx < 1 || rlx > nx + 1) { vpr_printf(TIO_MESSAGE_ERROR, "in find_to: rlx = %d\n", rlx); exit(1); } #endif num_tries = 0; block_index = type->index; do { /* Until legal */ is_legal = TRUE; /* Limit the number of tries when searching for an alternative position */ if(num_tries >= 2 * std::min(active_area / type->height, num_legal_pos[block_index]) + 10) { /* Tried randomly searching for a suitable position */ return FALSE; } else { num_tries++; } if(nx / 4 < rlx || ny / 4 < rly || num_legal_pos[block_index] < active_area) { ipos = my_irand(num_legal_pos[block_index] - 1); *x_to = legal_pos[block_index][ipos].x; *y_to = legal_pos[block_index][ipos].y; } else { x_rel = my_irand(std::max(0, max_x - min_x)); *x_to = min_x + x_rel; y_rel = my_irand(std::max(0, max_y - min_y)); *y_to = min_y + y_rel; *y_to = (*y_to) - grid[*x_to][*y_to].offset; /* align it */ } if((x_from == *x_to) && (y_from == *y_to)) { is_legal = FALSE; } else if(*x_to > max_x || *x_to < min_x || *y_to > max_y || *y_to < min_y) { is_legal = FALSE; } else if(grid[*x_to][*y_to].type != grid[x_from][y_from].type) { is_legal = FALSE; } assert(*x_to >= 0 && *x_to <= nx + 1); assert(*y_to >= 0 && *y_to <= ny + 1); } while (is_legal == FALSE); #ifdef DEBUG if (*x_to < 0 || *x_to > nx + 1 || *y_to < 0 || *y_to > ny + 1) { vpr_printf(TIO_MESSAGE_ERROR, "in routine find_to: (x_to,y_to) = (%d,%d)\n", *x_to, *y_to); exit(1); } #endif assert(type == grid[*x_to][*y_to].type); return TRUE; } static enum swap_result assess_swap(float delta_c, float t) { /* Returns: 1 -> move accepted, 0 -> rejected. */ enum swap_result accept; float prob_fac, fnum; if (delta_c <= 0) { #ifdef SPEC /* Reduce variation in final solution due to round off */ fnum = my_frand(); #endif accept = ACCEPTED; return (accept); } if (t == 0.) return (REJECTED); fnum = my_frand(); prob_fac = exp(-delta_c / t); if (prob_fac > fnum) { accept = ACCEPTED; } else { accept = REJECTED; } return (accept); } static float recompute_bb_cost(void) { /* Recomputes the cost to eliminate roundoff that may have accrued. * * This routine does as little work as possible to compute this new * * cost. */ int inet; float cost; cost = 0; for (inet = 0; inet < num_nets; inet++) { /* for each net ... */ if (clb_net[inet].is_global == FALSE) { /* Do only if not global. */ /* Bounding boxes don't have to be recomputed; they're correct. */ cost += net_cost[inet]; } } return (cost); } static float comp_td_point_to_point_delay(int inet, int ipin) { /*returns the delay of one point to point connection */ int source_block, sink_block; int delta_x, delta_y; t_type_ptr source_type, sink_type; float delay_source_to_sink; delay_source_to_sink = 0.; source_block = clb_net[inet].node_block[0]; source_type = block[source_block].type; sink_block = clb_net[inet].node_block[ipin]; sink_type = block[sink_block].type; assert(source_type != NULL); assert(sink_type != NULL); delta_x = abs(block[sink_block].x - block[source_block].x); delta_y = abs(block[sink_block].y - block[source_block].y); /* TODO low priority: Could be merged into one look-up table */ /* Note: This heuristic is terrible on Quality of Results. * A much better heuristic is to create a more comprehensive lookup table but * it's too late in the release cycle to do this. Pushing until the next release */ if (source_type == IO_TYPE) { if (sink_type == IO_TYPE) delay_source_to_sink = delta_io_to_io[delta_x][delta_y]; else delay_source_to_sink = delta_io_to_clb[delta_x][delta_y]; } else { if (sink_type == IO_TYPE) delay_source_to_sink = delta_clb_to_io[delta_x][delta_y]; else delay_source_to_sink = delta_clb_to_clb[delta_x][delta_y]; } if (delay_source_to_sink < 0) { vpr_printf(TIO_MESSAGE_ERROR, "in comp_td_point_to_point_delay: Bad delay_source_to_sink value delta(%d, %d) delay of %g\n", delta_x, delta_y, delay_source_to_sink); vpr_printf(TIO_MESSAGE_ERROR, "in comp_td_point_to_point_delay: Delay is less than 0\n"); exit(1); } return (delay_source_to_sink); } static void update_td_cost(void) { /* Update the point_to_point_timing_cost values from the temporary * * values for all connections that have changed. */ int iblk_pin, net_pin, inet, ipin; int iblk, iblk2, bnum, driven_by_moved_block; /* Go through all the blocks moved. */ for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) { bnum = blocks_affected.moved_blocks[iblk].block_num; for (iblk_pin = 0; iblk_pin < block[bnum].type->num_pins; iblk_pin++) { inet = block[bnum].nets[iblk_pin]; if (inet == OPEN) continue; if (clb_net[inet].is_global) continue; net_pin = net_pin_index[bnum][iblk_pin]; if (net_pin != 0) { driven_by_moved_block = FALSE; for (iblk2 = 0; iblk2 < blocks_affected.num_moved_blocks; iblk2++) { if (clb_net[inet].node_block[0] == blocks_affected.moved_blocks[iblk2].block_num) driven_by_moved_block = TRUE; } /* The following "if" prevents the value from being updated twice. */ if (driven_by_moved_block == FALSE) { point_to_point_delay_cost[inet][net_pin] = temp_point_to_point_delay_cost[inet][net_pin]; temp_point_to_point_delay_cost[inet][net_pin] = -1; point_to_point_timing_cost[inet][net_pin] = temp_point_to_point_timing_cost[inet][net_pin]; temp_point_to_point_timing_cost[inet][net_pin] = -1; } } else { /* This net is being driven by a moved block, recompute */ /* All point to point connections on this net. */ for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) { point_to_point_delay_cost[inet][ipin] = temp_point_to_point_delay_cost[inet][ipin]; temp_point_to_point_delay_cost[inet][ipin] = -1; point_to_point_timing_cost[inet][ipin] = temp_point_to_point_timing_cost[inet][ipin]; temp_point_to_point_timing_cost[inet][ipin] = -1; } /* Finished updating the pin */ } } /* Finished going through all the pins in the moved block */ } /* Finished going through all the blocks moved */ } static void comp_delta_td_cost(float *delta_timing, float *delta_delay) { /*a net that is being driven by a moved block must have all of its */ /*sink timing costs recomputed. A net that is driving a moved block */ /*must only have the timing cost on the connection driving the input */ /*pin computed */ int inet, net_pin, ipin; float delta_timing_cost, delta_delay_cost, temp_delay; int iblk, iblk2, bnum, iblk_pin, driven_by_moved_block; delta_timing_cost = 0.; delta_delay_cost = 0.; /* Go through all the blocks moved */ for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) { bnum = blocks_affected.moved_blocks[iblk].block_num; /* Go through all the pins in the moved block */ for (iblk_pin = 0; iblk_pin < block[bnum].type->num_pins; iblk_pin++) { inet = block[bnum].nets[iblk_pin]; if (inet == OPEN) continue; if (clb_net[inet].is_global) continue; net_pin = net_pin_index[bnum][iblk_pin]; if (net_pin != 0) { /* If this net is being driven by a block that has moved, we do not * * need to compute the change in the timing cost (here) since it will * * be computed in the fanout of the net on the driving block, also * * computing it here would double count the change, and mess up the * * delta_timing_cost value. */ driven_by_moved_block = FALSE; for (iblk2 = 0; iblk2 < blocks_affected.num_moved_blocks; iblk2++) { if (clb_net[inet].node_block[0] == blocks_affected.moved_blocks[iblk2].block_num) driven_by_moved_block = TRUE; } if (driven_by_moved_block == FALSE) { temp_delay = comp_td_point_to_point_delay(inet, net_pin); temp_point_to_point_delay_cost[inet][net_pin] = temp_delay; temp_point_to_point_timing_cost[inet][net_pin] = timing_place_crit[inet][net_pin] * temp_delay; delta_timing_cost += temp_point_to_point_timing_cost[inet][net_pin] - point_to_point_timing_cost[inet][net_pin]; delta_delay_cost += temp_point_to_point_delay_cost[inet][net_pin] - point_to_point_delay_cost[inet][net_pin]; } } else { /* This net is being driven by a moved block, recompute */ /* All point to point connections on this net. */ for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) { temp_delay = comp_td_point_to_point_delay(inet, ipin); temp_point_to_point_delay_cost[inet][ipin] = temp_delay; temp_point_to_point_timing_cost[inet][ipin] = timing_place_crit[inet][ipin] * temp_delay; delta_timing_cost += temp_point_to_point_timing_cost[inet][ipin] - point_to_point_timing_cost[inet][ipin]; delta_delay_cost += temp_point_to_point_delay_cost[inet][ipin] - point_to_point_delay_cost[inet][ipin]; } /* Finished updating the pin */ } } /* Finished going through all the pins in the moved block */ } /* Finished going through all the blocks moved */ *delta_timing = delta_timing_cost; *delta_delay = delta_delay_cost; } static void comp_td_costs(float *timing_cost, float *connection_delay_sum) { /* Computes the cost (from scratch) due to the delays and criticalities * * on all point to point connections, we define the timing cost of * * each connection as criticality*delay. */ int inet, ipin; float loc_timing_cost, loc_connection_delay_sum, temp_delay_cost, temp_timing_cost; loc_timing_cost = 0.; loc_connection_delay_sum = 0.; for (inet = 0; inet < num_nets; inet++) { /* For each net ... */ if (clb_net[inet].is_global == FALSE) { /* Do only if not global. */ for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) { temp_delay_cost = comp_td_point_to_point_delay(inet, ipin); temp_timing_cost = temp_delay_cost * timing_place_crit[inet][ipin]; loc_connection_delay_sum += temp_delay_cost; point_to_point_delay_cost[inet][ipin] = temp_delay_cost; temp_point_to_point_delay_cost[inet][ipin] = -1; /* Undefined */ point_to_point_timing_cost[inet][ipin] = temp_timing_cost; temp_point_to_point_timing_cost[inet][ipin] = -1; /* Undefined */ loc_timing_cost += temp_timing_cost; } } } /* Make sure timing cost does not go above MIN_TIMING_COST. */ *timing_cost = loc_timing_cost; *connection_delay_sum = loc_connection_delay_sum; } static float comp_bb_cost(enum cost_methods method) { /* Finds the cost from scratch. Done only when the placement * * has been radically changed (i.e. after initial placement). * * Otherwise find the cost change incrementally. If method * * check is NORMAL, we find bounding boxes that are updateable * * for the larger nets. If method is CHECK, all bounding boxes * * are found via the non_updateable_bb routine, to provide a * * cost which can be used to check the correctness of the * * other routine. */ int inet; float cost; double expected_wirelength; cost = 0; expected_wirelength = 0.0; for (inet = 0; inet < num_nets; inet++) { /* for each net ... */ if (clb_net[inet].is_global == FALSE) { /* Do only if not global. */ /* Small nets don't use incremental updating on their bounding boxes, * * so they can use a fast bounding box calculator. */ if (clb_net[inet].num_sinks >= SMALL_NET && method == NORMAL) { get_bb_from_scratch(inet, &bb_coords[inet], &bb_num_on_edges[inet]); } else { get_non_updateable_bb(inet, &bb_coords[inet]); } net_cost[inet] = get_net_cost(inet, &bb_coords[inet]); cost += net_cost[inet]; if (method == CHECK) expected_wirelength += get_net_wirelength_estimate(inet, &bb_coords[inet]); } } if (method == CHECK) { vpr_printf(TIO_MESSAGE_INFO, "\n"); vpr_printf(TIO_MESSAGE_INFO, "BB estimate of min-dist (placement) wirelength: %.0f\n", expected_wirelength); } return (cost); } static void free_placement_structs( float **old_region_occ_x, float **old_region_occ_y, struct s_placer_opts placer_opts) { /* Frees the major structures needed by the placer (and not needed * * elsewhere). */ int inet, imacro; free_legal_placements(); free_fast_cost_update(); if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || placer_opts.enable_timing_computations) { for (inet = 0; inet < num_nets; inet++) { /*add one to the address since it is indexed from 1 not 0 */ point_to_point_delay_cost[inet]++; free(point_to_point_delay_cost[inet]); point_to_point_timing_cost[inet]++; free(point_to_point_timing_cost[inet]); temp_point_to_point_delay_cost[inet]++; free(temp_point_to_point_delay_cost[inet]); temp_point_to_point_timing_cost[inet]++; free(temp_point_to_point_timing_cost[inet]); } free(point_to_point_delay_cost); free(temp_point_to_point_delay_cost); free(point_to_point_timing_cost); free(temp_point_to_point_timing_cost); free_matrix(net_pin_index, 0, num_blocks - 1, 0, sizeof(int)); } free(net_cost); free(temp_net_cost); free(bb_num_on_edges); free(bb_coords); free_placement_macros_structs(); for (imacro = 0; imacro < num_pl_macros; imacro ++) free(pl_macros[imacro].members); free(pl_macros); net_cost = NULL; /* Defensive coding. */ temp_net_cost = NULL; bb_num_on_edges = NULL; bb_coords = NULL; pl_macros = NULL; /* Frees up all the data structure used in vpr_utils. */ free_port_pin_from_blk_pin(); free_blk_pin_from_port_pin(); } static void alloc_and_load_placement_structs( float place_cost_exp, float ***old_region_occ_x, float ***old_region_occ_y, struct s_placer_opts placer_opts, t_direct_inf *directs, int num_directs) { /* Allocates the major structures needed only by the placer, primarily for * * computing costs quickly and such. */ int inet, ipin, max_pins_per_clb, i; alloc_legal_placements(); load_legal_placements(); max_pins_per_clb = 0; for (i = 0; i < num_types; i++) { max_pins_per_clb = std::max(max_pins_per_clb, type_descriptors[i].num_pins); } if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || placer_opts.enable_timing_computations) { /* Allocate structures associated with timing driven placement */ /* [0..num_nets-1][1..num_pins-1] */ point_to_point_delay_cost = (float **) my_malloc( num_nets * sizeof(float *)); temp_point_to_point_delay_cost = (float **) my_malloc( num_nets * sizeof(float *)); point_to_point_timing_cost = (float **) my_malloc( num_nets * sizeof(float *)); temp_point_to_point_timing_cost = (float **) my_malloc( num_nets * sizeof(float *)); for (inet = 0; inet < num_nets; inet++) { /* In the following, subract one so index starts at * * 1 instead of 0 */ point_to_point_delay_cost[inet] = (float *) my_malloc( clb_net[inet].num_sinks * sizeof(float)); point_to_point_delay_cost[inet]--; temp_point_to_point_delay_cost[inet] = (float *) my_malloc( clb_net[inet].num_sinks * sizeof(float)); temp_point_to_point_delay_cost[inet]--; point_to_point_timing_cost[inet] = (float *) my_malloc( clb_net[inet].num_sinks * sizeof(float)); point_to_point_timing_cost[inet]--; temp_point_to_point_timing_cost[inet] = (float *) my_malloc( clb_net[inet].num_sinks * sizeof(float)); temp_point_to_point_timing_cost[inet]--; } for (inet = 0; inet < num_nets; inet++) { for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) { point_to_point_delay_cost[inet][ipin] = 0; temp_point_to_point_delay_cost[inet][ipin] = 0; } } } net_cost = (float *) my_malloc(num_nets * sizeof(float)); temp_net_cost = (float *) my_malloc(num_nets * sizeof(float)); bb_updated_before = (char*)my_calloc(num_nets, sizeof(char)); /* Used to store costs for moves not yet made and to indicate when a net's * * cost has been recomputed. temp_net_cost[inet] < 0 means net's cost hasn't * * been recomputed. */ for (inet = 0; inet < num_nets; inet++){ bb_updated_before[inet] = NOT_UPDATED_YET; temp_net_cost[inet] = -1.; } bb_coords = (struct s_bb *) my_malloc(num_nets * sizeof(struct s_bb)); bb_num_on_edges = (struct s_bb *) my_malloc(num_nets * sizeof(struct s_bb)); /* Shouldn't use them; crash hard if I do! */ *old_region_occ_x = NULL; *old_region_occ_y = NULL; alloc_and_load_for_fast_cost_update(place_cost_exp); net_pin_index = alloc_and_load_net_pin_index(); alloc_and_load_try_swap_structs(); num_pl_macros = alloc_and_load_placement_macros(directs, num_directs, &pl_macros); } static void alloc_and_load_try_swap_structs() { /* Allocate the local bb_coordinate storage, etc. only once. */ /* Allocate with size num_nets for any number of nets affected. */ ts_bb_coord_new = (struct s_bb *) my_calloc( num_nets, sizeof(struct s_bb)); ts_bb_edge_new = (struct s_bb *) my_calloc( num_nets, sizeof(struct s_bb)); ts_nets_to_update = (int *) my_calloc(num_nets, sizeof(int)); /* Allocate with size num_blocks for any number of moved block. */ blocks_affected.moved_blocks = (t_pl_moved_block*)my_calloc( num_blocks, sizeof(t_pl_moved_block) ); blocks_affected.num_moved_blocks = 0; } static void get_bb_from_scratch(int inet, struct s_bb *coords, struct s_bb *num_on_edges) { /* This routine finds the bounding box of each net from scratch (i.e. * * from only the block location information). It updates both the * * coordinate and number of pins on each edge information. It * * should only be called when the bounding box information is not valid. */ int ipin, bnum, pnum, x, y, xmin, xmax, ymin, ymax; int xmin_edge, xmax_edge, ymin_edge, ymax_edge; int n_pins; n_pins = clb_net[inet].num_sinks + 1; bnum = clb_net[inet].node_block[0]; pnum = clb_net[inet].node_block_pin[0]; x = block[bnum].x; y = block[bnum].y + block[bnum].type->pin_height[pnum]; x = std::max(std::min(x, nx), 1); y = std::max(std::min(y, ny), 1); xmin = x; ymin = y; xmax = x; ymax = y; xmin_edge = 1; ymin_edge = 1; xmax_edge = 1; ymax_edge = 1; for (ipin = 1; ipin < n_pins; ipin++) { bnum = clb_net[inet].node_block[ipin]; pnum = clb_net[inet].node_block_pin[ipin]; x = block[bnum].x; y = block[bnum].y + block[bnum].type->pin_height[pnum]; /* Code below counts IO blocks as being within the 1..nx, 1..ny clb array. * * This is because channels do not go out of the 0..nx, 0..ny range, and * * I always take all channels impinging on the bounding box to be within * * that bounding box. Hence, this "movement" of IO blocks does not affect * * the which channels are included within the bounding box, and it * * simplifies the code a lot. */ x = std::max(std::min(x, nx), 1); y = std::max(std::min(y, ny), 1); if (x == xmin) { xmin_edge++; } if (x == xmax) { /* Recall that xmin could equal xmax -- don't use else */ xmax_edge++; } else if (x < xmin) { xmin = x; xmin_edge = 1; } else if (x > xmax) { xmax = x; xmax_edge = 1; } if (y == ymin) { ymin_edge++; } if (y == ymax) { ymax_edge++; } else if (y < ymin) { ymin = y; ymin_edge = 1; } else if (y > ymax) { ymax = y; ymax_edge = 1; } } /* Copy the coordinates and number on edges information into the proper * * structures. */ coords->xmin = xmin; coords->xmax = xmax; coords->ymin = ymin; coords->ymax = ymax; num_on_edges->xmin = xmin_edge; num_on_edges->xmax = xmax_edge; num_on_edges->ymin = ymin_edge; num_on_edges->ymax = ymax_edge; } static double get_net_wirelength_estimate(int inet, struct s_bb *bbptr) { /* WMF: Finds the estimate of wirelength due to one net by looking at * * its coordinate bounding box. */ double ncost, crossing; /* Get the expected "crossing count" of a net, based on its number * * of pins. Extrapolate for very large nets. */ if (((clb_net[inet].num_sinks + 1) > 50) && ((clb_net[inet].num_sinks + 1) < 85)) { crossing = 2.7933 + 0.02616 * ((clb_net[inet].num_sinks + 1) - 50); } else if ((clb_net[inet].num_sinks + 1) >= 85) { crossing = 2.7933 + 0.011 * (clb_net[inet].num_sinks + 1) - 0.0000018 * (clb_net[inet].num_sinks + 1) * (clb_net[inet].num_sinks + 1); } else { crossing = cross_count[(clb_net[inet].num_sinks + 1) - 1]; } /* Could insert a check for xmin == xmax. In that case, assume * * connection will be made with no bends and hence no x-cost. * * Same thing for y-cost. */ /* Cost = wire length along channel * cross_count / average * * channel capacity. Do this for x, then y direction and add. */ ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing; ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing; return (ncost); } static float get_net_cost(int inet, struct s_bb *bbptr) { /* Finds the cost due to one net by looking at its coordinate bounding * * box. */ float ncost, crossing; /* Get the expected "crossing count" of a net, based on its number * * of pins. Extrapolate for very large nets. */ if ((clb_net[inet].num_sinks + 1) > 50) { crossing = 2.7933 + 0.02616 * ((clb_net[inet].num_sinks + 1) - 50); /* crossing = 3.0; Old value */ } else { crossing = cross_count[(clb_net[inet].num_sinks + 1) - 1]; } /* Could insert a check for xmin == xmax. In that case, assume * * connection will be made with no bends and hence no x-cost. * * Same thing for y-cost. */ /* Cost = wire length along channel * cross_count / average * * channel capacity. Do this for x, then y direction and add. */ ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing * chanx_place_cost_fac[bbptr->ymax][bbptr->ymin - 1]; ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing * chany_place_cost_fac[bbptr->xmax][bbptr->xmin - 1]; return (ncost); } static void get_non_updateable_bb(int inet, struct s_bb *bb_coord_new) { /* Finds the bounding box of a net and stores its coordinates in the * * bb_coord_new data structure. This routine should only be called * * for small nets, since it does not determine enough information for * * the bounding box to be updated incrementally later. * * Currently assumes channels on both sides of the CLBs forming the * * edges of the bounding box can be used. Essentially, I am assuming * * the pins always lie on the outside of the bounding box. */ int k, xmax, ymax, xmin, ymin, x, y; int bnum, pnum; bnum = clb_net[inet].node_block[0]; pnum = clb_net[inet].node_block_pin[0]; x = block[bnum].x; y = block[bnum].y + block[bnum].type->pin_height[pnum]; xmin = x; ymin = y; xmax = x; ymax = y; for (k = 1; k < (clb_net[inet].num_sinks + 1); k++) { bnum = clb_net[inet].node_block[k]; pnum = clb_net[inet].node_block_pin[k]; x = block[bnum].x; y = block[bnum].y + block[bnum].type->pin_height[pnum]; if (x < xmin) { xmin = x; } else if (x > xmax) { xmax = x; } if (y < ymin) { ymin = y; } else if (y > ymax) { ymax = y; } } /* Now I've found the coordinates of the bounding box. There are no * * channels beyond nx and ny, so I want to clip to that. As well, * * since I'll always include the channel immediately below and the * * channel immediately to the left of the bounding box, I want to * * clip to 1 in both directions as well (since minimum channel index * * is 0). See route.c for a channel diagram. */ bb_coord_new->xmin = std::max(std::min(xmin, nx), 1); bb_coord_new->ymin = std::max(std::min(ymin, ny), 1); bb_coord_new->xmax = std::max(std::min(xmax, nx), 1); bb_coord_new->ymax = std::max(std::min(ymax, ny), 1); } static void update_bb(int inet, struct s_bb *bb_coord_new, struct s_bb *bb_edge_new, int xold, int yold, int xnew, int ynew) { /* Updates the bounding box of a net by storing its coordinates in * * the bb_coord_new data structure and the number of blocks on each * * edge in the bb_edge_new data structure. This routine should only * * be called for large nets, since it has some overhead relative to * * just doing a brute force bounding box calculation. The bounding * * box coordinate and edge information for inet must be valid before * * this routine is called. * * Currently assumes channels on both sides of the CLBs forming the * * edges of the bounding box can be used. Essentially, I am assuming * * the pins always lie on the outside of the bounding box. * * The x and y coordinates are the pin's x and y coordinates. */ /* IO blocks are considered to be one cell in for simplicity. */ struct s_bb *curr_bb_edge, *curr_bb_coord; xnew = std::max(std::min(xnew, nx), 1); ynew = std::max(std::min(ynew, ny), 1); xold = std::max(std::min(xold, nx), 1); yold = std::max(std::min(yold, ny), 1); /* Check if the net had been updated before. */ if (bb_updated_before[inet] == GOT_FROM_SCRATCH) { /* The net had been updated from scratch, DO NOT update again! */ return; } else if (bb_updated_before[inet] == NOT_UPDATED_YET) { /* The net had NOT been updated before, could use the old values */ curr_bb_coord = &bb_coords[inet]; curr_bb_edge = &bb_num_on_edges[inet]; bb_updated_before[inet] = UPDATED_ONCE; } else { /* The net had been updated before, must use the new values */ curr_bb_coord = bb_coord_new; curr_bb_edge = bb_edge_new; } /* Check if I can update the bounding box incrementally. */ if (xnew < xold) { /* Move to left. */ /* Update the xmax fields for coordinates and number of edges first. */ if (xold == curr_bb_coord->xmax) { /* Old position at xmax. */ if (curr_bb_edge->xmax == 1) { get_bb_from_scratch(inet, bb_coord_new, bb_edge_new); bb_updated_before[inet] = GOT_FROM_SCRATCH; return; } else { bb_edge_new->xmax = curr_bb_edge->xmax - 1; bb_coord_new->xmax = curr_bb_coord->xmax; } } else { /* Move to left, old postion was not at xmax. */ bb_coord_new->xmax = curr_bb_coord->xmax; bb_edge_new->xmax = curr_bb_edge->xmax; } /* Now do the xmin fields for coordinates and number of edges. */ if (xnew < curr_bb_coord->xmin) { /* Moved past xmin */ bb_coord_new->xmin = xnew; bb_edge_new->xmin = 1; } else if (xnew == curr_bb_coord->xmin) { /* Moved to xmin */ bb_coord_new->xmin = xnew; bb_edge_new->xmin = curr_bb_edge->xmin + 1; } else { /* Xmin unchanged. */ bb_coord_new->xmin = curr_bb_coord->xmin; bb_edge_new->xmin = curr_bb_edge->xmin; } } /* End of move to left case. */ else if (xnew > xold) { /* Move to right. */ /* Update the xmin fields for coordinates and number of edges first. */ if (xold == curr_bb_coord->xmin) { /* Old position at xmin. */ if (curr_bb_edge->xmin == 1) { get_bb_from_scratch(inet, bb_coord_new, bb_edge_new); bb_updated_before[inet] = GOT_FROM_SCRATCH; return; } else { bb_edge_new->xmin = curr_bb_edge->xmin - 1; bb_coord_new->xmin = curr_bb_coord->xmin; } } else { /* Move to right, old position was not at xmin. */ bb_coord_new->xmin = curr_bb_coord->xmin; bb_edge_new->xmin = curr_bb_edge->xmin; } /* Now do the xmax fields for coordinates and number of edges. */ if (xnew > curr_bb_coord->xmax) { /* Moved past xmax. */ bb_coord_new->xmax = xnew; bb_edge_new->xmax = 1; } else if (xnew == curr_bb_coord->xmax) { /* Moved to xmax */ bb_coord_new->xmax = xnew; bb_edge_new->xmax = curr_bb_edge->xmax + 1; } else { /* Xmax unchanged. */ bb_coord_new->xmax = curr_bb_coord->xmax; bb_edge_new->xmax = curr_bb_edge->xmax; } } /* End of move to right case. */ else { /* xnew == xold -- no x motion. */ bb_coord_new->xmin = curr_bb_coord->xmin; bb_coord_new->xmax = curr_bb_coord->xmax; bb_edge_new->xmin = curr_bb_edge->xmin; bb_edge_new->xmax = curr_bb_edge->xmax; } /* Now account for the y-direction motion. */ if (ynew < yold) { /* Move down. */ /* Update the ymax fields for coordinates and number of edges first. */ if (yold == curr_bb_coord->ymax) { /* Old position at ymax. */ if (curr_bb_edge->ymax == 1) { get_bb_from_scratch(inet, bb_coord_new, bb_edge_new); bb_updated_before[inet] = GOT_FROM_SCRATCH; return; } else { bb_edge_new->ymax = curr_bb_edge->ymax - 1; bb_coord_new->ymax = curr_bb_coord->ymax; } } else { /* Move down, old postion was not at ymax. */ bb_coord_new->ymax = curr_bb_coord->ymax; bb_edge_new->ymax = curr_bb_edge->ymax; } /* Now do the ymin fields for coordinates and number of edges. */ if (ynew < curr_bb_coord->ymin) { /* Moved past ymin */ bb_coord_new->ymin = ynew; bb_edge_new->ymin = 1; } else if (ynew == curr_bb_coord->ymin) { /* Moved to ymin */ bb_coord_new->ymin = ynew; bb_edge_new->ymin = curr_bb_edge->ymin + 1; } else { /* ymin unchanged. */ bb_coord_new->ymin = curr_bb_coord->ymin; bb_edge_new->ymin = curr_bb_edge->ymin; } } /* End of move down case. */ else if (ynew > yold) { /* Moved up. */ /* Update the ymin fields for coordinates and number of edges first. */ if (yold == curr_bb_coord->ymin) { /* Old position at ymin. */ if (curr_bb_edge->ymin == 1) { get_bb_from_scratch(inet, bb_coord_new, bb_edge_new); bb_updated_before[inet] = GOT_FROM_SCRATCH; return; } else { bb_edge_new->ymin = curr_bb_edge->ymin - 1; bb_coord_new->ymin = curr_bb_coord->ymin; } } else { /* Moved up, old position was not at ymin. */ bb_coord_new->ymin = curr_bb_coord->ymin; bb_edge_new->ymin = curr_bb_edge->ymin; } /* Now do the ymax fields for coordinates and number of edges. */ if (ynew > curr_bb_coord->ymax) { /* Moved past ymax. */ bb_coord_new->ymax = ynew; bb_edge_new->ymax = 1; } else if (ynew == curr_bb_coord->ymax) { /* Moved to ymax */ bb_coord_new->ymax = ynew; bb_edge_new->ymax = curr_bb_edge->ymax + 1; } else { /* ymax unchanged. */ bb_coord_new->ymax = curr_bb_coord->ymax; bb_edge_new->ymax = curr_bb_edge->ymax; } } /* End of move up case. */ else { /* ynew == yold -- no y motion. */ bb_coord_new->ymin = curr_bb_coord->ymin; bb_coord_new->ymax = curr_bb_coord->ymax; bb_edge_new->ymin = curr_bb_edge->ymin; bb_edge_new->ymax = curr_bb_edge->ymax; } if (bb_updated_before[inet] == NOT_UPDATED_YET) bb_updated_before[inet] = UPDATED_ONCE; } static void alloc_legal_placements() { int i, j, k; legal_pos = (t_legal_pos **) my_malloc(num_types * sizeof(t_legal_pos *)); num_legal_pos = (int *) my_calloc(num_types, sizeof(int)); /* Initialize all occupancy to zero. */ for (i = 0; i <= nx + 1; i++) { for (j = 0; j <= ny + 1; j++) { grid[i][j].usage = 0; for (k = 0; k < grid[i][j].type->capacity; k++) { grid[i][j].blocks[k] = EMPTY; if (grid[i][j].offset == 0) { num_legal_pos[grid[i][j].type->index]++; } } } } for (i = 0; i < num_types; i++) { legal_pos[i] = (t_legal_pos *) my_malloc(num_legal_pos[i] * sizeof(t_legal_pos)); } } static void load_legal_placements() { int i, j, k, itype; int *index; index = (int *) my_calloc(num_types, sizeof(int)); for (i = 0; i <= nx + 1; i++) { for (j = 0; j <= ny + 1; j++) { for (k = 0; k < grid[i][j].type->capacity; k++) { if (grid[i][j].offset == 0) { itype = grid[i][j].type->index; legal_pos[itype][index[itype]].x = i; legal_pos[itype][index[itype]].y = j; legal_pos[itype][index[itype]].z = k; index[itype]++; } } } } free(index); } static void free_legal_placements() { int i; for (i = 0; i < num_types; i++) { free(legal_pos[i]); } free(legal_pos); /* Free the mapping list */ free(num_legal_pos); } static int check_macro_can_be_placed(int imacro, int itype, int x, int y, int z) { int imember; int member_x, member_y, member_z; // Every macro can be placed until proven otherwise int macro_can_be_placed = TRUE; // Check whether all the members can be placed for (imember = 0; imember < pl_macros[imacro].num_blocks; imember++) { member_x = x + pl_macros[imacro].members[imember].x_offset; member_y = y + pl_macros[imacro].members[imember].y_offset; member_z = z + pl_macros[imacro].members[imember].z_offset; // Check whether the location could accept block of this type // Then check whether the location could still accomodate more blocks // Also check whether the member position is valid, that is the member's location // still within the chip's dimemsion and the member_z is allowed at that location on the grid if (member_x <= nx+1 && member_y <= ny+1 && grid[member_x][member_y].type->index == itype && grid[member_x][member_y].blocks[member_z] == OPEN) { // Can still accomodate blocks here, check the next position continue; } else { // Cant be placed here - skip to the next try macro_can_be_placed = FALSE; break; } } return (macro_can_be_placed); } static int try_place_macro(int itype, int ichoice, int imacro, int * free_locations){ int x, y, z, member_x, member_y, member_z, imember; int macro_placed = FALSE; // Choose a random position for the head x = legal_pos[itype][ichoice].x; y = legal_pos[itype][ichoice].y; z = legal_pos[itype][ichoice].z; // If that location is occupied, do nothing. if (grid[x][y].blocks[z] != OPEN) { return (macro_placed); } int macro_can_be_placed = check_macro_can_be_placed(imacro, itype, x, y, z); if (macro_can_be_placed == TRUE) { // Place down the macro macro_placed = TRUE; for (imember = 0; imember < pl_macros[imacro].num_blocks; imember++) { member_x = x + pl_macros[imacro].members[imember].x_offset; member_y = y + pl_macros[imacro].members[imember].y_offset; member_z = z + pl_macros[imacro].members[imember].z_offset; block[pl_macros[imacro].members[imember].blk_index].x = member_x; block[pl_macros[imacro].members[imember].blk_index].y = member_y; block[pl_macros[imacro].members[imember].blk_index].z = member_z; grid[member_x][member_y].blocks[member_z] = pl_macros[imacro].members[imember].blk_index; grid[member_x][member_y].usage++; // Could not ensure that the randomiser would not pick this location again // So, would have to do a lazy removal - whenever I come across a block that could not be placed, // go ahead and remove it from the legal_pos[][] array } // Finish placing all the members in the macro } // End of this choice of legal_pos return (macro_placed); } static void initial_placement_pl_macros(int macros_max_num_tries, int * free_locations) { int macro_placed; int imacro, iblk, itype, itry, ichoice; /* Macros are harder to place. Do them first */ for (imacro = 0; imacro < num_pl_macros; imacro++) { // Every macro are not placed in the beginnning macro_placed = FALSE; // Assume that all the blocks in the macro are of the same type iblk = pl_macros[imacro].members[0].blk_index; itype = block[iblk].type->index; if (free_locations[itype] < pl_macros[imacro].num_blocks) { vpr_printf (TIO_MESSAGE_ERROR, "Initial placement failed.\n"); vpr_printf (TIO_MESSAGE_ERROR, "Could not place macro length %d with head block %s (#%d); not enough free locations of type %s (#%d).\n", pl_macros[imacro].num_blocks, block[iblk].name, iblk, type_descriptors[itype].name, itype); vpr_printf (TIO_MESSAGE_INFO, "VPR cannot auto-size for your circuit, please resize the FPGA manually.\n"); exit(1); } // Try to place the macro first, if can be placed - place them, otherwise try again for (itry = 0; itry < macros_max_num_tries && macro_placed == FALSE; itry++) { // Choose a random position for the head ichoice = my_irand(free_locations[itype] - 1); // Try to place the macro macro_placed = try_place_macro(itype, ichoice, imacro, free_locations); } // Finished all tries if (macro_placed == FALSE){ // if a macro still could not be placed after macros_max_num_tries times, // go through the chip exhaustively to find a legal placement for the macro // place the macro on the first location that is legal // then set macro_placed = TRUE; // if there are no legal positions, error out // Exhaustive placement of carry macros for (ichoice = 0; ichoice < free_locations[itype] && macro_placed == FALSE; ichoice++) { // Try to place the macro macro_placed = try_place_macro(itype, ichoice, imacro, free_locations); } // Exhausted all the legal placement position for this macro // If macro could not be placed after exhaustive placement, error out if (macro_placed == FALSE) { // Error out vpr_printf (TIO_MESSAGE_ERROR, "Initial placement failed.\n"); vpr_printf (TIO_MESSAGE_ERROR, "Could not place macro length %d with head block %s (#%d); not enough free locations of type %s (#%d).\n", pl_macros[imacro].num_blocks, block[iblk].name, iblk, type_descriptors[itype].name, itype); vpr_printf (TIO_MESSAGE_INFO, "Please manually size the FPGA because VPR can't do this yet.\n"); exit(1); } } else { // This macro has been placed successfully, proceed to place the next macro continue; } } // Finish placing all the pl_macros successfully } static void initial_placement_blocks(int * free_locations, enum e_pad_loc_type pad_loc_type) { /* Place blocks that are NOT a part of any macro. * We'll randomly place each block in the clustered netlist, one by one. */ int iblk, itype; int ichoice, x, y, z; for (iblk = 0; iblk < num_blocks; iblk++) { if (block[iblk].x != -1) { // block placed. continue; } /* Don't do IOs if the user specifies IOs; we'll read those locations later. */ if (!(block[iblk].type == IO_TYPE && pad_loc_type == USER)) { /* Randomly select a free location of the appropriate type * for iblk. We have a linearized list of all the free locations * that can accomodate a block of that type in free_locations[itype]. * Choose one randomly and put iblk there. Then we don't want to pick that * location again, so remove it from the free_locations array. */ itype = block[iblk].type->index; if (free_locations[itype] <= 0) { vpr_printf (TIO_MESSAGE_ERROR, "Initial placement failed.\n"); vpr_printf (TIO_MESSAGE_ERROR, "Could not place block %s (#%d); no free locations of type %s (#%d).\n", block[iblk].name, iblk, type_descriptors[itype].name, itype); exit(1); } ichoice = my_irand(free_locations[itype] - 1); x = legal_pos[itype][ichoice].x; y = legal_pos[itype][ichoice].y; z = legal_pos[itype][ichoice].z; // Make sure that the position is OPEN before placing the block down assert (grid[x][y].blocks[z] == OPEN); grid[x][y].blocks[z] = iblk; grid[x][y].usage++; block[iblk].x = x; block[iblk].y = y; block[iblk].z = z; /* Ensure randomizer doesn't pick this location again, since it's occupied. Could shift all the * legal positions in legal_pos to remove the entry (choice) we just used, but faster to * just move the last entry in legal_pos to the spot we just used and decrement the * count of free_locations. */ legal_pos[itype][ichoice] = legal_pos[itype][free_locations[itype] - 1]; /* overwrite used block position */ free_locations[itype]--; } } } static void initial_placement(enum e_pad_loc_type pad_loc_type, char *pad_loc_file) { /* Randomly places the blocks to create an initial placement. We rely on * the legal_pos array already being loaded. That legal_pos[itype] is an * array that gives every legal value of (x,y,z) that can accomodate a block. * The number of such locations is given by num_legal_pos[itype]. */ int i, j, k, iblk, itype, x, y, z, ichoice; int *free_locations; /* [0..num_types-1]. * Stores how many locations there are for this type that *might* still be free. * That is, this stores the number of entries in legal_pos[itype] that are worth considering * as you look for a free location. */ free_locations = (int *) my_malloc(num_types * sizeof(int)); for (itype = 0; itype < num_types; itype++) { free_locations[itype] = num_legal_pos[itype]; } /* We'll use the grid to record where everything goes. Initialize to the grid has no * blocks placed anywhere. */ for (i = 0; i <= nx + 1; i++) { for (j = 0; j <= ny + 1; j++) { grid[i][j].usage = 0; itype = grid[i][j].type->index; for (k = 0; k < type_descriptors[itype].capacity; k++) { grid[i][j].blocks[k] = OPEN; } } } /* Similarly, mark all blocks as not being placed yet. */ for (iblk = 0; iblk < num_blocks; iblk++) { block[iblk].x = -1; block[iblk].y = -1; block[iblk].z = -1; } initial_placement_pl_macros(MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY, free_locations); // All the macros are placed, update the legal_pos[][] array for (itype = 0; itype < num_types; itype++) { assert (free_locations[itype] >= 0); for (ichoice = 0; ichoice < free_locations[itype]; ichoice++) { x = legal_pos[itype][ichoice].x; y = legal_pos[itype][ichoice].y; z = legal_pos[itype][ichoice].z; // Check if that location is occupied. If it is, remove from legal_pos if (grid[x][y].blocks[z] != OPEN) { legal_pos[itype][ichoice] = legal_pos[itype][free_locations[itype] - 1]; free_locations[itype]--; // After the move, I need to check this particular entry again ichoice--; continue; } } } // Finish updating the legal_pos[][] and free_locations[] array initial_placement_blocks(free_locations, pad_loc_type); if (pad_loc_type == USER) { read_user_pad_loc(pad_loc_file); } /* Restore legal_pos */ load_legal_placements(); #ifdef VERBOSE vpr_printf(TIO_MESSAGE_INFO, "At end of initial_placement.\n"); if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_INITIAL_CLB_PLACEMENT)) { print_clb_placement(getEchoFileName(E_ECHO_INITIAL_CLB_PLACEMENT)); } #endif free(free_locations); } static void free_fast_cost_update(void) { int i; for (i = 0; i <= ny; i++) free(chanx_place_cost_fac[i]); free(chanx_place_cost_fac); chanx_place_cost_fac = NULL; for (i = 0; i <= nx; i++) free(chany_place_cost_fac[i]); free(chany_place_cost_fac); chany_place_cost_fac = NULL; } static void alloc_and_load_for_fast_cost_update(float place_cost_exp) { /* Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac * * arrays with the inverse of the average number of tracks per channel * * between [subhigh] and [sublow]. This is only useful for the cost * * function that takes the length of the net bounding box in each * * dimension divided by the average number of tracks in that direction. * * For other cost functions, you don't have to bother calling this * * routine; when using the cost function described above, however, you * * must always call this routine after you call init_chan and before * * you do any placement cost determination. The place_cost_exp factor * * specifies to what power the width of the channel should be taken -- * * larger numbers make narrower channels more expensive. */ int low, high, i; /* Access arrays below as chan?_place_cost_fac[subhigh][sublow]. Since * * subhigh must be greater than or equal to sublow, we only need to * * allocate storage for the lower half of a matrix. */ chanx_place_cost_fac = (float **) my_malloc((ny + 1) * sizeof(float *)); for (i = 0; i <= ny; i++) chanx_place_cost_fac[i] = (float *) my_malloc((i + 1) * sizeof(float)); chany_place_cost_fac = (float **) my_malloc((nx + 1) * sizeof(float *)); for (i = 0; i <= nx; i++) chany_place_cost_fac[i] = (float *) my_malloc((i + 1) * sizeof(float)); /* First compute the number of tracks between channel high and channel * * low, inclusive, in an efficient manner. */ chanx_place_cost_fac[0][0] = chan_width_x[0]; for (high = 1; high <= ny; high++) { chanx_place_cost_fac[high][high] = chan_width_x[high]; for (low = 0; low < high; low++) { chanx_place_cost_fac[high][low] = chanx_place_cost_fac[high - 1][low] + chan_width_x[high]; } } /* Now compute the inverse of the average number of tracks per channel * * between high and low. The cost function divides by the average * * number of tracks per channel, so by storing the inverse I convert * * this to a faster multiplication. Take this final number to the * * place_cost_exp power -- numbers other than one mean this is no * * longer a simple "average number of tracks"; it is some power of * * that, allowing greater penalization of narrow channels. */ for (high = 0; high <= ny; high++) for (low = 0; low <= high; low++) { chanx_place_cost_fac[high][low] = (high - low + 1.) / chanx_place_cost_fac[high][low]; chanx_place_cost_fac[high][low] = pow( (double) chanx_place_cost_fac[high][low], (double) place_cost_exp); } /* Now do the same thing for the y-directed channels. First get the * * number of tracks between channel high and channel low, inclusive. */ chany_place_cost_fac[0][0] = chan_width_y[0]; for (high = 1; high <= nx; high++) { chany_place_cost_fac[high][high] = chan_width_y[high]; for (low = 0; low < high; low++) { chany_place_cost_fac[high][low] = chany_place_cost_fac[high - 1][low] + chan_width_y[high]; } } /* Now compute the inverse of the average number of tracks per channel * * between high and low. Take to specified power. */ for (high = 0; high <= nx; high++) for (low = 0; low <= high; low++) { chany_place_cost_fac[high][low] = (high - low + 1.) / chany_place_cost_fac[high][low]; chany_place_cost_fac[high][low] = pow( (double) chany_place_cost_fac[high][low], (double) place_cost_exp); } } static void check_place(float bb_cost, float timing_cost, enum e_place_algorithm place_algorithm, float delay_cost) { /* Checks that the placement has not confused our data structures. * * i.e. the clb and block structures agree about the locations of * * every block, blocks are in legal spots, etc. Also recomputes * * the final placement cost from scratch and makes sure it is * * within roundoff of what we think the cost is. */ static int *bdone; int i, j, k, error = 0, bnum; float bb_cost_check; int usage_check; float timing_cost_check, delay_cost_check; int imacro, imember, head_iblk, member_iblk, member_x, member_y, member_z; bb_cost_check = comp_bb_cost(CHECK); vpr_printf(TIO_MESSAGE_INFO, "bb_cost recomputed from scratch: %g\n", bb_cost_check); if (fabs(bb_cost_check - bb_cost) > bb_cost * ERROR_TOL) { vpr_printf(TIO_MESSAGE_ERROR, "bb_cost_check: %g and bb_cost: %g differ in check_place.\n", bb_cost_check, bb_cost); error++; } if (place_algorithm == NET_TIMING_DRIVEN_PLACE || place_algorithm == PATH_TIMING_DRIVEN_PLACE) { comp_td_costs(&timing_cost_check, &delay_cost_check); vpr_printf(TIO_MESSAGE_INFO, "timing_cost recomputed from scratch: %g\n", timing_cost_check); if (fabs(timing_cost_check - timing_cost) > timing_cost * ERROR_TOL) { vpr_printf(TIO_MESSAGE_ERROR, "timing_cost_check: %g and timing_cost: %g differ in check_place.\n", timing_cost_check, timing_cost); error++; } vpr_printf(TIO_MESSAGE_INFO, "delay_cost recomputed from scratch: %g\n", delay_cost_check); if (fabs(delay_cost_check - delay_cost) > delay_cost * ERROR_TOL) { vpr_printf(TIO_MESSAGE_ERROR, "delay_cost_check: %g and delay_cost: %g differ in check_place.\n", delay_cost_check, delay_cost); error++; } } bdone = (int *) my_malloc(num_blocks * sizeof(int)); for (i = 0; i < num_blocks; i++) bdone[i] = 0; /* Step through grid array. Check it against block array. */ for (i = 0; i <= (nx + 1); i++) for (j = 0; j <= (ny + 1); j++) { if (grid[i][j].usage > grid[i][j].type->capacity) { vpr_printf(TIO_MESSAGE_ERROR, "Block at grid location (%d,%d) overused. Usage is %d.\n", i, j, grid[i][j].usage); error++; } usage_check = 0; for (k = 0; k < grid[i][j].type->capacity; k++) { bnum = grid[i][j].blocks[k]; if (EMPTY == bnum) continue; if (block[bnum].type != grid[i][j].type) { vpr_printf(TIO_MESSAGE_ERROR, "Block %d type does not match grid location (%d,%d) type.\n", bnum, i, j); error++; } if ((block[bnum].x != i) || (block[bnum].y != j)) { vpr_printf(TIO_MESSAGE_ERROR, "Block %d location conflicts with grid(%d,%d) data.\n", bnum, i, j); error++; } ++usage_check; bdone[bnum]++; } if (usage_check != grid[i][j].usage) { vpr_printf(TIO_MESSAGE_ERROR, "Location (%d,%d) usage is %d, but has actual usage %d.\n", i, j, grid[i][j].usage, usage_check); error++; } } /* Check that every block exists in the grid and block arrays somewhere. */ for (i = 0; i < num_blocks; i++) if (bdone[i] != 1) { vpr_printf(TIO_MESSAGE_ERROR, "Block %d listed %d times in data structures.\n", i, bdone[i]); error++; } free(bdone); /* Check the pl_macro placement are legal - blocks are in the proper relative position. */ for (imacro = 0; imacro < num_pl_macros; imacro++) { head_iblk = pl_macros[imacro].members[0].blk_index; for (imember = 0; imember < pl_macros[imacro].num_blocks; imember++) { member_iblk = pl_macros[imacro].members[imember].blk_index; // Compute the suppossed member's x,y,z location member_x = block[head_iblk].x + pl_macros[imacro].members[imember].x_offset; member_y = block[head_iblk].y + pl_macros[imacro].members[imember].y_offset; member_z = block[head_iblk].z + pl_macros[imacro].members[imember].z_offset; // Check the block data structure first if (block[member_iblk].x != member_x || block[member_iblk].y != member_y || block[member_iblk].z != member_z) { vpr_printf(TIO_MESSAGE_ERROR, "Block %d in pl_macro #%d is not placed in the proper orientation.\n", member_iblk, imacro); error++; } // Then check the grid data structure if (grid[member_x][member_y].blocks[member_z] != member_iblk) { vpr_printf(TIO_MESSAGE_ERROR, "Block %d in pl_macro #%d is not placed in the proper orientation.\n", member_iblk, imacro); error++; } } // Finish going through all the members } // Finish going through all the macros if (error == 0) { vpr_printf(TIO_MESSAGE_INFO, "\n"); vpr_printf(TIO_MESSAGE_INFO, "Completed placement consistency check successfully.\n"); vpr_printf(TIO_MESSAGE_INFO, "\n"); vpr_printf(TIO_MESSAGE_INFO, "Swaps called: %d\n", num_ts_called); #ifdef PRINT_REL_POS_DISTR print_relative_pos_distr(void); #endif } else { vpr_printf(TIO_MESSAGE_INFO, "\n"); vpr_printf(TIO_MESSAGE_ERROR, "Completed placement consistency check, %d errors found.\n", error); vpr_printf(TIO_MESSAGE_INFO, "Aborting program.\n"); exit(1); } } #ifdef VERBOSE static void print_clb_placement(const char *fname) { /* Prints out the clb placements to a file. */ FILE *fp; int i; fp = my_fopen(fname, "w", 0); fprintf(fp, "Complex block placements:\n\n"); fprintf(fp, "Block #\tName\t(X, Y, Z).\n"); for(i = 0; i < num_blocks; i++) { fprintf(fp, "#%d\t%s\t(%d, %d, %d).\n", i, block[i].name, block[i].x, block[i].y, block[i].z); } fclose(fp); } #endif static void free_try_swap_arrays(void) { if(ts_bb_coord_new != NULL) { free(ts_bb_coord_new); free(ts_bb_edge_new); free(ts_nets_to_update); free(blocks_affected.moved_blocks); free(bb_updated_before); ts_bb_coord_new = NULL; ts_bb_edge_new = NULL; ts_nets_to_update = NULL; blocks_affected.moved_blocks = NULL; blocks_affected.num_moved_blocks = 0; bb_updated_before = NULL; } }