3186 lines
113 KiB
C
Executable File
3186 lines
113 KiB
C
Executable File
/*#include <stdlib.h> */
|
|
#include <stdio.h>
|
|
#include <math.h>
|
|
#include <assert.h>
|
|
#include "util.h"
|
|
#include "vpr_types.h"
|
|
#include "globals.h"
|
|
#include "place.h"
|
|
#include "read_place.h"
|
|
#include "draw.h"
|
|
#include "place_and_route.h"
|
|
#include "net_delay.h"
|
|
#include "path_delay.h"
|
|
#include "timing_place_lookup.h"
|
|
#include "timing_place.h"
|
|
#include "place_stats.h"
|
|
#include "read_xml_arch_file.h"
|
|
#include "ReadOptions.h"
|
|
#include "vpr_utils.h"
|
|
#include "place_macro.h"
|
|
|
|
/************** Types and defines local to place.c ***************************/
|
|
|
|
/* Cut off for incremental bounding box updates. *
|
|
* 4 is fastest -- I checked. */
|
|
/* To turn off incremental bounding box updates, set this to a huge value */
|
|
#define SMALL_NET 4
|
|
|
|
/* This defines the error tolerance for floating points variables used in *
|
|
* cost computation. 0.01 means that there is a 1% error tolerance. */
|
|
#define ERROR_TOL .01
|
|
|
|
/* This defines the maximum number of swap attempts before invoking the *
|
|
* once-in-a-while placement legality check as well as floating point *
|
|
* variables round-offs check. */
|
|
#define MAX_MOVES_BEFORE_RECOMPUTE 50000
|
|
|
|
/* The maximum number of tries when trying to place a carry chain at a *
|
|
* random location before trying exhaustive placement - find the fist *
|
|
* legal position and place it during initial placement. */
|
|
#define MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY 4
|
|
|
|
/* Flags for the states of the bounding box. *
|
|
* Stored as char for memory efficiency. */
|
|
#define NOT_UPDATED_YET 'N'
|
|
#define UPDATED_ONCE 'U'
|
|
#define GOT_FROM_SCRATCH 'S'
|
|
|
|
/* For comp_cost. NORMAL means use the method that generates updateable *
|
|
* bounding boxes for speed. CHECK means compute all bounding boxes from *
|
|
* scratch using a very simple routine to allow checks of the other *
|
|
* costs. */
|
|
enum cost_methods {
|
|
NORMAL, CHECK
|
|
};
|
|
|
|
/* This is for the placement swap routines. A swap attempt could be *
|
|
* rejected, accepted or aborted (due to the limitations placed on the *
|
|
* carry chain support at this point). */
|
|
enum swap_result {
|
|
REJECTED, ACCEPTED, ABORTED
|
|
};
|
|
|
|
#define MAX_INV_TIMING_COST 1.e9
|
|
/* Stops inverse timing cost from going to infinity with very lax timing constraints,
|
|
which avoids multiplying by a gigantic inverse_prev_timing_cost when auto-normalizing.
|
|
The exact value of this cost has relatively little impact, but should not be
|
|
large enough to be on the order of timing costs for normal constraints. */
|
|
|
|
/********************** Data Sturcture Definition ***************************/
|
|
/* Stores the information of the move for a block that is *
|
|
* moved during placement *
|
|
* block_num: the index of the moved block *
|
|
* xold: the x_coord that the block is moved from *
|
|
* xnew: the x_coord that the block is moved to *
|
|
* yold: the y_coord that the block is moved from *
|
|
* xnew: the x_coord that the block is moved to *
|
|
*/
|
|
typedef struct s_pl_moved_block {
|
|
int block_num;
|
|
int xold;
|
|
int xnew;
|
|
int yold;
|
|
int ynew;
|
|
int zold;
|
|
int znew;
|
|
int swapped_to_empty;
|
|
}t_pl_moved_block;
|
|
|
|
/* Stores the list of blocks to be moved in a swap during *
|
|
* placement. *
|
|
* num_moved_blocks: total number of blocks moved when *
|
|
* swapping two blocks. *
|
|
* moved blocks: a list of moved blocks data structure with *
|
|
* information on the move. *
|
|
* [0...num_moved_blocks-1] *
|
|
*/
|
|
typedef struct s_pl_blocks_to_be_moved {
|
|
int num_moved_blocks;
|
|
t_pl_moved_block * moved_blocks;
|
|
}t_pl_blocks_to_be_moved;
|
|
|
|
|
|
/********************** Variables local to place.c ***************************/
|
|
|
|
/* Cost of a net, and a temporary cost of a net used during move assessment. */
|
|
static float *net_cost = NULL, *temp_net_cost = NULL; /* [0..num_nets-1] */
|
|
|
|
/* legal positions for type */
|
|
typedef struct s_legal_pos {
|
|
int x;
|
|
int y;
|
|
int z;
|
|
}t_legal_pos;
|
|
|
|
static t_legal_pos **legal_pos = NULL; /* [0..num_types-1][0..type_tsize - 1] */
|
|
static int *num_legal_pos = NULL; /* [0..num_legal_pos-1] */
|
|
|
|
/* [0...num_nets-1] *
|
|
* A flag array to indicate whether the specific bounding box has been updated *
|
|
* in this particular swap or not. If it has been updated before, the code *
|
|
* must use the updated data, instead of the out-of-date data passed into the *
|
|
* subroutine, particularly used in try_swap(). The value NOT_UPDATED_YET *
|
|
* indicates that the net has not been updated before, UPDATED_ONCE indicated *
|
|
* that the net has been updated once, if it is going to be updated again, the *
|
|
* values from the previous update must be used. GOT_FROM_SCRATCH is only *
|
|
* applicable for nets larger than SMALL_NETS and it indicates that the *
|
|
* particular bounding box cannot be updated incrementally before, hence the *
|
|
* bounding box is got from scratch, so the bounding box would definitely be *
|
|
* right, DO NOT update again. *
|
|
* [0...num_nets-1] */
|
|
static char * bb_updated_before = NULL;
|
|
|
|
/* [0..num_nets-1][1..num_pins-1]. What is the value of the timing */
|
|
/* driven portion of the cost function. These arrays will be set to */
|
|
/* (criticality * delay) for each point to point connection. */
|
|
static float **point_to_point_timing_cost = NULL;
|
|
static float **temp_point_to_point_timing_cost = NULL;
|
|
|
|
/* [0..num_nets-1][1..num_pins-1]. What is the value of the delay */
|
|
/* for each connection in the circuit */
|
|
static float **point_to_point_delay_cost = NULL;
|
|
static float **temp_point_to_point_delay_cost = NULL;
|
|
|
|
/* [0..num_blocks-1][0..pins_per_clb-1]. Indicates which pin on the net */
|
|
/* this block corresponds to, this is only required during timing-driven */
|
|
/* placement. It is used to allow us to update individual connections on */
|
|
/* each net */
|
|
static int **net_pin_index = NULL;
|
|
|
|
/* [0..num_nets-1]. Store the bounding box coordinates and the number of *
|
|
* blocks on each of a net's bounding box (to allow efficient updates), *
|
|
* respectively. */
|
|
|
|
static struct s_bb *bb_coords = NULL, *bb_num_on_edges = NULL;
|
|
|
|
/* Store the information on the blocks to be moved in a swap during *
|
|
* placement, in the form of array of structs instead of struct with *
|
|
* arrays for cache effifiency *
|
|
*/
|
|
static t_pl_blocks_to_be_moved blocks_affected;
|
|
|
|
/* The arrays below are used to precompute the inverse of the average *
|
|
* number of tracks per channel between [subhigh] and [sublow]. Access *
|
|
* them as chan?_place_cost_fac[subhigh][sublow]. They are used to *
|
|
* speed up the computation of the cost function that takes the length *
|
|
* of the net bounding box in each dimension, divided by the average *
|
|
* number of tracks in that direction; for other cost functions they *
|
|
* will never be used. *
|
|
* [0...ny] [0...nx] */
|
|
static float **chanx_place_cost_fac, **chany_place_cost_fac;
|
|
|
|
/* The following arrays are used by the try_swap function for speed. */
|
|
/* [0...num_nets-1] */
|
|
static struct s_bb *ts_bb_coord_new = NULL;
|
|
static struct s_bb *ts_bb_edge_new = NULL;
|
|
static int *ts_nets_to_update = NULL;
|
|
|
|
/* The pl_macros array stores all the carry chains placement macros. *
|
|
* [0...num_pl_macros-1] */
|
|
static t_pl_macro * pl_macros = NULL;
|
|
static int num_pl_macros;
|
|
|
|
/* These file-scoped variables keep track of the number of swaps *
|
|
* rejected, accepted or aborted. The total number of swap attempts *
|
|
* is the sum of the three number. */
|
|
static int num_swap_rejected = 0;
|
|
static int num_swap_accepted = 0;
|
|
static int num_swap_aborted = 0;
|
|
static int num_ts_called = 0;
|
|
|
|
/* Expected crossing counts for nets with different #'s of pins. From *
|
|
* ICCAD 94 pp. 690 - 695 (with linear interpolation applied by me). *
|
|
* Multiplied to bounding box of a net to better estimate wire length *
|
|
* for higher fanout nets. Each entry is the correction factor for the *
|
|
* fanout index-1 */
|
|
static const float cross_count[50] = { /* [0..49] */1.0, 1.0, 1.0, 1.0828, 1.1536, 1.2206, 1.2823, 1.3385, 1.3991, 1.4493, 1.4974,
|
|
1.5455, 1.5937, 1.6418, 1.6899, 1.7304, 1.7709, 1.8114, 1.8519, 1.8924,
|
|
1.9288, 1.9652, 2.0015, 2.0379, 2.0743, 2.1061, 2.1379, 2.1698, 2.2016,
|
|
2.2334, 2.2646, 2.2958, 2.3271, 2.3583, 2.3895, 2.4187, 2.4479, 2.4772,
|
|
2.5064, 2.5356, 2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148,
|
|
2.7410, 2.7671, 2.7933 };
|
|
|
|
/********************* Static subroutines local to place.c *******************/
|
|
#ifdef VERBOSE
|
|
static void print_clb_placement(const char *fname);
|
|
#endif
|
|
|
|
static void alloc_and_load_placement_structs(
|
|
float place_cost_exp, float ***old_region_occ_x,
|
|
float ***old_region_occ_y, struct s_placer_opts placer_opts,
|
|
t_direct_inf *directs, int num_directs);
|
|
|
|
static void alloc_and_load_try_swap_structs();
|
|
|
|
static void free_placement_structs(
|
|
float **old_region_occ_x, float **old_region_occ_y,
|
|
struct s_placer_opts placer_opts);
|
|
|
|
static void alloc_and_load_for_fast_cost_update(float place_cost_exp);
|
|
|
|
static void free_fast_cost_update(void);
|
|
|
|
static void alloc_legal_placements();
|
|
static void load_legal_placements();
|
|
|
|
static void free_legal_placements();
|
|
|
|
static int check_macro_can_be_placed(int imacro, int itype, int x, int y, int z);
|
|
|
|
static int try_place_macro(int itype, int ichoice, int imacro, int * free_locations);
|
|
|
|
static void initial_placement_pl_macros(int macros_max_num_tries, int * free_locations);
|
|
|
|
static void initial_placement_blocks(int * free_locations, enum e_pad_loc_type pad_loc_type);
|
|
|
|
static void initial_placement(enum e_pad_loc_type pad_loc_type,
|
|
char *pad_loc_file);
|
|
|
|
static float comp_bb_cost(enum cost_methods method);
|
|
|
|
static int setup_blocks_affected(int b_from, int x_to, int y_to, int z_to);
|
|
|
|
static int find_affected_blocks(int b_from, int x_to, int y_to, int z_to);
|
|
|
|
static enum swap_result try_swap(float t, float *cost, float *bb_cost, float *timing_cost,
|
|
float rlim, float **old_region_occ_x,
|
|
float **old_region_occ_y,
|
|
enum e_place_algorithm place_algorithm, float timing_tradeoff,
|
|
float inverse_prev_bb_cost, float inverse_prev_timing_cost,
|
|
float *delay_cost);
|
|
|
|
static void check_place(float bb_cost, float timing_cost,
|
|
enum e_place_algorithm place_algorithm,
|
|
float delay_cost);
|
|
|
|
static float starting_t(float *cost_ptr, float *bb_cost_ptr,
|
|
float *timing_cost_ptr, float **old_region_occ_x,
|
|
float **old_region_occ_y,
|
|
struct s_annealing_sched annealing_sched, int max_moves, float rlim,
|
|
enum e_place_algorithm place_algorithm, float timing_tradeoff,
|
|
float inverse_prev_bb_cost, float inverse_prev_timing_cost,
|
|
float *delay_cost_ptr);
|
|
|
|
static void update_t(float *t, float std_dev, float rlim, float success_rat,
|
|
struct s_annealing_sched annealing_sched);
|
|
|
|
static void update_rlim(float *rlim, float success_rat);
|
|
|
|
static int exit_crit(float t, float cost,
|
|
struct s_annealing_sched annealing_sched);
|
|
|
|
static int count_connections(void);
|
|
|
|
static double get_std_dev(int n, double sum_x_squared, double av_x);
|
|
|
|
static float recompute_bb_cost(void);
|
|
|
|
static float comp_td_point_to_point_delay(int inet, int ipin);
|
|
|
|
static void update_td_cost(void);
|
|
|
|
static void comp_delta_td_cost(float *delta_timing, float *delta_delay);
|
|
|
|
static void comp_td_costs(float *timing_cost, float *connection_delay_sum);
|
|
|
|
static enum swap_result assess_swap(float delta_c, float t);
|
|
|
|
static boolean find_to(int x_from, int y_from, t_type_ptr type, float rlim, int *x_to, int *y_to);
|
|
|
|
static void get_non_updateable_bb(int inet, struct s_bb *bb_coord_new);
|
|
|
|
static void update_bb(int inet, struct s_bb *bb_coord_new,
|
|
struct s_bb *bb_edge_new, int xold, int yold, int xnew, int ynew);
|
|
|
|
static int find_affected_nets(int *nets_to_update);
|
|
|
|
static float get_net_cost(int inet, struct s_bb *bb_ptr);
|
|
|
|
static void get_bb_from_scratch(int inet, struct s_bb *coords,
|
|
struct s_bb *num_on_edges);
|
|
|
|
static double get_net_wirelength_estimate(int inet, struct s_bb *bbptr);
|
|
|
|
static void free_try_swap_arrays(void);
|
|
|
|
|
|
/*****************************************************************************/
|
|
/* RESEARCH TODO: Bounding Box and rlim need to be redone for heterogeneous to prevent a QoR penalty */
|
|
void try_place(struct s_placer_opts placer_opts,
|
|
struct s_annealing_sched annealing_sched,
|
|
t_chan_width_dist chan_width_dist, struct s_router_opts router_opts,
|
|
struct s_det_routing_arch det_routing_arch, t_segment_inf * segment_inf,
|
|
t_timing_inf timing_inf, t_direct_inf *directs, int num_directs) {
|
|
|
|
/* Does almost all the work of placing a circuit. Width_fac gives the *
|
|
* width of the widest channel. Place_cost_exp says what exponent the *
|
|
* width should be taken to when calculating costs. This allows a *
|
|
* greater bias for anisotropic architectures. */
|
|
|
|
int tot_iter, inner_iter, success_sum, move_lim, moves_since_cost_recompute, width_fac,
|
|
num_connections, inet, ipin, outer_crit_iter_count, inner_crit_iter_count,
|
|
inner_recompute_limit, swap_result;
|
|
float t, success_rat, rlim, cost, timing_cost, bb_cost, new_bb_cost, new_timing_cost,
|
|
delay_cost, new_delay_cost, place_delay_value, inverse_prev_bb_cost, inverse_prev_timing_cost,
|
|
oldt, **old_region_occ_x, **old_region_occ_y, **net_delay = NULL, crit_exponent,
|
|
first_rlim, final_rlim, inverse_delta_rlim, critical_path_delay = UNDEFINED,
|
|
**remember_net_delay_original_ptr; /*used to free net_delay if it is re-assigned */
|
|
double av_cost, av_bb_cost, av_timing_cost, av_delay_cost, sum_of_squares, std_dev;
|
|
int total_swap_attempts;
|
|
float reject_rate;
|
|
float accept_rate;
|
|
float abort_rate;
|
|
char msg[BUFSIZE];
|
|
t_slack * slacks = NULL;
|
|
|
|
/* Allocated here because it goes into timing critical code where each memory allocation is expensive */
|
|
|
|
remember_net_delay_original_ptr = NULL; /*prevents compiler warning */
|
|
|
|
/* init file scope variables */
|
|
num_swap_rejected = 0;
|
|
num_swap_accepted = 0;
|
|
num_swap_aborted = 0;
|
|
num_ts_called = 0;
|
|
|
|
if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
|
|
|| placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE
|
|
|| placer_opts.enable_timing_computations) {
|
|
/*do this before the initial placement to avoid messing up the initial placement */
|
|
slacks = alloc_lookups_and_criticalities(chan_width_dist, router_opts,
|
|
det_routing_arch, segment_inf, timing_inf, &net_delay, directs, num_directs);
|
|
|
|
remember_net_delay_original_ptr = net_delay;
|
|
|
|
/*#define PRINT_LOWER_BOUND */
|
|
#ifdef PRINT_LOWER_BOUND
|
|
/*print the crit_path, assuming delay between blocks that are*
|
|
*block_dist apart*/
|
|
|
|
if (placer_opts.block_dist <= nx)
|
|
place_delay_value =
|
|
delta_clb_to_clb[placer_opts.block_dist][0];
|
|
else if (placer_opts.block_dist <= ny)
|
|
place_delay_value =
|
|
delta_clb_to_clb[0][placer_opts.block_dist];
|
|
else
|
|
place_delay_value = delta_clb_to_clb[nx][ny];
|
|
|
|
vpr_printf(TIO_MESSAGE_INFO, "\n");
|
|
vpr_printf(TIO_MESSAGE_INFO, "Lower bound assuming delay of %g\n", place_delay_value);
|
|
|
|
load_constant_net_delay(net_delay, place_delay_value);
|
|
load_timing_graph_net_delays(net_delay);
|
|
do_timing_analysis(slacks, FALSE, FALSE, TRUE);
|
|
|
|
if (getEchoEnabled()) {
|
|
if(isEchoFileEnabled(E_ECHO_PLACEMENT_CRITICAL_PATH))
|
|
print_critical_path(getEchoFileName(E_ECHO_PLACEMENT_CRITICAL_PATH));
|
|
if(isEchoFileEnabled(E_ECHO_PLACEMENT_LOWER_BOUND_SINK_DELAYS))
|
|
print_sink_delays(getEchoFileName(E_ECHO_PLACEMENT_LOWER_BOUND_SINK_DELAYS));
|
|
if(isEchoFileEnabled(E_ECHO_PLACEMENT_LOGIC_SINK_DELAYS))
|
|
print_sink_delays(getEchoFileName(E_ECHO_PLACEMENT_LOGIC_SINK_DELAYS));
|
|
}
|
|
|
|
/*also print sink delays assuming 0 delay between blocks,
|
|
* this tells us how much logic delay is on each path */
|
|
|
|
load_constant_net_delay(net_delay, 0);
|
|
load_timing_graph_net_delays(net_delay);
|
|
do_timing_analysis(slacks, FALSE, FALSE, TRUE);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
width_fac = placer_opts.place_chan_width;
|
|
|
|
init_chan(width_fac, chan_width_dist);
|
|
|
|
alloc_and_load_placement_structs(
|
|
placer_opts.place_cost_exp,
|
|
&old_region_occ_x, &old_region_occ_y, placer_opts,
|
|
directs, num_directs);
|
|
|
|
initial_placement(placer_opts.pad_loc_type, placer_opts.pad_loc_file);
|
|
init_draw_coords((float) width_fac);
|
|
|
|
/* Storing the number of pins on each type of block makes the swap routine *
|
|
* slightly more efficient. */
|
|
|
|
/* Gets initial cost and loads bounding boxes. */
|
|
|
|
if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
|
|
|| placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
|
|
bb_cost = comp_bb_cost(NORMAL);
|
|
|
|
crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */
|
|
|
|
num_connections = count_connections();
|
|
vpr_printf(TIO_MESSAGE_INFO, "\n");
|
|
vpr_printf(TIO_MESSAGE_INFO, "There are %d point to point connections in this circuit.\n", num_connections);
|
|
vpr_printf(TIO_MESSAGE_INFO, "\n");
|
|
|
|
if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE) {
|
|
for (inet = 0; inet < num_nets; inet++)
|
|
for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)
|
|
timing_place_crit[inet][ipin] = 0; /*dummy crit values */
|
|
|
|
comp_td_costs(&timing_cost, &delay_cost); /*first pass gets delay_cost, which is used
|
|
* in criticality computations in the next call
|
|
* to comp_td_costs. */
|
|
place_delay_value = delay_cost / num_connections; /*used for computing criticalities */
|
|
load_constant_net_delay(net_delay, place_delay_value, clb_net,
|
|
num_nets);
|
|
|
|
} else
|
|
place_delay_value = 0;
|
|
|
|
if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
|
|
net_delay = point_to_point_delay_cost; /*this keeps net_delay up to date with *
|
|
* *the same values that the placer is using *
|
|
* *point_to_point_delay_cost is computed each*
|
|
* *time that comp_td_costs is called, and is *
|
|
* *also updated after any swap is accepted */
|
|
}
|
|
|
|
load_timing_graph_net_delays(net_delay);
|
|
do_timing_analysis(slacks, FALSE, FALSE, FALSE);
|
|
load_criticalities(slacks, crit_exponent);
|
|
if (getEchoEnabled()) {
|
|
if(isEchoFileEnabled(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH))
|
|
print_timing_graph(getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH));
|
|
if(isEchoFileEnabled(E_ECHO_INITIAL_PLACEMENT_SLACK))
|
|
print_slack(slacks->slack, FALSE, getEchoFileName(E_ECHO_INITIAL_PLACEMENT_SLACK));
|
|
if(isEchoFileEnabled(E_ECHO_INITIAL_PLACEMENT_CRITICALITY))
|
|
print_criticality(slacks, FALSE, getEchoFileName(E_ECHO_INITIAL_PLACEMENT_CRITICALITY));
|
|
}
|
|
outer_crit_iter_count = 1;
|
|
|
|
/*now we can properly compute costs */
|
|
comp_td_costs(&timing_cost, &delay_cost); /*also vpr_printf proper values into point_to_point_delay_cost */
|
|
|
|
inverse_prev_timing_cost = 1 / timing_cost;
|
|
inverse_prev_bb_cost = 1 / bb_cost;
|
|
cost = 1; /*our new cost function uses normalized values of */
|
|
/*bb_cost and timing_cost, the value of cost will be reset */
|
|
/*to 1 at each temperature when *_TIMING_DRIVEN_PLACE is true */
|
|
} else { /*BOUNDING_BOX_PLACE */
|
|
cost = bb_cost = comp_bb_cost(NORMAL);
|
|
timing_cost = 0;
|
|
delay_cost = 0;
|
|
place_delay_value = 0;
|
|
outer_crit_iter_count = 0;
|
|
num_connections = 0;
|
|
crit_exponent = 0;
|
|
|
|
inverse_prev_timing_cost = 0; /*inverses not used */
|
|
inverse_prev_bb_cost = 0;
|
|
}
|
|
|
|
move_lim = (int) (annealing_sched.inner_num * pow(num_blocks, 1.3333));
|
|
|
|
if (placer_opts.inner_loop_recompute_divider != 0)
|
|
inner_recompute_limit = (int) (0.5
|
|
+ (float) move_lim
|
|
/ (float) placer_opts.inner_loop_recompute_divider);
|
|
else
|
|
/*don't do an inner recompute */
|
|
inner_recompute_limit = move_lim + 1;
|
|
|
|
/* Sometimes I want to run the router with a random placement. Avoid *
|
|
* using 0 moves to stop division by 0 and 0 length vector problems, *
|
|
* by setting move_lim to 1 (which is still too small to do any *
|
|
* significant optimization). */
|
|
|
|
if (move_lim <= 0)
|
|
move_lim = 1;
|
|
|
|
rlim = (float) std::max(nx + 1, ny + 1);
|
|
|
|
first_rlim = rlim; /*used in timing-driven placement for exponent computation */
|
|
final_rlim = 1;
|
|
inverse_delta_rlim = 1 / (first_rlim - final_rlim);
|
|
|
|
t = starting_t(&cost, &bb_cost, &timing_cost,
|
|
old_region_occ_x, old_region_occ_y,
|
|
annealing_sched, move_lim, rlim,
|
|
placer_opts.place_algorithm, placer_opts.timing_tradeoff,
|
|
inverse_prev_bb_cost, inverse_prev_timing_cost, &delay_cost);
|
|
tot_iter = 0;
|
|
moves_since_cost_recompute = 0;
|
|
vpr_printf(TIO_MESSAGE_INFO, "Initial placement cost: %g bb_cost: %g td_cost: %g delay_cost: %g\n",
|
|
cost, bb_cost, timing_cost, delay_cost);
|
|
vpr_printf(TIO_MESSAGE_INFO, "\n");
|
|
|
|
#ifndef SPEC
|
|
vpr_printf(TIO_MESSAGE_INFO, "%9s %9s %11s %11s %11s %11s %8s %8s %7s %7s %7s %9s %7s\n",
|
|
"---------", "---------", "-----------", "-----------", "-----------", "-----------",
|
|
"--------", "--------", "-------", "-------", "-------", "---------", "-------");
|
|
vpr_printf(TIO_MESSAGE_INFO, "%9s %9s %11s %11s %11s %11s %8s %8s %7s %7s %7s %9s %7s\n",
|
|
"T", "Cost", "Av BB Cost", "Av TD Cost", "Av Tot Del",
|
|
"P to P Del", "d_max", "Ac Rate", "Std Dev", "R limit", "Exp",
|
|
"Tot Moves", "Alpha");
|
|
vpr_printf(TIO_MESSAGE_INFO, "%9s %9s %11s %11s %11s %11s %8s %8s %7s %7s %7s %9s %7s\n",
|
|
"---------", "---------", "-----------", "-----------", "-----------", "-----------",
|
|
"--------", "--------", "-------", "-------", "-------", "---------", "-------");
|
|
#endif
|
|
|
|
sprintf(msg, "Initial Placement. Cost: %g BB Cost: %g TD Cost %g Delay Cost: %g \t Channel Factor: %d",
|
|
cost, bb_cost, timing_cost, delay_cost, width_fac);
|
|
update_screen(MAJOR, msg, PLACEMENT, FALSE);
|
|
|
|
while (exit_crit(t, cost, annealing_sched) == 0) {
|
|
|
|
if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
|
|
|| placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
|
|
cost = 1;
|
|
}
|
|
|
|
av_cost = 0.;
|
|
av_bb_cost = 0.;
|
|
av_delay_cost = 0.;
|
|
av_timing_cost = 0.;
|
|
sum_of_squares = 0.;
|
|
success_sum = 0;
|
|
|
|
if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
|
|
|| placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
|
|
|
|
if (outer_crit_iter_count >= placer_opts.recompute_crit_iter
|
|
|| placer_opts.inner_loop_recompute_divider != 0) {
|
|
#ifdef VERBOSE
|
|
vpr_printf(TIO_MESSAGE_INFO, "Outer loop recompute criticalities\n");
|
|
#endif
|
|
place_delay_value = delay_cost / num_connections;
|
|
|
|
if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE)
|
|
load_constant_net_delay(net_delay, place_delay_value,
|
|
clb_net, num_nets);
|
|
/*note, for path_based, the net delay is not updated since it is current,
|
|
*because it accesses point_to_point_delay array */
|
|
|
|
load_timing_graph_net_delays(net_delay);
|
|
do_timing_analysis(slacks, FALSE, FALSE, FALSE);
|
|
load_criticalities(slacks, crit_exponent);
|
|
/*recompute costs from scratch, based on new criticalities */
|
|
comp_td_costs(&timing_cost, &delay_cost);
|
|
outer_crit_iter_count = 0;
|
|
}
|
|
outer_crit_iter_count++;
|
|
|
|
/*at each temperature change we update these values to be used */
|
|
/*for normalizing the tradeoff between timing and wirelength (bb) */
|
|
inverse_prev_bb_cost = 1 / bb_cost;
|
|
/*Prevent inverse timing cost from going to infinity */
|
|
inverse_prev_timing_cost = std::min(1 / timing_cost, (float)MAX_INV_TIMING_COST);
|
|
}
|
|
|
|
inner_crit_iter_count = 1;
|
|
|
|
for (inner_iter = 0; inner_iter < move_lim; inner_iter++) {
|
|
swap_result = try_swap(t, &cost, &bb_cost, &timing_cost, rlim,
|
|
old_region_occ_x,
|
|
old_region_occ_y,
|
|
placer_opts.place_algorithm, placer_opts.timing_tradeoff,
|
|
inverse_prev_bb_cost, inverse_prev_timing_cost, &delay_cost);
|
|
if (swap_result == ACCEPTED) {
|
|
|
|
/* Move was accepted. Update statistics that are useful for the annealing schedule. */
|
|
success_sum++;
|
|
av_cost += cost;
|
|
av_bb_cost += bb_cost;
|
|
av_timing_cost += timing_cost;
|
|
av_delay_cost += delay_cost;
|
|
sum_of_squares += cost * cost;
|
|
num_swap_accepted++;
|
|
} else if (swap_result == ABORTED) {
|
|
num_swap_aborted++;
|
|
} else { // swap_result == REJECTED
|
|
num_swap_rejected++;
|
|
}
|
|
|
|
|
|
if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
|
|
|| placer_opts.place_algorithm
|
|
== PATH_TIMING_DRIVEN_PLACE) {
|
|
|
|
/* Do we want to re-timing analyze the circuit to get updated slack and criticality values?
|
|
* We do this only once in a while, since it is expensive.
|
|
*/
|
|
if (inner_crit_iter_count >= inner_recompute_limit
|
|
&& inner_iter != move_lim - 1) { /*on last iteration don't recompute */
|
|
|
|
inner_crit_iter_count = 0;
|
|
#ifdef VERBOSE
|
|
vpr_printf(TIO_MESSAGE_TRACE, "Inner loop recompute criticalities\n");
|
|
#endif
|
|
if (placer_opts.place_algorithm
|
|
== NET_TIMING_DRIVEN_PLACE) {
|
|
/* Use a constant delay per connection as the delay estimate, rather than
|
|
* estimating based on the current placement. Not a great idea, but not the
|
|
* default.
|
|
*/
|
|
place_delay_value = delay_cost / num_connections;
|
|
load_constant_net_delay(net_delay, place_delay_value,
|
|
clb_net, num_nets);
|
|
}
|
|
|
|
/* Using the delays in net_delay, do a timing analysis to update slacks and
|
|
* criticalities; then update the timing cost since it will change.
|
|
*/
|
|
load_timing_graph_net_delays(net_delay);
|
|
do_timing_analysis(slacks, FALSE, FALSE, FALSE);
|
|
load_criticalities(slacks, crit_exponent);
|
|
comp_td_costs(&timing_cost, &delay_cost);
|
|
}
|
|
inner_crit_iter_count++;
|
|
}
|
|
#ifdef VERBOSE
|
|
vpr_printf(TIO_MESSAGE_TRACE, "t = %g cost = %g bb_cost = %g timing_cost = %g move = %d dmax = %g\n",
|
|
t, cost, bb_cost, timing_cost, inner_iter, delay_cost);
|
|
if (fabs(bb_cost - comp_bb_cost(CHECK)) > bb_cost * ERROR_TOL)
|
|
exit(1);
|
|
#endif
|
|
}
|
|
|
|
/* Lines below prevent too much round-off error from accumulating *
|
|
* in the cost over many iterations. This round-off can lead to *
|
|
* error checks failing because the cost is different from what *
|
|
* you get when you recompute from scratch. */
|
|
|
|
moves_since_cost_recompute += move_lim;
|
|
if (moves_since_cost_recompute > MAX_MOVES_BEFORE_RECOMPUTE) {
|
|
new_bb_cost = recompute_bb_cost();
|
|
if (fabs(new_bb_cost - bb_cost) > bb_cost * ERROR_TOL) {
|
|
vpr_printf(TIO_MESSAGE_ERROR, "in try_place: new_bb_cost = %g, old bb_cost = %g\n",
|
|
new_bb_cost, bb_cost);
|
|
exit(1);
|
|
}
|
|
bb_cost = new_bb_cost;
|
|
|
|
if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
|
|
|| placer_opts.place_algorithm
|
|
== PATH_TIMING_DRIVEN_PLACE) {
|
|
comp_td_costs(&new_timing_cost, &new_delay_cost);
|
|
if (fabs(new_timing_cost - timing_cost) > timing_cost * ERROR_TOL) {
|
|
vpr_printf(TIO_MESSAGE_ERROR, "in try_place: new_timing_cost = %g, old timing_cost = %g\n",
|
|
new_timing_cost, timing_cost);
|
|
exit(1);
|
|
}
|
|
if (fabs(new_delay_cost - delay_cost) > delay_cost * ERROR_TOL) {
|
|
vpr_printf(TIO_MESSAGE_ERROR, "in try_place: new_delay_cost = %g, old delay_cost = %g\n",
|
|
new_delay_cost, delay_cost);
|
|
exit(1);
|
|
}
|
|
timing_cost = new_timing_cost;
|
|
}
|
|
|
|
if (placer_opts.place_algorithm == BOUNDING_BOX_PLACE) {
|
|
cost = new_bb_cost;
|
|
}
|
|
moves_since_cost_recompute = 0;
|
|
}
|
|
|
|
tot_iter += move_lim;
|
|
success_rat = ((float) success_sum) / move_lim;
|
|
if (success_sum == 0) {
|
|
av_cost = cost;
|
|
av_bb_cost = bb_cost;
|
|
av_timing_cost = timing_cost;
|
|
av_delay_cost = delay_cost;
|
|
} else {
|
|
av_cost /= success_sum;
|
|
av_bb_cost /= success_sum;
|
|
av_timing_cost /= success_sum;
|
|
av_delay_cost /= success_sum;
|
|
}
|
|
std_dev = get_std_dev(success_sum, sum_of_squares, av_cost);
|
|
|
|
oldt = t; /* for finding and printing alpha. */
|
|
update_t(&t, std_dev, rlim, success_rat, annealing_sched);
|
|
|
|
#ifndef SPEC
|
|
critical_path_delay = get_critical_path_delay();
|
|
vpr_printf(TIO_MESSAGE_INFO, "%9.5f %9.5g %11.6g %11.6g %11.6g %11.6g %8.4f %8.4f %7.4f %7.4f %7.4f %9d %7.4f\n",
|
|
oldt, av_cost, av_bb_cost, av_timing_cost, av_delay_cost, place_delay_value,
|
|
critical_path_delay, success_rat, std_dev, rlim, crit_exponent, tot_iter, t / oldt);
|
|
#endif
|
|
|
|
sprintf(msg, "Cost: %g BB Cost %g TD Cost %g Temperature: %g",
|
|
cost, bb_cost, timing_cost, t);
|
|
update_screen(MINOR, msg, PLACEMENT, FALSE);
|
|
update_rlim(&rlim, success_rat);
|
|
|
|
if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
|
|
|| placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
|
|
crit_exponent = (1 - (rlim - final_rlim) * inverse_delta_rlim)
|
|
* (placer_opts.td_place_exp_last
|
|
- placer_opts.td_place_exp_first)
|
|
+ placer_opts.td_place_exp_first;
|
|
}
|
|
#ifdef VERBOSE
|
|
if (getEchoEnabled()) {
|
|
print_clb_placement("first_iteration_clb_placement.echo");
|
|
}
|
|
#endif
|
|
}
|
|
|
|
t = 0; /* freeze out */
|
|
av_cost = 0.;
|
|
av_bb_cost = 0.;
|
|
av_timing_cost = 0.;
|
|
sum_of_squares = 0.;
|
|
av_delay_cost = 0.;
|
|
success_sum = 0;
|
|
|
|
if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
|
|
|| placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
|
|
/*at each temperature change we update these values to be used */
|
|
/*for normalizing the tradeoff between timing and wirelength (bb) */
|
|
if (outer_crit_iter_count >= placer_opts.recompute_crit_iter
|
|
|| placer_opts.inner_loop_recompute_divider != 0) {
|
|
|
|
#ifdef VERBOSE
|
|
vpr_printf(TIO_MESSAGE_INFO, "Outer loop recompute criticalities\n");
|
|
#endif
|
|
place_delay_value = delay_cost / num_connections;
|
|
|
|
if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE)
|
|
load_constant_net_delay(net_delay, place_delay_value, clb_net,
|
|
num_nets);
|
|
|
|
load_timing_graph_net_delays(net_delay);
|
|
do_timing_analysis(slacks, FALSE, FALSE, FALSE);
|
|
load_criticalities(slacks, crit_exponent);
|
|
/*recompute criticaliies */
|
|
comp_td_costs(&timing_cost, &delay_cost);
|
|
outer_crit_iter_count = 0;
|
|
}
|
|
outer_crit_iter_count++;
|
|
|
|
inverse_prev_bb_cost = 1 / (bb_cost);
|
|
/*Prevent inverse timing cost from going to infinity */
|
|
inverse_prev_timing_cost = std::min(1 / timing_cost, (float)MAX_INV_TIMING_COST);
|
|
}
|
|
|
|
inner_crit_iter_count = 1;
|
|
|
|
for (inner_iter = 0; inner_iter < move_lim; inner_iter++) {
|
|
swap_result = try_swap(t, &cost, &bb_cost, &timing_cost, rlim,
|
|
old_region_occ_x, old_region_occ_y,
|
|
placer_opts.place_algorithm, placer_opts.timing_tradeoff,
|
|
inverse_prev_bb_cost, inverse_prev_timing_cost, &delay_cost);
|
|
|
|
if (swap_result == ACCEPTED) {
|
|
success_sum++;
|
|
av_cost += cost;
|
|
av_bb_cost += bb_cost;
|
|
av_delay_cost += delay_cost;
|
|
av_timing_cost += timing_cost;
|
|
sum_of_squares += cost * cost;
|
|
|
|
if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
|
|
|| placer_opts.place_algorithm
|
|
== PATH_TIMING_DRIVEN_PLACE) {
|
|
|
|
if (inner_crit_iter_count >= inner_recompute_limit
|
|
&& inner_iter != move_lim - 1) {
|
|
|
|
inner_crit_iter_count = 0;
|
|
#ifdef VERBOSE
|
|
vpr_printf(TIO_MESSAGE_TRACE, "Inner loop recompute criticalities\n");
|
|
#endif
|
|
if (placer_opts.place_algorithm
|
|
== NET_TIMING_DRIVEN_PLACE) {
|
|
place_delay_value = delay_cost / num_connections;
|
|
load_constant_net_delay(net_delay, place_delay_value,
|
|
clb_net, num_nets);
|
|
}
|
|
|
|
load_timing_graph_net_delays(net_delay);
|
|
do_timing_analysis(slacks, FALSE, FALSE, FALSE);
|
|
load_criticalities(slacks, crit_exponent);
|
|
comp_td_costs(&timing_cost, &delay_cost);
|
|
}
|
|
inner_crit_iter_count++;
|
|
}
|
|
num_swap_accepted++;
|
|
} else if (swap_result == ABORTED) {
|
|
num_swap_aborted++;
|
|
} else {
|
|
num_swap_rejected++;
|
|
}
|
|
|
|
#ifdef VERBOSE
|
|
vpr_printf(TIO_MESSAGE_INFO, "t = %g, cost = %g, move = %d\n", t, cost, tot_iter);
|
|
#endif
|
|
}
|
|
tot_iter += move_lim;
|
|
success_rat = ((float) success_sum) / move_lim;
|
|
if (success_sum == 0) {
|
|
av_cost = cost;
|
|
av_bb_cost = bb_cost;
|
|
av_delay_cost = delay_cost;
|
|
av_timing_cost = timing_cost;
|
|
} else {
|
|
av_cost /= success_sum;
|
|
av_bb_cost /= success_sum;
|
|
av_delay_cost /= success_sum;
|
|
av_timing_cost /= success_sum;
|
|
}
|
|
|
|
std_dev = get_std_dev(success_sum, sum_of_squares, av_cost);
|
|
|
|
#ifndef SPEC
|
|
vpr_printf(TIO_MESSAGE_INFO, "%9.5f %9.5g %11.6g %11.6g %11.6g %11.6g %8s %8.4f %7.4f %7.4f %7.4f %9d\n",
|
|
t, av_cost, av_bb_cost, av_timing_cost, av_delay_cost, place_delay_value,
|
|
" ", success_rat, std_dev, rlim, crit_exponent, tot_iter);
|
|
#endif
|
|
|
|
// TODO:
|
|
// 1. print a message about number of aborted moves.
|
|
// 2. add some subroutine hierarchy! Too big!
|
|
// 3. put statistics counters (av_cost, success_sum, etc.) in a struct so a
|
|
// pointer to it can be passed around.
|
|
|
|
#ifdef VERBOSE
|
|
if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_END_CLB_PLACEMENT)) {
|
|
print_clb_placement(getEchoFileName(E_ECHO_END_CLB_PLACEMENT));
|
|
}
|
|
#endif
|
|
|
|
check_place(bb_cost, timing_cost,
|
|
placer_opts.place_algorithm, delay_cost);
|
|
|
|
if (placer_opts.enable_timing_computations
|
|
&& placer_opts.place_algorithm == BOUNDING_BOX_PLACE) {
|
|
/*need this done since the timing data has not been kept up to date*
|
|
*in bounding_box mode */
|
|
for (inet = 0; inet < num_nets; inet++)
|
|
for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)
|
|
timing_place_crit[inet][ipin] = 0; /*dummy crit values */
|
|
comp_td_costs(&timing_cost, &delay_cost); /*computes point_to_point_delay_cost */
|
|
}
|
|
|
|
if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
|
|
|| placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE
|
|
|| placer_opts.enable_timing_computations) {
|
|
net_delay = point_to_point_delay_cost; /*this makes net_delay up to date with *
|
|
*the same values that the placer is using*/
|
|
load_timing_graph_net_delays(net_delay);
|
|
|
|
do_timing_analysis(slacks, FALSE, FALSE, FALSE);
|
|
|
|
if (getEchoEnabled()) {
|
|
if(isEchoFileEnabled(E_ECHO_PLACEMENT_SINK_DELAYS))
|
|
print_sink_delays(getEchoFileName(E_ECHO_PLACEMENT_SINK_DELAYS));
|
|
if(isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_SLACK))
|
|
print_slack(slacks->slack, FALSE, getEchoFileName(E_ECHO_FINAL_PLACEMENT_SLACK));
|
|
if(isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_CRITICALITY))
|
|
print_criticality(slacks, FALSE, getEchoFileName(E_ECHO_FINAL_PLACEMENT_CRITICALITY));
|
|
if(isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH))
|
|
print_timing_graph(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH));
|
|
if(isEchoFileEnabled(E_ECHO_PLACEMENT_CRIT_PATH))
|
|
print_critical_path(getEchoFileName(E_ECHO_PLACEMENT_CRIT_PATH));
|
|
}
|
|
|
|
/* Print critical path delay. */
|
|
critical_path_delay = get_critical_path_delay();
|
|
vpr_printf(TIO_MESSAGE_INFO, "\n");
|
|
vpr_printf(TIO_MESSAGE_INFO, "Placement estimated critical path delay: %g ns\n", critical_path_delay);
|
|
}
|
|
|
|
sprintf(msg, "Placement. Cost: %g bb_cost: %g td_cost: %g Channel Factor: %d",
|
|
cost, bb_cost, timing_cost, width_fac);
|
|
vpr_printf(TIO_MESSAGE_INFO, "Placement cost: %g, bb_cost: %g, td_cost: %g, delay_cost: %g\n",
|
|
cost, bb_cost, timing_cost, delay_cost);
|
|
update_screen(MAJOR, msg, PLACEMENT, FALSE);
|
|
|
|
// Print out swap statistics
|
|
total_swap_attempts = num_swap_rejected + num_swap_accepted + num_swap_aborted;
|
|
reject_rate = num_swap_rejected / total_swap_attempts;
|
|
accept_rate = num_swap_accepted / total_swap_attempts;
|
|
abort_rate = num_swap_aborted / total_swap_attempts;
|
|
vpr_printf(TIO_MESSAGE_INFO, "Placement total # of swap attempts: %d\n", total_swap_attempts);
|
|
vpr_printf(TIO_MESSAGE_INFO, "\tSwap reject rate: %g\n", reject_rate);
|
|
vpr_printf(TIO_MESSAGE_INFO, "\tSwap accept rate: %g\n", accept_rate);
|
|
vpr_printf(TIO_MESSAGE_INFO, "\tSwap abort rate: %g\n", abort_rate);
|
|
|
|
|
|
#ifdef SPEC
|
|
vpr_printf(TIO_MESSAGE_INFO, "Total moves attempted: %d.0\n", tot_iter);
|
|
#endif
|
|
|
|
free_placement_structs(
|
|
old_region_occ_x, old_region_occ_y,
|
|
placer_opts);
|
|
if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
|
|
|| placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE
|
|
|| placer_opts.enable_timing_computations) {
|
|
|
|
net_delay = remember_net_delay_original_ptr;
|
|
free_lookups_and_criticalities(&net_delay, slacks);
|
|
}
|
|
|
|
free_try_swap_arrays();
|
|
}
|
|
|
|
static int count_connections() {
|
|
/*only count non-global connections */
|
|
|
|
int count, inet;
|
|
|
|
count = 0;
|
|
|
|
for (inet = 0; inet < num_nets; inet++) {
|
|
|
|
if (clb_net[inet].is_global)
|
|
continue;
|
|
|
|
count += clb_net[inet].num_sinks;
|
|
}
|
|
return (count);
|
|
}
|
|
|
|
static double get_std_dev(int n, double sum_x_squared, double av_x) {
|
|
|
|
/* Returns the standard deviation of data set x. There are n sample points, *
|
|
* sum_x_squared is the summation over n of x^2 and av_x is the average x. *
|
|
* All operations are done in double precision, since round off error can be *
|
|
* a problem in the initial temp. std_dev calculation for big circuits. */
|
|
|
|
double std_dev;
|
|
|
|
if (n <= 1)
|
|
std_dev = 0.;
|
|
else
|
|
std_dev = (sum_x_squared - n * av_x * av_x) / (double) (n - 1);
|
|
|
|
if (std_dev > 0.) /* Very small variances sometimes round negative */
|
|
std_dev = sqrt(std_dev);
|
|
else
|
|
std_dev = 0.;
|
|
|
|
return (std_dev);
|
|
}
|
|
|
|
static void update_rlim(float *rlim, float success_rat) {
|
|
|
|
/* Update the range limited to keep acceptance prob. near 0.44. Use *
|
|
* a floating point rlim to allow gradual transitions at low temps. */
|
|
|
|
float upper_lim;
|
|
|
|
*rlim = (*rlim) * (1. - 0.44 + success_rat);
|
|
upper_lim = std::max(nx + 1, ny + 1);
|
|
*rlim = std::min(*rlim, upper_lim);
|
|
*rlim = std::max(*rlim, (float)1.);
|
|
}
|
|
|
|
/* Update the temperature according to the annealing schedule selected. */
|
|
static void update_t(float *t, float std_dev, float rlim, float success_rat,
|
|
struct s_annealing_sched annealing_sched) {
|
|
|
|
/* float fac; */
|
|
|
|
if (annealing_sched.type == USER_SCHED) {
|
|
*t = annealing_sched.alpha_t * (*t);
|
|
}
|
|
|
|
/* Old standard deviation based stuff is below. This bogs down horribly
|
|
* for big circuits (alu4 and especially bigkey_mod). */
|
|
/* #define LAMBDA .7 */
|
|
/* ------------------------------------ */
|
|
#if 0
|
|
else if (std_dev == 0.)
|
|
{
|
|
*t = 0.;
|
|
}
|
|
else
|
|
{
|
|
fac = exp(-LAMBDA * (*t) / std_dev);
|
|
fac = max(0.5, fac);
|
|
*t = (*t) * fac;
|
|
}
|
|
#endif
|
|
/* ------------------------------------- */
|
|
|
|
else { /* AUTO_SCHED */
|
|
if (success_rat > 0.96) {
|
|
*t = (*t) * 0.5;
|
|
} else if (success_rat > 0.8) {
|
|
*t = (*t) * 0.9;
|
|
} else if (success_rat > 0.15 || rlim > 1.) {
|
|
*t = (*t) * 0.95;
|
|
} else {
|
|
*t = (*t) * 0.8;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int exit_crit(float t, float cost,
|
|
struct s_annealing_sched annealing_sched) {
|
|
|
|
/* Return 1 when the exit criterion is met. */
|
|
|
|
if (annealing_sched.type == USER_SCHED) {
|
|
if (t < annealing_sched.exit_t) {
|
|
return (1);
|
|
} else {
|
|
return (0);
|
|
}
|
|
}
|
|
|
|
/* Automatic annealing schedule */
|
|
|
|
if (t < 0.005 * cost / num_nets) {
|
|
return (1);
|
|
} else {
|
|
return (0);
|
|
}
|
|
}
|
|
|
|
static float starting_t(float *cost_ptr, float *bb_cost_ptr,
|
|
float *timing_cost_ptr, float **old_region_occ_x,
|
|
float **old_region_occ_y,
|
|
struct s_annealing_sched annealing_sched, int max_moves, float rlim,
|
|
enum e_place_algorithm place_algorithm, float timing_tradeoff,
|
|
float inverse_prev_bb_cost, float inverse_prev_timing_cost,
|
|
float *delay_cost_ptr) {
|
|
|
|
/* Finds the starting temperature (hot condition). */
|
|
|
|
int i, num_accepted, move_lim, swap_result;
|
|
double std_dev, av, sum_of_squares; /* Double important to avoid round off */
|
|
|
|
if (annealing_sched.type == USER_SCHED)
|
|
return (annealing_sched.init_t);
|
|
|
|
move_lim = std::min(max_moves, num_blocks);
|
|
|
|
num_accepted = 0;
|
|
av = 0.;
|
|
sum_of_squares = 0.;
|
|
|
|
/* Try one move per block. Set t high so essentially all accepted. */
|
|
|
|
for (i = 0; i < move_lim; i++) {
|
|
swap_result = try_swap(HUGE_POSITIVE_FLOAT, cost_ptr, bb_cost_ptr, timing_cost_ptr, rlim,
|
|
old_region_occ_x, old_region_occ_y,
|
|
place_algorithm, timing_tradeoff,
|
|
inverse_prev_bb_cost, inverse_prev_timing_cost, delay_cost_ptr);
|
|
|
|
if (swap_result == ACCEPTED) {
|
|
num_accepted++;
|
|
av += *cost_ptr;
|
|
sum_of_squares += *cost_ptr * (*cost_ptr);
|
|
num_swap_accepted++;
|
|
} else if (swap_result == ABORTED) {
|
|
num_swap_aborted++;
|
|
} else {
|
|
num_swap_rejected++;
|
|
}
|
|
}
|
|
|
|
if (num_accepted != 0)
|
|
av /= num_accepted;
|
|
else
|
|
av = 0.;
|
|
|
|
std_dev = get_std_dev(num_accepted, sum_of_squares, av);
|
|
|
|
#ifdef DEBUG
|
|
if (num_accepted != move_lim) {
|
|
vpr_printf(TIO_MESSAGE_WARNING, "Starting t: %d of %d configurations accepted.\n", num_accepted, move_lim);
|
|
}
|
|
#endif
|
|
|
|
#ifdef VERBOSE
|
|
vpr_printf(TIO_MESSAGE_INFO, "std_dev: %g, average cost: %g, starting temp: %g\n", std_dev, av, 20. * std_dev);
|
|
#endif
|
|
|
|
/* Set the initial temperature to 20 times the standard of deviation */
|
|
/* so that the initial temperature adjusts according to the circuit */
|
|
return (20. * std_dev);
|
|
}
|
|
|
|
|
|
static int setup_blocks_affected(int b_from, int x_to, int y_to, int z_to) {
|
|
|
|
/* Find all the blocks affected when b_from is swapped with b_to.
|
|
* Returns abort_swap. */
|
|
|
|
int imoved_blk, imacro;
|
|
int x_from, y_from, z_from, b_to;
|
|
int abort_swap = FALSE;
|
|
|
|
/* Xifan TANG: support swap between macros */
|
|
/* int from_macro; */
|
|
|
|
x_from = block[b_from].x;
|
|
y_from = block[b_from].y;
|
|
z_from = block[b_from].z;
|
|
|
|
b_to = grid[x_to][y_to].blocks[z_to];
|
|
|
|
// Check whether the to_location is empty
|
|
if (b_to == EMPTY) {
|
|
|
|
// Swap the block, dont swap the nets yet
|
|
block[b_from].x = x_to;
|
|
block[b_from].y = y_to;
|
|
block[b_from].z = z_to;
|
|
|
|
// Sets up the blocks moved
|
|
imoved_blk = blocks_affected.num_moved_blocks;
|
|
blocks_affected.moved_blocks[imoved_blk].block_num = b_from;
|
|
blocks_affected.moved_blocks[imoved_blk].xold = x_from;
|
|
blocks_affected.moved_blocks[imoved_blk].xnew = x_to;
|
|
blocks_affected.moved_blocks[imoved_blk].yold = y_from;
|
|
blocks_affected.moved_blocks[imoved_blk].ynew = y_to;
|
|
blocks_affected.moved_blocks[imoved_blk].zold = z_from;
|
|
blocks_affected.moved_blocks[imoved_blk].znew = z_to;
|
|
blocks_affected.moved_blocks[imoved_blk].swapped_to_empty = TRUE;
|
|
blocks_affected.num_moved_blocks ++;
|
|
|
|
} else {
|
|
|
|
// Does not allow a swap with a macro yet
|
|
/* Xifan TANG: allow macro swapping...*/
|
|
get_imacro_from_iblk(&imacro, b_to, pl_macros, num_pl_macros);
|
|
/* get_imacro_from_iblk(&from_macro, b_from, pl_macros, num_pl_macros);
|
|
if (((-1 != from_macro)||(imacro != -1))
|
|
&&(!((-1 != from_macro)&&(imacro != -1)))) {
|
|
*/
|
|
if (imacro != -1) {
|
|
abort_swap = TRUE;
|
|
return (abort_swap);
|
|
}
|
|
|
|
// Swap the block, dont swap the nets yet
|
|
block[b_to].x = x_from;
|
|
block[b_to].y = y_from;
|
|
block[b_to].z = z_from;
|
|
|
|
block[b_from].x = x_to;
|
|
block[b_from].y = y_to;
|
|
block[b_from].z = z_to;
|
|
|
|
// Sets up the blocks moved
|
|
imoved_blk = blocks_affected.num_moved_blocks;
|
|
blocks_affected.moved_blocks[imoved_blk].block_num = b_from;
|
|
blocks_affected.moved_blocks[imoved_blk].xold = x_from;
|
|
blocks_affected.moved_blocks[imoved_blk].xnew = x_to;
|
|
blocks_affected.moved_blocks[imoved_blk].yold = y_from;
|
|
blocks_affected.moved_blocks[imoved_blk].ynew = y_to;
|
|
blocks_affected.moved_blocks[imoved_blk].zold = z_from;
|
|
blocks_affected.moved_blocks[imoved_blk].znew = z_to;
|
|
blocks_affected.moved_blocks[imoved_blk].swapped_to_empty = FALSE;
|
|
blocks_affected.num_moved_blocks ++;
|
|
|
|
imoved_blk = blocks_affected.num_moved_blocks;
|
|
blocks_affected.moved_blocks[imoved_blk].block_num = b_to;
|
|
blocks_affected.moved_blocks[imoved_blk].xold = x_to;
|
|
blocks_affected.moved_blocks[imoved_blk].xnew = x_from;
|
|
blocks_affected.moved_blocks[imoved_blk].yold = y_to;
|
|
blocks_affected.moved_blocks[imoved_blk].ynew = y_from;
|
|
blocks_affected.moved_blocks[imoved_blk].zold = z_to;
|
|
blocks_affected.moved_blocks[imoved_blk].znew = z_from;
|
|
blocks_affected.moved_blocks[imoved_blk].swapped_to_empty = FALSE;
|
|
blocks_affected.num_moved_blocks ++;
|
|
|
|
} // Finish swapping the blocks and setting up blocks_affected
|
|
|
|
return (abort_swap);
|
|
|
|
}
|
|
|
|
static int find_affected_blocks(int b_from, int x_to, int y_to, int z_to) {
|
|
|
|
/* Finds and set ups the affected_blocks array.
|
|
* Returns abort_swap. */
|
|
|
|
int imacro, imember;
|
|
int x_swap_offset, y_swap_offset, z_swap_offset, x_from, y_from, z_from;
|
|
int curr_b_from, curr_x_from, curr_y_from, curr_z_from, curr_x_to, curr_y_to, curr_z_to;
|
|
int abort_swap = FALSE;
|
|
|
|
/* int to_imacro;*/ /* Xifan TANG: for more checking */
|
|
|
|
x_from = block[b_from].x;
|
|
y_from = block[b_from].y;
|
|
z_from = block[b_from].z;
|
|
|
|
get_imacro_from_iblk(&imacro, b_from, pl_macros, num_pl_macros);
|
|
if ( imacro != -1) {
|
|
// b_from is part of a macro, I need to swap the whole macro
|
|
|
|
// Record down the relative position of the swap
|
|
x_swap_offset = x_to - x_from;
|
|
y_swap_offset = y_to - y_from;
|
|
z_swap_offset = z_to - z_from;
|
|
|
|
for (imember = 0; imember < pl_macros[imacro].num_blocks && abort_swap == FALSE; imember++) {
|
|
|
|
// Gets the new from and to info for every block in the macro
|
|
// cannot use the old from and to info
|
|
curr_b_from = pl_macros[imacro].members[imember].blk_index;
|
|
|
|
curr_x_from = block[curr_b_from].x;
|
|
curr_y_from = block[curr_b_from].y;
|
|
curr_z_from = block[curr_b_from].z;
|
|
|
|
curr_x_to = curr_x_from + x_swap_offset;
|
|
curr_y_to = curr_y_from + y_swap_offset;
|
|
curr_z_to = curr_z_from + z_swap_offset;
|
|
|
|
/* Xifan TANG: double check*/
|
|
assert(block[curr_b_from].type == grid[curr_x_from][curr_y_from].type);
|
|
|
|
// Make sure that the swap_to location is still on the chip
|
|
if (curr_x_to < 1 || curr_x_to > nx || curr_y_to < 1 || curr_y_to > ny || curr_z_to < 0) {
|
|
abort_swap = TRUE;
|
|
/* Xifan TANG: We need to check if the swap_to location has the same type! */
|
|
/*
|
|
} else if (grid[curr_x_from][curr_y_from].type != grid[curr_x_to][curr_y_to].type) {
|
|
abort_swap = TRUE;
|
|
*/
|
|
} else {
|
|
/* Xifan TANG: Check if the to_x, to_y is also a marco...
|
|
* If the follow cases are true then we should abort the swap
|
|
* 1. length of to_macro is larger than this macro
|
|
* 2. length of to_macro is the same as this macro, but its starting point is not align with this macro.
|
|
* 2. length of to_macro is less this macro, but its starting/ending point is out of the range of this macro.
|
|
*/
|
|
/*
|
|
curr_b_to = grid[curr_x_to][curr_y_to].blocks[curr_z_to];
|
|
if (OPEN != curr_b_to) {
|
|
get_imacro_from_iblk(&to_imacro, curr_b_to, pl_macros, num_pl_macros);
|
|
}
|
|
if (OPEN != to_imacro) {
|
|
if (pl_macros[imacro].num_blocks < pl_macros[to_imacro].num_blocks) {
|
|
abort_swap = TRUE;
|
|
} else if ((pl_macros[imacro].num_blocks == pl_macros[to_imacro].num_blocks)
|
|
&& (imember != spot_blk_position_in_a_macro(pl_macros[to_imacro],curr_b_to))) {
|
|
abort_swap = TRUE;
|
|
} else if ((pl_macros[imacro].num_blocks > pl_macros[to_imacro].num_blocks)
|
|
&& (0 == check_macros_contained(pl_macros[imacro], pl_macros[to_imacro]))) {
|
|
abort_swap = TRUE;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
*/
|
|
/* Xifan TANG: Only all the memebers in the macro pass the check, we can proceed to setup swap */
|
|
/*
|
|
if (FALSE == abort_swap) {
|
|
for (imember = 0; imember < pl_macros[imacro].num_blocks && abort_swap == FALSE; imember++) {
|
|
// Gets the new from and to info for every block in the macro
|
|
// cannot use the old from and to info
|
|
curr_b_from = pl_macros[imacro].members[imember].blk_index;
|
|
|
|
curr_x_from = block[curr_b_from].x;
|
|
curr_y_from = block[curr_b_from].y;
|
|
curr_z_from = block[curr_b_from].z;
|
|
|
|
curr_x_to = curr_x_from + x_swap_offset;
|
|
curr_y_to = curr_y_from + y_swap_offset;
|
|
curr_z_to = curr_z_from + z_swap_offset;
|
|
*/
|
|
abort_swap = setup_blocks_affected(curr_b_from, curr_x_to, curr_y_to, curr_z_to);
|
|
} // Finish going through all the blocks in the macro
|
|
}
|
|
} else {
|
|
|
|
// This is not a macro - I could use the from and to info from before
|
|
abort_swap = setup_blocks_affected(b_from, x_to, y_to, z_to);
|
|
|
|
} // Finish handling cases for blocks in macro and otherwise
|
|
|
|
return (abort_swap);
|
|
|
|
}
|
|
|
|
static enum swap_result try_swap(float t, float *cost, float *bb_cost, float *timing_cost,
|
|
float rlim, float **old_region_occ_x,
|
|
float **old_region_occ_y,
|
|
enum e_place_algorithm place_algorithm, float timing_tradeoff,
|
|
float inverse_prev_bb_cost, float inverse_prev_timing_cost,
|
|
float *delay_cost) {
|
|
|
|
/* Picks some block and moves it to another spot. If this spot is *
|
|
* occupied, switch the blocks. Assess the change in cost function *
|
|
* and accept or reject the move. If rejected, return 0. If *
|
|
* accepted return 1. Pass back the new value of the cost function. *
|
|
* rlim is the range limiter. */
|
|
|
|
enum swap_result keep_switch;
|
|
int b_from, x_from, y_from, z_from, x_to, y_to, z_to;
|
|
int num_nets_affected;
|
|
float delta_c, bb_delta_c, timing_delta_c, delay_delta_c;
|
|
int inet, iblk, bnum, iblk_pin, inet_affected;
|
|
int abort_swap = FALSE;
|
|
|
|
num_ts_called ++;
|
|
|
|
/* I'm using negative values of temp_net_cost as a flag, so DO NOT *
|
|
* use cost functions that can go negative. */
|
|
|
|
delta_c = 0; /* Change in cost due to this swap. */
|
|
bb_delta_c = 0;
|
|
timing_delta_c = 0;
|
|
delay_delta_c = 0.0;
|
|
|
|
/* Pick a random block to be swapped with another random block */
|
|
b_from = my_irand(num_blocks - 1);
|
|
|
|
/* If the pins are fixed we never move them from their initial *
|
|
* random locations. The code below could be made more efficient *
|
|
* by using the fact that pins appear first in the block list, *
|
|
* but this shouldn't cause any significant slowdown and won't be *
|
|
* broken if I ever change the parser so that the pins aren't *
|
|
* necessarily at the start of the block list. */
|
|
while (block[b_from].isFixed == TRUE) {
|
|
b_from = my_irand(num_blocks - 1);
|
|
}
|
|
|
|
x_from = block[b_from].x;
|
|
y_from = block[b_from].y;
|
|
z_from = block[b_from].z;
|
|
|
|
if (!find_to(x_from, y_from, block[b_from].type, rlim, &x_to,
|
|
&y_to)) {
|
|
return REJECTED;
|
|
}
|
|
|
|
z_to = 0;
|
|
if (grid[x_to][y_to].type->capacity > 1) {
|
|
z_to = my_irand(grid[x_to][y_to].type->capacity - 1);
|
|
}
|
|
|
|
/* Make the switch in order to make computing the new bounding *
|
|
* box simpler. If the cost increase is too high, switch them *
|
|
* back. (block data structures switched, clbs not switched *
|
|
* until success of move is determined.) *
|
|
* Also check that whether those are the only 2 blocks *
|
|
* to be moved - check for carry chains and other placement *
|
|
* macros. */
|
|
|
|
/* Check whether the from_block is part of a macro first. *
|
|
* If it is, the whole macro has to be moved. Calculate the *
|
|
* x, y, z offsets of the swap to maintain relative placements *
|
|
* of the blocks. Abort the swap if the to_block is part of a *
|
|
* macro (not supported yet). */
|
|
|
|
abort_swap = find_affected_blocks(b_from, x_to, y_to, z_to);
|
|
|
|
if (abort_swap == FALSE) {
|
|
|
|
// Find all the nets affected by this swap
|
|
num_nets_affected = find_affected_nets(ts_nets_to_update);
|
|
|
|
/* Go through all the pins in all the blocks moved and update the bounding boxes. *
|
|
* Do not update the net cost here since it should only be updated once per net, *
|
|
* not once per pin */
|
|
for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++)
|
|
{
|
|
bnum = blocks_affected.moved_blocks[iblk].block_num;
|
|
|
|
/* Go through all the pins in the moved block */
|
|
for (iblk_pin = 0; iblk_pin < block[bnum].type->num_pins; iblk_pin++)
|
|
{
|
|
inet = block[bnum].nets[iblk_pin];
|
|
if (inet == OPEN)
|
|
continue;
|
|
if (clb_net[inet].is_global)
|
|
continue;
|
|
|
|
if (clb_net[inet].num_sinks < SMALL_NET) {
|
|
if(bb_updated_before[inet] == NOT_UPDATED_YET)
|
|
/* Brute force bounding box recomputation, once only for speed. */
|
|
get_non_updateable_bb(inet, &ts_bb_coord_new[inet]);
|
|
} else {
|
|
update_bb(inet, &ts_bb_coord_new[inet],
|
|
&ts_bb_edge_new[inet],
|
|
blocks_affected.moved_blocks[iblk].xold,
|
|
blocks_affected.moved_blocks[iblk].yold + block[bnum].type->pin_height[iblk_pin],
|
|
blocks_affected.moved_blocks[iblk].xnew,
|
|
blocks_affected.moved_blocks[iblk].ynew + block[bnum].type->pin_height[iblk_pin]);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Now update the cost function. The cost is only updated once for every net *
|
|
* May have to do major optimizations here later. */
|
|
for (inet_affected = 0; inet_affected < num_nets_affected; inet_affected++) {
|
|
inet = ts_nets_to_update[inet_affected];
|
|
|
|
temp_net_cost[inet] = get_net_cost(inet, &ts_bb_coord_new[inet]);
|
|
bb_delta_c += temp_net_cost[inet] - net_cost[inet];
|
|
}
|
|
|
|
if (place_algorithm == NET_TIMING_DRIVEN_PLACE
|
|
|| place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
|
|
/*in this case we redefine delta_c as a combination of timing and bb. *
|
|
*additionally, we normalize all values, therefore delta_c is in *
|
|
*relation to 1*/
|
|
|
|
comp_delta_td_cost(&timing_delta_c, &delay_delta_c);
|
|
|
|
delta_c = (1 - timing_tradeoff) * bb_delta_c * inverse_prev_bb_cost
|
|
+ timing_tradeoff * timing_delta_c * inverse_prev_timing_cost;
|
|
} else {
|
|
delta_c = bb_delta_c;
|
|
}
|
|
|
|
/* 1 -> move accepted, 0 -> rejected. */
|
|
keep_switch = assess_swap(delta_c, t);
|
|
|
|
if (keep_switch == ACCEPTED) {
|
|
*cost = *cost + delta_c;
|
|
*bb_cost = *bb_cost + bb_delta_c;
|
|
|
|
if (place_algorithm == NET_TIMING_DRIVEN_PLACE
|
|
|| place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
|
|
/*update the point_to_point_timing_cost and point_to_point_delay_cost
|
|
* values from the temporary values */
|
|
*timing_cost = *timing_cost + timing_delta_c;
|
|
*delay_cost = *delay_cost + delay_delta_c;
|
|
|
|
update_td_cost();
|
|
}
|
|
|
|
/* update net cost functions and reset flags. */
|
|
for (inet_affected = 0; inet_affected < num_nets_affected; inet_affected++) {
|
|
inet = ts_nets_to_update[inet_affected];
|
|
|
|
bb_coords[inet] = ts_bb_coord_new[inet];
|
|
if (clb_net[inet].num_sinks >= SMALL_NET)
|
|
bb_num_on_edges[inet] = ts_bb_edge_new[inet];
|
|
|
|
net_cost[inet] = temp_net_cost[inet];
|
|
|
|
/* negative temp_net_cost value is acting as a flag. */
|
|
temp_net_cost[inet] = -1;
|
|
bb_updated_before[inet] = NOT_UPDATED_YET;
|
|
}
|
|
|
|
/* Update clb data structures since we kept the move. */
|
|
/* Swap physical location */
|
|
for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
|
|
|
|
x_to = blocks_affected.moved_blocks[iblk].xnew;
|
|
y_to = blocks_affected.moved_blocks[iblk].ynew;
|
|
z_to = blocks_affected.moved_blocks[iblk].znew;
|
|
/* Xifan TANG: not sure if this is needed */
|
|
//b_to = grid[x_to][y_to].blocks[z_to];
|
|
|
|
x_from = blocks_affected.moved_blocks[iblk].xold;
|
|
y_from = blocks_affected.moved_blocks[iblk].yold;
|
|
z_from = blocks_affected.moved_blocks[iblk].zold;
|
|
|
|
b_from = blocks_affected.moved_blocks[iblk].block_num;
|
|
|
|
grid[x_to][y_to].blocks[z_to] = b_from;
|
|
/* Xifan TANG: not sure if this is needed */
|
|
//grid[x_from][y_from].blocks[z_from] = b_to;
|
|
|
|
if (blocks_affected.moved_blocks[iblk].swapped_to_empty == TRUE) {
|
|
grid[x_to][y_to].usage++;
|
|
grid[x_from][y_from].usage--;
|
|
grid[x_from][y_from].blocks[z_from] = -1;
|
|
}
|
|
|
|
} // Finish updating clb for all blocks
|
|
|
|
} else { /* Move was rejected. */
|
|
|
|
/* Reset the net cost function flags first. */
|
|
for (inet_affected = 0; inet_affected < num_nets_affected; inet_affected++) {
|
|
inet = ts_nets_to_update[inet_affected];
|
|
temp_net_cost[inet] = -1;
|
|
bb_updated_before[inet] = NOT_UPDATED_YET;
|
|
}
|
|
|
|
/* Restore the block data structures to their state before the move. */
|
|
for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
|
|
b_from = blocks_affected.moved_blocks[iblk].block_num;
|
|
|
|
block[b_from].x = blocks_affected.moved_blocks[iblk].xold;
|
|
block[b_from].y = blocks_affected.moved_blocks[iblk].yold;
|
|
block[b_from].z = blocks_affected.moved_blocks[iblk].zold;
|
|
}
|
|
}
|
|
|
|
/* Resets the num_moved_blocks, but do not free blocks_moved array. Defensive Coding */
|
|
blocks_affected.num_moved_blocks = 0;
|
|
|
|
//check_place(*bb_cost, *timing_cost, place_algorithm, *delay_cost);
|
|
|
|
return (keep_switch);
|
|
} else {
|
|
|
|
/* Restore the block data structures to their state before the move. */
|
|
for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
|
|
b_from = blocks_affected.moved_blocks[iblk].block_num;
|
|
|
|
block[b_from].x = blocks_affected.moved_blocks[iblk].xold;
|
|
block[b_from].y = blocks_affected.moved_blocks[iblk].yold;
|
|
block[b_from].z = blocks_affected.moved_blocks[iblk].zold;
|
|
}
|
|
|
|
/* Resets the num_moved_blocks, but do not free blocks_moved array. Defensive Coding */
|
|
blocks_affected.num_moved_blocks = 0;
|
|
|
|
return ABORTED;
|
|
}
|
|
}
|
|
|
|
static int find_affected_nets(int *nets_to_update) {
|
|
|
|
/* Puts a list of all the nets that are changed by the swap into *
|
|
* nets_to_update. Returns the number of affected nets. */
|
|
|
|
int iblk, iblk_pin, inet, bnum, num_affected_nets;
|
|
|
|
num_affected_nets = 0;
|
|
/* Go through all the blocks moved */
|
|
for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++)
|
|
{
|
|
bnum = blocks_affected.moved_blocks[iblk].block_num;
|
|
|
|
/* Go through all the pins in the moved block */
|
|
for (iblk_pin = 0; iblk_pin < block[bnum].type->num_pins; iblk_pin++)
|
|
{
|
|
/* Updates the pins_to_nets array, set to -1 if *
|
|
* that pin is not connected to any net or it is a *
|
|
* global pin that does not need to be updated */
|
|
inet = block[bnum].nets[iblk_pin];
|
|
if (inet == OPEN)
|
|
continue;
|
|
if (clb_net[inet].is_global)
|
|
continue;
|
|
|
|
if (temp_net_cost[inet] < 0.) {
|
|
/* Net not marked yet. */
|
|
nets_to_update[num_affected_nets] = inet;
|
|
num_affected_nets++;
|
|
|
|
/* Flag to say we've marked this net. */
|
|
temp_net_cost[inet] = 1.;
|
|
}
|
|
}
|
|
}
|
|
return num_affected_nets;
|
|
}
|
|
|
|
static boolean find_to(int x_from, int y_from, t_type_ptr type, float rlim, int *x_to, int *y_to) {
|
|
|
|
/* Returns the point to which I want to swap, properly range limited.
|
|
* rlim must always be between 1 and nx (inclusive) for this routine
|
|
* to work. Assumes that a column only contains blocks of the same type.
|
|
*/
|
|
|
|
int x_rel, y_rel, rlx, rly, min_x, max_x, min_y, max_y;
|
|
int num_tries;
|
|
int active_area;
|
|
boolean is_legal;
|
|
int block_index, ipos;
|
|
|
|
if (type != grid[x_from][y_from].type) {
|
|
assert(type == grid[x_from][y_from].type);
|
|
}
|
|
|
|
rlx = (int)std::min((float)nx + 1, rlim);
|
|
rly = (int)std::min((float)ny + 1, rlim); /* Added rly for aspect_ratio != 1 case. */
|
|
active_area = 4 * rlx * rly;
|
|
|
|
min_x = std::max(0, x_from - rlx);
|
|
max_x = std::min(nx + 1, x_from + rlx);
|
|
min_y = std::max(0, y_from - rly);
|
|
max_y = std::min(ny + 1, y_from + rly);
|
|
|
|
#ifdef DEBUG
|
|
if (rlx < 1 || rlx > nx + 1) {
|
|
vpr_printf(TIO_MESSAGE_ERROR, "in find_to: rlx = %d\n", rlx);
|
|
exit(1);
|
|
}
|
|
#endif
|
|
|
|
num_tries = 0;
|
|
block_index = type->index;
|
|
|
|
do { /* Until legal */
|
|
is_legal = TRUE;
|
|
|
|
/* Limit the number of tries when searching for an alternative position */
|
|
if(num_tries >= 2 * std::min(active_area / type->height, num_legal_pos[block_index]) + 10) {
|
|
/* Tried randomly searching for a suitable position */
|
|
return FALSE;
|
|
} else {
|
|
num_tries++;
|
|
}
|
|
if(nx / 4 < rlx ||
|
|
ny / 4 < rly ||
|
|
num_legal_pos[block_index] < active_area) {
|
|
ipos = my_irand(num_legal_pos[block_index] - 1);
|
|
*x_to = legal_pos[block_index][ipos].x;
|
|
*y_to = legal_pos[block_index][ipos].y;
|
|
} else {
|
|
x_rel = my_irand(std::max(0, max_x - min_x));
|
|
*x_to = min_x + x_rel;
|
|
y_rel = my_irand(std::max(0, max_y - min_y));
|
|
*y_to = min_y + y_rel;
|
|
*y_to = (*y_to) - grid[*x_to][*y_to].offset; /* align it */
|
|
}
|
|
|
|
if((x_from == *x_to) && (y_from == *y_to)) {
|
|
is_legal = FALSE;
|
|
} else if(*x_to > max_x || *x_to < min_x || *y_to > max_y || *y_to < min_y) {
|
|
is_legal = FALSE;
|
|
} else if(grid[*x_to][*y_to].type != grid[x_from][y_from].type) {
|
|
is_legal = FALSE;
|
|
}
|
|
|
|
assert(*x_to >= 0 && *x_to <= nx + 1);
|
|
assert(*y_to >= 0 && *y_to <= ny + 1);
|
|
} while (is_legal == FALSE);
|
|
|
|
#ifdef DEBUG
|
|
if (*x_to < 0 || *x_to > nx + 1 || *y_to < 0 || *y_to > ny + 1) {
|
|
vpr_printf(TIO_MESSAGE_ERROR, "in routine find_to: (x_to,y_to) = (%d,%d)\n", *x_to, *y_to);
|
|
exit(1);
|
|
}
|
|
#endif
|
|
assert(type == grid[*x_to][*y_to].type);
|
|
return TRUE;
|
|
}
|
|
|
|
static enum swap_result assess_swap(float delta_c, float t) {
|
|
|
|
/* Returns: 1 -> move accepted, 0 -> rejected. */
|
|
|
|
enum swap_result accept;
|
|
float prob_fac, fnum;
|
|
|
|
if (delta_c <= 0) {
|
|
|
|
#ifdef SPEC /* Reduce variation in final solution due to round off */
|
|
fnum = my_frand();
|
|
#endif
|
|
|
|
accept = ACCEPTED;
|
|
return (accept);
|
|
}
|
|
|
|
if (t == 0.)
|
|
return (REJECTED);
|
|
|
|
fnum = my_frand();
|
|
prob_fac = exp(-delta_c / t);
|
|
if (prob_fac > fnum) {
|
|
accept = ACCEPTED;
|
|
} else {
|
|
accept = REJECTED;
|
|
}
|
|
return (accept);
|
|
}
|
|
|
|
static float recompute_bb_cost(void) {
|
|
|
|
/* Recomputes the cost to eliminate roundoff that may have accrued. *
|
|
* This routine does as little work as possible to compute this new *
|
|
* cost. */
|
|
|
|
int inet;
|
|
float cost;
|
|
|
|
cost = 0;
|
|
|
|
for (inet = 0; inet < num_nets; inet++) { /* for each net ... */
|
|
if (clb_net[inet].is_global == FALSE) { /* Do only if not global. */
|
|
|
|
/* Bounding boxes don't have to be recomputed; they're correct. */
|
|
cost += net_cost[inet];
|
|
}
|
|
}
|
|
|
|
return (cost);
|
|
}
|
|
|
|
static float comp_td_point_to_point_delay(int inet, int ipin) {
|
|
|
|
/*returns the delay of one point to point connection */
|
|
|
|
int source_block, sink_block;
|
|
int delta_x, delta_y;
|
|
t_type_ptr source_type, sink_type;
|
|
float delay_source_to_sink;
|
|
|
|
delay_source_to_sink = 0.;
|
|
|
|
source_block = clb_net[inet].node_block[0];
|
|
source_type = block[source_block].type;
|
|
|
|
sink_block = clb_net[inet].node_block[ipin];
|
|
sink_type = block[sink_block].type;
|
|
|
|
assert(source_type != NULL);
|
|
assert(sink_type != NULL);
|
|
|
|
delta_x = abs(block[sink_block].x - block[source_block].x);
|
|
delta_y = abs(block[sink_block].y - block[source_block].y);
|
|
|
|
/* TODO low priority: Could be merged into one look-up table */
|
|
/* Note: This heuristic is terrible on Quality of Results.
|
|
* A much better heuristic is to create a more comprehensive lookup table but
|
|
* it's too late in the release cycle to do this. Pushing until the next release */
|
|
if (source_type == IO_TYPE) {
|
|
if (sink_type == IO_TYPE)
|
|
delay_source_to_sink = delta_io_to_io[delta_x][delta_y];
|
|
else
|
|
delay_source_to_sink = delta_io_to_clb[delta_x][delta_y];
|
|
} else {
|
|
if (sink_type == IO_TYPE)
|
|
delay_source_to_sink = delta_clb_to_io[delta_x][delta_y];
|
|
else
|
|
delay_source_to_sink = delta_clb_to_clb[delta_x][delta_y];
|
|
}
|
|
if (delay_source_to_sink < 0) {
|
|
vpr_printf(TIO_MESSAGE_ERROR, "in comp_td_point_to_point_delay: Bad delay_source_to_sink value delta(%d, %d) delay of %g\n", delta_x, delta_y, delay_source_to_sink);
|
|
vpr_printf(TIO_MESSAGE_ERROR, "in comp_td_point_to_point_delay: Delay is less than 0\n");
|
|
exit(1);
|
|
}
|
|
|
|
return (delay_source_to_sink);
|
|
}
|
|
|
|
static void update_td_cost(void) {
|
|
/* Update the point_to_point_timing_cost values from the temporary *
|
|
* values for all connections that have changed. */
|
|
|
|
int iblk_pin, net_pin, inet, ipin;
|
|
int iblk, iblk2, bnum, driven_by_moved_block;
|
|
|
|
/* Go through all the blocks moved. */
|
|
for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++)
|
|
{
|
|
bnum = blocks_affected.moved_blocks[iblk].block_num;
|
|
for (iblk_pin = 0; iblk_pin < block[bnum].type->num_pins; iblk_pin++) {
|
|
|
|
inet = block[bnum].nets[iblk_pin];
|
|
|
|
if (inet == OPEN)
|
|
continue;
|
|
|
|
if (clb_net[inet].is_global)
|
|
continue;
|
|
|
|
net_pin = net_pin_index[bnum][iblk_pin];
|
|
|
|
if (net_pin != 0) {
|
|
|
|
driven_by_moved_block = FALSE;
|
|
for (iblk2 = 0; iblk2 < blocks_affected.num_moved_blocks; iblk2++)
|
|
{ if (clb_net[inet].node_block[0] == blocks_affected.moved_blocks[iblk2].block_num)
|
|
driven_by_moved_block = TRUE;
|
|
}
|
|
|
|
/* The following "if" prevents the value from being updated twice. */
|
|
if (driven_by_moved_block == FALSE) {
|
|
point_to_point_delay_cost[inet][net_pin] =
|
|
temp_point_to_point_delay_cost[inet][net_pin];
|
|
temp_point_to_point_delay_cost[inet][net_pin] = -1;
|
|
point_to_point_timing_cost[inet][net_pin] =
|
|
temp_point_to_point_timing_cost[inet][net_pin];
|
|
temp_point_to_point_timing_cost[inet][net_pin] = -1;
|
|
}
|
|
} else { /* This net is being driven by a moved block, recompute */
|
|
/* All point to point connections on this net. */
|
|
for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {
|
|
point_to_point_delay_cost[inet][ipin] =
|
|
temp_point_to_point_delay_cost[inet][ipin];
|
|
temp_point_to_point_delay_cost[inet][ipin] = -1;
|
|
point_to_point_timing_cost[inet][ipin] =
|
|
temp_point_to_point_timing_cost[inet][ipin];
|
|
temp_point_to_point_timing_cost[inet][ipin] = -1;
|
|
} /* Finished updating the pin */
|
|
}
|
|
} /* Finished going through all the pins in the moved block */
|
|
} /* Finished going through all the blocks moved */
|
|
}
|
|
|
|
static void comp_delta_td_cost(float *delta_timing, float *delta_delay) {
|
|
|
|
/*a net that is being driven by a moved block must have all of its */
|
|
/*sink timing costs recomputed. A net that is driving a moved block */
|
|
/*must only have the timing cost on the connection driving the input */
|
|
/*pin computed */
|
|
|
|
int inet, net_pin, ipin;
|
|
float delta_timing_cost, delta_delay_cost, temp_delay;
|
|
int iblk, iblk2, bnum, iblk_pin, driven_by_moved_block;
|
|
|
|
delta_timing_cost = 0.;
|
|
delta_delay_cost = 0.;
|
|
|
|
/* Go through all the blocks moved */
|
|
for (iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++)
|
|
{
|
|
bnum = blocks_affected.moved_blocks[iblk].block_num;
|
|
/* Go through all the pins in the moved block */
|
|
for (iblk_pin = 0; iblk_pin < block[bnum].type->num_pins; iblk_pin++) {
|
|
inet = block[bnum].nets[iblk_pin];
|
|
|
|
if (inet == OPEN)
|
|
continue;
|
|
|
|
if (clb_net[inet].is_global)
|
|
continue;
|
|
|
|
net_pin = net_pin_index[bnum][iblk_pin];
|
|
|
|
if (net_pin != 0) {
|
|
/* If this net is being driven by a block that has moved, we do not *
|
|
* need to compute the change in the timing cost (here) since it will *
|
|
* be computed in the fanout of the net on the driving block, also *
|
|
* computing it here would double count the change, and mess up the *
|
|
* delta_timing_cost value. */
|
|
driven_by_moved_block = FALSE;
|
|
for (iblk2 = 0; iblk2 < blocks_affected.num_moved_blocks; iblk2++)
|
|
{ if (clb_net[inet].node_block[0] == blocks_affected.moved_blocks[iblk2].block_num)
|
|
driven_by_moved_block = TRUE;
|
|
}
|
|
|
|
if (driven_by_moved_block == FALSE) {
|
|
temp_delay = comp_td_point_to_point_delay(inet, net_pin);
|
|
temp_point_to_point_delay_cost[inet][net_pin] = temp_delay;
|
|
|
|
temp_point_to_point_timing_cost[inet][net_pin] =
|
|
timing_place_crit[inet][net_pin] * temp_delay;
|
|
delta_timing_cost += temp_point_to_point_timing_cost[inet][net_pin]
|
|
- point_to_point_timing_cost[inet][net_pin];
|
|
delta_delay_cost += temp_point_to_point_delay_cost[inet][net_pin]
|
|
- point_to_point_delay_cost[inet][net_pin];
|
|
}
|
|
} else { /* This net is being driven by a moved block, recompute */
|
|
/* All point to point connections on this net. */
|
|
for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {
|
|
temp_delay = comp_td_point_to_point_delay(inet, ipin);
|
|
temp_point_to_point_delay_cost[inet][ipin] = temp_delay;
|
|
|
|
temp_point_to_point_timing_cost[inet][ipin] =
|
|
timing_place_crit[inet][ipin] * temp_delay;
|
|
delta_timing_cost += temp_point_to_point_timing_cost[inet][ipin]
|
|
- point_to_point_timing_cost[inet][ipin];
|
|
delta_delay_cost += temp_point_to_point_delay_cost[inet][ipin]
|
|
- point_to_point_delay_cost[inet][ipin];
|
|
|
|
} /* Finished updating the pin */
|
|
}
|
|
} /* Finished going through all the pins in the moved block */
|
|
} /* Finished going through all the blocks moved */
|
|
|
|
*delta_timing = delta_timing_cost;
|
|
*delta_delay = delta_delay_cost;
|
|
}
|
|
|
|
static void comp_td_costs(float *timing_cost, float *connection_delay_sum) {
|
|
/* Computes the cost (from scratch) due to the delays and criticalities *
|
|
* on all point to point connections, we define the timing cost of *
|
|
* each connection as criticality*delay. */
|
|
|
|
int inet, ipin;
|
|
float loc_timing_cost, loc_connection_delay_sum, temp_delay_cost,
|
|
temp_timing_cost;
|
|
|
|
loc_timing_cost = 0.;
|
|
loc_connection_delay_sum = 0.;
|
|
|
|
for (inet = 0; inet < num_nets; inet++) { /* For each net ... */
|
|
if (clb_net[inet].is_global == FALSE) { /* Do only if not global. */
|
|
|
|
for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {
|
|
|
|
temp_delay_cost = comp_td_point_to_point_delay(inet, ipin);
|
|
temp_timing_cost = temp_delay_cost * timing_place_crit[inet][ipin];
|
|
|
|
loc_connection_delay_sum += temp_delay_cost;
|
|
point_to_point_delay_cost[inet][ipin] = temp_delay_cost;
|
|
temp_point_to_point_delay_cost[inet][ipin] = -1; /* Undefined */
|
|
|
|
point_to_point_timing_cost[inet][ipin] = temp_timing_cost;
|
|
temp_point_to_point_timing_cost[inet][ipin] = -1; /* Undefined */
|
|
loc_timing_cost += temp_timing_cost;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Make sure timing cost does not go above MIN_TIMING_COST. */
|
|
*timing_cost = loc_timing_cost;
|
|
|
|
*connection_delay_sum = loc_connection_delay_sum;
|
|
}
|
|
|
|
static float comp_bb_cost(enum cost_methods method) {
|
|
|
|
/* Finds the cost from scratch. Done only when the placement *
|
|
* has been radically changed (i.e. after initial placement). *
|
|
* Otherwise find the cost change incrementally. If method *
|
|
* check is NORMAL, we find bounding boxes that are updateable *
|
|
* for the larger nets. If method is CHECK, all bounding boxes *
|
|
* are found via the non_updateable_bb routine, to provide a *
|
|
* cost which can be used to check the correctness of the *
|
|
* other routine. */
|
|
|
|
int inet;
|
|
float cost;
|
|
double expected_wirelength;
|
|
|
|
cost = 0;
|
|
expected_wirelength = 0.0;
|
|
|
|
for (inet = 0; inet < num_nets; inet++) { /* for each net ... */
|
|
|
|
if (clb_net[inet].is_global == FALSE) { /* Do only if not global. */
|
|
|
|
/* Small nets don't use incremental updating on their bounding boxes, *
|
|
* so they can use a fast bounding box calculator. */
|
|
|
|
if (clb_net[inet].num_sinks >= SMALL_NET && method == NORMAL) {
|
|
get_bb_from_scratch(inet, &bb_coords[inet],
|
|
&bb_num_on_edges[inet]);
|
|
} else {
|
|
get_non_updateable_bb(inet, &bb_coords[inet]);
|
|
}
|
|
|
|
net_cost[inet] = get_net_cost(inet, &bb_coords[inet]);
|
|
cost += net_cost[inet];
|
|
if (method == CHECK)
|
|
expected_wirelength += get_net_wirelength_estimate(inet,
|
|
&bb_coords[inet]);
|
|
}
|
|
}
|
|
|
|
if (method == CHECK) {
|
|
vpr_printf(TIO_MESSAGE_INFO, "\n");
|
|
vpr_printf(TIO_MESSAGE_INFO, "BB estimate of min-dist (placement) wirelength: %.0f\n", expected_wirelength);
|
|
}
|
|
return (cost);
|
|
}
|
|
|
|
static void free_placement_structs(
|
|
float **old_region_occ_x, float **old_region_occ_y,
|
|
struct s_placer_opts placer_opts) {
|
|
|
|
/* Frees the major structures needed by the placer (and not needed *
|
|
* elsewhere). */
|
|
|
|
int inet, imacro;
|
|
|
|
free_legal_placements();
|
|
free_fast_cost_update();
|
|
|
|
if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
|
|
|| placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE
|
|
|| placer_opts.enable_timing_computations) {
|
|
for (inet = 0; inet < num_nets; inet++) {
|
|
/*add one to the address since it is indexed from 1 not 0 */
|
|
|
|
point_to_point_delay_cost[inet]++;
|
|
free(point_to_point_delay_cost[inet]);
|
|
|
|
point_to_point_timing_cost[inet]++;
|
|
free(point_to_point_timing_cost[inet]);
|
|
|
|
temp_point_to_point_delay_cost[inet]++;
|
|
free(temp_point_to_point_delay_cost[inet]);
|
|
|
|
temp_point_to_point_timing_cost[inet]++;
|
|
free(temp_point_to_point_timing_cost[inet]);
|
|
}
|
|
free(point_to_point_delay_cost);
|
|
free(temp_point_to_point_delay_cost);
|
|
|
|
free(point_to_point_timing_cost);
|
|
free(temp_point_to_point_timing_cost);
|
|
|
|
free_matrix(net_pin_index, 0, num_blocks - 1, 0, sizeof(int));
|
|
}
|
|
|
|
free(net_cost);
|
|
free(temp_net_cost);
|
|
free(bb_num_on_edges);
|
|
free(bb_coords);
|
|
|
|
free_placement_macros_structs();
|
|
|
|
for (imacro = 0; imacro < num_pl_macros; imacro ++)
|
|
free(pl_macros[imacro].members);
|
|
free(pl_macros);
|
|
|
|
net_cost = NULL; /* Defensive coding. */
|
|
temp_net_cost = NULL;
|
|
bb_num_on_edges = NULL;
|
|
bb_coords = NULL;
|
|
pl_macros = NULL;
|
|
|
|
/* Frees up all the data structure used in vpr_utils. */
|
|
free_port_pin_from_blk_pin();
|
|
free_blk_pin_from_port_pin();
|
|
|
|
}
|
|
|
|
static void alloc_and_load_placement_structs(
|
|
float place_cost_exp, float ***old_region_occ_x,
|
|
float ***old_region_occ_y, struct s_placer_opts placer_opts,
|
|
t_direct_inf *directs, int num_directs) {
|
|
|
|
/* Allocates the major structures needed only by the placer, primarily for *
|
|
* computing costs quickly and such. */
|
|
|
|
int inet, ipin, max_pins_per_clb, i;
|
|
|
|
alloc_legal_placements();
|
|
load_legal_placements();
|
|
|
|
max_pins_per_clb = 0;
|
|
for (i = 0; i < num_types; i++) {
|
|
max_pins_per_clb = std::max(max_pins_per_clb, type_descriptors[i].num_pins);
|
|
}
|
|
|
|
if (placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
|
|
|| placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE
|
|
|| placer_opts.enable_timing_computations) {
|
|
/* Allocate structures associated with timing driven placement */
|
|
/* [0..num_nets-1][1..num_pins-1] */
|
|
point_to_point_delay_cost = (float **) my_malloc(
|
|
num_nets * sizeof(float *));
|
|
temp_point_to_point_delay_cost = (float **) my_malloc(
|
|
num_nets * sizeof(float *));
|
|
|
|
point_to_point_timing_cost = (float **) my_malloc(
|
|
num_nets * sizeof(float *));
|
|
temp_point_to_point_timing_cost = (float **) my_malloc(
|
|
num_nets * sizeof(float *));
|
|
|
|
for (inet = 0; inet < num_nets; inet++) {
|
|
|
|
/* In the following, subract one so index starts at *
|
|
* 1 instead of 0 */
|
|
point_to_point_delay_cost[inet] = (float *) my_malloc(
|
|
clb_net[inet].num_sinks * sizeof(float));
|
|
point_to_point_delay_cost[inet]--;
|
|
|
|
temp_point_to_point_delay_cost[inet] = (float *) my_malloc(
|
|
clb_net[inet].num_sinks * sizeof(float));
|
|
temp_point_to_point_delay_cost[inet]--;
|
|
|
|
point_to_point_timing_cost[inet] = (float *) my_malloc(
|
|
clb_net[inet].num_sinks * sizeof(float));
|
|
point_to_point_timing_cost[inet]--;
|
|
|
|
temp_point_to_point_timing_cost[inet] = (float *) my_malloc(
|
|
clb_net[inet].num_sinks * sizeof(float));
|
|
temp_point_to_point_timing_cost[inet]--;
|
|
}
|
|
for (inet = 0; inet < num_nets; inet++) {
|
|
for (ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) {
|
|
point_to_point_delay_cost[inet][ipin] = 0;
|
|
temp_point_to_point_delay_cost[inet][ipin] = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
net_cost = (float *) my_malloc(num_nets * sizeof(float));
|
|
temp_net_cost = (float *) my_malloc(num_nets * sizeof(float));
|
|
bb_updated_before = (char*)my_calloc(num_nets, sizeof(char));
|
|
|
|
/* Used to store costs for moves not yet made and to indicate when a net's *
|
|
* cost has been recomputed. temp_net_cost[inet] < 0 means net's cost hasn't *
|
|
* been recomputed. */
|
|
|
|
for (inet = 0; inet < num_nets; inet++){
|
|
bb_updated_before[inet] = NOT_UPDATED_YET;
|
|
temp_net_cost[inet] = -1.;
|
|
}
|
|
|
|
bb_coords = (struct s_bb *) my_malloc(num_nets * sizeof(struct s_bb));
|
|
bb_num_on_edges = (struct s_bb *) my_malloc(num_nets * sizeof(struct s_bb));
|
|
|
|
/* Shouldn't use them; crash hard if I do! */
|
|
*old_region_occ_x = NULL;
|
|
*old_region_occ_y = NULL;
|
|
|
|
alloc_and_load_for_fast_cost_update(place_cost_exp);
|
|
|
|
net_pin_index = alloc_and_load_net_pin_index();
|
|
|
|
alloc_and_load_try_swap_structs();
|
|
|
|
num_pl_macros = alloc_and_load_placement_macros(directs, num_directs, &pl_macros);
|
|
}
|
|
|
|
static void alloc_and_load_try_swap_structs() {
|
|
/* Allocate the local bb_coordinate storage, etc. only once. */
|
|
/* Allocate with size num_nets for any number of nets affected. */
|
|
ts_bb_coord_new = (struct s_bb *) my_calloc(
|
|
num_nets, sizeof(struct s_bb));
|
|
ts_bb_edge_new = (struct s_bb *) my_calloc(
|
|
num_nets, sizeof(struct s_bb));
|
|
ts_nets_to_update = (int *) my_calloc(num_nets, sizeof(int));
|
|
|
|
/* Allocate with size num_blocks for any number of moved block. */
|
|
blocks_affected.moved_blocks = (t_pl_moved_block*)my_calloc(
|
|
num_blocks, sizeof(t_pl_moved_block) );
|
|
blocks_affected.num_moved_blocks = 0;
|
|
|
|
}
|
|
|
|
static void get_bb_from_scratch(int inet, struct s_bb *coords,
|
|
struct s_bb *num_on_edges) {
|
|
|
|
/* This routine finds the bounding box of each net from scratch (i.e. *
|
|
* from only the block location information). It updates both the *
|
|
* coordinate and number of pins on each edge information. It *
|
|
* should only be called when the bounding box information is not valid. */
|
|
|
|
int ipin, bnum, pnum, x, y, xmin, xmax, ymin, ymax;
|
|
int xmin_edge, xmax_edge, ymin_edge, ymax_edge;
|
|
int n_pins;
|
|
|
|
n_pins = clb_net[inet].num_sinks + 1;
|
|
|
|
bnum = clb_net[inet].node_block[0];
|
|
pnum = clb_net[inet].node_block_pin[0];
|
|
|
|
x = block[bnum].x;
|
|
y = block[bnum].y + block[bnum].type->pin_height[pnum];
|
|
|
|
x = std::max(std::min(x, nx), 1);
|
|
y = std::max(std::min(y, ny), 1);
|
|
|
|
xmin = x;
|
|
ymin = y;
|
|
xmax = x;
|
|
ymax = y;
|
|
xmin_edge = 1;
|
|
ymin_edge = 1;
|
|
xmax_edge = 1;
|
|
ymax_edge = 1;
|
|
|
|
for (ipin = 1; ipin < n_pins; ipin++) {
|
|
bnum = clb_net[inet].node_block[ipin];
|
|
pnum = clb_net[inet].node_block_pin[ipin];
|
|
x = block[bnum].x;
|
|
y = block[bnum].y + block[bnum].type->pin_height[pnum];
|
|
|
|
/* Code below counts IO blocks as being within the 1..nx, 1..ny clb array. *
|
|
* This is because channels do not go out of the 0..nx, 0..ny range, and *
|
|
* I always take all channels impinging on the bounding box to be within *
|
|
* that bounding box. Hence, this "movement" of IO blocks does not affect *
|
|
* the which channels are included within the bounding box, and it *
|
|
* simplifies the code a lot. */
|
|
|
|
x = std::max(std::min(x, nx), 1);
|
|
y = std::max(std::min(y, ny), 1);
|
|
|
|
if (x == xmin) {
|
|
xmin_edge++;
|
|
}
|
|
if (x == xmax) { /* Recall that xmin could equal xmax -- don't use else */
|
|
xmax_edge++;
|
|
} else if (x < xmin) {
|
|
xmin = x;
|
|
xmin_edge = 1;
|
|
} else if (x > xmax) {
|
|
xmax = x;
|
|
xmax_edge = 1;
|
|
}
|
|
|
|
if (y == ymin) {
|
|
ymin_edge++;
|
|
}
|
|
if (y == ymax) {
|
|
ymax_edge++;
|
|
} else if (y < ymin) {
|
|
ymin = y;
|
|
ymin_edge = 1;
|
|
} else if (y > ymax) {
|
|
ymax = y;
|
|
ymax_edge = 1;
|
|
}
|
|
}
|
|
|
|
/* Copy the coordinates and number on edges information into the proper *
|
|
* structures. */
|
|
coords->xmin = xmin;
|
|
coords->xmax = xmax;
|
|
coords->ymin = ymin;
|
|
coords->ymax = ymax;
|
|
|
|
num_on_edges->xmin = xmin_edge;
|
|
num_on_edges->xmax = xmax_edge;
|
|
num_on_edges->ymin = ymin_edge;
|
|
num_on_edges->ymax = ymax_edge;
|
|
}
|
|
|
|
static double get_net_wirelength_estimate(int inet, struct s_bb *bbptr) {
|
|
|
|
/* WMF: Finds the estimate of wirelength due to one net by looking at *
|
|
* its coordinate bounding box. */
|
|
|
|
double ncost, crossing;
|
|
|
|
/* Get the expected "crossing count" of a net, based on its number *
|
|
* of pins. Extrapolate for very large nets. */
|
|
|
|
if (((clb_net[inet].num_sinks + 1) > 50)
|
|
&& ((clb_net[inet].num_sinks + 1) < 85)) {
|
|
crossing = 2.7933 + 0.02616 * ((clb_net[inet].num_sinks + 1) - 50);
|
|
} else if ((clb_net[inet].num_sinks + 1) >= 85) {
|
|
crossing = 2.7933 + 0.011 * (clb_net[inet].num_sinks + 1)
|
|
- 0.0000018 * (clb_net[inet].num_sinks + 1)
|
|
* (clb_net[inet].num_sinks + 1);
|
|
} else {
|
|
crossing = cross_count[(clb_net[inet].num_sinks + 1) - 1];
|
|
}
|
|
|
|
/* Could insert a check for xmin == xmax. In that case, assume *
|
|
* connection will be made with no bends and hence no x-cost. *
|
|
* Same thing for y-cost. */
|
|
|
|
/* Cost = wire length along channel * cross_count / average *
|
|
* channel capacity. Do this for x, then y direction and add. */
|
|
|
|
ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing;
|
|
|
|
ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing;
|
|
|
|
return (ncost);
|
|
}
|
|
|
|
static float get_net_cost(int inet, struct s_bb *bbptr) {
|
|
|
|
/* Finds the cost due to one net by looking at its coordinate bounding *
|
|
* box. */
|
|
|
|
float ncost, crossing;
|
|
|
|
/* Get the expected "crossing count" of a net, based on its number *
|
|
* of pins. Extrapolate for very large nets. */
|
|
|
|
if ((clb_net[inet].num_sinks + 1) > 50) {
|
|
crossing = 2.7933 + 0.02616 * ((clb_net[inet].num_sinks + 1) - 50);
|
|
/* crossing = 3.0; Old value */
|
|
} else {
|
|
crossing = cross_count[(clb_net[inet].num_sinks + 1) - 1];
|
|
}
|
|
|
|
/* Could insert a check for xmin == xmax. In that case, assume *
|
|
* connection will be made with no bends and hence no x-cost. *
|
|
* Same thing for y-cost. */
|
|
|
|
/* Cost = wire length along channel * cross_count / average *
|
|
* channel capacity. Do this for x, then y direction and add. */
|
|
|
|
ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing
|
|
* chanx_place_cost_fac[bbptr->ymax][bbptr->ymin - 1];
|
|
|
|
ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing
|
|
* chany_place_cost_fac[bbptr->xmax][bbptr->xmin - 1];
|
|
|
|
return (ncost);
|
|
}
|
|
|
|
static void get_non_updateable_bb(int inet, struct s_bb *bb_coord_new) {
|
|
|
|
/* Finds the bounding box of a net and stores its coordinates in the *
|
|
* bb_coord_new data structure. This routine should only be called *
|
|
* for small nets, since it does not determine enough information for *
|
|
* the bounding box to be updated incrementally later. *
|
|
* Currently assumes channels on both sides of the CLBs forming the *
|
|
* edges of the bounding box can be used. Essentially, I am assuming *
|
|
* the pins always lie on the outside of the bounding box. */
|
|
|
|
int k, xmax, ymax, xmin, ymin, x, y;
|
|
int bnum, pnum;
|
|
|
|
bnum = clb_net[inet].node_block[0];
|
|
pnum = clb_net[inet].node_block_pin[0];
|
|
x = block[bnum].x;
|
|
y = block[bnum].y + block[bnum].type->pin_height[pnum];
|
|
|
|
xmin = x;
|
|
ymin = y;
|
|
xmax = x;
|
|
ymax = y;
|
|
|
|
for (k = 1; k < (clb_net[inet].num_sinks + 1); k++) {
|
|
bnum = clb_net[inet].node_block[k];
|
|
pnum = clb_net[inet].node_block_pin[k];
|
|
x = block[bnum].x;
|
|
y = block[bnum].y + block[bnum].type->pin_height[pnum];
|
|
|
|
if (x < xmin) {
|
|
xmin = x;
|
|
} else if (x > xmax) {
|
|
xmax = x;
|
|
}
|
|
|
|
if (y < ymin) {
|
|
ymin = y;
|
|
} else if (y > ymax) {
|
|
ymax = y;
|
|
}
|
|
}
|
|
|
|
/* Now I've found the coordinates of the bounding box. There are no *
|
|
* channels beyond nx and ny, so I want to clip to that. As well, *
|
|
* since I'll always include the channel immediately below and the *
|
|
* channel immediately to the left of the bounding box, I want to *
|
|
* clip to 1 in both directions as well (since minimum channel index *
|
|
* is 0). See route.c for a channel diagram. */
|
|
|
|
bb_coord_new->xmin = std::max(std::min(xmin, nx), 1);
|
|
bb_coord_new->ymin = std::max(std::min(ymin, ny), 1);
|
|
bb_coord_new->xmax = std::max(std::min(xmax, nx), 1);
|
|
bb_coord_new->ymax = std::max(std::min(ymax, ny), 1);
|
|
}
|
|
|
|
static void update_bb(int inet, struct s_bb *bb_coord_new,
|
|
struct s_bb *bb_edge_new, int xold, int yold, int xnew, int ynew) {
|
|
|
|
/* Updates the bounding box of a net by storing its coordinates in *
|
|
* the bb_coord_new data structure and the number of blocks on each *
|
|
* edge in the bb_edge_new data structure. This routine should only *
|
|
* be called for large nets, since it has some overhead relative to *
|
|
* just doing a brute force bounding box calculation. The bounding *
|
|
* box coordinate and edge information for inet must be valid before *
|
|
* this routine is called. *
|
|
* Currently assumes channels on both sides of the CLBs forming the *
|
|
* edges of the bounding box can be used. Essentially, I am assuming *
|
|
* the pins always lie on the outside of the bounding box. *
|
|
* The x and y coordinates are the pin's x and y coordinates. */
|
|
/* IO blocks are considered to be one cell in for simplicity. */
|
|
|
|
struct s_bb *curr_bb_edge, *curr_bb_coord;
|
|
|
|
xnew = std::max(std::min(xnew, nx), 1);
|
|
ynew = std::max(std::min(ynew, ny), 1);
|
|
xold = std::max(std::min(xold, nx), 1);
|
|
yold = std::max(std::min(yold, ny), 1);
|
|
|
|
/* Check if the net had been updated before. */
|
|
if (bb_updated_before[inet] == GOT_FROM_SCRATCH)
|
|
{ /* The net had been updated from scratch, DO NOT update again! */
|
|
return;
|
|
}
|
|
else if (bb_updated_before[inet] == NOT_UPDATED_YET)
|
|
{ /* The net had NOT been updated before, could use the old values */
|
|
curr_bb_coord = &bb_coords[inet];
|
|
curr_bb_edge = &bb_num_on_edges[inet];
|
|
bb_updated_before[inet] = UPDATED_ONCE;
|
|
}
|
|
else
|
|
{ /* The net had been updated before, must use the new values */
|
|
curr_bb_coord = bb_coord_new;
|
|
curr_bb_edge = bb_edge_new;
|
|
}
|
|
|
|
/* Check if I can update the bounding box incrementally. */
|
|
|
|
if (xnew < xold) { /* Move to left. */
|
|
|
|
/* Update the xmax fields for coordinates and number of edges first. */
|
|
|
|
if (xold == curr_bb_coord->xmax) { /* Old position at xmax. */
|
|
if (curr_bb_edge->xmax == 1) {
|
|
get_bb_from_scratch(inet, bb_coord_new, bb_edge_new);
|
|
bb_updated_before[inet] = GOT_FROM_SCRATCH;
|
|
return;
|
|
} else {
|
|
bb_edge_new->xmax = curr_bb_edge->xmax - 1;
|
|
bb_coord_new->xmax = curr_bb_coord->xmax;
|
|
}
|
|
}
|
|
|
|
else { /* Move to left, old postion was not at xmax. */
|
|
bb_coord_new->xmax = curr_bb_coord->xmax;
|
|
bb_edge_new->xmax = curr_bb_edge->xmax;
|
|
}
|
|
|
|
/* Now do the xmin fields for coordinates and number of edges. */
|
|
|
|
if (xnew < curr_bb_coord->xmin) { /* Moved past xmin */
|
|
bb_coord_new->xmin = xnew;
|
|
bb_edge_new->xmin = 1;
|
|
}
|
|
|
|
else if (xnew == curr_bb_coord->xmin) { /* Moved to xmin */
|
|
bb_coord_new->xmin = xnew;
|
|
bb_edge_new->xmin = curr_bb_edge->xmin + 1;
|
|
}
|
|
|
|
else { /* Xmin unchanged. */
|
|
bb_coord_new->xmin = curr_bb_coord->xmin;
|
|
bb_edge_new->xmin = curr_bb_edge->xmin;
|
|
}
|
|
}
|
|
|
|
/* End of move to left case. */
|
|
else if (xnew > xold) { /* Move to right. */
|
|
|
|
/* Update the xmin fields for coordinates and number of edges first. */
|
|
|
|
if (xold == curr_bb_coord->xmin) { /* Old position at xmin. */
|
|
if (curr_bb_edge->xmin == 1) {
|
|
get_bb_from_scratch(inet, bb_coord_new, bb_edge_new);
|
|
bb_updated_before[inet] = GOT_FROM_SCRATCH;
|
|
return;
|
|
} else {
|
|
bb_edge_new->xmin = curr_bb_edge->xmin - 1;
|
|
bb_coord_new->xmin = curr_bb_coord->xmin;
|
|
}
|
|
}
|
|
|
|
else { /* Move to right, old position was not at xmin. */
|
|
bb_coord_new->xmin = curr_bb_coord->xmin;
|
|
bb_edge_new->xmin = curr_bb_edge->xmin;
|
|
}
|
|
|
|
/* Now do the xmax fields for coordinates and number of edges. */
|
|
|
|
if (xnew > curr_bb_coord->xmax) { /* Moved past xmax. */
|
|
bb_coord_new->xmax = xnew;
|
|
bb_edge_new->xmax = 1;
|
|
}
|
|
|
|
else if (xnew == curr_bb_coord->xmax) { /* Moved to xmax */
|
|
bb_coord_new->xmax = xnew;
|
|
bb_edge_new->xmax = curr_bb_edge->xmax + 1;
|
|
}
|
|
|
|
else { /* Xmax unchanged. */
|
|
bb_coord_new->xmax = curr_bb_coord->xmax;
|
|
bb_edge_new->xmax = curr_bb_edge->xmax;
|
|
}
|
|
}
|
|
/* End of move to right case. */
|
|
else { /* xnew == xold -- no x motion. */
|
|
bb_coord_new->xmin = curr_bb_coord->xmin;
|
|
bb_coord_new->xmax = curr_bb_coord->xmax;
|
|
bb_edge_new->xmin = curr_bb_edge->xmin;
|
|
bb_edge_new->xmax = curr_bb_edge->xmax;
|
|
}
|
|
|
|
/* Now account for the y-direction motion. */
|
|
|
|
if (ynew < yold) { /* Move down. */
|
|
|
|
/* Update the ymax fields for coordinates and number of edges first. */
|
|
|
|
if (yold == curr_bb_coord->ymax) { /* Old position at ymax. */
|
|
if (curr_bb_edge->ymax == 1) {
|
|
get_bb_from_scratch(inet, bb_coord_new, bb_edge_new);
|
|
bb_updated_before[inet] = GOT_FROM_SCRATCH;
|
|
return;
|
|
} else {
|
|
bb_edge_new->ymax = curr_bb_edge->ymax - 1;
|
|
bb_coord_new->ymax = curr_bb_coord->ymax;
|
|
}
|
|
}
|
|
|
|
else { /* Move down, old postion was not at ymax. */
|
|
bb_coord_new->ymax = curr_bb_coord->ymax;
|
|
bb_edge_new->ymax = curr_bb_edge->ymax;
|
|
}
|
|
|
|
/* Now do the ymin fields for coordinates and number of edges. */
|
|
|
|
if (ynew < curr_bb_coord->ymin) { /* Moved past ymin */
|
|
bb_coord_new->ymin = ynew;
|
|
bb_edge_new->ymin = 1;
|
|
}
|
|
|
|
else if (ynew == curr_bb_coord->ymin) { /* Moved to ymin */
|
|
bb_coord_new->ymin = ynew;
|
|
bb_edge_new->ymin = curr_bb_edge->ymin + 1;
|
|
}
|
|
|
|
else { /* ymin unchanged. */
|
|
bb_coord_new->ymin = curr_bb_coord->ymin;
|
|
bb_edge_new->ymin = curr_bb_edge->ymin;
|
|
}
|
|
}
|
|
/* End of move down case. */
|
|
else if (ynew > yold) { /* Moved up. */
|
|
|
|
/* Update the ymin fields for coordinates and number of edges first. */
|
|
|
|
if (yold == curr_bb_coord->ymin) { /* Old position at ymin. */
|
|
if (curr_bb_edge->ymin == 1) {
|
|
get_bb_from_scratch(inet, bb_coord_new, bb_edge_new);
|
|
bb_updated_before[inet] = GOT_FROM_SCRATCH;
|
|
return;
|
|
} else {
|
|
bb_edge_new->ymin = curr_bb_edge->ymin - 1;
|
|
bb_coord_new->ymin = curr_bb_coord->ymin;
|
|
}
|
|
}
|
|
|
|
else { /* Moved up, old position was not at ymin. */
|
|
bb_coord_new->ymin = curr_bb_coord->ymin;
|
|
bb_edge_new->ymin = curr_bb_edge->ymin;
|
|
}
|
|
|
|
/* Now do the ymax fields for coordinates and number of edges. */
|
|
|
|
if (ynew > curr_bb_coord->ymax) { /* Moved past ymax. */
|
|
bb_coord_new->ymax = ynew;
|
|
bb_edge_new->ymax = 1;
|
|
}
|
|
|
|
else if (ynew == curr_bb_coord->ymax) { /* Moved to ymax */
|
|
bb_coord_new->ymax = ynew;
|
|
bb_edge_new->ymax = curr_bb_edge->ymax + 1;
|
|
}
|
|
|
|
else { /* ymax unchanged. */
|
|
bb_coord_new->ymax = curr_bb_coord->ymax;
|
|
bb_edge_new->ymax = curr_bb_edge->ymax;
|
|
}
|
|
}
|
|
/* End of move up case. */
|
|
else { /* ynew == yold -- no y motion. */
|
|
bb_coord_new->ymin = curr_bb_coord->ymin;
|
|
bb_coord_new->ymax = curr_bb_coord->ymax;
|
|
bb_edge_new->ymin = curr_bb_edge->ymin;
|
|
bb_edge_new->ymax = curr_bb_edge->ymax;
|
|
}
|
|
|
|
if (bb_updated_before[inet] == NOT_UPDATED_YET)
|
|
bb_updated_before[inet] = UPDATED_ONCE;
|
|
}
|
|
|
|
static void alloc_legal_placements() {
|
|
int i, j, k;
|
|
|
|
legal_pos = (t_legal_pos **) my_malloc(num_types * sizeof(t_legal_pos *));
|
|
num_legal_pos = (int *) my_calloc(num_types, sizeof(int));
|
|
|
|
/* Initialize all occupancy to zero. */
|
|
|
|
for (i = 0; i <= nx + 1; i++) {
|
|
for (j = 0; j <= ny + 1; j++) {
|
|
grid[i][j].usage = 0;
|
|
for (k = 0; k < grid[i][j].type->capacity; k++) {
|
|
grid[i][j].blocks[k] = EMPTY;
|
|
if (grid[i][j].offset == 0) {
|
|
num_legal_pos[grid[i][j].type->index]++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < num_types; i++) {
|
|
legal_pos[i] = (t_legal_pos *) my_malloc(num_legal_pos[i] * sizeof(t_legal_pos));
|
|
}
|
|
}
|
|
|
|
static void load_legal_placements() {
|
|
int i, j, k, itype;
|
|
int *index;
|
|
|
|
index = (int *) my_calloc(num_types, sizeof(int));
|
|
|
|
for (i = 0; i <= nx + 1; i++) {
|
|
for (j = 0; j <= ny + 1; j++) {
|
|
for (k = 0; k < grid[i][j].type->capacity; k++) {
|
|
if (grid[i][j].offset == 0) {
|
|
itype = grid[i][j].type->index;
|
|
legal_pos[itype][index[itype]].x = i;
|
|
legal_pos[itype][index[itype]].y = j;
|
|
legal_pos[itype][index[itype]].z = k;
|
|
index[itype]++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
free(index);
|
|
}
|
|
|
|
static void free_legal_placements() {
|
|
int i;
|
|
for (i = 0; i < num_types; i++) {
|
|
free(legal_pos[i]);
|
|
}
|
|
free(legal_pos); /* Free the mapping list */
|
|
free(num_legal_pos);
|
|
}
|
|
|
|
|
|
|
|
static int check_macro_can_be_placed(int imacro, int itype, int x, int y, int z) {
|
|
|
|
int imember;
|
|
int member_x, member_y, member_z;
|
|
|
|
// Every macro can be placed until proven otherwise
|
|
int macro_can_be_placed = TRUE;
|
|
|
|
// Check whether all the members can be placed
|
|
for (imember = 0; imember < pl_macros[imacro].num_blocks; imember++) {
|
|
member_x = x + pl_macros[imacro].members[imember].x_offset;
|
|
member_y = y + pl_macros[imacro].members[imember].y_offset;
|
|
member_z = z + pl_macros[imacro].members[imember].z_offset;
|
|
|
|
// Check whether the location could accept block of this type
|
|
// Then check whether the location could still accomodate more blocks
|
|
// Also check whether the member position is valid, that is the member's location
|
|
// still within the chip's dimemsion and the member_z is allowed at that location on the grid
|
|
if (member_x <= nx+1 && member_y <= ny+1
|
|
&& grid[member_x][member_y].type->index == itype
|
|
&& grid[member_x][member_y].blocks[member_z] == OPEN) {
|
|
// Can still accomodate blocks here, check the next position
|
|
continue;
|
|
} else {
|
|
// Cant be placed here - skip to the next try
|
|
macro_can_be_placed = FALSE;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return (macro_can_be_placed);
|
|
}
|
|
|
|
|
|
static int try_place_macro(int itype, int ichoice, int imacro, int * free_locations){
|
|
|
|
int x, y, z, member_x, member_y, member_z, imember;
|
|
|
|
int macro_placed = FALSE;
|
|
|
|
// Choose a random position for the head
|
|
x = legal_pos[itype][ichoice].x;
|
|
y = legal_pos[itype][ichoice].y;
|
|
z = legal_pos[itype][ichoice].z;
|
|
|
|
// If that location is occupied, do nothing.
|
|
if (grid[x][y].blocks[z] != OPEN) {
|
|
return (macro_placed);
|
|
}
|
|
|
|
int macro_can_be_placed = check_macro_can_be_placed(imacro, itype, x, y, z);
|
|
|
|
if (macro_can_be_placed == TRUE) {
|
|
|
|
// Place down the macro
|
|
macro_placed = TRUE;
|
|
for (imember = 0; imember < pl_macros[imacro].num_blocks; imember++) {
|
|
|
|
member_x = x + pl_macros[imacro].members[imember].x_offset;
|
|
member_y = y + pl_macros[imacro].members[imember].y_offset;
|
|
member_z = z + pl_macros[imacro].members[imember].z_offset;
|
|
|
|
block[pl_macros[imacro].members[imember].blk_index].x = member_x;
|
|
block[pl_macros[imacro].members[imember].blk_index].y = member_y;
|
|
block[pl_macros[imacro].members[imember].blk_index].z = member_z;
|
|
|
|
grid[member_x][member_y].blocks[member_z] = pl_macros[imacro].members[imember].blk_index;
|
|
grid[member_x][member_y].usage++;
|
|
|
|
// Could not ensure that the randomiser would not pick this location again
|
|
// So, would have to do a lazy removal - whenever I come across a block that could not be placed,
|
|
// go ahead and remove it from the legal_pos[][] array
|
|
|
|
} // Finish placing all the members in the macro
|
|
|
|
} // End of this choice of legal_pos
|
|
|
|
return (macro_placed);
|
|
|
|
}
|
|
|
|
|
|
static void initial_placement_pl_macros(int macros_max_num_tries, int * free_locations) {
|
|
|
|
int macro_placed;
|
|
int imacro, iblk, itype, itry, ichoice;
|
|
|
|
/* Macros are harder to place. Do them first */
|
|
for (imacro = 0; imacro < num_pl_macros; imacro++) {
|
|
|
|
// Every macro are not placed in the beginnning
|
|
macro_placed = FALSE;
|
|
|
|
// Assume that all the blocks in the macro are of the same type
|
|
iblk = pl_macros[imacro].members[0].blk_index;
|
|
itype = block[iblk].type->index;
|
|
if (free_locations[itype] < pl_macros[imacro].num_blocks) {
|
|
vpr_printf (TIO_MESSAGE_ERROR, "Initial placement failed.\n");
|
|
vpr_printf (TIO_MESSAGE_ERROR, "Could not place macro length %d with head block %s (#%d); not enough free locations of type %s (#%d).\n",
|
|
pl_macros[imacro].num_blocks, block[iblk].name, iblk, type_descriptors[itype].name, itype);
|
|
vpr_printf (TIO_MESSAGE_INFO, "VPR cannot auto-size for your circuit, please resize the FPGA manually.\n");
|
|
exit(1);
|
|
}
|
|
|
|
// Try to place the macro first, if can be placed - place them, otherwise try again
|
|
for (itry = 0; itry < macros_max_num_tries && macro_placed == FALSE; itry++) {
|
|
|
|
// Choose a random position for the head
|
|
ichoice = my_irand(free_locations[itype] - 1);
|
|
|
|
// Try to place the macro
|
|
macro_placed = try_place_macro(itype, ichoice, imacro, free_locations);
|
|
|
|
} // Finished all tries
|
|
|
|
|
|
if (macro_placed == FALSE){
|
|
// if a macro still could not be placed after macros_max_num_tries times,
|
|
// go through the chip exhaustively to find a legal placement for the macro
|
|
// place the macro on the first location that is legal
|
|
// then set macro_placed = TRUE;
|
|
// if there are no legal positions, error out
|
|
|
|
// Exhaustive placement of carry macros
|
|
for (ichoice = 0; ichoice < free_locations[itype] && macro_placed == FALSE; ichoice++) {
|
|
|
|
// Try to place the macro
|
|
macro_placed = try_place_macro(itype, ichoice, imacro, free_locations);
|
|
|
|
} // Exhausted all the legal placement position for this macro
|
|
|
|
// If macro could not be placed after exhaustive placement, error out
|
|
if (macro_placed == FALSE) {
|
|
// Error out
|
|
vpr_printf (TIO_MESSAGE_ERROR, "Initial placement failed.\n");
|
|
vpr_printf (TIO_MESSAGE_ERROR, "Could not place macro length %d with head block %s (#%d); not enough free locations of type %s (#%d).\n",
|
|
pl_macros[imacro].num_blocks, block[iblk].name, iblk, type_descriptors[itype].name, itype);
|
|
vpr_printf (TIO_MESSAGE_INFO, "Please manually size the FPGA because VPR can't do this yet.\n");
|
|
exit(1);
|
|
}
|
|
|
|
} else {
|
|
// This macro has been placed successfully, proceed to place the next macro
|
|
continue;
|
|
}
|
|
} // Finish placing all the pl_macros successfully
|
|
}
|
|
|
|
static void initial_placement_blocks(int * free_locations, enum e_pad_loc_type pad_loc_type) {
|
|
|
|
/* Place blocks that are NOT a part of any macro.
|
|
* We'll randomly place each block in the clustered netlist, one by one.
|
|
*/
|
|
|
|
int iblk, itype;
|
|
int ichoice, x, y, z;
|
|
|
|
for (iblk = 0; iblk < num_blocks; iblk++) {
|
|
|
|
if (block[iblk].x != -1) {
|
|
// block placed.
|
|
continue;
|
|
}
|
|
/* Don't do IOs if the user specifies IOs; we'll read those locations later. */
|
|
if (!(block[iblk].type == IO_TYPE && pad_loc_type == USER)) {
|
|
|
|
/* Randomly select a free location of the appropriate type
|
|
* for iblk. We have a linearized list of all the free locations
|
|
* that can accomodate a block of that type in free_locations[itype].
|
|
* Choose one randomly and put iblk there. Then we don't want to pick that
|
|
* location again, so remove it from the free_locations array.
|
|
*/
|
|
itype = block[iblk].type->index;
|
|
if (free_locations[itype] <= 0) {
|
|
vpr_printf (TIO_MESSAGE_ERROR, "Initial placement failed.\n");
|
|
vpr_printf (TIO_MESSAGE_ERROR, "Could not place block %s (#%d); no free locations of type %s (#%d).\n",
|
|
block[iblk].name, iblk, type_descriptors[itype].name, itype);
|
|
exit(1);
|
|
}
|
|
|
|
ichoice = my_irand(free_locations[itype] - 1);
|
|
x = legal_pos[itype][ichoice].x;
|
|
y = legal_pos[itype][ichoice].y;
|
|
z = legal_pos[itype][ichoice].z;
|
|
|
|
// Make sure that the position is OPEN before placing the block down
|
|
assert (grid[x][y].blocks[z] == OPEN);
|
|
|
|
grid[x][y].blocks[z] = iblk;
|
|
grid[x][y].usage++;
|
|
|
|
block[iblk].x = x;
|
|
block[iblk].y = y;
|
|
block[iblk].z = z;
|
|
|
|
/* Ensure randomizer doesn't pick this location again, since it's occupied. Could shift all the
|
|
* legal positions in legal_pos to remove the entry (choice) we just used, but faster to
|
|
* just move the last entry in legal_pos to the spot we just used and decrement the
|
|
* count of free_locations.
|
|
*/
|
|
legal_pos[itype][ichoice] = legal_pos[itype][free_locations[itype] - 1]; /* overwrite used block position */
|
|
free_locations[itype]--;
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
static void initial_placement(enum e_pad_loc_type pad_loc_type,
|
|
char *pad_loc_file) {
|
|
|
|
/* Randomly places the blocks to create an initial placement. We rely on
|
|
* the legal_pos array already being loaded. That legal_pos[itype] is an
|
|
* array that gives every legal value of (x,y,z) that can accomodate a block.
|
|
* The number of such locations is given by num_legal_pos[itype].
|
|
*/
|
|
int i, j, k, iblk, itype, x, y, z, ichoice;
|
|
int *free_locations; /* [0..num_types-1].
|
|
* Stores how many locations there are for this type that *might* still be free.
|
|
* That is, this stores the number of entries in legal_pos[itype] that are worth considering
|
|
* as you look for a free location.
|
|
*/
|
|
|
|
free_locations = (int *) my_malloc(num_types * sizeof(int));
|
|
for (itype = 0; itype < num_types; itype++) {
|
|
free_locations[itype] = num_legal_pos[itype];
|
|
}
|
|
|
|
/* We'll use the grid to record where everything goes. Initialize to the grid has no
|
|
* blocks placed anywhere.
|
|
*/
|
|
for (i = 0; i <= nx + 1; i++) {
|
|
for (j = 0; j <= ny + 1; j++) {
|
|
grid[i][j].usage = 0;
|
|
itype = grid[i][j].type->index;
|
|
for (k = 0; k < type_descriptors[itype].capacity; k++) {
|
|
grid[i][j].blocks[k] = OPEN;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Similarly, mark all blocks as not being placed yet. */
|
|
for (iblk = 0; iblk < num_blocks; iblk++) {
|
|
block[iblk].x = -1;
|
|
block[iblk].y = -1;
|
|
block[iblk].z = -1;
|
|
}
|
|
|
|
initial_placement_pl_macros(MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY, free_locations);
|
|
|
|
// All the macros are placed, update the legal_pos[][] array
|
|
for (itype = 0; itype < num_types; itype++) {
|
|
assert (free_locations[itype] >= 0);
|
|
for (ichoice = 0; ichoice < free_locations[itype]; ichoice++) {
|
|
x = legal_pos[itype][ichoice].x;
|
|
y = legal_pos[itype][ichoice].y;
|
|
z = legal_pos[itype][ichoice].z;
|
|
|
|
// Check if that location is occupied. If it is, remove from legal_pos
|
|
if (grid[x][y].blocks[z] != OPEN) {
|
|
legal_pos[itype][ichoice] = legal_pos[itype][free_locations[itype] - 1];
|
|
free_locations[itype]--;
|
|
|
|
// After the move, I need to check this particular entry again
|
|
ichoice--;
|
|
continue;
|
|
}
|
|
}
|
|
} // Finish updating the legal_pos[][] and free_locations[] array
|
|
|
|
initial_placement_blocks(free_locations, pad_loc_type);
|
|
|
|
if (pad_loc_type == USER) {
|
|
read_user_pad_loc(pad_loc_file);
|
|
}
|
|
|
|
/* Restore legal_pos */
|
|
load_legal_placements();
|
|
|
|
#ifdef VERBOSE
|
|
vpr_printf(TIO_MESSAGE_INFO, "At end of initial_placement.\n");
|
|
if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_INITIAL_CLB_PLACEMENT)) {
|
|
print_clb_placement(getEchoFileName(E_ECHO_INITIAL_CLB_PLACEMENT));
|
|
}
|
|
#endif
|
|
free(free_locations);
|
|
}
|
|
|
|
static void free_fast_cost_update(void) {
|
|
int i;
|
|
|
|
for (i = 0; i <= ny; i++)
|
|
free(chanx_place_cost_fac[i]);
|
|
free(chanx_place_cost_fac);
|
|
chanx_place_cost_fac = NULL;
|
|
|
|
for (i = 0; i <= nx; i++)
|
|
free(chany_place_cost_fac[i]);
|
|
free(chany_place_cost_fac);
|
|
chany_place_cost_fac = NULL;
|
|
}
|
|
|
|
static void alloc_and_load_for_fast_cost_update(float place_cost_exp) {
|
|
|
|
/* Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac *
|
|
* arrays with the inverse of the average number of tracks per channel *
|
|
* between [subhigh] and [sublow]. This is only useful for the cost *
|
|
* function that takes the length of the net bounding box in each *
|
|
* dimension divided by the average number of tracks in that direction. *
|
|
* For other cost functions, you don't have to bother calling this *
|
|
* routine; when using the cost function described above, however, you *
|
|
* must always call this routine after you call init_chan and before *
|
|
* you do any placement cost determination. The place_cost_exp factor *
|
|
* specifies to what power the width of the channel should be taken -- *
|
|
* larger numbers make narrower channels more expensive. */
|
|
|
|
int low, high, i;
|
|
|
|
/* Access arrays below as chan?_place_cost_fac[subhigh][sublow]. Since *
|
|
* subhigh must be greater than or equal to sublow, we only need to *
|
|
* allocate storage for the lower half of a matrix. */
|
|
|
|
chanx_place_cost_fac = (float **) my_malloc((ny + 1) * sizeof(float *));
|
|
for (i = 0; i <= ny; i++)
|
|
chanx_place_cost_fac[i] = (float *) my_malloc((i + 1) * sizeof(float));
|
|
|
|
chany_place_cost_fac = (float **) my_malloc((nx + 1) * sizeof(float *));
|
|
for (i = 0; i <= nx; i++)
|
|
chany_place_cost_fac[i] = (float *) my_malloc((i + 1) * sizeof(float));
|
|
|
|
/* First compute the number of tracks between channel high and channel *
|
|
* low, inclusive, in an efficient manner. */
|
|
|
|
chanx_place_cost_fac[0][0] = chan_width_x[0];
|
|
|
|
for (high = 1; high <= ny; high++) {
|
|
chanx_place_cost_fac[high][high] = chan_width_x[high];
|
|
for (low = 0; low < high; low++) {
|
|
chanx_place_cost_fac[high][low] =
|
|
chanx_place_cost_fac[high - 1][low] + chan_width_x[high];
|
|
}
|
|
}
|
|
|
|
/* Now compute the inverse of the average number of tracks per channel *
|
|
* between high and low. The cost function divides by the average *
|
|
* number of tracks per channel, so by storing the inverse I convert *
|
|
* this to a faster multiplication. Take this final number to the *
|
|
* place_cost_exp power -- numbers other than one mean this is no *
|
|
* longer a simple "average number of tracks"; it is some power of *
|
|
* that, allowing greater penalization of narrow channels. */
|
|
|
|
for (high = 0; high <= ny; high++)
|
|
for (low = 0; low <= high; low++) {
|
|
chanx_place_cost_fac[high][low] = (high - low + 1.)
|
|
/ chanx_place_cost_fac[high][low];
|
|
chanx_place_cost_fac[high][low] = pow(
|
|
(double) chanx_place_cost_fac[high][low],
|
|
(double) place_cost_exp);
|
|
}
|
|
|
|
/* Now do the same thing for the y-directed channels. First get the *
|
|
* number of tracks between channel high and channel low, inclusive. */
|
|
|
|
chany_place_cost_fac[0][0] = chan_width_y[0];
|
|
|
|
for (high = 1; high <= nx; high++) {
|
|
chany_place_cost_fac[high][high] = chan_width_y[high];
|
|
for (low = 0; low < high; low++) {
|
|
chany_place_cost_fac[high][low] =
|
|
chany_place_cost_fac[high - 1][low] + chan_width_y[high];
|
|
}
|
|
}
|
|
|
|
/* Now compute the inverse of the average number of tracks per channel *
|
|
* between high and low. Take to specified power. */
|
|
|
|
for (high = 0; high <= nx; high++)
|
|
for (low = 0; low <= high; low++) {
|
|
chany_place_cost_fac[high][low] = (high - low + 1.)
|
|
/ chany_place_cost_fac[high][low];
|
|
chany_place_cost_fac[high][low] = pow(
|
|
(double) chany_place_cost_fac[high][low],
|
|
(double) place_cost_exp);
|
|
}
|
|
}
|
|
|
|
static void check_place(float bb_cost, float timing_cost,
|
|
enum e_place_algorithm place_algorithm,
|
|
float delay_cost) {
|
|
|
|
/* Checks that the placement has not confused our data structures. *
|
|
* i.e. the clb and block structures agree about the locations of *
|
|
* every block, blocks are in legal spots, etc. Also recomputes *
|
|
* the final placement cost from scratch and makes sure it is *
|
|
* within roundoff of what we think the cost is. */
|
|
|
|
static int *bdone;
|
|
int i, j, k, error = 0, bnum;
|
|
float bb_cost_check;
|
|
int usage_check;
|
|
float timing_cost_check, delay_cost_check;
|
|
int imacro, imember, head_iblk, member_iblk, member_x, member_y, member_z;
|
|
|
|
bb_cost_check = comp_bb_cost(CHECK);
|
|
vpr_printf(TIO_MESSAGE_INFO, "bb_cost recomputed from scratch: %g\n", bb_cost_check);
|
|
if (fabs(bb_cost_check - bb_cost) > bb_cost * ERROR_TOL) {
|
|
vpr_printf(TIO_MESSAGE_ERROR, "bb_cost_check: %g and bb_cost: %g differ in check_place.\n", bb_cost_check, bb_cost);
|
|
error++;
|
|
}
|
|
|
|
if (place_algorithm == NET_TIMING_DRIVEN_PLACE
|
|
|| place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
|
|
comp_td_costs(&timing_cost_check, &delay_cost_check);
|
|
vpr_printf(TIO_MESSAGE_INFO, "timing_cost recomputed from scratch: %g\n", timing_cost_check);
|
|
if (fabs(timing_cost_check - timing_cost) > timing_cost * ERROR_TOL) {
|
|
vpr_printf(TIO_MESSAGE_ERROR, "timing_cost_check: %g and timing_cost: %g differ in check_place.\n",
|
|
timing_cost_check, timing_cost);
|
|
error++;
|
|
}
|
|
vpr_printf(TIO_MESSAGE_INFO, "delay_cost recomputed from scratch: %g\n", delay_cost_check);
|
|
if (fabs(delay_cost_check - delay_cost) > delay_cost * ERROR_TOL) {
|
|
vpr_printf(TIO_MESSAGE_ERROR, "delay_cost_check: %g and delay_cost: %g differ in check_place.\n",
|
|
delay_cost_check, delay_cost);
|
|
error++;
|
|
}
|
|
}
|
|
|
|
bdone = (int *) my_malloc(num_blocks * sizeof(int));
|
|
for (i = 0; i < num_blocks; i++)
|
|
bdone[i] = 0;
|
|
|
|
/* Step through grid array. Check it against block array. */
|
|
for (i = 0; i <= (nx + 1); i++)
|
|
for (j = 0; j <= (ny + 1); j++) {
|
|
if (grid[i][j].usage > grid[i][j].type->capacity) {
|
|
vpr_printf(TIO_MESSAGE_ERROR, "Block at grid location (%d,%d) overused. Usage is %d.\n",
|
|
i, j, grid[i][j].usage);
|
|
error++;
|
|
}
|
|
usage_check = 0;
|
|
for (k = 0; k < grid[i][j].type->capacity; k++) {
|
|
bnum = grid[i][j].blocks[k];
|
|
if (EMPTY == bnum)
|
|
continue;
|
|
|
|
if (block[bnum].type != grid[i][j].type) {
|
|
vpr_printf(TIO_MESSAGE_ERROR, "Block %d type does not match grid location (%d,%d) type.\n",
|
|
bnum, i, j);
|
|
error++;
|
|
}
|
|
if ((block[bnum].x != i) || (block[bnum].y != j)) {
|
|
vpr_printf(TIO_MESSAGE_ERROR, "Block %d location conflicts with grid(%d,%d) data.\n",
|
|
bnum, i, j);
|
|
error++;
|
|
}
|
|
++usage_check;
|
|
bdone[bnum]++;
|
|
}
|
|
if (usage_check != grid[i][j].usage) {
|
|
vpr_printf(TIO_MESSAGE_ERROR, "Location (%d,%d) usage is %d, but has actual usage %d.\n",
|
|
i, j, grid[i][j].usage, usage_check);
|
|
error++;
|
|
}
|
|
}
|
|
|
|
/* Check that every block exists in the grid and block arrays somewhere. */
|
|
for (i = 0; i < num_blocks; i++)
|
|
if (bdone[i] != 1) {
|
|
vpr_printf(TIO_MESSAGE_ERROR, "Block %d listed %d times in data structures.\n",
|
|
i, bdone[i]);
|
|
error++;
|
|
}
|
|
free(bdone);
|
|
|
|
/* Check the pl_macro placement are legal - blocks are in the proper relative position. */
|
|
for (imacro = 0; imacro < num_pl_macros; imacro++) {
|
|
|
|
head_iblk = pl_macros[imacro].members[0].blk_index;
|
|
|
|
for (imember = 0; imember < pl_macros[imacro].num_blocks; imember++) {
|
|
|
|
member_iblk = pl_macros[imacro].members[imember].blk_index;
|
|
|
|
// Compute the suppossed member's x,y,z location
|
|
member_x = block[head_iblk].x + pl_macros[imacro].members[imember].x_offset;
|
|
member_y = block[head_iblk].y + pl_macros[imacro].members[imember].y_offset;
|
|
member_z = block[head_iblk].z + pl_macros[imacro].members[imember].z_offset;
|
|
|
|
// Check the block data structure first
|
|
if (block[member_iblk].x != member_x
|
|
|| block[member_iblk].y != member_y
|
|
|| block[member_iblk].z != member_z) {
|
|
vpr_printf(TIO_MESSAGE_ERROR, "Block %d in pl_macro #%d is not placed in the proper orientation.\n",
|
|
member_iblk, imacro);
|
|
error++;
|
|
}
|
|
|
|
// Then check the grid data structure
|
|
if (grid[member_x][member_y].blocks[member_z] != member_iblk) {
|
|
vpr_printf(TIO_MESSAGE_ERROR, "Block %d in pl_macro #%d is not placed in the proper orientation.\n",
|
|
member_iblk, imacro);
|
|
error++;
|
|
}
|
|
} // Finish going through all the members
|
|
} // Finish going through all the macros
|
|
|
|
if (error == 0) {
|
|
vpr_printf(TIO_MESSAGE_INFO, "\n");
|
|
vpr_printf(TIO_MESSAGE_INFO, "Completed placement consistency check successfully.\n");
|
|
vpr_printf(TIO_MESSAGE_INFO, "\n");
|
|
vpr_printf(TIO_MESSAGE_INFO, "Swaps called: %d\n", num_ts_called);
|
|
|
|
#ifdef PRINT_REL_POS_DISTR
|
|
print_relative_pos_distr(void);
|
|
#endif
|
|
} else {
|
|
vpr_printf(TIO_MESSAGE_INFO, "\n");
|
|
vpr_printf(TIO_MESSAGE_ERROR, "Completed placement consistency check, %d errors found.\n", error);
|
|
vpr_printf(TIO_MESSAGE_INFO, "Aborting program.\n");
|
|
exit(1);
|
|
}
|
|
|
|
}
|
|
|
|
#ifdef VERBOSE
|
|
static void print_clb_placement(const char *fname) {
|
|
|
|
/* Prints out the clb placements to a file. */
|
|
|
|
FILE *fp;
|
|
int i;
|
|
|
|
fp = my_fopen(fname, "w", 0);
|
|
fprintf(fp, "Complex block placements:\n\n");
|
|
|
|
fprintf(fp, "Block #\tName\t(X, Y, Z).\n");
|
|
for(i = 0; i < num_blocks; i++) {
|
|
fprintf(fp, "#%d\t%s\t(%d, %d, %d).\n", i, block[i].name, block[i].x, block[i].y, block[i].z);
|
|
}
|
|
|
|
fclose(fp);
|
|
}
|
|
#endif
|
|
|
|
static void free_try_swap_arrays(void) {
|
|
if(ts_bb_coord_new != NULL) {
|
|
free(ts_bb_coord_new);
|
|
free(ts_bb_edge_new);
|
|
free(ts_nets_to_update);
|
|
free(blocks_affected.moved_blocks);
|
|
free(bb_updated_before);
|
|
|
|
ts_bb_coord_new = NULL;
|
|
ts_bb_edge_new = NULL;
|
|
ts_nets_to_update = NULL;
|
|
blocks_affected.moved_blocks = NULL;
|
|
blocks_affected.num_moved_blocks = 0;
|
|
bb_updated_before = NULL;
|
|
}
|
|
}
|
|
|