OpenFPGA/vpr7_x2p/vpr/SRC/route/rr_graph.c

#include <stdio.h>
#include <math.h>
#include <assert.h>
#include <string.h>
#include "util.h"
#include "vpr_types.h"
#include "globals.h"
#include "rr_graph_util.h"
#include "rr_graph.h"
#include "rr_graph2.h"
#include "rr_graph_sbox.h"
#include "check_rr_graph.h"
#include "rr_graph_timing_params.h"
#include "rr_graph_indexed_data.h"
#include "vpr_utils.h"
#include "read_xml_arch_file.h"
#include "ReadOptions.h"

/* Xifan TANG: SWSEG SUPPORT */
#include "rr_graph_swseg.h" 
/* end */
/* Xifan TANG: opin_to_cb support */
#include "rr_graph_opincb.h" 
/* end */

/* mrFPGA: Xifan TANG */
#include "mrfpga_globals.h" 
/* end */

/* #define ENABLE_DUMP */
/* #define MUX_SIZE_DIST_DISPLAY */

/* mux size statistic data structures */

typedef struct s_mux {
	int size;
	struct s_mux *next;
} t_mux;

typedef struct s_mux_size_distribution {
	int mux_count;
	int max_index;
	int *distr;
	struct s_mux_size_distribution *next;
} t_mux_size_distribution;

/*
typedef struct s_clb_to_clb_directs {
	t_type_descriptor *from_clb_type;
	int from_clb_pin_start_index;
	int from_clb_pin_end_index;
	t_type_descriptor *to_clb_type;
	int to_clb_pin_start_index;
	int to_clb_pin_end_index;
} t_clb_to_clb_directs;
*/

/* Xifan TANG: opin_to_cb support */
#include "pb_pin_eq_auto_detect.h"
/* end */

/* UDSD Modifications by WMF End */

/******************* Variables local to this module. ***********************/

/* Used to free "chunked" memory.  If NULL, no rr_graph exists right now.  */
static t_chunk rr_mem_ch = {NULL, 0, NULL};

/* Status of current chunk being dished out by calls to my_chunk_malloc.   */

/********************* Subroutines local to this module. *******************/

static void build_bidir_rr_opins(INP int i, INP int j,
		INOUTP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices,
		INP int *****opin_to_track_map, INP int **Fc_out,
		INP boolean * L_rr_edge_done, INP t_seg_details * seg_details,
		INP struct s_grid_tile **L_grid, INP int delayless_switch,
		INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs);

static void build_unidir_rr_opins(INP int i, INP int j,
		INP struct s_grid_tile **L_grid, INP int **Fc_out,
		INP int nodes_per_chan, INP t_seg_details * seg_details,
		INOUTP int **Fc_xofs, INOUTP int **Fc_yofs,
		INOUTP t_rr_node * L_rr_node, INOUTP boolean * L_rr_edge_done,
		OUTP boolean * Fc_clipped, INP t_ivec *** L_rr_node_indices, INP int delayless_switch,
		INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs);

static int get_opin_direct_connecions(int x, int y, int opin, INOUTP t_linked_edge ** edge_list_ptr, INP t_ivec *** L_rr_node_indices, 
	INP int delayless_switch, INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs);

static void alloc_and_load_rr_graph(INP int num_nodes,
		INP t_rr_node * L_rr_node, INP int num_seg_types,
		INP t_seg_details * seg_details, INP boolean * L_rr_edge_done,
		INP struct s_ivec ****track_to_ipin_lookup,
		INP int *****opin_to_track_map, INP struct s_ivec ***switch_block_conn,
		INP struct s_grid_tile **L_grid, INP int L_nx, INP int L_ny, INP int Fs,
		INP short *****sblock_pattern, INP int **Fc_out, INP int **Fc_xofs,
		INP int **Fc_yofs, INP t_ivec *** L_rr_node_indices,
		INP int nodes_per_chan, INP enum e_switch_block_type sb_type,
		INP int delayless_switch, INP enum e_directionality directionality,
		INP int wire_to_ipin_switch, OUTP boolean * Fc_clipped, INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs);

static void load_uniform_switch_pattern(INP t_type_ptr type,
		INOUTP int ****tracks_connected_to_pin, INP int num_phys_pins,
		INP int *pin_num_ordering, INP int *side_ordering,
		INP int *offset_ordering, INP int nodes_per_chan, INP int Fc,
		INP enum e_directionality directionality);

static void load_perturbed_switch_pattern(INP t_type_ptr type,
		INOUTP int ****tracks_connected_to_pin, INP int num_phys_pins,
		INP int *pin_num_ordering, INP int *side_ordering,
		INP int *offset_ordering, INP int nodes_per_chan, INP int Fc,
		INP enum e_directionality directionality);

static void check_all_tracks_reach_pins(t_type_ptr type,
		int ****tracks_connected_to_pin, int nodes_per_chan, int Fc,
		enum e_pin_type ipin_or_opin);

static void build_rr_sinks_sources(INP int i, INP int j,
		INP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices,
		INP int delayless_switch, INP struct s_grid_tile **L_grid);

static void build_rr_xchan(INP int i, INP int j,
		INP struct s_ivec ****track_to_ipin_lookup,
		INP struct s_ivec ***switch_block_conn, INP int cost_index_offset,
		INP int nodes_per_chan, INP int *opin_mux_size,
		INP short *****sblock_pattern, INP int Fs_per_side,
		INP t_seg_details * seg_details, INP t_ivec *** L_rr_node_indices,
		INP boolean * L_rr_edge_done, INOUTP t_rr_node * L_rr_node,
		INP int wire_to_ipin_switch, INP enum e_directionality directionality);

static void build_rr_ychan(INP int i, INP int j,
		INP struct s_ivec ****track_to_ipin_lookup,
		INP struct s_ivec ***switch_block_conn, INP int cost_index_offset,
		INP int nodes_per_chan, INP int *opin_mux_size,
		INP short *****sblock_pattern, INP int Fs_per_side,
		INP t_seg_details * seg_details, INP t_ivec *** L_rr_node_indices,
		INP boolean * L_rr_edge_done, INOUTP t_rr_node * L_rr_node,
		INP int wire_to_ipin_switch, INP enum e_directionality directionality);


void alloc_and_load_edges_and_switches(INP t_rr_node * L_rr_node, INP int inode,
		INP int num_edges, INP boolean * L_rr_edge_done,
		INP t_linked_edge * edge_list_head);

static void alloc_net_rr_terminals(void);

static void alloc_and_load_rr_clb_source(t_ivec *** L_rr_node_indices);

/*
static t_clb_to_clb_directs *alloc_and_load_clb_to_clb_directs(INP t_direct_inf *directs, INP int num_directs);
*/

#if 0
static void load_uniform_opin_switch_pattern_paired(INP int *Fc_out,
		INP int num_pins,
		INP int *pins_in_chan_seg,
		INP int num_wire_inc_muxes,
		INP int num_wire_dec_muxes,
		INP int *wire_inc_muxes,
		INP int *wire_dec_muxes,
		INOUTP t_rr_node * L_rr_node,
		INOUTP boolean *
		L_rr_edge_done,
		INP t_seg_details *
		seg_details,
		OUTP boolean * Fc_clipped);
#endif
void watch_edges(int inode, t_linked_edge * edge_list_head);
#if MUX_SIZE_DIST_DISPLAY
static void view_mux_size_distribution(t_ivec *** L_rr_node_indices,
		int nodes_per_chan,
		t_seg_details * seg_details_x,
		t_seg_details * seg_details_y);
static void print_distribution(FILE * fptr,
		t_mux_size_distribution * distr_struct);
#endif


static t_seg_details *alloc_and_load_global_route_seg_details(
		INP int nodes_per_chan, INP int global_route_switch);

/* UDSD Modifications by WMF End */

/******************* Subroutine definitions *******************************/

void build_rr_graph(INP t_graph_type graph_type, INP int L_num_types,
		INP t_type_ptr types, INP int L_nx, INP int L_ny,
		INP struct s_grid_tile **L_grid, INP int chan_width,
		INP struct s_chan_width_dist *chan_capacity_inf,
		INP enum e_switch_block_type sb_type, INP int Fs, INP int num_seg_types,
		INP int num_switches, INP t_segment_inf * segment_inf,
		INP int global_route_switch, INP int delayless_switch,
		INP t_timing_inf timing_inf, INP int wire_to_ipin_switch,
		INP enum e_base_cost_type base_cost_type, INP t_direct_inf *directs, 
		INP int num_directs, INP boolean ignore_Fc_0, OUTP int *Warnings,
        /*Xifan TANG: Switch Segment Pattern Support*/
        INP int num_swseg_pattern, INP t_swseg_pattern_inf* swseg_patterns,
        INP boolean opin_to_cb_fast_edges, INP boolean opin_logic_eq_edges) {
	/* Temp structures used to build graph */
	int nodes_per_chan, i, j;
	t_seg_details *seg_details = NULL;
	int **Fc_in = NULL; /* [0..num_types-1][0..num_pins-1] */
	int **Fc_out = NULL; /* [0..num_types-1][0..num_pins-1] */

	int *****opin_to_track_map = NULL; /* [0..num_types-1][0..num_pins-1][0..height][0..3][0..Fc-1] */
	int *****ipin_to_track_map = NULL; /* [0..num_types-1][0..num_pins-1][0..height][0..3][0..Fc-1] */
	t_ivec ****track_to_ipin_lookup = NULL; /* [0..num_types-1][0..nodes_per_chan-1][0..height][0..3] */
	t_ivec ***switch_block_conn = NULL;
	short *****unidir_sb_pattern = NULL;
	boolean *L_rr_edge_done = NULL;
	boolean is_global_graph;
	boolean Fc_clipped;
	boolean use_full_seg_groups;
	boolean *perturb_ipins = NULL;
	enum e_directionality directionality;
	int **Fc_xofs = NULL; /* [0..ny-1][0..nx-1] */
	int **Fc_yofs = NULL; /* [0..nx-1][0..ny-1] */
	t_clb_to_clb_directs *clb_to_clb_directs;

	rr_node_indices = NULL;
	rr_node = NULL;
	num_rr_nodes = 0;

	/* Reset warning flag */
	*Warnings = RR_GRAPH_NO_WARN;

	/* Decode the graph_type */
	is_global_graph = FALSE;
	if (GRAPH_GLOBAL == graph_type) {
		is_global_graph = TRUE;
	}
	use_full_seg_groups = FALSE;
	if (GRAPH_UNIDIR_TILEABLE == graph_type) {
		use_full_seg_groups = TRUE;
	}
	directionality = UNI_DIRECTIONAL;
	if (GRAPH_BIDIR == graph_type) {
		directionality = BI_DIRECTIONAL;
	}
	if (is_global_graph) {
		directionality = BI_DIRECTIONAL;
	}

	/* Global routing uses a single longwire track */
	nodes_per_chan = (is_global_graph ? 1 : chan_width);
	assert(nodes_per_chan > 0);

	clb_to_clb_directs = NULL;
	if(num_directs > 0) {
		clb_to_clb_directs = alloc_and_load_clb_to_clb_directs(directs, num_directs);
	}

	/* START SEG_DETAILS */
	if (is_global_graph) {
		/* Sets up a single unit length segment type for global routing. */
		seg_details = alloc_and_load_global_route_seg_details(nodes_per_chan,
				global_route_switch);
	} else {
		/* Setup segments including distrubuting tracks and staggering.
		 * If use_full_seg_groups is specified, nodes_per_chan may be 
		 * changed. Warning should be singled to caller if this happens. */
		seg_details = alloc_and_load_seg_details(&nodes_per_chan,
				/* std::max(L_nx, L_ny), */ /* Original VPR */
                std::max(L_nx, L_ny) + ( is_stack ? 1 : 0 ), /* mrFPGA: Xifan TANG */
                num_seg_types, segment_inf,
				use_full_seg_groups, is_global_graph, directionality);
		if ((is_global_graph ? 1 : chan_width) != nodes_per_chan) {
			*Warnings |= RR_GRAPH_WARN_CHAN_WIDTH_CHANGED;
		}
		if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_SEG_DETAILS)) {
			dump_seg_details(seg_details, nodes_per_chan, 
				getEchoFileName(E_ECHO_SEG_DETAILS));
		}
	}
	/* END SEG_DETAILS */

	/* START FC */
	/* Determine the actual value of Fc */
	if (is_global_graph) {
		Fc_in = (int **) my_malloc(sizeof(int) * L_num_types);
		Fc_out = (int **) my_malloc(sizeof(int) * L_num_types);
		for (i = 0; i < L_num_types; ++i) {
			for (j = 0; j < types[i].num_pins; ++j) {
				Fc_in[i][j] = 1;
				Fc_out[i][j] = 1;
			}
		}
	} else {
		Fc_clipped = FALSE;
		Fc_in = alloc_and_load_actual_fc(L_num_types, types, nodes_per_chan,
				FALSE, directionality, &Fc_clipped, ignore_Fc_0);
		if (Fc_clipped) {
			*Warnings |= RR_GRAPH_WARN_FC_CLIPPED;
		}
		Fc_clipped = FALSE;
		Fc_out = alloc_and_load_actual_fc(L_num_types, types, nodes_per_chan,
				TRUE, directionality, &Fc_clipped, ignore_Fc_0);
		if (Fc_clipped) {
			*Warnings |= RR_GRAPH_WARN_FC_CLIPPED;
		}

#ifdef VERBOSE
		for (i = 1; i < L_num_types; ++i) { /* Skip "<EMPTY>" */
			for (j = 0; j < type_descriptors[i].num_pins; ++j) { 
				if (type_descriptors[i].is_Fc_full_flex[j]) {
					vpr_printf(TIO_MESSAGE_INFO, "Fc Actual Values: type = %s, Fc_out = full, Fc_in = %d.\n",
							type_descriptors[i].name, Fc_in[i][j]);
				}
				else {
					vpr_printf(TIO_MESSAGE_INFO, "Fc Actual Values: type = %s, Fc_out = %d, Fc_in = %d.\n",
							type_descriptors[i].name, Fc_out[i][j], Fc_in[i][j]);
				}
			}
		}
#endif /* VERBOSE */
	}

	perturb_ipins = alloc_and_load_perturb_ipins(nodes_per_chan, L_num_types,
			Fc_in, Fc_out, directionality);
	/* END FC */
	
	/* Alloc node lookups, count nodes, alloc rr nodes */
	num_rr_nodes = 0;
	rr_node_indices = alloc_and_load_rr_node_indices(nodes_per_chan, L_nx, L_ny,
			&num_rr_nodes, seg_details);
	rr_node = (t_rr_node *) my_malloc(sizeof(t_rr_node) * num_rr_nodes);
	memset(rr_node, 0, sizeof(t_rr_node) * num_rr_nodes);
	L_rr_edge_done = (boolean *) my_malloc(sizeof(boolean) * num_rr_nodes);
	memset(L_rr_edge_done, 0, sizeof(boolean) * num_rr_nodes);

	/* These are data structures used by the the unidir opin mapping. */
	if (UNI_DIRECTIONAL == directionality) {
		Fc_xofs = (int **) alloc_matrix(0, L_ny, 0, L_nx, sizeof(int));
		Fc_yofs = (int **) alloc_matrix(0, L_nx, 0, L_ny, sizeof(int));
		for (i = 0; i <= L_nx; ++i) {
			for (j = 0; j <= L_ny; ++j) {
				Fc_xofs[j][i] = 0;
				Fc_yofs[i][j] = 0;
			}
		}
	}

	/* START SB LOOKUP */
	/* Alloc and load the switch block lookup */
	if (is_global_graph) {
		assert(nodes_per_chan == 1);
		switch_block_conn = alloc_and_load_switch_block_conn(1, SUBSET, 3);
	} else if (BI_DIRECTIONAL == directionality) {
		switch_block_conn = alloc_and_load_switch_block_conn(nodes_per_chan,
				sb_type, Fs);
	} else {
		assert(UNI_DIRECTIONAL == directionality);

		unidir_sb_pattern = alloc_sblock_pattern_lookup(L_nx, L_ny,
				nodes_per_chan);
		for (i = 0; i <= L_nx; i++) {
			for (j = 0; j <= L_ny; j++) {
				load_sblock_pattern_lookup(i, j, nodes_per_chan, seg_details,
						Fs, sb_type, unidir_sb_pattern);
			}
		}
	}
	/* END SB LOOKUP */

	/* START IPINP MAP */
	/* Create ipin map lookups */
	ipin_to_track_map = (int *****) my_malloc(sizeof(int ****) * L_num_types);
	track_to_ipin_lookup = (struct s_ivec ****) my_malloc(
			sizeof(struct s_ivec ***) * L_num_types);
	for (i = 0; i < L_num_types; ++i) {
		ipin_to_track_map[i] = alloc_and_load_pin_to_track_map(RECEIVER,
			nodes_per_chan, Fc_in[i], &types[i], perturb_ipins[i],
			directionality);
		track_to_ipin_lookup[i] = alloc_and_load_track_to_pin_lookup(
			ipin_to_track_map[i], Fc_in[i], types[i].height,
			types[i].num_pins, nodes_per_chan);
	}
	/* END IPINP MAP */

	/* START OPINP MAP */
	/* Create opin map lookups */
	if (BI_DIRECTIONAL == directionality) {
		opin_to_track_map = (int *****) my_malloc(
				sizeof(int ****) * L_num_types);
		for (i = 0; i < L_num_types; ++i) {
			opin_to_track_map[i] = alloc_and_load_pin_to_track_map(DRIVER,
				nodes_per_chan, Fc_out[i], &types[i], FALSE, directionality);
		}
	}
	/* END OPINP MAP */

	/* UDSD Modifications by WMF begin */
	/* I'm adding 2 new fields to t_rr_node, and I want them initialized to 0. */
	for (i = 0; i < num_rr_nodes; i++) {
		rr_node[i].num_wire_drivers = 0;
		rr_node[i].num_opin_drivers = 0;
	}

	alloc_and_load_rr_graph(num_rr_nodes, rr_node, num_seg_types, seg_details,
			L_rr_edge_done, track_to_ipin_lookup, opin_to_track_map,
			switch_block_conn, L_grid, L_nx, L_ny, Fs, unidir_sb_pattern,
			Fc_out, Fc_xofs, Fc_yofs, rr_node_indices, nodes_per_chan, sb_type,
			delayless_switch, directionality, wire_to_ipin_switch, &Fc_clipped, directs, num_directs, clb_to_clb_directs);

#ifdef MUX_SIZE_DIST_DISPLAY
	if (UNI_DIRECTIONAL == directionality)
	{
		view_mux_size_distribution(rr_node_indices, nodes_per_chan,
				seg_details, seg_details);
	}
#endif

	/* Update rr_nodes capacities if global routing */
	if (graph_type == GRAPH_GLOBAL) {
		for (i = 0; i < num_rr_nodes; i++) {
			if (rr_node[i].type == CHANX || rr_node[i].type == CHANY) {
				rr_node[i].capacity = chan_width;
			}
		}
	}

    /* Xifan TANG: Add Fast Interconnection from LB OPINs to adjacent LB IPINs*/
	if (TRUE == opin_to_cb_fast_edges) { // Do only detailed rr_graph is needed 
      vpr_printf(TIO_MESSAGE_INFO,"Adding %d fast edges from logic block OPIN to logic block IPIN ...\n",
                 add_rr_graph_fast_edge_opin_to_cb(rr_node_indices));
    }
    /*END*/

    /*Xifan TANG: Switch Segment Pattern Support*/
	if (NULL != swseg_patterns) { // Do only the pointer is not NULL
      vpr_printf(TIO_MESSAGE_INFO,"Applying Switch Segment Pattern...\n");
	  if (UNI_DIRECTIONAL == directionality) {
        add_rr_graph_switch_segment_pattern(directionality,nodes_per_chan, num_swseg_pattern, swseg_patterns, rr_node_indices, seg_details, seg_details);
      } else {
        vpr_printf(TIO_MESSAGE_ERROR,"Switch Segment Pattern is only applicable to uni-directional routing architecture!\n");
        exit(1);
      }
    }
    /*END*/

    /* Xifan TANG: Check logic equivalence of LB OPINs and IPINs. Then modify the associated rr_graph */
    /* use net_rr_terminal array to find SOURCE rr_node for each net*/
	if (TRUE == opin_logic_eq_edges) { // Do only detailed rr_graph is needed 
      vpr_printf(TIO_MESSAGE_INFO,"Adding %d logic equivalent edges for logic block OPIN ...\n",
                                 // alloc_and_add_grids_fully_capacity_sb_rr_edges(rr_node_indices, num_directs, clb_to_clb_directs));
                                  alloc_and_add_grids_fully_capacity_rr_edges(rr_node_indices, num_directs, clb_to_clb_directs));
    }
    /*END*/

	rr_graph_externals(timing_inf, segment_inf, num_seg_types, nodes_per_chan,
			wire_to_ipin_switch, base_cost_type);

	if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_RR_GRAPH)) {
		dump_rr_graph(getEchoFileName(E_ECHO_RR_GRAPH));
	} else
		;

	check_rr_graph(graph_type, types, L_nx, L_ny, nodes_per_chan, Fs,
			num_seg_types, num_switches, segment_inf, global_route_switch,
			delayless_switch, wire_to_ipin_switch, seg_details, Fc_in, Fc_out,
			opin_to_track_map, ipin_to_track_map, track_to_ipin_lookup,
			switch_block_conn, perturb_ipins);

	/* Free all temp structs */
	if (seg_details) {
		free_seg_details(seg_details, nodes_per_chan);
		seg_details = NULL;
	}
	if (Fc_in) {
		free_matrix(Fc_in,0, L_num_types, 0, sizeof(int));
		Fc_in = NULL;
	}
	if (Fc_out) {
		free_matrix(Fc_out,0, L_num_types, 0, sizeof(int));
		Fc_out = NULL;
	}
	if (perturb_ipins) {
		free(perturb_ipins);
		perturb_ipins = NULL;
	}
	if (switch_block_conn) {
		free_switch_block_conn(switch_block_conn, nodes_per_chan);
		switch_block_conn = NULL;
	}
	if (L_rr_edge_done) {
		free(L_rr_edge_done);
		L_rr_edge_done = NULL;
	}
	if (Fc_xofs) {
		free_matrix(Fc_xofs, 0, L_ny, 0, sizeof(int));
		Fc_xofs = NULL;
	}
	if (Fc_yofs) {
		free_matrix(Fc_yofs, 0, L_nx, 0, sizeof(int));
		Fc_yofs = NULL;
	}
	if (unidir_sb_pattern) {
		free_sblock_pattern_lookup(unidir_sb_pattern);
		unidir_sb_pattern = NULL;
	}
	if (opin_to_track_map) {
		for (i = 0; i < L_num_types; ++i) {
			free_matrix4(opin_to_track_map[i], 0, types[i].num_pins - 1, 0,
					types[i].height - 1, 0, 3, 0, sizeof(int));
		}
		free(opin_to_track_map);
	}

	free_type_pin_to_track_map(ipin_to_track_map, types);
	free_type_track_to_ipin_map(track_to_ipin_lookup, types, nodes_per_chan);
	if(clb_to_clb_directs != NULL) {
		free(clb_to_clb_directs);
	}
}

void rr_graph_externals(t_timing_inf timing_inf,
		t_segment_inf * segment_inf, int num_seg_types, int nodes_per_chan,
		int wire_to_ipin_switch, enum e_base_cost_type base_cost_type) {
	add_rr_graph_C_from_switches(timing_inf.C_ipin_cblock);
	alloc_and_load_rr_indexed_data(segment_inf, num_seg_types, rr_node_indices,
			nodes_per_chan, wire_to_ipin_switch, base_cost_type);

	alloc_net_rr_terminals();
	load_net_rr_terminals(rr_node_indices);
	alloc_and_load_rr_clb_source(rr_node_indices);
}

boolean *
alloc_and_load_perturb_ipins(INP int nodes_per_chan, INP int L_num_types,
		INP int **Fc_in, INP int **Fc_out, INP enum e_directionality directionality) {
	int i;
	float Fc_ratio;
	boolean *result = NULL;

	result = (boolean *) my_malloc(L_num_types * sizeof(boolean));

	if (BI_DIRECTIONAL == directionality) {
		result[0] = FALSE;
		for (i = 1; i < L_num_types; ++i) {
			result[i] = FALSE;

			if (Fc_in[i][0] > Fc_out[i][0]) {
				Fc_ratio = (float) Fc_in[i][0] / (float) Fc_out[i][0];
			} else {
				Fc_ratio = (float) Fc_out[i][0] / (float) Fc_in[i][0];
			}

			if ((Fc_in[i][0] <= nodes_per_chan - 2)
					&& (fabs(Fc_ratio - nint(Fc_ratio))
							< (0.5 / (float) nodes_per_chan))) {
				result[i] = TRUE;
			}
		}
	} else {
		/* Unidirectional routing uses mux balancing patterns and 
		 * thus shouldn't need perturbation. */
		assert(UNI_DIRECTIONAL == directionality);
		for (i = 0; i < L_num_types; ++i) {
			result[i] = FALSE;
		}
	}

	return result;
}

static t_seg_details *
alloc_and_load_global_route_seg_details(INP int nodes_per_chan,
		INP int global_route_switch) {
	t_seg_details *result = NULL;

	assert(nodes_per_chan == 1);
	result = (t_seg_details *) my_malloc(sizeof(t_seg_details));

	result->index = 0;
	result->length = 1;
	result->wire_switch = global_route_switch;
	result->opin_switch = global_route_switch;
	result->longline = FALSE;
	result->direction = BI_DIRECTION;
	result->Cmetal = 0.0;
	result->Rmetal = 0.0;
	result->start = 1;
	result->drivers = MULTI_BUFFERED;
	result->cb = (boolean *) my_malloc(sizeof(boolean) * 1);
	result->cb[0] = TRUE;
	result->sb = (boolean *) my_malloc(sizeof(boolean) * 2);
	result->sb[0] = TRUE;
	result->sb[1] = TRUE;
	result->group_size = 1;
	result->group_start = 0;

	return result;
}

/* Calculates the actual Fc values for the given nodes_per_chan value */
int **
alloc_and_load_actual_fc(INP int L_num_types, INP t_type_ptr types,
		INP int nodes_per_chan, INP boolean is_Fc_out,
		INP enum e_directionality directionality, OUTP boolean * Fc_clipped, INP boolean ignore_Fc_0) {

	int i, j;
	int **Result = NULL;
	int fac, num_sets;

	*Fc_clipped = FALSE;

	/* Unidir tracks formed in pairs, otherwise no effect. */
	fac = 1;
	if (UNI_DIRECTIONAL == directionality) {
		fac = 2;
	}

	assert((nodes_per_chan % fac) == 0);
	num_sets = nodes_per_chan / fac;

	int max_pins = types[0].num_pins;
	for (i = 1; i < L_num_types; ++i) {
		if (types[i].num_pins > max_pins) {
			max_pins = types[i].num_pins;
		}
	}

	Result = (int **) alloc_matrix(0, L_num_types, 0, max_pins, sizeof(int));

	for (i = 1; i < L_num_types; ++i) { 
		float *Fc = (float *) my_malloc(sizeof(float) * types[i].num_pins); /* [0..num_pins-1] */ 
		for (j = 0; j < types[i].num_pins; ++j) {
			Fc[j] = types[i].Fc[j];
		
			if(Fc[j] == 0 && ignore_Fc_0 == FALSE) {
				/* Special case indicating that this pin does not connect to general-purpose routing */
				Result[i][j] = 0;
			} else {
				/* General case indicating that this pin connects to general-purpose routing */
				if (types[i].is_Fc_frac[j]) {
					Result[i][j] = fac * nint(num_sets * Fc[j]);
				} else {
					Result[i][j] = (int)Fc[j];
				}

				if (is_Fc_out && types[i].is_Fc_full_flex[j]) {
					Result[i][j] = nodes_per_chan;
				}

				Result[i][j] = std::max(Result[i][j], fac);
				if (Result[i][j] > nodes_per_chan) {
					*Fc_clipped = TRUE;
					Result[i][j] = nodes_per_chan;
				}
			}
			assert(Result[i][j] % fac == 0);
		}
		free(Fc);
	}

	return Result;
}

/* frees the track to ipin mapping for each physical grid type */
void free_type_track_to_ipin_map(struct s_ivec**** track_to_pin_map,
		t_type_ptr types, int nodes_per_chan) {
	int i, itrack, ioff, iside;
	for (i = 0; i < num_types; i++) {
		if (track_to_pin_map[i] != NULL) {
			for (itrack = 0; itrack < nodes_per_chan; itrack++) {
				for (ioff = 0; ioff < types[i].height; ioff++) {
					for (iside = 0; iside < 4; iside++) {
						if (track_to_pin_map[i][itrack][ioff][iside].list
								!= NULL) {
							free(track_to_pin_map[i][itrack][ioff][iside].list);
						}
					}
				}
			}
			free_matrix3(track_to_pin_map[i], 0, nodes_per_chan - 1, 0,
					types[i].height - 1, 0, sizeof(struct s_ivec));
		}
	}
	free(track_to_pin_map);
}

/* frees the ipin to track mapping for each physical grid type */
void free_type_pin_to_track_map(int***** ipin_to_track_map,
		t_type_ptr types) {
	int i;
	for (i = 0; i < num_types; i++) {
		free_matrix4(ipin_to_track_map[i], 0, types[i].num_pins - 1, 0,
				types[i].height - 1, 0, 3, 0, sizeof(int));
	}
	free(ipin_to_track_map);
}

/* Does the actual work of allocating the rr_graph and filling all the *
 * appropriate values.  Everything up to this was just a prelude!      */
static void alloc_and_load_rr_graph(INP int num_nodes,
		INP t_rr_node * L_rr_node, INP int num_seg_types,
		INP t_seg_details * seg_details, INP boolean * L_rr_edge_done,
		INP struct s_ivec ****track_to_ipin_lookup,
		INP int *****opin_to_track_map, INP struct s_ivec ***switch_block_conn,
		INP struct s_grid_tile **L_grid, INP int L_nx, INP int L_ny, INP int Fs,
		INP short *****sblock_pattern, INP int **Fc_out, INP int **Fc_xofs,
		INP int **Fc_yofs, INP t_ivec *** L_rr_node_indices,
		INP int nodes_per_chan, INP enum e_switch_block_type sb_type,
		INP int delayless_switch, INP enum e_directionality directionality,
		INP int wire_to_ipin_switch, OUTP boolean * Fc_clipped,
		INP t_direct_inf *directs, INP int num_directs,
		INP t_clb_to_clb_directs *clb_to_clb_directs) {

	int i, j;
	boolean clipped;
	int *opin_mux_size = NULL;

	/* If Fc gets clipped, this will be flagged to true */
	*Fc_clipped = FALSE;

	/* Connection SINKS and SOURCES to their pins. */
	for (i = 0; i <= (L_nx + 1); i++) {
		for (j = 0; j <= (L_ny + 1); j++) {
			build_rr_sinks_sources(i, j, L_rr_node, L_rr_node_indices,
					delayless_switch, L_grid);
		}
	}

	/* Build opins */
	for (i = 0; i <= (L_nx + 1); ++i) {
		for (j = 0; j <= (L_ny + 1); ++j) {
			if (BI_DIRECTIONAL == directionality) {
				build_bidir_rr_opins(i, j, L_rr_node, L_rr_node_indices,
						opin_to_track_map, Fc_out, L_rr_edge_done, seg_details,
						L_grid, delayless_switch,
						directs, num_directs, clb_to_clb_directs);
			} else {
				assert(UNI_DIRECTIONAL == directionality);
				build_unidir_rr_opins(i, j, L_grid, Fc_out, nodes_per_chan,
						seg_details, Fc_xofs, Fc_yofs, L_rr_node,
						L_rr_edge_done, &clipped, L_rr_node_indices, delayless_switch,
						directs, num_directs, clb_to_clb_directs);
				if (clipped) {
					*Fc_clipped = TRUE;
				}
			}
		}
	}

	/* We make a copy of the current fanin values for the nodes to 
	 * know the number of OPINs driving each mux presently */
	opin_mux_size = (int *) my_malloc(sizeof(int) * num_nodes);
	for (i = 0; i < num_nodes; ++i) {
		opin_mux_size[i] = L_rr_node[i].fan_in;
	}

	/* Build channels */
	assert(Fs % 3 == 0);
	for (i = 0; i <= L_nx; i++) {
		for (j = 0; j <= L_ny; j++) {
			if (i > 0) {
				build_rr_xchan(i, j, track_to_ipin_lookup, switch_block_conn,
						CHANX_COST_INDEX_START, nodes_per_chan, opin_mux_size,
						sblock_pattern, Fs / 3, seg_details, L_rr_node_indices,
						L_rr_edge_done, L_rr_node, wire_to_ipin_switch,
						directionality);
			}
			if (j > 0) {
				build_rr_ychan(i, j, track_to_ipin_lookup, switch_block_conn,
						CHANX_COST_INDEX_START + num_seg_types, nodes_per_chan,
						opin_mux_size, sblock_pattern, Fs / 3, seg_details,
						L_rr_node_indices, L_rr_edge_done, L_rr_node,
						wire_to_ipin_switch, directionality);
			}
		}
	}
	free(opin_mux_size);
}

static void build_bidir_rr_opins(INP int i, INP int j,
		INOUTP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices,
		INP int *****opin_to_track_map, INP int **Fc_out,
		INP boolean * L_rr_edge_done, INP t_seg_details * seg_details,
		INP struct s_grid_tile **L_grid, INP int delayless_switch,
		INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs) {

	int ipin, inode, num_edges, *Fc, ofs;
	t_type_ptr type;
	struct s_linked_edge *edge_list, *next;

	/* OPINP edges need to be done at once so let the offset 0
	 * block do the work. */
	if (L_grid[i][j].offset > 0) {
		return;
	}

	type = L_grid[i][j].type;
	Fc = Fc_out[type->index];

	for (ipin = 0; ipin < type->num_pins; ++ipin) {
		/* We only are working with opins so skip non-drivers */
		if (type->class_inf[type->pin_class[ipin]].type != DRIVER) {
			continue;
		}

		num_edges = 0;
		edge_list = NULL;
		if(Fc[ipin] != 0) {
			for (ofs = 0; ofs < type->height; ++ofs) {
				num_edges += get_bidir_opin_connections(i, j + ofs, ipin,
						&edge_list, opin_to_track_map, Fc[ipin], L_rr_edge_done,
						L_rr_node_indices, seg_details);
			}
		}

		/* Add in direct connections */
		num_edges += get_opin_direct_connecions(i, j, ipin, &edge_list,	L_rr_node_indices, delayless_switch, directs, num_directs, clb_to_clb_directs);

		inode = get_rr_node_index(i, j, OPIN, ipin, L_rr_node_indices);
		alloc_and_load_edges_and_switches(L_rr_node, inode, num_edges,
				L_rr_edge_done, edge_list);
		while (edge_list != NULL) {
			next = edge_list->next;
			free(edge_list);
			edge_list = next;
		}
	}
}

void free_rr_graph(void) {
	int i;

	/* Frees all the routing graph data structures, if they have been       *
	 * allocated.  I use rr_mem_chunk_list_head as a flag to indicate       *
	 * whether or not the graph has been allocated -- if it is not NULL,    *
	 * a routing graph exists and can be freed.  Hence, you can call this   *
	 * routine even if you're not sure of whether a rr_graph exists or not. */

	if (rr_mem_ch.chunk_ptr_head == NULL) /* Nothing to free. */
		return;

	free_chunk_memory(&rr_mem_ch); /* Frees ALL "chunked" data */

	/* Before adding any more free calls here, be sure the data is NOT chunk *
	 * allocated, as ALL the chunk allocated data is already free!           */

	if(net_rr_terminals != NULL) {
		free(net_rr_terminals);
	}
	for (i = 0; i < num_rr_nodes; i++) {
		if (rr_node[i].edges != NULL) {
			free(rr_node[i].edges);
		}
		if (rr_node[i].switches != NULL) {
			free(rr_node[i].switches);
		}
	}

	assert(rr_node_indices);
	free_rr_node_indices(rr_node_indices);
	free(rr_node);
	free(rr_indexed_data);
	for (i = 0; i < num_blocks; i++) {
		free(rr_blk_source[i]);
	}
	free(rr_blk_source);
	rr_blk_source = NULL;
	net_rr_terminals = NULL;
	rr_node = NULL;
	rr_node_indices = NULL;
	rr_indexed_data = NULL;
	num_rr_nodes = 0;
}

static void alloc_net_rr_terminals(void) {
	int inet;

	net_rr_terminals = (int **) my_malloc(num_nets * sizeof(int *));

	for (inet = 0; inet < num_nets; inet++) {
		net_rr_terminals[inet] = (int *) my_chunk_malloc(
				(clb_net[inet].num_sinks + 1) * sizeof(int),
				&rr_mem_ch);
	}
}

void load_net_rr_terminals(t_ivec *** L_rr_node_indices) {

	/* Allocates and loads the net_rr_terminals data structure.  For each net   *
	 * it stores the rr_node index of the SOURCE of the net and all the SINKs   *
	 * of the net.  [0..num_nets-1][0..num_pins-1].  Entry [inet][pnum] stores  *
	 * the rr index corresponding to the SOURCE (opin) or SINK (ipin) of pnum.  */

	int inet, ipin, inode, iblk, i, j, node_block_pin, iclass;
	t_type_ptr type;

	for (inet = 0; inet < num_nets; inet++) {
		for (ipin = 0; ipin <= clb_net[inet].num_sinks; ipin++) {
			iblk = clb_net[inet].node_block[ipin];
			i = block[iblk].x;
			j = block[iblk].y;
			type = block[iblk].type;

			/* In the routing graph, each (x, y) location has unique pins on it
			 * so when there is capacity, blocks are packed and their pin numbers
			 * are offset to get their actual rr_node */
			node_block_pin = clb_net[inet].node_block_pin[ipin];

			iclass = type->pin_class[node_block_pin];

			inode = get_rr_node_index(i, j, (ipin == 0 ? SOURCE : SINK), /* First pin is driver */
			iclass, L_rr_node_indices);
			net_rr_terminals[inet][ipin] = inode;
		}
	}
}

static void alloc_and_load_rr_clb_source(t_ivec *** L_rr_node_indices) {

	/* Saves the rr_node corresponding to each SOURCE and SINK in each CLB      *
	 * in the FPGA.  Currently only the SOURCE rr_node values are used, and     *
	 * they are used only to reserve pins for locally used OPINs in the router. *
	 * [0..num_blocks-1][0..num_class-1].  The values for blocks that are pads  *
	 * are NOT valid.                                                           */

	int iblk, i, j, iclass, inode;
	int class_low, class_high;
	t_rr_type rr_type;
	t_type_ptr type;

	rr_blk_source = (int **) my_malloc(num_blocks * sizeof(int *));

	for (iblk = 0; iblk < num_blocks; iblk++) {
		type = block[iblk].type;
		get_class_range_for_block(iblk, &class_low, &class_high);
		rr_blk_source[iblk] = (int *) my_malloc(type->num_class * sizeof(int));
		for (iclass = 0; iclass < type->num_class; iclass++) {
			if (iclass >= class_low && iclass <= class_high) {
				i = block[iblk].x;
				j = block[iblk].y;

				if (type->class_inf[iclass].type == DRIVER)
					rr_type = SOURCE;
				else
					rr_type = SINK;

				inode = get_rr_node_index(i, j, rr_type, iclass,
						L_rr_node_indices);
				rr_blk_source[iblk][iclass] = inode;
			} else {
				rr_blk_source[iblk][iclass] = OPEN;
			}
		}
	}
}

static void build_rr_sinks_sources(INP int i, INP int j,
		INP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices,
		INP int delayless_switch, INP struct s_grid_tile **L_grid) {

	/* Loads IPIN, SINK, SOURCE, and OPIN. 
	 * Loads IPINP to SINK edges, and SOURCE to OPINP edges */

	int ipin, iclass, inode, pin_num, to_node, num_edges;
	int num_class, num_pins;
	t_type_ptr type;
	struct s_class *class_inf;
	int *pin_class;
	const t_pb_graph_node *pb_graph_node;
	int iport, ipb_pin, iporttype, z;

	/* Since we share nodes within a large block, only 
	 * start tile can initialize sinks, sources, and pins */
	if (L_grid[i][j].offset > 0)
		return;

	type = L_grid[i][j].type;
	num_class = type->num_class;
	class_inf = type->class_inf;
	num_pins = type->num_pins;
	pin_class = type->pin_class;
	z = 0;

	/* SINKS and SOURCE to OPINP edges */
	for (iclass = 0; iclass < num_class; iclass++) {
		if (class_inf[iclass].type == DRIVER) { /* SOURCE */
			inode = get_rr_node_index(i, j, SOURCE, iclass, L_rr_node_indices);

			num_edges = class_inf[iclass].num_pins;
			L_rr_node[inode].num_edges = num_edges;
			L_rr_node[inode].edges = (int *) my_malloc(num_edges * sizeof(int));
			L_rr_node[inode].switches = (short *) my_malloc(
					num_edges * sizeof(short));

			for (ipin = 0; ipin < class_inf[iclass].num_pins; ipin++) {
				pin_num = class_inf[iclass].pinlist[ipin];
				to_node = get_rr_node_index(i, j, OPIN, pin_num,
						L_rr_node_indices);
				L_rr_node[inode].edges[ipin] = to_node;
				L_rr_node[inode].switches[ipin] = delayless_switch;

				++L_rr_node[to_node].fan_in;
			}

			L_rr_node[inode].cost_index = SOURCE_COST_INDEX;
			L_rr_node[inode].type = SOURCE;
		} else { /* SINK */
			assert(class_inf[iclass].type == RECEIVER);
			inode = get_rr_node_index(i, j, SINK, iclass, L_rr_node_indices);

			/* NOTE:  To allow route throughs through clbs, change the lines below to  *
			 * make an edge from the input SINK to the output SOURCE.  Do for just the *
			 * special case of INPUTS = class 0 and OUTPUTS = class 1 and see what it  *
			 * leads to.  If route throughs are allowed, you may want to increase the  *
			 * base cost of OPINs and/or SOURCES so they aren't used excessively.      */

			/* Initialize to unconnected to fix values */
			L_rr_node[inode].num_edges = 0;
			L_rr_node[inode].edges = NULL;
			L_rr_node[inode].switches = NULL;

			L_rr_node[inode].cost_index = SINK_COST_INDEX;
			L_rr_node[inode].type = SINK;
		}

		/* Things common to both SOURCEs and SINKs.   */
		L_rr_node[inode].capacity = class_inf[iclass].num_pins;
		L_rr_node[inode].occ = 0;
		L_rr_node[inode].xlow = i;
		L_rr_node[inode].xhigh = i;
		L_rr_node[inode].ylow = j;
		L_rr_node[inode].yhigh = j + type->height - 1;
		L_rr_node[inode].R = 0;
		L_rr_node[inode].C = 0;
		L_rr_node[inode].ptc_num = iclass;
		L_rr_node[inode].direction = (enum e_direction)OPEN;
		L_rr_node[inode].drivers = (enum e_drivers)OPEN;
	}

	iporttype = iport = ipb_pin = 0;

	pb_graph_node = type->pb_graph_head;
	if(pb_graph_node != NULL && pb_graph_node->num_input_ports == 0) {
		iporttype = 1;
	}
	/* Connect IPINS to SINKS and dummy for OPINS */
	for (ipin = 0; ipin < num_pins; ipin++) {
		iclass = pin_class[ipin];
		z = ipin / (type->pb_type->num_clock_pins + type->pb_type->num_output_pins + type->pb_type->num_input_pins);

		if (class_inf[iclass].type == RECEIVER) {
			inode = get_rr_node_index(i, j, IPIN, ipin, L_rr_node_indices);
			to_node = get_rr_node_index(i, j, SINK, iclass, L_rr_node_indices);

			L_rr_node[inode].num_edges = 1;
			L_rr_node[inode].edges = (int *) my_malloc(sizeof(int));
			L_rr_node[inode].switches = (short *) my_malloc(sizeof(short));

			L_rr_node[inode].edges[0] = to_node;
			L_rr_node[inode].switches[0] = delayless_switch;

			++L_rr_node[to_node].fan_in;

			L_rr_node[inode].cost_index = IPIN_COST_INDEX;
			L_rr_node[inode].type = IPIN;

			/* Add in information so that I can identify which cluster pin this rr_node connects to later */
			L_rr_node[inode].z = z;
			if(iporttype == 0) {
                L_rr_node[inode].pb_graph_pin = &pb_graph_node->input_pins[iport][ipb_pin];
				ipb_pin++;
				if(ipb_pin >= pb_graph_node->num_input_pins[iport]) {
					iport++;
					ipb_pin = 0;
					if(iport >= pb_graph_node->num_input_ports) {
						iporttype++;
						iport = 0;
						if(pb_graph_node->num_clock_ports == 0) {
							iporttype = 0;
						}
					}
				}
			} else {
				assert(iporttype == 1);
				L_rr_node[inode].pb_graph_pin = &pb_graph_node->clock_pins[iport][ipb_pin];
				ipb_pin++; /* Xifan TANG: Original VPR does not have this incremental!!! */
				if(ipb_pin >= pb_graph_node->num_clock_pins[iport]) {
					iport++;
					ipb_pin = 0;
					if(iport >= pb_graph_node->num_clock_ports) {
						iporttype = 0;
						iport = 0;
						if(pb_graph_node->num_input_ports == 0) {
							iporttype = 1;
						}
					}
				}
			}
		} else {
			assert(class_inf[iclass].type == DRIVER);

			inode = get_rr_node_index(i, j, OPIN, ipin, L_rr_node_indices);
			
			/* Add in information so that I can identify which cluster pin this rr_node connects to later */
			L_rr_node[inode].z = z;
			
			L_rr_node[inode].num_edges = 0;
			L_rr_node[inode].edges = NULL;

			L_rr_node[inode].switches = NULL;

			L_rr_node[inode].cost_index = OPIN_COST_INDEX;
			L_rr_node[inode].type = OPIN;
			
			L_rr_node[inode].pb_graph_pin = &pb_graph_node->output_pins[iport][ipb_pin];
	        ipb_pin++; /* Xifan TANG: Original VPR does not have this incremental!!! */
			if(ipb_pin >= pb_graph_node->num_output_pins[iport]) {
				iport++;
				ipb_pin = 0;
				if(iport >= pb_graph_node->num_output_ports) {
					iport = 0;
					if(pb_graph_node->num_input_ports == 0) {
						iporttype = 1;
					} else {
						iporttype = 0;
					}
				}
			}
		}

		/* Common to both DRIVERs and RECEIVERs */
		L_rr_node[inode].capacity = 1;
		L_rr_node[inode].occ = 0;
		L_rr_node[inode].xlow = i;
		L_rr_node[inode].xhigh = i;
		L_rr_node[inode].ylow = j;
		L_rr_node[inode].yhigh = j + type->height - 1;
		L_rr_node[inode].C = 0;
		L_rr_node[inode].R = 0;
		L_rr_node[inode].ptc_num = ipin;
		L_rr_node[inode].direction = (enum e_direction)OPEN;
		L_rr_node[inode].drivers = (enum e_drivers)OPEN;
	}
}

static void build_rr_xchan(INP int i, INP int j,
		INP struct s_ivec ****track_to_ipin_lookup,
		INP struct s_ivec ***switch_block_conn, INP int cost_index_offset,
		INP int nodes_per_chan, INP int *opin_mux_size,
		INP short *****sblock_pattern, INP int Fs_per_side,
		INP t_seg_details * seg_details, INP t_ivec *** L_rr_node_indices,
		INOUTP boolean * L_rr_edge_done, INOUTP t_rr_node * L_rr_node,
		INP int wire_to_ipin_switch, INP enum e_directionality directionality) {

	/* Loads up all the routing resource nodes in the x-directed channel      *
	 * segments starting at (i,j).                                            */

	int itrack, istart, iend, num_edges, inode, length;
	struct s_linked_edge *edge_list, *next;
    /* mrFPGA: Xifan TANG */
    int jstart, jend;
    /* END */

	for (itrack = 0; itrack < nodes_per_chan; itrack++) {
	    /* First count number of edges and put the edges in a linked list. */
	    num_edges = 0;
		edge_list = NULL;

        /* mrFPGA : Xifan TANG*/
        if ( is_stack ) {
          jstart = get_seg_start (seg_details, itrack, i, j);
          if ( jstart != j )
            continue;
          jend = get_seg_end (seg_details, itrack, jstart, i, ny);

          istart = i;
          iend = i;

          num_edges += get_track_to_ipins(jstart, i, itrack, &edge_list, L_rr_node_indices,
                         track_to_ipin_lookup, seg_details, CHANX, nx, wire_to_ipin_switch,
                         directionality);

          num_edges += get_track_to_tracks(i, jstart, itrack, CHANX, i-1, CHANY, ny,
                         nodes_per_chan, opin_mux_size, Fs_per_side, sblock_pattern,
                         &edge_list, seg_details, directionality, L_rr_node_indices,
                         L_rr_edge_done, switch_block_conn);

          num_edges += get_track_to_tracks(i, jstart, itrack, CHANX, i, CHANY, ny,
                         nodes_per_chan, opin_mux_size, Fs_per_side, sblock_pattern,
                         &edge_list, seg_details, directionality, L_rr_node_indices,
                         L_rr_edge_done, switch_block_conn);

          if( jstart > 0 ) {
             num_edges += get_track_to_tracks(i, jstart, itrack, CHANX, jstart - 1, CHANX, ny,
                            nodes_per_chan, opin_mux_size, Fs_per_side,sblock_pattern, &edge_list,
                            seg_details, directionality, L_rr_node_indices, L_rr_edge_done, switch_block_conn);
          }
          if( jend < ny ) {
            num_edges += get_track_to_tracks(i, jstart, itrack, CHANX, jend + 1, CHANX, ny,
                           nodes_per_chan, opin_mux_size, Fs_per_side, sblock_pattern, &edge_list,
                           seg_details, directionality, L_rr_node_indices, L_rr_edge_done, switch_block_conn);
          }
        } else { /* end */
          /* Xifan TANG: I remove the accurate part for Original VPR*/
          /* Original VPR part*/
          istart = get_seg_start(seg_details, itrack, j, i);
          iend = get_seg_end(seg_details, itrack, istart, j, nx);
          if (i > istart)
            continue; /* Not the start of this segment. */
          jstart = j; 
          jend = j;
          /* end */

          /* First count number of edges and put the edges in a linked list. */
          num_edges = 0;
          edge_list = NULL;

          num_edges += get_track_to_ipins(istart, j, itrack, &edge_list,
                         L_rr_node_indices, track_to_ipin_lookup, seg_details, CHANX, nx,
                         wire_to_ipin_switch, directionality);

          if (j > 0) {
            num_edges += get_track_to_tracks(j, istart, itrack, CHANX, j, CHANY,
                           nx, nodes_per_chan, opin_mux_size, Fs_per_side,
                           sblock_pattern, &edge_list, seg_details, directionality,
                           L_rr_node_indices, L_rr_edge_done, switch_block_conn);
          }

          if (j < ny) {
            num_edges += get_track_to_tracks(j, istart, itrack, CHANX, j + 1,
                           CHANY, nx, nodes_per_chan, opin_mux_size, Fs_per_side,
                           sblock_pattern, &edge_list, seg_details, directionality,
                           L_rr_node_indices, L_rr_edge_done, switch_block_conn);
          }

          if (istart > 1) {
            num_edges += get_track_to_tracks(j, istart, itrack, CHANX,
                           istart - 1, CHANX, nx, nodes_per_chan, opin_mux_size,
                           Fs_per_side, sblock_pattern, &edge_list, seg_details,
                           directionality, L_rr_node_indices, L_rr_edge_done,
                           switch_block_conn);
          }

          if (iend < nx) {
            num_edges += get_track_to_tracks(j, istart, itrack, CHANX, iend + 1,
                           CHANX, nx, nodes_per_chan, opin_mux_size, Fs_per_side,
                           sblock_pattern, &edge_list, seg_details, directionality,
                           L_rr_node_indices, L_rr_edge_done, switch_block_conn);
          }
        }
        /* END */

		inode = get_rr_node_index(i, j, CHANX, itrack, L_rr_node_indices);
		alloc_and_load_edges_and_switches(L_rr_node, inode, num_edges,
				L_rr_edge_done, edge_list);

		while (edge_list != NULL) {
			next = edge_list->next;
			free(edge_list);
			edge_list = next;
		}

		/* Edge arrays have now been built up.  Do everything else.  */
		L_rr_node[inode].cost_index = cost_index_offset
				+ seg_details[itrack].index;
		L_rr_node[inode].occ = 0;

		L_rr_node[inode].capacity = 1; /* GLOBAL routing handled elsewhere */

        if (is_stack) {
          /* mrFPGA: Xifan TANG */
	      L_rr_node[inode].xlow = istart;
	      L_rr_node[inode].xhigh = iend;
   	      L_rr_node[inode].ylow = jstart;
	      L_rr_node[inode].yhigh = jend;
        } else {
          /* Original VPR */
	  	  L_rr_node[inode].xlow = istart;
		  L_rr_node[inode].xhigh = iend;
		  L_rr_node[inode].ylow = j;
		  L_rr_node[inode].yhigh = j;
        }

        /* mrFPGA: Xifan TANG */
		length = is_stack ? (jend - jstart) : (iend - istart + 1);
		//length = (iend - istart + 1);
        /* END */

		L_rr_node[inode].R = length * seg_details[itrack].Rmetal;
		L_rr_node[inode].C = length * seg_details[itrack].Cmetal;

		L_rr_node[inode].ptc_num = itrack;
		L_rr_node[inode].type = CHANX;
		L_rr_node[inode].direction = seg_details[itrack].direction;
		L_rr_node[inode].drivers = seg_details[itrack].drivers;
        /* Xifan TANG:(For SPICE Modeling) Fill the segment inf */
        //LL_rr_node[inode].seg_index = seg_details[itrack].index;
	}
}

static void build_rr_ychan(INP int i, INP int j,
		INP struct s_ivec ****track_to_ipin_lookup,
		INP struct s_ivec ***switch_block_conn, INP int cost_index_offset,
		INP int nodes_per_chan, INP int *opin_mux_size,
		INP short *****sblock_pattern, INP int Fs_per_side,
		INP t_seg_details * seg_details, INP t_ivec *** L_rr_node_indices,
		INP boolean * L_rr_edge_done, INOUTP t_rr_node * L_rr_node,
		INP int wire_to_ipin_switch, INP enum e_directionality directionality) {

	/* Loads up all the routing resource nodes in the y-directed channel      *
	 * segments starting at (i,j).                                            */

	int itrack, istart, iend, num_edges, inode, length;
	struct s_linked_edge *edge_list, *next;

    /* mrFPGA: Xifan TANG*/
    int jstart, jend;
    /* END */

	for (itrack = 0; itrack < nodes_per_chan; itrack++) {
      /* First count number of edges and put the edges in a linked list. */
      num_edges = 0;
      edge_list = NULL;

      /* mrFPGA */
      if ( is_stack ) {
        istart = get_seg_start (seg_details, itrack, j, i);
        if ( istart != i )
          continue;
        iend = get_seg_end (seg_details, itrack, istart, j, nx);
        /* mrFPGA: Xifan TANG */
        jstart = j;
        jend = j;
        /* end */

        num_edges += get_track_to_ipins(istart, j, itrack, &edge_list, L_rr_node_indices,
                       track_to_ipin_lookup, seg_details, CHANY, nx, wire_to_ipin_switch,
                       directionality);

        num_edges += get_track_to_tracks(j, istart, itrack, CHANY, j-1, CHANX, nx,
                       nodes_per_chan, opin_mux_size, Fs_per_side, sblock_pattern,
                       &edge_list, seg_details, directionality, L_rr_node_indices,
                       L_rr_edge_done, switch_block_conn);

        num_edges += get_track_to_tracks(j, istart, itrack, CHANY, j, CHANX, nx,
                       nodes_per_chan, opin_mux_size, Fs_per_side, sblock_pattern,
                       &edge_list, seg_details, directionality, L_rr_node_indices,
                       L_rr_edge_done, switch_block_conn);

        if (istart > 0) {
          num_edges += get_track_to_tracks(j, istart, itrack, CHANY, istart - 1, CHANY, nx,
                         nodes_per_chan, opin_mux_size, Fs_per_side, sblock_pattern, &edge_list,
                         seg_details, directionality, L_rr_node_indices, L_rr_edge_done,
                         switch_block_conn);
        }

        if (iend < nx) {
          num_edges += get_track_to_tracks(j, istart, itrack, CHANY, iend + 1, 
                         CHANY, nx, nodes_per_chan, opin_mux_size, Fs_per_side,
                         sblock_pattern, &edge_list, seg_details, directionality,
                         L_rr_node_indices, L_rr_edge_done, switch_block_conn);

        }
      } else { /* end */
		istart = get_seg_start(seg_details, itrack, i, j);
        iend = get_seg_end(seg_details, itrack, istart, i, ny);

        if (j > istart)
     	  continue; /* Not the start of this segment. */
        /* mrFPGA: Xifan TANG*/
        jstart = i;
        jend = i;
        /* END */

        /* Original VPR */
		/* First count number of edges and put the edges in a linked list. */
        num_edges = 0;
		edge_list = NULL;

		num_edges += get_track_to_ipins(istart, i, itrack, &edge_list,
				L_rr_node_indices, track_to_ipin_lookup, seg_details, CHANY, ny,
				wire_to_ipin_switch, directionality);

		if (i > 0) {
			num_edges += get_track_to_tracks(i, istart, itrack, CHANY, i, CHANX,
					ny, nodes_per_chan, opin_mux_size, Fs_per_side,
					sblock_pattern, &edge_list, seg_details, directionality,
					L_rr_node_indices, L_rr_edge_done, switch_block_conn);
		}

		if (i < nx) {
			num_edges += get_track_to_tracks(i, istart, itrack, CHANY, i + 1,
					CHANX, ny, nodes_per_chan, opin_mux_size, Fs_per_side,
					sblock_pattern, &edge_list, seg_details, directionality,
					L_rr_node_indices, L_rr_edge_done, switch_block_conn);
		}

		if (istart > 1) {
			num_edges += get_track_to_tracks(i, istart, itrack, CHANY,
					istart - 1, CHANY, ny, nodes_per_chan, opin_mux_size,
					Fs_per_side, sblock_pattern, &edge_list, seg_details,
					directionality, L_rr_node_indices, L_rr_edge_done,
					switch_block_conn);
		}

		if (iend < ny) {
			num_edges += get_track_to_tracks(i, istart, itrack, CHANY, iend + 1,
					CHANY, ny, nodes_per_chan, opin_mux_size, Fs_per_side,
					sblock_pattern, &edge_list, seg_details, directionality,
					L_rr_node_indices, L_rr_edge_done, switch_block_conn);
		}
      }
      /* END */

      inode = get_rr_node_index(i, j, CHANY, itrack, L_rr_node_indices);
      alloc_and_load_edges_and_switches(L_rr_node, inode, num_edges,
        L_rr_edge_done, edge_list);

      while (edge_list != NULL) {
		next = edge_list->next;
		free(edge_list);
		edge_list = next;
      }

	  /* Edge arrays have now been built up.  Do everything else.  */
	  L_rr_node[inode].cost_index = cost_index_offset
				+ seg_details[itrack].index;
	  L_rr_node[inode].occ = 0;

	  L_rr_node[inode].capacity = 1; /* GLOBAL routing handled elsewhere */

      if (is_stack) {
        /* mrFPGA: Xifan TANG */
	    L_rr_node[inode].xlow = istart;
	    L_rr_node[inode].xhigh = iend;
   	    L_rr_node[inode].ylow = jstart;
	    L_rr_node[inode].yhigh = jend;
      } else {
        /* Original VPR */
	    L_rr_node[inode].xlow = i;
	    L_rr_node[inode].xhigh = i;
   	    L_rr_node[inode].ylow = istart;
	    L_rr_node[inode].yhigh = iend;
      }

      /* mrFPGA : Xifan TANG*/
	  length = is_stack ? (iend - istart) : (iend - istart + 1);
	  //length = (iend - istart + 1);
      /* END */

	  L_rr_node[inode].R = length * seg_details[itrack].Rmetal;
	  L_rr_node[inode].C = length * seg_details[itrack].Cmetal;

	  L_rr_node[inode].ptc_num = itrack;
	  L_rr_node[inode].type = CHANY;
	  L_rr_node[inode].direction = seg_details[itrack].direction;
	  L_rr_node[inode].drivers = seg_details[itrack].drivers;
      /* Xifan TANG:(For SPICE Modeling) Fill the segment inf */
      //LL_rr_node[inode].seg_index = seg_details[itrack].index;
	}
}

void watch_edges(int inode, t_linked_edge * edge_list_head) {
	t_linked_edge *list_ptr;
	int i, to_node;

	list_ptr = edge_list_head;
	i = 0;

	vpr_printf(TIO_MESSAGE_TRACE, "!!! Watching Node %d !!!!\n", inode);
	print_rr_node(stdout, rr_node, inode);
	vpr_printf(TIO_MESSAGE_TRACE, "Currently connects to:\n");
	while (list_ptr != NULL) {
		to_node = list_ptr->edge;
		print_rr_node(stdout, rr_node, to_node);
		list_ptr = list_ptr->next;
		i++;
	}
}

void alloc_and_load_edges_and_switches(INP t_rr_node * L_rr_node, INP int inode,
		INP int num_edges, INOUTP boolean * L_rr_edge_done,
		INP t_linked_edge * edge_list_head) {

	/* Sets up all the edge related information for rr_node inode (num_edges,  * 
	 * the edges array and the switches array).  The edge_list_head points to  *
	 * a list of the num_edges edges and switches to put in the arrays.  This  *
	 * linked list is freed by this routine. This routine also resets the      *
	 * rr_edge_done array for the next rr_node (i.e. set it so that no edges   *
	 * are marked as having been seen before).                                 */

	t_linked_edge *list_ptr;
	int i;

	/* Check we aren't overwriting edges */
	assert(L_rr_node[inode].num_edges < 1);
	assert(NULL == L_rr_node[inode].edges);
	assert(NULL == L_rr_node[inode].switches);

	L_rr_node[inode].num_edges = num_edges;
	L_rr_node[inode].edges = (int *) my_malloc(num_edges * sizeof(int));
	L_rr_node[inode].switches = (short *) my_malloc(num_edges * sizeof(short));

	i = 0;
	list_ptr = edge_list_head;
	while (list_ptr && (i < num_edges)) {
		L_rr_node[inode].edges[i] = list_ptr->edge;
		L_rr_node[inode].switches[i] = list_ptr->iswitch;

		++L_rr_node[list_ptr->edge].fan_in;

		/* Unmark the edge since we are done considering fanout from node. */
		L_rr_edge_done[list_ptr->edge] = FALSE;

		list_ptr = list_ptr->next;
		++i;
	}
	assert(list_ptr == NULL);
	assert(i == num_edges);
}

int ****
alloc_and_load_pin_to_track_map(INP enum e_pin_type pin_type,
		INP int nodes_per_chan, INP int *Fc, INP t_type_ptr Type,
		INP boolean perturb_switch_pattern,
		INP enum e_directionality directionality) {

	int **num_dir; /* [0..height][0..3] Number of *physical* pins on each side.          */
	int ***dir_list; /* [0..height][0..3][0..num_pins-1] list of pins of correct type  *
	 * * on each side. Max possible space alloced for simplicity */

	int i, j, k, iside, ipin, iclass, num_phys_pins, pindex, ioff;
	int *pin_num_ordering, *side_ordering, *offset_ordering;
	int **num_done_per_dir; /* [0..height][0..3] */
	int ****tracks_connected_to_pin; /* [0..num_pins-1][0..height][0..3][0..Fc-1] */

	/* NB:  This wastes some space.  Could set tracks_..._pin[ipin][ioff][iside] = 
	 * NULL if there is no pin on that side, or that pin is of the wrong type. 
	 * Probably not enough memory to worry about, esp. as it's temporary.      
	 * If pin ipin on side iside does not exist or is of the wrong type,       
	 * tracks_connected_to_pin[ipin][iside][0] = OPEN.                               */

	if (Type->num_pins < 1) {
		return NULL;
	}

	/* Currently, only two possible Fc values exist: 0 or default. 
	 * Finding the max. value of Fc in block will result in the 
	 * default value, which works for now. In the future, when 
	 * the Fc values of all pins can vary, the max value will continue
	 * to work for matrix (de)allocation purposes. However, all looping 
	 * will have to be modified to account for pin-based Fc values. */
	int max_Fc = 0;
	for (i = 0; i < Type->num_pins; ++i) {
		iclass = Type->pin_class[i];
		if (Fc[i] > max_Fc && Type->class_inf[iclass].type == pin_type) {
			max_Fc = Fc[i];
		}
	}

	tracks_connected_to_pin = (int ****) alloc_matrix4(0, Type->num_pins - 1, 0,
			Type->height - 1, 0, 3, 0, max_Fc, sizeof(int));

	for (ipin = 0; ipin < Type->num_pins; ipin++) {
		for (ioff = 0; ioff < Type->height; ioff++) {
			for (iside = 0; iside < 4; iside++) {
				for (i = 0; i < max_Fc; ++i) {
					tracks_connected_to_pin[ipin][ioff][iside][i] = OPEN; /* Unconnected. */
				}
			}
		}
	}

	num_dir = (int **) alloc_matrix(0, Type->height - 1, 0, 3, sizeof(int));
	dir_list = (int ***) alloc_matrix3(0, Type->height - 1, 0, 3, 0,
			Type->num_pins - 1, sizeof(int));

	/* Defensive coding.  Try to crash hard if I use an unset entry.  */
	for (i = 0; i < Type->height; i++)
		for (j = 0; j < 4; j++)
			for (k = 0; k < Type->num_pins; k++)
				dir_list[i][j][k] = (-1);

	for (i = 0; i < Type->height; i++)
		for (j = 0; j < 4; j++)
			num_dir[i][j] = 0;

	for (ipin = 0; ipin < Type->num_pins; ipin++) {
		iclass = Type->pin_class[ipin];
		if (Type->class_inf[iclass].type != pin_type) /* Doing either ipins OR opins */
			continue;

		/* Pins connecting only to global resources get no switches -> keeps the    *
		 * area model accurate.                                                     */

		if (Type->is_global_pin[ipin])
			continue;
		for (ioff = 0; ioff < Type->height; ioff++) {
			for (iside = 0; iside < 4; iside++) {
				if (Type->pinloc[ioff][iside][ipin] == 1) {
					dir_list[ioff][iside][num_dir[ioff][iside]] = ipin;
					num_dir[ioff][iside]++;
				}
			}
		}
	}

	num_phys_pins = 0;
	for (ioff = 0; ioff < Type->height; ioff++) {
		for (iside = 0; iside < 4; iside++)
			num_phys_pins += num_dir[ioff][iside]; /* Num. physical pins per type */
	}
	num_done_per_dir = (int **) alloc_matrix(0, Type->height - 1, 0, 3,
			sizeof(int));
	for (ioff = 0; ioff < Type->height; ioff++) {
		for (iside = 0; iside < 4; iside++) {
			num_done_per_dir[ioff][iside] = 0;
		}
	}
	pin_num_ordering = (int *) my_malloc(num_phys_pins * sizeof(int));
	side_ordering = (int *) my_malloc(num_phys_pins * sizeof(int));
	offset_ordering = (int *) my_malloc(num_phys_pins * sizeof(int));

	/* Connection block I use distributes pins evenly across the tracks      *
	 * of ALL sides of the clb at once.  Ensures that each pin connects      *
	 * to spaced out tracks in its connection block, and that the other      *
	 * pins (potentially in other C blocks) connect to the remaining tracks  *
	 * first.  Doesn't matter for large Fc, but should make a fairly         *
	 * good low Fc block that leverages the fact that usually lots of pins   *
	 * are logically equivalent.                                             */

	iside = LEFT;
	ioff = Type->height - 1;
	ipin = 0;
	pindex = -1;

	while (ipin < num_phys_pins) {
		if (iside == TOP) {
			iside = RIGHT;
		} else if (iside == RIGHT) {
			if (ioff <= 0) {
				iside = BOTTOM;
			} else {
				ioff--;
			}
		} else if (iside == BOTTOM) {
			iside = LEFT;
		} else {
			assert(iside == LEFT);
			if (ioff >= Type->height - 1) {
				pindex++;
				iside = TOP;
			} else {
				ioff++;
			}
		}

		assert(pindex < num_phys_pins);
		/* Number of physical pins bounds number of logical pins */

		if (num_done_per_dir[ioff][iside] >= num_dir[ioff][iside])
			continue;
		pin_num_ordering[ipin] = dir_list[ioff][iside][pindex];
		side_ordering[ipin] = iside;
		offset_ordering[ipin] = ioff;
		assert(Type->pinloc[ioff][iside][dir_list[ioff][iside][pindex]]);
		num_done_per_dir[ioff][iside]++;
		ipin++;
	}

	if (perturb_switch_pattern) {
		load_perturbed_switch_pattern(Type, tracks_connected_to_pin,
				num_phys_pins, pin_num_ordering, side_ordering, offset_ordering,
				nodes_per_chan, max_Fc, directionality);
	} else {
		load_uniform_switch_pattern(Type, tracks_connected_to_pin,
				num_phys_pins, pin_num_ordering, side_ordering, offset_ordering,
				nodes_per_chan, max_Fc, directionality);
	}
	check_all_tracks_reach_pins(Type, tracks_connected_to_pin, nodes_per_chan,
			max_Fc, pin_type);

	/* Free all temporary storage. */
	free_matrix(num_dir, 0, Type->height - 1, 0, sizeof(int));
	free_matrix3(dir_list, 0, Type->height - 1, 0, 3, 0, sizeof(int));
	free_matrix(num_done_per_dir, 0, Type->height - 1, 0, sizeof(int));
	free(pin_num_ordering);
	free(side_ordering);
	free(offset_ordering);

	return tracks_connected_to_pin;
}

static void load_uniform_switch_pattern(INP t_type_ptr type,
		INOUTP int ****tracks_connected_to_pin, INP int num_phys_pins,
		INP int *pin_num_ordering, INP int *side_ordering,
		INP int *offset_ordering, INP int nodes_per_chan, INP int Fc,
		enum e_directionality directionality) {

	/* Loads the tracks_connected_to_pin array with an even distribution of     *
	 * switches across the tracks for each pin.  For example, each pin connects *
	 * to every 4.3rd track in a channel, with exactly which tracks a pin       *
	 * connects to staggered from pin to pin.                                   */

	int i, j, ipin, iside, ioff, itrack, k;
	float f_track, fc_step;
	int group_size;
	float step_size;

	/* Uni-directional drive is implemented to ensure no directional bias and this means 
	 * two important comments noted below                                                */
	/* 1. Spacing should be (W/2)/(Fc/2), and step_size should be spacing/(num_phys_pins),
	 *    and lay down 2 switches on an adjacent pair of tracks at a time to ensure
	 *    no directional bias. Basically, treat W (even) as W/2 pairs of tracks, and
	 *    assign switches to a pair at a time. Can do this because W is guaranteed to 
	 *    be even-numbered; however same approach cannot be applied to Fc_out pattern
	 *    when L > 1 and W <> 2L multiple. 
	 *
	 * 2. This generic pattern should be considered the tileable physical layout,
	 *    meaning all track # here are physical #'s,
	 *    so later must use vpr_to_phy conversion to find actual logical #'s to connect.
	 *    This also means I will not use get_output_block_companion_track to ensure
	 *    no bias, since that describes a logical # -> that would confuse people.  */

	step_size = (float) nodes_per_chan / (float) (Fc * num_phys_pins);

	if (directionality == BI_DIRECTIONAL) {
		group_size = 1;
	} else {
		assert(directionality == UNI_DIRECTIONAL);
		group_size = 2;
	}

	assert((nodes_per_chan % group_size == 0) && (Fc % group_size == 0));

	fc_step = (float) nodes_per_chan / (float) Fc;

	for (i = 0; i < num_phys_pins; i++) {
		ipin = pin_num_ordering[i];
		iside = side_ordering[i];
		ioff = offset_ordering[i];

		/* Bi-directional treats each track separately, uni-directional works with pairs of tracks */
		for (j = 0; j < (Fc / group_size); j++) {
			f_track = (i * step_size) + (j * fc_step);
			itrack = ((int) f_track) * group_size;

			/* Catch possible floating point round error */
			itrack = std::min(itrack, nodes_per_chan - group_size);

			/* Assign the group of tracks for the Fc pattern */
			for (k = 0; k < group_size; ++k) {
				tracks_connected_to_pin[ipin][ioff][iside][group_size * j + k] =
						itrack + k;
			}
		}
	}
}

static void load_perturbed_switch_pattern(INP t_type_ptr type,
		INOUTP int ****tracks_connected_to_pin, INP int num_phys_pins,
		INP int *pin_num_ordering, INP int *side_ordering,
		INP int *offset_ordering, INP int nodes_per_chan, INP int Fc,
		enum e_directionality directionality) {

	/* Loads the tracks_connected_to_pin array with an unevenly distributed     *
	 * set of switches across the channel.  This is done for inputs when        *
	 * Fc_input = Fc_output to avoid creating "pin domains" -- certain output   *
	 * pins being able to talk only to certain input pins because their switch  *
	 * patterns exactly line up.  Distribute Fc/2 + 1 switches over half the    *
	 * channel and Fc/2 - 1 switches over the other half to make the switch     * 
	 * pattern different from the uniform one of the outputs.  Also, have half  *
	 * the pins put the "dense" part of their connections in the first half of  *
	 * the channel and the other half put the "dense" part in the second half,  *
	 * to make sure each track can connect to about the same number of ipins.   */

	int i, j, ipin, iside, itrack, ihalf, iconn, ioff;
	int Fc_dense, Fc_sparse, Fc_half[2];
	float f_track, spacing_dense, spacing_sparse, spacing[2];
	float step_size;

	assert(directionality == BI_DIRECTIONAL);

	step_size = (float) nodes_per_chan / (float) (Fc * num_phys_pins);

	Fc_dense = (Fc / 2) + 1;
	Fc_sparse = Fc - Fc_dense; /* Works for even or odd Fc */

	spacing_dense = (float) nodes_per_chan / (float) (2 * Fc_dense);
	spacing_sparse = (float) nodes_per_chan / (float) (2 * Fc_sparse);

	for (i = 0; i < num_phys_pins; i++) {
		ipin = pin_num_ordering[i];
		iside = side_ordering[i];
		ioff = offset_ordering[i];

		/* Flip every pin to balance switch density */
		spacing[i % 2] = spacing_dense;
		Fc_half[i % 2] = Fc_dense;
		spacing[(i + 1) % 2] = spacing_sparse;
		Fc_half[(i + 1) % 2] = Fc_sparse;

		f_track = i * step_size; /* Start point.  Staggered from pin to pin */
		iconn = 0;

		for (ihalf = 0; ihalf < 2; ihalf++) { /* For both dense and sparse halves. */
			for (j = 0; j < Fc_half[ihalf]; ++j) {
				itrack = (int) f_track;

				/* Can occasionally get wraparound due to floating point rounding. 
				 This is okay because the starting position > 0 when this occurs
				 so connection is valid and fine */
				itrack = itrack % nodes_per_chan;
				tracks_connected_to_pin[ipin][ioff][iside][iconn] = itrack;

				f_track += spacing[ihalf];
				iconn++;
			}
		}
	} /* End for all physical pins. */
}

static void check_all_tracks_reach_pins(t_type_ptr type,
		int ****tracks_connected_to_pin, int nodes_per_chan, int Fc,
		enum e_pin_type ipin_or_opin) {

	/* Checks that all tracks can be reached by some pin.   */

	int iconn, iside, itrack, ipin, ioff;
	int *num_conns_to_track; /* [0..nodes_per_chan-1] */

	assert(nodes_per_chan > 0);

	num_conns_to_track = (int *) my_calloc(nodes_per_chan, sizeof(int));

	for (ipin = 0; ipin < type->num_pins; ipin++) {
		for (ioff = 0; ioff < type->height; ioff++) {
			for (iside = 0; iside < 4; iside++) {
				if (tracks_connected_to_pin[ipin][ioff][iside][0] != OPEN) { /* Pin exists */
					for (iconn = 0; iconn < Fc; iconn++) {
						itrack =
								tracks_connected_to_pin[ipin][ioff][iside][iconn];
						num_conns_to_track[itrack]++;
					}
				}
			}
		}
	}

	for (itrack = 0; itrack < nodes_per_chan; itrack++) {
		if (num_conns_to_track[itrack] <= 0) {
			vpr_printf(TIO_MESSAGE_ERROR, "check_all_tracks_reach_pins: Track %d does not connect to any CLB %ss.\n", 
				itrack, (ipin_or_opin == DRIVER ? "OPIN" : "IPIN"));
		}
	}

	free(num_conns_to_track);
}

/* Allocates and loads the track to ipin lookup for each physical grid type. This
 * is the same information as the ipin_to_track map but accessed in a different way. */

struct s_ivec ***
alloc_and_load_track_to_pin_lookup(INP int ****pin_to_track_map, INP int *Fc,
		INP int height, INP int num_pins, INP int nodes_per_chan) {
	int ipin, iside, itrack, iconn, ioff, pin_counter;
	struct s_ivec ***track_to_pin_lookup;

	/* [0..nodes_per_chan-1][0..height][0..3].  For each track number it stores a vector  
	 * for each of the four sides.  x-directed channels will use the TOP and   
	 * BOTTOM vectors to figure out what clb input pins they connect to above  
	 * and below them, respectively, while y-directed channels use the LEFT    
	 * and RIGHT vectors.  Each vector contains an nelem field saying how many 
	 * ipins it connects to.  The list[0..nelem-1] array then gives the pin    
	 * numbers.                                                                */

	/* Note that a clb pin that connects to a channel on its RIGHT means that  *
	 * that channel connects to a clb pin on its LEFT.  The convention used    *
	 * here is always in the perspective of the CLB                            */

	if (num_pins < 1) {
		return NULL;
	}

	/* Alloc and zero the the lookup table */
	track_to_pin_lookup = (struct s_ivec ***) alloc_matrix3(0,
			nodes_per_chan - 1, 0, height - 1, 0, 3, sizeof(struct s_ivec));
	for (itrack = 0; itrack < nodes_per_chan; itrack++) {
		for (ioff = 0; ioff < height; ioff++) {
			for (iside = 0; iside < 4; iside++) {
				track_to_pin_lookup[itrack][ioff][iside].nelem = 0;
				track_to_pin_lookup[itrack][ioff][iside].list = NULL;
			}
		}
	}

	/* Counting pass.  */
	for (ipin = 0; ipin < num_pins; ipin++) {
		for (ioff = 0; ioff < height; ioff++) {
			for (iside = 0; iside < 4; iside++) {
				if (pin_to_track_map[ipin][ioff][iside][0] == OPEN)
					continue;

				for (iconn = 0; iconn < Fc[ipin]; iconn++) {
					itrack = pin_to_track_map[ipin][ioff][iside][iconn];
					track_to_pin_lookup[itrack][ioff][iside].nelem++;
				}
			}
		}
	}

	/* Allocate space.  */
	for (itrack = 0; itrack < nodes_per_chan; itrack++) {
		for (ioff = 0; ioff < height; ioff++) {
			for (iside = 0; iside < 4; iside++) {
				track_to_pin_lookup[itrack][ioff][iside].list = NULL; /* Defensive code */
				if (track_to_pin_lookup[itrack][ioff][iside].nelem != 0) {
					track_to_pin_lookup[itrack][ioff][iside].list =
							(int *) my_malloc(
									track_to_pin_lookup[itrack][ioff][iside].nelem
											* sizeof(int));
					track_to_pin_lookup[itrack][ioff][iside].nelem = 0;
				}
			}
		}
	}

	/* Loading pass. */
	for (ipin = 0; ipin < num_pins; ipin++) {
		for (ioff = 0; ioff < height; ioff++) {
			for (iside = 0; iside < 4; iside++) {
				if (pin_to_track_map[ipin][ioff][iside][0] == OPEN)
					continue;

				for (iconn = 0; iconn < Fc[ipin]; iconn++) {
					itrack = pin_to_track_map[ipin][ioff][iside][iconn];
					pin_counter =
							track_to_pin_lookup[itrack][ioff][iside].nelem;
					track_to_pin_lookup[itrack][ioff][iside].list[pin_counter] =
							ipin;
					track_to_pin_lookup[itrack][ioff][iside].nelem++;
				}
			}
		}
	}

	return track_to_pin_lookup;
}

/* A utility routine to dump the contents of the routing resource graph   *
 * (everything -- connectivity, occupancy, cost, etc.) into a file.  Used *
 * only for debugging.                                                    */
void dump_rr_graph(INP const char *file_name) {

	int inode;
	FILE *fp;

	fp = my_fopen(file_name, "w", 0);

	for (inode = 0; inode < num_rr_nodes; inode++) {
		print_rr_node(fp, rr_node, inode);
		fprintf(fp, "\n");
	}

#if 0
	fprintf(fp, "\n\n%d rr_indexed_data entries.\n\n", num_rr_indexed_data);

	for (index = 0; index < num_rr_indexed_data; index++)
	{
		print_rr_indexed_data(fp, index);
		fprintf(fp, "\n");
	}
#endif

	fclose(fp);
}

/* Prints all the data about node inode to file fp.                    */
void print_rr_node(FILE * fp, t_rr_node * L_rr_node, int inode) {

	static const char *name_type[] = { "SOURCE", "SINK", "IPIN", "OPIN",
			"CHANX", "CHANY", "INTRA_CLUSTER_EDGE" };
	static const char *direction_name[] = { "OPEN", "INC_DIRECTION",
			"DEC_DIRECTION", "BI_DIRECTION" };
	static const char *drivers_name[] = { "OPEN", "MULTI_BUFFER", "SINGLE" };

	t_rr_type rr_type;
	int iconn;

	rr_type = L_rr_node[inode].type;

	/* Make sure we don't overrun const arrays */
	assert((int)rr_type < (int)(sizeof(name_type) / sizeof(char *)));
	assert(
			(L_rr_node[inode].direction + 1) < (int)(sizeof(direction_name) / sizeof(char *)));
	assert(
			(L_rr_node[inode].drivers + 1) < (int)(sizeof(drivers_name) / sizeof(char *)));

	fprintf(fp, "Node: %d %s ", inode, name_type[rr_type]);
	if ((L_rr_node[inode].xlow == L_rr_node[inode].xhigh)
			&& (L_rr_node[inode].ylow == L_rr_node[inode].yhigh)) {
		fprintf(fp, "(%d, %d) ", L_rr_node[inode].xlow, L_rr_node[inode].ylow);
	} else {
		fprintf(fp, "(%d, %d) to (%d, %d) ", L_rr_node[inode].xlow,
				L_rr_node[inode].ylow, L_rr_node[inode].xhigh,
				L_rr_node[inode].yhigh);
	}
	fprintf(fp, "Ptc_num: %d ", L_rr_node[inode].ptc_num);
	fprintf(fp, "Direction: %s ",
			direction_name[L_rr_node[inode].direction + 1]);
	fprintf(fp, "Drivers: %s ", drivers_name[L_rr_node[inode].drivers + 1]);
	fprintf(fp, "\n");

	fprintf(fp, "%d edge(s):", L_rr_node[inode].num_edges);
	for (iconn = 0; iconn < L_rr_node[inode].num_edges; iconn++)
		fprintf(fp, " %d", L_rr_node[inode].edges[iconn]);
	fprintf(fp, "\n");

	fprintf(fp, "Switch types:");
	for (iconn = 0; iconn < L_rr_node[inode].num_edges; iconn++)
		fprintf(fp, " %d", L_rr_node[inode].switches[iconn]);
	fprintf(fp, "\n");

	fprintf(fp, "Occ: %d  Capacity: %d\n", L_rr_node[inode].occ,
			L_rr_node[inode].capacity);
	if (rr_type != INTRA_CLUSTER_EDGE) {
		fprintf(fp, "R: %g  C: %g\n", L_rr_node[inode].R, L_rr_node[inode].C);
	}
	fprintf(fp, "Cost_index: %d\n", L_rr_node[inode].cost_index);
}

/* Prints all the rr_indexed_data of index to file fp.   */
void print_rr_indexed_data(FILE * fp, int index) {

	fprintf(fp, "Index: %d\n", index);

	fprintf(fp, "ortho_cost_index: %d  ",
			rr_indexed_data[index].ortho_cost_index);
	fprintf(fp, "base_cost: %g  ", rr_indexed_data[index].saved_base_cost);
	fprintf(fp, "saved_base_cost: %g\n",
			rr_indexed_data[index].saved_base_cost);

	fprintf(fp, "Seg_index: %d  ", rr_indexed_data[index].seg_index);
	fprintf(fp, "inv_length: %g\n", rr_indexed_data[index].inv_length);

	fprintf(fp, "T_linear: %g  ", rr_indexed_data[index].T_linear);
	fprintf(fp, "T_quadratic: %g  ", rr_indexed_data[index].T_quadratic);
	fprintf(fp, "C_load: %g\n", rr_indexed_data[index].C_load);
}

static void build_unidir_rr_opins(INP int i, INP int j,
		INP struct s_grid_tile **L_grid, INP int **Fc_out,
		INP int nodes_per_chan, INP t_seg_details * seg_details,
		INOUTP int **Fc_xofs, INOUTP int **Fc_yofs,
		INOUTP t_rr_node * L_rr_node, INOUTP boolean * L_rr_edge_done,
		OUTP boolean * Fc_clipped, INP t_ivec *** L_rr_node_indices, INP int delayless_switch,
		INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs) {
	/* This routine returns a list of the opins rr_nodes on each
	 * side/offset of the block. You must free the result with
	 * free_matrix. */

	t_type_ptr type;
	int ipin, iclass, ofs, chan, seg, max_len, inode, max_Fc = -1;
	enum e_side side;
	t_rr_type chan_type;
	t_linked_edge *edge_list = NULL, *next;
	boolean clipped, vert, pos_dir;
	int num_edges;
	int **Fc_ofs;

	*Fc_clipped = FALSE;

	/* Only the base block of a set should use this function */
	if (L_grid[i][j].offset > 0) {
		return;
	}

	type = L_grid[i][j].type;

	/* Currently, only two possible Fc values exist: 0 or default. 
	 * Finding the max. value of Fc in block will result in the 
	 * default value, which works for now. In the future, when 
	 * the Fc values of all pins can vary, the max value will continue
	 * to work for matrix allocation purposes. However, all looping 
	 * will have to be modified to account for pin-based Fc values. */
	if (type->index > 0) {
		max_Fc = 0;
		for (ipin = 0; ipin < type->num_pins; ++ipin) {
			iclass = type->pin_class[ipin];
			if (Fc_out[type->index][ipin] > max_Fc && type->class_inf[iclass].type == DRIVER) {
				max_Fc = Fc_out[type->index][ipin];
			}
		}
	}

	/* Go through each pin and find its fanout. */
	for (ipin = 0; ipin < type->num_pins; ++ipin) {
		/* Skip global pins and ipins */
		iclass = type->pin_class[ipin];
		if (type->class_inf[iclass].type != DRIVER) {
			continue;
		}
		if (type->is_global_pin[ipin]) {
			continue;
		}

		num_edges = 0;
		edge_list = NULL;
		if(Fc_out[type->index][ipin] != 0) {
			for (ofs = 0; ofs < type->height; ++ofs) {
				for (side = (enum e_side)0; side < 4; side = (enum e_side)(side + 1)) {
					/* Can't do anything if pin isn't at this location */
					if (0 == type->pinloc[ofs][side][ipin]) {
						continue;
					}

					/* Figure out the chan seg at that side. 
					 * side is the side of the logic or io block. */
					vert = (boolean) ((side == TOP) || (side == BOTTOM));
                    /* mrFPGA */
                    if (is_stack) {
                      vert = (boolean)(!vert);
                    }
                    /* END */
					pos_dir = (boolean) ((side == TOP) || (side == RIGHT));
                    /* mrFPGA */
					//chan_type = (vert ? CHANX : CHANY);
					chan_type = ((is_stack ? !vert : vert) ? CHANX : CHANY);
                    /* END */
					chan = (vert ? (j + ofs) : i);
					seg = (vert ? i : (j + ofs));
					max_len = (vert ? nx : ny);
					Fc_ofs = (vert ? Fc_xofs : Fc_yofs);
					if (FALSE == pos_dir) {
                      /* mrFPGA */
                      if (is_stack) {
                        --seg;
                      /* END */
                      } else {
						--chan;
                      }
					}

                    /* mrFPGA */
					/* Skip the location if there is no channel. */
					if (chan < (is_stack ? 1:0)) {
						continue;
					}
					if (seg < (is_stack ? 0:1)) {
						continue;
					}
                    /* END */
					if (seg > (vert ? nx : ny)) {
						continue;
					}
					if (chan > (vert ? ny : nx)) {
						continue;
					}

					/* Get the list of opin to mux connections for that chan seg. */
					num_edges += get_unidir_opin_connections(chan, seg,
							max_Fc, chan_type, seg_details, &edge_list,
							Fc_ofs, L_rr_edge_done, max_len, nodes_per_chan,
							L_rr_node_indices, &clipped);
					if (clipped) {
						*Fc_clipped = TRUE;
					}
				}
			}
		}

		/* Add in direct connections */
		num_edges += get_opin_direct_connecions(i, j, ipin, &edge_list,	L_rr_node_indices, delayless_switch, directs, num_directs, clb_to_clb_directs);

		/* Add the edges */
		inode = get_rr_node_index(i, j, OPIN, ipin, L_rr_node_indices);
		alloc_and_load_edges_and_switches(rr_node, inode, num_edges,
				L_rr_edge_done, edge_list);
		while (edge_list != NULL) {
			next = edge_list->next;
			free(edge_list);
			edge_list = next;
		}
	}
}
#if 0
static void
load_uniform_opin_switch_pattern_paired(INP int *Fc_out,
		INP int num_pins,
		INP int *pins_in_chan_seg,
		INP int num_wire_inc_muxes,
		INP int num_wire_dec_muxes,
		INP int *wire_inc_muxes,
		INP int *wire_dec_muxes,
		INOUTP t_rr_node * L_rr_node,
		INOUTP boolean * L_rr_edge_done,
		INP t_seg_details * seg_details,
		OUTP boolean * Fc_clipped)
{

	/* Directionality is assumed to be uni-directional */

	/* Make turn-based assignment to avoid overlap when Fc_ouput is low. This is a bipartite
	 * matching problem. Out of "num_wire_muxes" muxes "Fc_output" of them is assigned
	 * to each outpin (total "num_pins" of them); assignment is uniform (spacing - spreadout) 
	 * and staggered to avoid overlap when Fc_output is low. */

	/* The natural order wires muxes are stored in wire_muxes is alternating in directionality 
	 * already (by my implementation), so no need to do anything extra to avoid directional bias */

	/* TODO: Due to spacing, it's possible to have directional bias: all Fc_out wires connected 
	 * to one opin goes in either INC or DEC -> whereas I want a mix of both.
	 * SOLUTION: Use quantization of 2 to ensure that if an opin connects to one wire, it 
	 * must also connect to its companion wire, which runs in the opposite direction. This 
	 * means instead of having num_wire_muxes as the matching set, pick out the INC wires
	 * in num_wires_muxes as the matching set (the DEC wires are their companions)  April 17, 2007 
	 * NEWS: That solution does not work, as treating wires in groups will lead to serious
	 * abnormal patterns (conns crossing multiple blocks) for W nonquantized to multiples of 2L.
	 * So, I'm chaning that approach to a new one that avoids directional bias: I will separate
	 * the INC muxes and DEC muxes into two sets. Each set is uniformly assigned to opins with
	 * Fc_output/2; this should be identical as before for normal cases and contains all conns
	 * in the same chan segment for the nonquantized cases. */

	/* Finally, separated the two approaches: 1. Take all wire muxes and assign them to opins
	 * one at a time (load_uniform_opin_switch_pattern) 2. Take pairs (by companion) 
	 * of wire muxes and assign them to opins a pair at a time (load_uniform_opin_switch_pattern_paired). 
	 * The first is used for fringe channel segments (ends of channels, where
	 * there are lots of muxes due to partial wire segments) and the second is used in core */

	/* float spacing, step_size, f_mux; */
	int ipin, iconn, num_edges, init_mux;
	int from_node, to_node, to_track;
	int xlow, ylow;
	t_linked_edge *edge_list;
	int *wire_muxes;
	int k, num_wire_muxes, Fc_output_per_side, CurFc;
	int count_inc, count_dec;
	t_type_ptr type;

	*Fc_clipped = FALSE;

	count_inc = count_dec = 0;

	for (ipin = 0; ipin < num_pins; ipin++)
	{
		from_node = pins_in_chan_seg[ipin];
		xlow = L_rr_node[from_node].xlow;
		ylow = L_rr_node[from_node].ylow;
		type = grid[xlow][ylow].type;
		edge_list = NULL;
		num_edges = 0;

		/* Assigning the INC muxes first, then DEC muxes */
		for (k = 0; k < 2; ++k)
		{
			if (k == 0)
			{
				num_wire_muxes = num_wire_inc_muxes;
				wire_muxes = wire_inc_muxes;
			}
			else
			{
				num_wire_muxes = num_wire_dec_muxes;
				wire_muxes = wire_dec_muxes;
			}

			/* Half the Fc will be assigned for each direction. */
			assert(Fc_out[type->index] % 2 == 0);
			Fc_output_per_side = Fc_out[type->index] / 2;

			/* Clip the demand. Make sure to use a new variable so
			 * on the second pass it is not clipped. */
			CurFc = Fc_output_per_side;
			if (Fc_output_per_side > num_wire_muxes)
			{
				*Fc_clipped = TRUE;
				CurFc = num_wire_muxes;
			}

			if (k == 0)
			{
				init_mux = (count_inc) % num_wire_muxes;
				count_inc += CurFc;
			}
			else
			{
				init_mux = (count_dec) % num_wire_muxes;
				count_dec += CurFc;
			}

			for (iconn = 0; iconn < CurFc; iconn++)
			{
				/* FINALLY, make the outpin to mux connection */
				/* Latest update: I'm not using Uniform Pattern, but a similarly staggered pattern */
				to_node =
				wire_muxes[(init_mux +
						iconn) % num_wire_muxes];

				L_rr_node[to_node].num_opin_drivers++; /* keep track of mux size */
				to_track = L_rr_node[to_node].ptc_num;

				if (FALSE == L_rr_edge_done[to_node])
				{
					/* Use of alloc_and_load_edges_and_switches 
					 * must be accompanied by rr_edge_done check. */
					L_rr_edge_done[to_node] = TRUE;
					edge_list =
					insert_in_edge_list(edge_list,
							to_node,
							seg_details
							[to_track].
							wire_switch);
					num_edges++;
				}
			}
		}

		if (num_edges < 1)
		{
			vpr_printf(TIO_MESSAGE_ERROR, "opin %d at (%d,%d) does not connect to any tracks.\n", 
					L_rr_node[from_node].ptc_num, L_rr_node[from_node].xlow, L_rr_node[from_node].ylow);
			exit(1);
		}

		alloc_and_load_edges_and_switches(L_rr_node, from_node, num_edges,
				L_rr_edge_done, edge_list);
	}
}
#endif

#if MUX_SIZE_DIST_DISPLAY
/* This routine prints and dumps statistics on the mux sizes on a sblock 
 * per sblock basis, over the entire chip. Mux sizes should be balanced (off by
 * at most 1) for all muxes in the same sblock in the core, and corner sblocks.
 * Fringe sblocks will have imbalance due to missing one side and constrains on 
 * where wires must connect. Comparing two core sblock sblocks, muxes need not 
 * be balanced if W is not quantized to 2L multiples, again for reasons that 
 * there could be sblocks with different number of muxes but same number of incoming
 * wires that need to make connections to these muxes (we don't want to under-connect
 * user-specified Fc and Fs). */
static void
view_mux_size_distribution(t_ivec *** L_rr_node_indices,
		int nodes_per_chan,
		t_seg_details * seg_details_x,
		t_seg_details * seg_details_y)
{

	int i, j, itrack, seg_num, chan_num, max_len;
	int start, end, inode, max_value, min_value;
	int array_count, k, num_muxes;
	short direction, side;
	float *percent_range_array;
	float percent_range, percent_range_sum, avg_percent_range;
	float std_dev_percent_range, deviation_f;
	int range, *range_array, global_max_range;
	float avg_range, range_sum, std_dev_range;
	t_seg_details *seg_details;
	t_mux *new_mux, *sblock_mux_list_head, *current, *next;

#ifdef ENABLE_DUMP
	FILE *dump_file_per_sblock, *dump_file;
#endif /* ENABLE_DUMP */
	t_mux_size_distribution *distr_list, *distr_current, *new_distribution,
	*distr_next;

#ifdef ENABLE_DUMP
	dump_file = my_fopen("mux_size_dump.txt", "w", 0);
	dump_file_per_sblock = my_fopen("mux_size_per_sblock_dump.txt", "w", 0);
#endif /* ENABLE_DUMP */

	sblock_mux_list_head = NULL;
	percent_range_array =
	(float *)my_malloc((nx - 1) * (ny - 1) * sizeof(float));
	range_array = (int *)my_malloc((nx - 1) * (ny - 1) * sizeof(int));
	array_count = 0;
	percent_range_sum = 0.0;
	range_sum = 0.0;
	global_max_range = 0;
	min_value = 0;
	max_value = 0;
	seg_num = 0;
	chan_num = 0;
	direction = 0;
	seg_details = 0;
	max_len = 0;
	distr_list = NULL;

	/* With the specified range, I'm only looking at core sblocks */
	for (j = (ny - 1); j > 0; j--)
	{
		for (i = 1; i < nx; i++)
		{
			num_muxes = 0;
			for (side = 0; side < 4; side++)
			{
				switch (side)
				{
					case LEFT:
					seg_num = i;
					chan_num = j;
					direction = DEC_DIRECTION; /* only DEC have muxes in that sblock */
					seg_details = seg_details_x;
					max_len = nx;
					break;

					case RIGHT:
					seg_num = i + 1;
					chan_num = j;
					direction = INC_DIRECTION;
					seg_details = seg_details_x;
					max_len = nx;
					break;

					case TOP:
					seg_num = j + 1;
					chan_num = i;
					direction = INC_DIRECTION;
					seg_details = seg_details_y;
					max_len = ny;
					break;

					case BOTTOM:
					seg_num = j;
					chan_num = i;
					direction = DEC_DIRECTION;
					seg_details = seg_details_y;
					max_len = ny;
					break;

					default:
					assert(FALSE);
				}

				assert(nodes_per_chan > 0);
				for (itrack = 0; itrack < nodes_per_chan; itrack++)
				{
					start =
					get_seg_start(seg_details, itrack,
							seg_num, chan_num);
					end =
					get_seg_end(seg_details, itrack,
							start, chan_num, max_len);

					if ((seg_details[itrack].direction ==
									direction) && (((start == seg_num)
											&& (direction ==
													INC_DIRECTION))
									|| ((end == seg_num)
											&& (direction ==
													DEC_DIRECTION))))
					{ /* mux found */
						num_muxes++;
						if (side == LEFT || side == RIGHT)
						{ /* CHANX */
							inode =
							get_rr_node_index
							(seg_num, chan_num,
									CHANX, itrack,
									L_rr_node_indices);
						}
						else
						{
							assert((side == TOP) || (side == BOTTOM)); /* CHANY */
							inode =
							get_rr_node_index
							(chan_num, seg_num,
									CHANY, itrack,
									L_rr_node_indices);
						}

						new_mux = (t_mux *)
						my_malloc(sizeof(t_mux));
						new_mux->size =
						rr_node[inode].
						num_wire_drivers +
						rr_node[inode].
						num_opin_drivers;
						new_mux->next = NULL;

						/* insert in linked list, descending */
						if (sblock_mux_list_head == NULL)
						{
							/* first entry */
							sblock_mux_list_head =
							new_mux;
						}
						else if (sblock_mux_list_head->
								size < new_mux->size)
						{
							/* insert before head */
							new_mux->next =
							sblock_mux_list_head;
							sblock_mux_list_head =
							new_mux;
						}
						else
						{
							/* insert after head */
							current =
							sblock_mux_list_head;
							next = current->next;

							while ((next != NULL)
									&& (next->size >
											new_mux->size))
							{
								current = next;
								next =
								current->next;
							}

							if (next == NULL)
							{
								current->next =
								new_mux;
							}
							else
							{
								new_mux->next =
								current->next;
								current->next =
								new_mux;
							}
						}
						/* end of insert in linked list */
					}
				}
			} /* end of mux searching over all four sides of sblock */
			/* now sblock_mux_list_head holds a linked list of all muxes in this sblock */

			current = sblock_mux_list_head;

#ifdef ENABLE_DUMP
			fprintf(dump_file_per_sblock,
					"sblock at (%d, %d) has mux sizes: {", i, j);
#endif /* ENABLE_DUMP */

			if (current != NULL)
			{
				max_value = min_value = current->size;
			}
			while (current != NULL)
			{
				if (max_value < current->size)
				max_value = current->size;
				if (min_value > current->size)
				min_value = current->size;

#ifdef ENABLE_DUMP
				fprintf(dump_file_per_sblock, "%d ",
						current->size);
				fprintf(dump_file, "%d\n", current->size);
#endif /* ENABLE_DUMP */

				current = current->next;
			}

#ifdef ENABLE_DUMP
			fprintf(dump_file_per_sblock, "}\n\tmax: %d\tmin:%d",
					max_value, min_value);
#endif /* ENABLE_DUMP */

			range = max_value - min_value;
			percent_range = ((float)range) / ((float)min_value);

			if (global_max_range < range)
			global_max_range = range;

#ifdef ENABLE_DUMP
			fprintf(dump_file_per_sblock,
					"\t\trange: %d\t\tpercent range:%.2f\n",
					range, percent_range);
#endif /* ENABLE_DUMP */

			percent_range_array[array_count] = percent_range;
			range_array[array_count] = range;

			percent_range_sum += percent_range;
			range_sum += range;

			array_count++;

			/* I will use a distribution for each (core) sblock type. 
			 * There are more than 1 type of sblocks, 
			 * when quantization of W to 2L multiples is not observed. */

			distr_current = distr_list;
			while (distr_current != NULL
					&& distr_current->mux_count != num_muxes)
			{
				distr_current = distr_current->next;
			}

			if (distr_current == NULL)
			{
				/* Create a distribution for the new sblock type, 
				 * and put it as head of linked list by convention */

				new_distribution = (t_mux_size_distribution *)
				my_malloc(sizeof(t_mux_size_distribution));
				new_distribution->mux_count = num_muxes;
				new_distribution->max_index = max_value;
				new_distribution->distr =
				(int *)my_calloc(max_value + 1, sizeof(int));

				/* filling in the distribution */
				current = sblock_mux_list_head;
				while (current != NULL)
				{
					assert(current->size <=
							new_distribution->max_index);
					new_distribution->distr[current->size]++;
					current = current->next;
				}

				/* add it to head */
				new_distribution->next = distr_list;
				distr_list = new_distribution;
			}
			else
			{
				/* distr_current->mux_count == num_muxes so add this sblock's mux sizes in this distribution */
				current = sblock_mux_list_head;

				while (current != NULL)
				{
					if (current->size >
							distr_current->max_index)
					{
						/* needs to realloc to expand the distribution array to hold the new large-valued data */
						distr_current->distr =
						my_realloc(distr_current->
								distr,
								(current->size +
										1) * sizeof(int));

						/* initializing the newly allocated elements */
						for (k =
								(distr_current->max_index +
										1); k <= current->size; k++)
						distr_current->distr[k] = 0;

						distr_current->max_index =
						current->size;
						distr_current->distr[current->
						size]++;
					}
					else
					{
						distr_current->distr[current->
						size]++;
					}
					current = current->next;
				}
			}

			/* done - now free memory */
			current = sblock_mux_list_head;
			while (current != NULL)
			{
				next = current->next;
				free(current);
				current = next;
			}
			sblock_mux_list_head = NULL;
		}
	}

	avg_percent_range = (float)percent_range_sum / array_count;
	avg_range = (float)range_sum / array_count;

	percent_range_sum = 0.0;
	range_sum = 0.0;
	for (k = 0; k < array_count; k++)
	{
		deviation_f = (percent_range_array[k] - avg_percent_range);
		percent_range_sum += deviation_f * deviation_f;

		deviation_f = ((float)range_array[k] - avg_range);
		range_sum += deviation_f * deviation_f;
	}
	std_dev_percent_range =
	sqrt(percent_range_sum / ((float)array_count - 1.0));
	std_dev_range = sqrt(range_sum / ((float)array_count - 1.0));
	vpr_printf(TIO_MESSAGE_INFO, "==== MUX size statistics ====\n");
	vpr_printf(TIO_MESSAGE_INFO, "Max range of mux size within a sblock: %d\n", global_max_range);
	vpr_printf(TIO_MESSAGE_INFO, "Average range of mux size within a sblock: %.2f\n", avg_range);
	vpr_printf(TIO_MESSAGE_INFO, "Std dev of range of mux size within a sblock: %.2f\n", std_dev_range);
	vpr_printf(TIO_MESSAGE_INFO, "Average percent range of mux size within a sblock: %.2f%%\n", avg_percent_range * 100.0);
	vpr_printf(TIO_MESSAGE_INFO, "Std dev of percent range of mux size within a sblock: %.2f%%\n", std_dev_percent_range * 100.0);

	vpr_printf(TIO_MESSAGE_INFO, " -- Detailed MUX size distribution by sblock type -- \n");
	distr_current = distr_list;
	while (distr_current != NULL)
	{
		print_distribution(stdout, distr_current);

		/* free */
		distr_next = distr_current->next;

		free(distr_current->distr);
		free(distr_current);

		distr_current = distr_next;
	}

	free(percent_range_array);
	free(range_array);
#ifdef ENABLE_DUMP
	fclose(dump_file_per_sblock);
	fclose(dump_file);
#endif /* ENABLE_DUMP */
}

static void
print_distribution(FILE * fptr,
		t_mux_size_distribution * distr_struct)
{
	int *distr;
	int k;
	float sum;
	boolean zeros;

	distr = distr_struct->distr;
	fprintf(fptr,
			"For Sblocks containing %d MUXes, the MUX size distribution is:\n",
			distr_struct->mux_count);
	fprintf(fptr, "\t\t\tSize\t\t\tFrequency (percent)\n");

	sum = 0.0;
	for (k = 0; k <= distr_struct->max_index; k++)
	sum += distr[k];

	zeros = TRUE;
	for (k = 0; k <= distr_struct->max_index; k++)
	{
		if (zeros && (distr[k] == 0))
		{
			/* do nothing for leading string of zeros */
		}
		else
		{
			zeros = FALSE; /* leading string of zeros ended */
			fprintf(fptr, "\t\t\t%d\t\t\t%d (%.2f%%)\n", k, distr[k],
					(float)distr[k] / sum * 100.0);
		}
	}
	fprintf(fptr, "\nEnd of this Sblock MUX size distribution.\n");

}
#endif

/**
 * Parse out which CLB pins should connect directly to which other CLB pins then store that in a clb_to_clb_directs data structure
 * This data structure supplements the the info in the "directs" data structure
 * TODO: The function that does this parsing in placement is poorly done because it lacks generality on heterogeniety, should replace with this one
static t_clb_to_clb_directs * alloc_and_load_clb_to_clb_directs(INP t_direct_inf *directs, INP int num_directs) {
	int i, j;
	t_clb_to_clb_directs *clb_to_clb_directs;
	char *pb_type_name, *port_name;
	int start_pin_index, end_pin_index;
	t_pb_type *pb_type;

	clb_to_clb_directs = (t_clb_to_clb_directs*)my_calloc(num_directs, sizeof(t_clb_to_clb_directs));

	pb_type_name = NULL;
	port_name = NULL;

	for(i = 0; i < num_directs; i++) {
		pb_type_name = (char*)my_malloc((strlen(directs[i].from_pin) + strlen(directs[i].to_pin)) * sizeof(char));
		port_name = (char*)my_malloc((strlen(directs[i].from_pin) + strlen(directs[i].to_pin)) * sizeof(char));

		// Load from pins
		// Parse out the pb_type name, port name, and pin range
		parse_direct_pin_name(directs[i].from_pin, directs[i].line, &start_pin_index, &end_pin_index, pb_type_name, port_name);

		// Figure out which type, port, and pin is used
		for(j = 0; j < num_types; j++) {
			if(strcmp(type_descriptors[j].name, pb_type_name) == 0) {
				break;
			}
		}
		assert(j < num_types);
		clb_to_clb_directs[i].from_clb_type = &type_descriptors[j];
		pb_type = clb_to_clb_directs[i].from_clb_type->pb_type;

		for(j = 0; j < pb_type->num_ports; j++) {
			if(strcmp(pb_type->ports[j].name, port_name) == 0) {
				break;
			}
		}
		assert(j < pb_type->num_ports);

		if(start_pin_index == OPEN) {
			assert(start_pin_index == end_pin_index);
			start_pin_index = 0;
			end_pin_index = pb_type->ports[j].num_pins - 1;
		}
		get_blk_pin_from_port_pin(clb_to_clb_directs[i].from_clb_type->index, j, start_pin_index, &clb_to_clb_directs[i].from_clb_pin_start_index);
		get_blk_pin_from_port_pin(clb_to_clb_directs[i].from_clb_type->index, j, end_pin_index, &clb_to_clb_directs[i].from_clb_pin_end_index);

		// Load to pins
		// Parse out the pb_type name, port name, and pin range
		parse_direct_pin_name(directs[i].to_pin, directs[i].line, &start_pin_index, &end_pin_index, pb_type_name, port_name);

		// Figure out which type, port, and pin is used
		for(j = 0; j < num_types; j++) {
			if(strcmp(type_descriptors[j].name, pb_type_name) == 0) {
				break;
			}
		}
		assert(j < num_types);
		clb_to_clb_directs[i].to_clb_type = &type_descriptors[j];
		pb_type = clb_to_clb_directs[i].to_clb_type->pb_type;

		for(j = 0; j < pb_type->num_ports; j++) {
			if(strcmp(pb_type->ports[j].name, port_name) == 0) {
				break;
			}
		}
		assert(j < pb_type->num_ports);

		if(start_pin_index == OPEN) {
			assert(start_pin_index == end_pin_index);
			start_pin_index = 0;
			end_pin_index = pb_type->ports[j].num_pins - 1;
		}

		get_blk_pin_from_port_pin(clb_to_clb_directs[i].to_clb_type->index, j, start_pin_index, &clb_to_clb_directs[i].to_clb_pin_start_index);
		get_blk_pin_from_port_pin(clb_to_clb_directs[i].to_clb_type->index, j, end_pin_index, &clb_to_clb_directs[i].to_clb_pin_end_index);

		if(abs(clb_to_clb_directs[i].from_clb_pin_start_index - clb_to_clb_directs[i].from_clb_pin_end_index) != abs(clb_to_clb_directs[i].to_clb_pin_start_index - clb_to_clb_directs[i].to_clb_pin_end_index)) {
			vpr_printf(TIO_MESSAGE_ERROR, "[LINE %d] Range mismatch from %s to %s.\n", directs[i].line, directs[i].from_pin, directs[i].to_pin);
				exit(1);
		}

		free(pb_type_name);
		free(port_name);
	}
	return clb_to_clb_directs;
}
 */

/* Add all direct clb-pin-to-clb-pin edges to given opin */ 
static int get_opin_direct_connecions(int x, int y, int opin, INOUTP t_linked_edge ** edge_list_ptr, INP t_ivec *** L_rr_node_indices, 
	INP int delayless_switch, INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs) {
	t_type_ptr type;
	int grid_ofs;
	int i, ipin, inode;
	t_linked_edge *edge_list_head;
	int max_index, min_index, offset, swap;
	int new_edges;

	type = grid[x][y].type;
	edge_list_head = *edge_list_ptr;
	new_edges = 0;

	/* Iterate through all direct connections */
	for(i = 0; i < num_directs; i++) {
		/* Find matching direct clb-to-clb connections with the same type as current grid location */
		if(clb_to_clb_directs[i].from_clb_type == type) {
			/* Compute index of opin with regards to given pins */ 
			if(clb_to_clb_directs[i].from_clb_pin_start_index > clb_to_clb_directs[i].from_clb_pin_end_index) {
				swap = TRUE;
				max_index = clb_to_clb_directs[i].from_clb_pin_start_index;
				min_index = clb_to_clb_directs[i].from_clb_pin_end_index;
			} else {
				swap = FALSE;
				min_index = clb_to_clb_directs[i].from_clb_pin_start_index;
				max_index = clb_to_clb_directs[i].from_clb_pin_end_index;
			}
			if(max_index >= opin && min_index <= opin) {
				offset = opin - min_index;
				/* This opin is specified to connect directly to an ipin, now compute which ipin to connect to */
				if(x + directs[i].x_offset < nx + 1 &&
				   x + directs[i].x_offset > 0 &&
				   y + directs[i].y_offset < ny + 1 &&
				   y + directs[i].y_offset > 0) {
					   ipin = OPEN;
						if(clb_to_clb_directs[i].to_clb_pin_start_index > clb_to_clb_directs[i].to_clb_pin_end_index) {
							if(swap == TRUE) {
								ipin = clb_to_clb_directs[i].to_clb_pin_end_index + offset;
							} else {
								ipin = clb_to_clb_directs[i].to_clb_pin_start_index - offset;
							}
						} else {
							if(swap == TRUE) {
								ipin = clb_to_clb_directs[i].to_clb_pin_end_index - offset;
							} else {
								ipin = clb_to_clb_directs[i].to_clb_pin_start_index + offset;
							}
						}
						/* Add new ipin edge to list of edges */
					   grid_ofs = grid[x + directs[i].x_offset][y + directs[i].y_offset].offset;
					   inode = get_rr_node_index(x + directs[i].x_offset, y + directs[i].y_offset - grid_ofs, IPIN, ipin, L_rr_node_indices);
					   edge_list_head = insert_in_edge_list(edge_list_head, inode, delayless_switch);
					   new_edges++;
				}
			}
		}
	}
	*edge_list_ptr = edge_list_head;
	return new_edges;
}