OpenFPGA/vpr7_x2p/vpr/SRC/place/place_macro.c

/****************************************************************************************
  Y.G.THIEN
  29 AUG 2012

    This file contains functions related to placement macros. The term "placement macros"
  refers to a structure that contains information on blocks that need special treatment
  during placement and possibly routing. 
  
    An example of placement macros is a carry chain. Blocks in a carry chain have to be 
  placed in a specific orientation or relative placement so that the carry_in's and the 
  carry_out's are properly aligned. With that, the carry chains would be able to use the 
  direct connections specified in the arch file. Direct connections with the pin's 
  fc_value 0 would be treated specially in routing where the whole carry chain would be
  treated as a unit and regular routing would not be used to connect the carry_in's and 
  carry_out's. Floorplanning constraints may also be an example of placement macros.

    The function alloc_and_load_placement_macros allocates and loads the placement 
  macros in the following steps:
  (1) First, go through all the block types and mark down the pins that could possibly 
      be part of a placement macros. 
  (2) Then, go through the netlist of all the pins marked in (1) to find out all the 
      heads of the placement macros using criteria depending on the type of placement 
	  macros. For carry chains, the heads of the placement macros are blocks with 
	  carry_in's not connected to any nets (OPEN) while the carry_out's connected to the 
	  netlist with only 1 SINK.
  (3) Traverse from the heads to the tails of the placement macros and load the 
      information in the t_pl_macro data structure. Similar to (2), tails are identified 
	  with criteria depending on the type of placement macros. For carry chains, the 
	  tails are blocks with carry_out's not connected to any nets (OPEN) while the 
	  carry_in's is connected to the netlist which has only 1 SINK.

    The only placement macros supported at the moment are the carry chains with limited
  functionality. 
    
	Current support for placement macros are:
  (1) The arch parser for direct connections is working. The specifications of the direct
      connections are specified in sample_adder_arch.xml and also in the 
	  VPR_User_Manual.doc
  (2) The placement macros allocator and loader is working.
  (3) The initial placement of placement macros that respects the restrictions of the 
      placement macros is working.
  (4) The post-placement legality check for placement macros is working.
    
	Current limitations on placement macros are:
  (1) One block could only be a part of a carry chain. In the future, if a block is part
      of multiple placement macros, we should load 1 huge placement macro instead of 
	  multiple placement macros that contain the same block.
  (2) Bus direct connections (direct connections with multiple bits) are supported. 
      However, a 2-bit carry chain when loaded would become 2 1-bit carry chains.
	  And because of (1), only 1 1-bit carry chain would be loaded. In the future, 
	  placement macros with multiple-bit connections or multiple 1-bit connections 
	  should be allowed.
  (3) Placement macros that span longer or wider than the chip would cause an error. 
      In the future, we *might* expand the size of the chip to accommodate such 
	  placement macros that are crucial.

    In order for the carry chain support to work, two changes are required in the 
  arch file. 
  (1) For carry chain support, added in a new child in <layout> called <directlist>. 
      <directlist> specifies a list of available direct connections on the FPGA chip 
	  that are necessary for direct carry chain connections. These direct connections 
	  would be treated specially in routing if the fc_value for the pins is specified 
	  as 0. Note that only direct connections that has fc_value 0 could be used as a 
	  carry chain.
    
      A <directlist> may have 0 or more children called <direct>. For each <direct>, 
	  there are the following fields:
        1) name:  This specifies the name given to this particular direct connection.
        2) from_pin:  This specifies the SOURCEs for this direct connection. The format 
		              could be as following:
                       a) type_name.port_name, for all the pins in this port.
                       b) type_name.port_name [end_pin_index:start_pin_index], for a 
					      single pin, the end_pin_index and start_pin_index could be 
						  the same.
        3) to_pin:  This specifies the SINKs for this direct connection. The format is 
		            the same as from_pin. 
                    Note that the width of the from_pin and to_pin has to match.
        4) x_offset: This specifies the x direction that this connection is going from 
		             SOURCEs to SINKs.
        5) y_offset: This specifies the y direction that this connection is going from 
		             SOURCEs to SINKs. 
                     Note that the x_offset and y_offset could not both be 0.
        6) z_offset: This specifies the z sublocations that all the blocks in this 
		             direct connection to be at.
    
      The example of a direct connection specification below shows a possible carry chain 
	  connection going north on the FPGA chip:
       _______________________________________________________________________________
      | <directlist>                                                                  |
      |   <direct name="adder_carry" from_pin="adder.cout" to_pin="adder.cin"         |
	  |           x_offset="0" y_offset="1" z_offset="0"/>                            |
      | </directlist>                                                                 |
      |_______________________________________________________________________________|
	  A corresponding arch file that has this direct connection is sample_adder_arch.xml
      A corresponding blif file that uses this direct connection is adder.blif

  (2) As mentioned in (1), carry chain connections using the directs would only be 
      recognized if the pin's fc_value is 0. In order to achieve this, pin-based fc_value
	  is required. Hence, the new <fc> tag replaces both <fc_in> and <fc_out> tags.
	  
	  A <fc> tag may have 0 or more children called <pin>. For each <fc>, there are the 
	  following fields:
	    1) default_in_type: This specifies the default fc_type for input pins. They could
		                    be "frac", "abs" or "full".
		2) default_in_val: This specifies the default fc_value for input pins.
		3) default_out_type: This specifies the default fc_type for output pins. They could
		                     be "frac", "abs" or "full".
		4) default_out_val: This specifies the default fc_value for output pins.

	  As for the <pin> children, there are the following fields:
	    1) name: This specifies the name of the port/pin that the fc_type and fc_value 
		         apply to. The name have to be in the format "port_name" or 
				 "port_name [end_pin_index:start_pin_index]" where port_name is the name
				 of the port it apply to while end_pin_index and start_pin_index could
				 be specified to apply the fc_type and fc_value that follows to part of
				 a bus (multi-pin) port.
	    2) fc_type: This specifies the fc_type that would be applied to the specified pins.
		3) fc_val: This specifies the fc_value that would be applied to the specified pins.

	  The example of a pin-based fc_value specification below shows that the fc_values for
	  the cout and the cin ports are 0:
	   _______________________________________________________________________________
      | <fc default_in_type="frac" default_in_val="0.15" default_out_type="frac"      |
	  |     default_out_val="0.15">                                                   |
      |    <pin name="cin" fc_type="frac" fc_val="0"/>                                |
      |    <pin name="cout" fc_type="frac" fc_val="0"/>                               |
      | </fc>                                                                         |
	  |_______________________________________________________________________________|
      A corresponding arch file that has this direct connection is sample_adder_arch.xml
      A corresponding blif file that uses this direct connection is adder.blif

****************************************************************************************/


#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <assert.h>
#include "util.h"
#include "vpr_types.h"
#include "physical_types.h"
#include "globals.h"
#include "place.h"
#include "read_xml_arch_file.h"
#include "ReadOptions.h"
#include "place_macro.h"
#include "vpr_utils.h"


/******************** File-scope variables delcarations **********************/

/* f_idirect_from_blk_pin array allow us to quickly find pins that could be in a    *
 * direct connection. Values stored is the index of the possible direct connection  *
 * as specified in the arch file, OPEN (-1) is stored for pins that could not be    *
 * part of a direct chain conneciton.                                               *
 * [0...num_types-1][0...num_pins-1]                                                */
static int ** f_idirect_from_blk_pin = NULL;

/* f_direct_type_from_blk_pin array stores the value SOURCE if the pin is the       *
 * from_pin, SINK if the pin is the to_pin in the direct connection as specified in *
 * the arch file, OPEN (-1) is stored for pins that could not be part of a direct   *
 * chain conneciton.                                                                *
 * [0...num_types-1][0...num_pins-1]                                                */
static int ** f_direct_type_from_blk_pin = NULL;

/* f_imacro_from_blk_pin maps a blk_num to the corresponding macro index.           *
 * If the block is not part of a macro, the value OPEN (-1) is stored.              *
 * [0...num_blocks-1]                                                               */
static int * f_imacro_from_iblk = NULL;


/******************** Subroutine declarations ********************************/

static void find_all_the_macro (int * num_of_macro, int * pl_macro_member_blk_num_of_this_blk, 
		int * pl_macro_idirect, int * pl_macro_num_members, int ** pl_macro_member_blk_num);

static void free_imacro_from_iblk(void);

static void alloc_and_load_imacro_from_iblk(t_pl_macro * macros, int num_macros);

/******************** Subroutine definitions *********************************/

static void find_all_the_macro (int * num_of_macro, int * pl_macro_member_blk_num_of_this_blk, 
		int * pl_macro_idirect, int * pl_macro_num_members, int ** pl_macro_member_blk_num) {

	/* Compute required size:                                                *
	 * Go through all the pins with possible direct connections in           *
	 * f_idirect_from_blk_pin. Count the number of heads (which is the same  *
	 * as the number macros) and also the length of each macro               *
	 * Head - blocks with to_pin OPEN and from_pin connected                 *
	 * Tail - blocks with to_pin connected and from_pin OPEN                 */

	int iblk, from_iblk_pin, to_iblk_pin, from_inet, to_inet, from_idirect, to_idirect, 
			from_src_or_sink, to_src_or_sink;
	int next_iblk, next_inet, curr_inet;
	int num_blk_pins, num_macro; 
	int imember;

	num_macro = 0;
	for (iblk = 0; iblk < num_blocks; iblk++) {

		num_blk_pins = block[iblk].type->num_pins;
		for (to_iblk_pin = 0; to_iblk_pin < num_blk_pins; to_iblk_pin++) {
			
			to_inet = block[iblk].nets[to_iblk_pin];
			to_idirect = f_idirect_from_blk_pin[block[iblk].type->index][to_iblk_pin];
			to_src_or_sink = f_direct_type_from_blk_pin[block[iblk].type->index][to_iblk_pin];
			
			// Find to_pins (SINKs) with possible direct connection but are not 
			// connected to any net (Possible head of macro)
			if ( to_src_or_sink == SINK && to_idirect != OPEN && to_inet == OPEN ) {

				for (from_iblk_pin = 0; from_iblk_pin < num_blk_pins; from_iblk_pin++) {
					from_inet = block[iblk].nets[from_iblk_pin];
					from_idirect = f_idirect_from_blk_pin[block[iblk].type->index][from_iblk_pin];
					from_src_or_sink = f_direct_type_from_blk_pin[block[iblk].type->index][from_iblk_pin];

					// Find from_pins with the same possible direct connection that are connected.
					// Confirmed head of macro
					if ( from_src_or_sink == SOURCE && to_idirect == from_idirect && from_inet != OPEN) {
						
						// Mark down that this is the first block in the macro
						pl_macro_member_blk_num_of_this_blk[0] = iblk;
						pl_macro_idirect[num_macro] = to_idirect;
						
						// Increment the num_member count.
						pl_macro_num_members[num_macro]++;
						
						// Also find out how many members are in the macros, 
						// there are at least 2 members - 1 head and 1 tail.
						
						// Initialize the variables
						next_inet = from_inet;
						next_iblk = iblk;

						// Start finding the other members
						while (next_inet != OPEN) {

							curr_inet = next_inet;
							
							// Assume that carry chains only has 1 sink - direct connection
							if (clb_net[curr_inet].num_sinks != 1) {
							assert(clb_net[curr_inet].num_sinks == 1);
                            }
							next_iblk = clb_net[curr_inet].node_block[1];
							
							// Assume that the from_iblk_pin index is the same for the next block
							assert (f_idirect_from_blk_pin[block[next_iblk].type->index][from_iblk_pin] == from_idirect
									&& f_direct_type_from_blk_pin[block[next_iblk].type->index][from_iblk_pin] == SOURCE);
							next_inet = block[next_iblk].nets[from_iblk_pin];

							// Mark down this block as a member of the macro
							imember = pl_macro_num_members[num_macro];
							pl_macro_member_blk_num_of_this_blk[imember] = next_iblk;
                            /* Xifan TANG: Should detect if there is a combinational loop inside */
                            if (1 == spot_int_in_array(imember, pl_macro_member_blk_num_of_this_blk, next_iblk )) {
                              vpr_printf(TIO_MESSAGE_INFO, "Find a combinational loop in macro placement! More info:\n");
                              vpr_printf(TIO_MESSAGE_ERROR,"next_inet: %d, num_macro: %d, imember: %d, next_iblk: %d.\n", 
                                         next_inet, num_macro, imember, next_iblk);
                              exit(1);
                            }
							// Increment the num_member count.
							pl_macro_num_members[num_macro]++;

						} // Found all the members of this macro at this point

						// Allocate the second dimension of the blk_num array since I now know the size
						pl_macro_member_blk_num[num_macro] = 
								(int *) my_calloc (pl_macro_num_members[num_macro] , sizeof(int));
						// Copy the data from the temporary array to the newly allocated array.
						for (imember = 0; imember < pl_macro_num_members[num_macro]; imember ++)
							pl_macro_member_blk_num[num_macro][imember] = pl_macro_member_blk_num_of_this_blk[imember];

						// Increment the macro count
						num_macro ++;

					} // Do nothing if the from_pins does not have same possible direct connection.
				} // Finish going through all the pins for from_pins.
			} // Do nothing if the to_pins does not have same possible direct connection.
		} // Finish going through all the pins for to_pins.
	} // Finish going through all blocks.
	
	// Now, all the data is readily stored in the temporary data structures.
	*num_of_macro = num_macro;
}


int alloc_and_load_placement_macros(t_direct_inf* directs, int num_directs, t_pl_macro ** macros){
	
	/* This function allocates and loads the macros placement macros   *
	 * and returns the total number of macros in 2 steps.              *
	 *   1) Allocate temporary data structure for maximum possible     *
	 *      size and loops through all the blocks storing the data     *
	 *      relevant to the carry chains. At the same time, also count *
	 *      the amount of memory required for the actual variables.    *
	 *   2) Allocate the actual variables with the exact amount of     *
	 *      memory. Then loads the data from the temporary data        *
	 *       structures before freeing them.                           *
	 *                                                                 *
	 * For pl_macro_member_blk_num, allocate for the first dimension   *
	 * only at first. Allocate for the second dimemsion when I know    *
	 * the size. Otherwise, the array is going to be of size           *
	 * num_blocks^2 (There are big benckmarks VPR that have num_blocks *
	 * in the 100k's range).                                           *
	 *                                                                 *
	 * The placement macro array is freed by the caller(s).            */

	/* Declaration of local variables */
	int imacro, imember, num_macro;
	int *pl_macro_idirect, *pl_macro_num_members, **pl_macro_member_blk_num, 
			*pl_macro_member_blk_num_of_this_blk;
	
	t_pl_macro * macro = NULL;
	
	/* Sets up the required variables. */
	alloc_and_load_idirect_from_blk_pin(directs, num_directs, 
			&f_idirect_from_blk_pin, &f_direct_type_from_blk_pin);

	/* Allocate maximum memory for temporary variables. */
	pl_macro_num_members = (int *) my_calloc (num_blocks , sizeof(int));
	pl_macro_idirect = (int *) my_calloc (num_blocks , sizeof(int));
	pl_macro_member_blk_num = (int **) my_calloc (num_blocks , sizeof(int*));
	pl_macro_member_blk_num_of_this_blk = (int *) my_calloc (num_blocks , sizeof(int));
 
	/* Compute required size:                                                *
	 * Go through all the pins with possible direct connections in           *
	 * f_idirect_from_blk_pin. Count the number of heads (which is the same  *
	 * as the number macros) and also the length of each macro               *
	 * Head - blocks with to_pin OPEN and from_pin connected                 *
	 * Tail - blocks with to_pin connected and from_pin OPEN                 */
	num_macro = 0;
	find_all_the_macro (&num_macro, pl_macro_member_blk_num_of_this_blk, 
			pl_macro_idirect, pl_macro_num_members, pl_macro_member_blk_num);

	/* Allocate the memories for the macro. */
	macro = (t_pl_macro *) my_malloc (num_macro * sizeof(t_pl_macro));

	/* Allocate the memories for the chaim members.             *
	 * Load the values from the temporary data structures.      */
	for (imacro = 0; imacro < num_macro; imacro++) {
		macro[imacro].num_blocks = pl_macro_num_members[imacro];
		macro[imacro].members = (t_pl_macro_member *) my_malloc 
										(macro[imacro].num_blocks * sizeof(t_pl_macro_member));

		/* Load the values for each member of the macro */
		for (imember = 0; imember < macro[imacro].num_blocks; imember++) {
			macro[imacro].members[imember].x_offset = imember * directs[pl_macro_idirect[imacro]].x_offset;
			macro[imacro].members[imember].y_offset = imember * directs[pl_macro_idirect[imacro]].y_offset;
			macro[imacro].members[imember].z_offset = directs[pl_macro_idirect[imacro]].z_offset;
			macro[imacro].members[imember].blk_index = pl_macro_member_blk_num[imacro][imember];
		}
	}

	/* Frees up the temporary data structures. */
	free(pl_macro_num_members);
	free(pl_macro_idirect);
	for(imacro=0; imacro < num_macro; imacro++) {
		free(pl_macro_member_blk_num[imacro]);
	}
	free(pl_macro_member_blk_num);
	free(pl_macro_member_blk_num_of_this_blk);
	
	/* Returns the pointer to the macro by reference. */
	*macros = macro;
	return (num_macro);

}

void get_imacro_from_iblk(int * imacro, int iblk, t_pl_macro * macros, int num_macros) {

	/* This mapping is needed for fast lookup's whether the block with index *
	 * iblk belongs to a placement macro or not.                             *
	 *                                                                       *
	 * The array f_imacro_from_iblk is used for the mapping for speed reason *
	 * [0...num_blocks-1]                                                    */

	/* If the array is not allocated and loaded, allocate it.                */ 
	if (f_imacro_from_iblk == NULL) {
		alloc_and_load_imacro_from_iblk(macros, num_macros);
	}

	/* Return the imacro for the block. */
	*imacro = f_imacro_from_iblk[iblk];

}

static void free_imacro_from_iblk(void) {

	/* Frees the f_imacro_from_iblk array.                    *
	 *                                                        *
	 * This function is called when the arrays are freed in   *
	 * free_placement_structs()                               */

	if (f_imacro_from_iblk != NULL) {
		free(f_imacro_from_iblk);
		f_imacro_from_iblk = NULL;
	}

}

static void alloc_and_load_imacro_from_iblk(t_pl_macro * macros, int num_macros) {

	/* Allocates and loads imacro_from_iblk array.                           *
	 *                                                                       *
	 * The array is freed in free_placement_structs()                        */

	int * temp_imacro_from_iblk = NULL;
	int imacro, imember, iblk;

	/* Allocate and initialize the values to OPEN (-1). */
	temp_imacro_from_iblk = (int *)my_malloc(num_blocks * sizeof(int));
	for(iblk = 0; iblk < num_blocks; iblk ++) {
		temp_imacro_from_iblk[iblk] = OPEN;
	}
	
	/* Load the values */
	for (imacro = 0; imacro < num_macros; imacro++) {
		for (imember = 0; imember < macros[imacro].num_blocks; imember++) {
			iblk = macros[imacro].members[imember].blk_index;
			temp_imacro_from_iblk[iblk] = imacro;
		}
	}
	
	/* Sets the file_scope variables to point at the arrays. */
	f_imacro_from_iblk = temp_imacro_from_iblk;
}

void free_placement_macros_structs(void) {

	/* This function frees up all the static data structures used. */

	// This frees up the two arrays and set the pointers to NULL
	int itype;
	if ( f_idirect_from_blk_pin != NULL ) {
		for (itype = 1; itype < num_types; itype++) {
			free(f_idirect_from_blk_pin[itype]);
		}
		free(f_idirect_from_blk_pin);
		f_idirect_from_blk_pin = NULL;
	}

	if ( f_direct_type_from_blk_pin != NULL ) {
		for (itype = 1; itype < num_types; itype++) {
			free(f_direct_type_from_blk_pin[itype]);
		}
		free(f_direct_type_from_blk_pin);
		f_direct_type_from_blk_pin = NULL;
	}

	// This frees up the imacro from iblk mapping array.
	free_imacro_from_iblk();
	
}

/* Xifan TANG: Find the position of a blk in a macro */
int spot_blk_position_in_a_macro(t_pl_macro pl_macros,
                                 int blk_idx) {
  int imember;

  for (imember = 0; imember < pl_macros.num_blocks; imember++) {
    if (blk_idx == pl_macros.members[imember].blk_index) {
      return imember;
    }
  } 

  return -1;
}

/* Xifan TANG: Check if 1st macro contains the 2nd macro */
void get_start_end_points_one_macro(t_pl_macro pl_macro,
                                    int* upper_x, int* lower_x, 
                                    int* upper_y, int* lower_y) {
  int imemb, iblk;

  /* Initialize */
  (*upper_x) = -1;
  (*lower_x) = -1;
  (*upper_y) = -1;
  (*lower_y) = -1;

  /* Determine the upper/lower bound of x,y of macros*/ 
  for (imemb = 0; imemb < pl_macro.num_blocks; imemb++) {
    iblk = pl_macro.members[imemb].blk_index;
    if (0 == imemb) {
      (*upper_x) = block[iblk].x;
      (*upper_y) = block[iblk].y;
      (*lower_x) = block[iblk].x;
      (*lower_y) = block[iblk].y;
      /* macro_a_upper_z = block[iblk_a].z; */
    } else {
      if (block[iblk].x > (*upper_x)) {
        (*upper_x) = block[iblk].x;
      }
      if (block[iblk].y > (*upper_y)) {
        (*upper_y) = block[iblk].y;
      }
      if (block[iblk].x < (*lower_x)) {
        (*lower_x) = block[iblk].x;
      }
      if (block[iblk].y < (*lower_y)) {
        (*lower_y) = block[iblk].y;
      }
    }
  }
  /* check: this is currently true, as carry chain is vertical
   * maybe changed if a new carry chain style is applied */
  if (!(((*upper_x) == (*lower_x))&&(((*upper_y) - (*lower_y) + 1) == (pl_macro.num_blocks)))) {
  assert (((*upper_x) == (*lower_x))&&(((*upper_y) - (*lower_y) + 1) == (pl_macro.num_blocks)));
  }
} 

/* Xifan TANG: Check if 1st macro contains the 2nd macro */
int check_macros_contained(t_pl_macro pl_macro_a,
                           t_pl_macro pl_macro_b) {
  int macro_a_upper_x, macro_a_upper_y;
  int macro_a_lower_x, macro_a_lower_y;
  int macro_b_upper_x, macro_b_upper_y;
  int macro_b_lower_x, macro_b_lower_y;

  get_start_end_points_one_macro(pl_macro_a, &macro_a_upper_x, &macro_a_lower_x, 
                                 &macro_a_upper_y, &macro_a_lower_y);

  get_start_end_points_one_macro(pl_macro_b, &macro_b_upper_x, &macro_b_lower_x, 
                                 &macro_b_upper_y, &macro_b_lower_y);

  if ((macro_a_upper_y < macro_b_upper_y)||(macro_a_lower_y > macro_b_lower_y)) {
    return 0;
  }
  
  return 1;
}

/* Xifan TANG: get the maximum length of macros */
int max_len_pl_macros(int num_pl_macros, 
                      t_pl_macro* pl_macros) {
  int imacro;
  int max_len = 0;

  if (0 == num_pl_macros) {
    return max_len;
  }
  
  assert(NULL != pl_macros);

  for (imacro = 0; imacro < num_pl_macros; imacro++) {
    if (max_len < pl_macros[imacro].num_blocks) {
      max_len = pl_macros[imacro].num_blocks;
    }
  }

  return max_len;
}