OpenFPGA/vpr/src/place/place_macro.cpp

#include <cstdio>
#include <ctime>
#include <cmath>
#include <sstream>
#include <map>

#include "vtr_assert.h"
#include "vtr_memory.h"
#include "vtr_util.h"

#include "vpr_types.h"
#include "vpr_error.h"
#include "physical_types.h"
#include "globals.h"
#include "place.h"
#include "read_xml_arch_file.h"
#include "place_macro.h"
#include "vpr_utils.h"
#include "echo_files.h"

/******************** File-scope variables declarations **********************/

/* f_idirect_from_blk_pin array allow us to quickly find pins that could be in a    *
 * direct connection. Values stored is the index of the possible direct connection  *
 * as specified in the arch file, OPEN (-1) is stored for pins that could not be    *
 * part of a direct chain conneciton.                                               *
 * [0...device_ctx.num_block_types-1][0...num_pins-1]                               */
static int** f_idirect_from_blk_pin = nullptr;

/* f_direct_type_from_blk_pin array stores the value SOURCE if the pin is the       *
 * from_pin, SINK if the pin is the to_pin in the direct connection as specified in *
 * the arch file, OPEN (-1) is stored for pins that could not be part of a direct   *
 * chain conneciton.                                                                *
 * [0...device_ctx.num_block_types-1][0...num_pins-1]                               */
static int** f_direct_type_from_blk_pin = nullptr;

/* f_imacro_from_blk_pin maps a blk_num to the corresponding macro index.           *
 * If the block is not part of a macro, the value OPEN (-1) is stored.              *
 * [0...cluster_ctx.clb_nlist.blocks().size()-1]                                    */
static vtr::vector_map<ClusterBlockId, int> f_imacro_from_iblk;

/******************** Subroutine declarations ********************************/

static void find_all_the_macro(int* num_of_macro, std::vector<ClusterBlockId>& pl_macro_member_blk_num_of_this_blk, std::vector<int>& pl_macro_idirect, std::vector<int>& pl_macro_num_members, std::vector<std::vector<ClusterBlockId>>& pl_macro_member_blk_num);

static void alloc_and_load_imacro_from_iblk(const std::vector<t_pl_macro>& macros);

static void write_place_macros(std::string filename, const std::vector<t_pl_macro>& macros);

static bool is_constant_clb_net(ClusterNetId clb_net);

static bool net_is_driven_by_direct(ClusterNetId clb_net);

static void validate_macros(const std::vector<t_pl_macro>& macros);

static bool try_combine_macros(std::vector<std::vector<ClusterBlockId>>& pl_macro_member_blk_num, int matching_macro, int latest_macro);
/******************** Subroutine definitions *********************************/

static void find_all_the_macro(int* num_of_macro, std::vector<ClusterBlockId>& pl_macro_member_blk_num_of_this_blk, std::vector<int>& pl_macro_idirect, std::vector<int>& pl_macro_num_members, std::vector<std::vector<ClusterBlockId>>& pl_macro_member_blk_num) {
    /* Compute required size:                                                *
     * Go through all the pins with possible direct connections in           *
     * f_idirect_from_blk_pin. Count the number of heads (which is the same  *
     * as the number macros) and also the length of each macro               *
     * Head - blocks with to_pin OPEN and from_pin connected                 *
     * Tail - blocks with to_pin connected and from_pin OPEN                 */

    int from_iblk_pin, to_iblk_pin, from_idirect, to_idirect,
        from_src_or_sink, to_src_or_sink;
    ClusterNetId to_net_id, from_net_id, next_net_id, curr_net_id;
    ClusterBlockId next_blk_id;
    int num_blk_pins, num_macro;
    int imember;
    auto& cluster_ctx = g_vpr_ctx.clustering();

    // Hash table holding the unique cluster ids and the macro id it belongs to
    std::unordered_map<ClusterBlockId, int> clusters_macro;

    num_macro = 0;
    for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
        auto logical_block = cluster_ctx.clb_nlist.block_type(blk_id);
        auto physical_tile = pick_best_physical_type(logical_block);

        num_blk_pins = cluster_ctx.clb_nlist.block_type(blk_id)->pb_type->num_pins;
        for (to_iblk_pin = 0; to_iblk_pin < num_blk_pins; to_iblk_pin++) {
            int to_physical_pin = get_physical_pin(physical_tile, logical_block, to_iblk_pin);

            to_net_id = cluster_ctx.clb_nlist.block_net(blk_id, to_iblk_pin);
            to_idirect = f_idirect_from_blk_pin[physical_tile->index][to_physical_pin];
            to_src_or_sink = f_direct_type_from_blk_pin[physical_tile->index][to_physical_pin];

            // Identify potential macro head blocks (i.e. start of a macro)
            //
            // The input SINK (to_pin) of a potential HEAD macro will have either:
            //  * no connection to any net (OPEN), or
            //  * a connection to a constant net (e.g. gnd/vcc) which is not driven by a direct
            //
            // Note that the restriction that constant nets are not driven from another direct ensures that
            // blocks in the middle of a chain with internal constant signals are not detected as potential
            // head blocks.
            if (to_src_or_sink == SINK && to_idirect != OPEN
                && (to_net_id == ClusterNetId::INVALID()
                    || (is_constant_clb_net(to_net_id)
                        && !net_is_driven_by_direct(to_net_id)))) {
                for (from_iblk_pin = 0; from_iblk_pin < num_blk_pins; from_iblk_pin++) {
                    int from_physical_pin = get_physical_pin(physical_tile, logical_block, from_iblk_pin);

                    from_net_id = cluster_ctx.clb_nlist.block_net(blk_id, from_iblk_pin);
                    from_idirect = f_idirect_from_blk_pin[physical_tile->index][from_physical_pin];
                    from_src_or_sink = f_direct_type_from_blk_pin[physical_tile->index][from_physical_pin];

                    // Confirm whether this is a head macro
                    //
                    // The output SOURCE (from_pin) of a true head macro will:
                    //  * drive another block with the same direct connection
                    if (from_src_or_sink == SOURCE && to_idirect == from_idirect && from_net_id != ClusterNetId::INVALID()) {
                        // Mark down that this is the first block in the macro
                        pl_macro_member_blk_num_of_this_blk[0] = blk_id;
                        pl_macro_idirect[num_macro] = to_idirect;

                        // Increment the num_member count.
                        pl_macro_num_members[num_macro]++;

                        // Also find out how many members are in the macros,
                        // there are at least 2 members - 1 head and 1 tail.

                        // Initialize the variables
                        next_net_id = from_net_id;
                        next_blk_id = blk_id;

                        // Start finding the other members
                        while (next_net_id != ClusterNetId::INVALID()) {
                            curr_net_id = next_net_id;

                            // Assume that carry chains only has 1 sink - direct connection
                            VTR_ASSERT(cluster_ctx.clb_nlist.net_sinks(curr_net_id).size() == 1);
                            next_blk_id = cluster_ctx.clb_nlist.net_pin_block(curr_net_id, 1);

                            // Assume that the from_iblk_pin index is the same for the next block
                            VTR_ASSERT(f_idirect_from_blk_pin[physical_tile->index][from_physical_pin] == from_idirect
                                       && f_direct_type_from_blk_pin[physical_tile->index][from_physical_pin] == SOURCE);
                            next_net_id = cluster_ctx.clb_nlist.block_net(next_blk_id, from_iblk_pin);

                            // Mark down this block as a member of the macro
                            imember = pl_macro_num_members[num_macro];
                            pl_macro_member_blk_num_of_this_blk[imember] = next_blk_id;

                            // Increment the num_member count.
                            pl_macro_num_members[num_macro]++;

                        } // Found all the members of this macro at this point

                        // Allocate the second dimension of the blk_num array since I now know the size
                        pl_macro_member_blk_num[num_macro].resize(pl_macro_num_members[num_macro]);
                        int matching_macro = -1;
                        // Copy the data from the temporary array to the newly allocated array.
                        for (imember = 0; imember < pl_macro_num_members[num_macro]; imember++) {
                            auto cluster_id = pl_macro_member_blk_num_of_this_blk[imember];
                            pl_macro_member_blk_num[num_macro][imember] = cluster_id;
                            // check if this cluster block was in a previous macro
                            auto cluster_macro_pair = std::pair<ClusterBlockId, int>(cluster_id, num_macro);
                            if (!clusters_macro.insert(cluster_macro_pair).second) {
                                matching_macro = clusters_macro[cluster_id];
                            }
                        }

                        // one cluster from this macro is found in a previous macro try to combine both
                        // macros, since otherwise the program will fail when validating the macros.
                        if (matching_macro != -1) {
                            // try to combine the newly created macro with the found match
                            if (try_combine_macros(pl_macro_member_blk_num, matching_macro, num_macro)) {
                                // the newly created macro is combined with a previous macro
                                // reset the number of members of the newly created macro since it's now removed
                                pl_macro_num_members[num_macro] = 0;
                                // update the number of blocks of the matching macro after combining it with the new macro
                                pl_macro_num_members[matching_macro] = pl_macro_member_blk_num[matching_macro].size();
                                // decrement the number of found macros since the latest one is removed
                                num_macro--;
                            }
                        }

                        // Increment the macro count
                        num_macro++;

                    } // Do nothing if the from_pins does not have same possible direct connection.
                }     // Finish going through all the pins for from_pins.
            }         // Do nothing if the to_pins does not have same possible direct connection.
        }             // Finish going through all the pins for to_pins.
    }                 // Finish going through all blocks.

    // Now, all the data is readily stored in the temporary data structures.
    *num_of_macro = num_macro;
}

static bool try_combine_macros(std::vector<std::vector<ClusterBlockId>>& pl_macro_member_blk_num, int matching_macro, int latest_macro) {
    /* This function takes two placement macro ids which have a common cluster block
     * or more in between. The function then tries to find if the two macros could
     * be combined together to form a larger macro. If it's impossible to combine
     * the two macros together then this design will never place and route.
     * Arguments:
     *  pl_macro_member_blk_num : [0..num_macros-1][0..num_cluster_blocks-1] 2D array
     *                            of macros created so far.
     *  matching_macro          : first macro id, which is a previous macro that is found to have the same block
     *  latest_macro            : second macro id, which is the macro being created at this iteration */

    auto& old_macro_blocks = pl_macro_member_blk_num[matching_macro];
    auto& new_macro_blocks = pl_macro_member_blk_num[latest_macro];

    // Algorithm:
    // 1) Combining two macros is valid when the first block of one of the two macros
    //    matches one of the blocks in the other macro. Examples for valid cases:
    //
    // Case 1: Macro 2 is a subset of Macro 1
    //
    //        Macro 1 (and Combined Macro)
    //          ---
    //          |0|<--- First      Macro 2
    //          ---                 ---
    //          |1|<---- Match ---->|1|<--- First
    //          ---                 ---
    //          |2|                 |2|<---- ClusterBlockId
    //          ---                 ---
    //          |3|
    //          ---
    //
    // Case 2: Macro 2 is an extension of Macro 1
    //
    //        Macro 1             Macro 2            Combined Macro
    //          ---                 ---                  ---
    //First --->|0|      ---------->|2|<--- First        |0|
    //          ---      |          ---                  ---
    //          |1|    Match        |3|                  |1|
    //          ---      |          ---   ========>      ---
    //          |2|<------          |4|                  |2|
    //          ---                 ---                  ---
    //          |3|                 |5|                  |3|
    //          ---                 ---                  ---
    //                                                   |4|
    //                                                   ---
    //                                                   |5|
    //                                                   ---
    //
    // 2) Starting from this match and going forward in both macros all the blocks
    //    should match till we reach the end of one of the macros or both of them.
    // 3) If combining the macros is valid, create a new macro that is the union
    //    of both macros.
    // 4) Replace the old macro with this new combined macro.

    // Step 1) find the staring point of the matching
    auto new_macro_it = new_macro_blocks.begin();
    auto old_macro_it = std::find(old_macro_blocks.begin(), old_macro_blocks.end(), *new_macro_it);
    if (old_macro_it == old_macro_blocks.end()) {
        old_macro_it = old_macro_blocks.begin();
        new_macro_it = std::find(new_macro_blocks.begin(), new_macro_blocks.end(), *old_macro_it);
        // if matching is from the middle of the two macros, then combining macros is not possible
        if (new_macro_it == new_macro_blocks.end()) {
            return false;
        }
    }

    // Store the first part of the combined macro. Similar to blocks 0 -> 1 in case 2
    std::vector<ClusterBlockId> combined_macro;
    // old_macro is similar to Macro 1 in case 2
    if (old_macro_it != old_macro_blocks.begin()) {
        combined_macro.insert(combined_macro.begin(), old_macro_blocks.begin(), old_macro_it);
        // new_macro is similar to Macro 1 in case 2
    } else {
        combined_macro.insert(combined_macro.begin(), new_macro_blocks.begin(), new_macro_it);
    }

    // Step 2) The matching block between the two macros is found, move forward
    // from the matching block to find if combining both macros is valid or not
    while (old_macro_it != old_macro_blocks.end() && new_macro_it != new_macro_blocks.end()) {
        // block ids should match till the end of one
        // of the macros or both of them is reached
        if (*old_macro_it != *new_macro_it) {
            return false;
        }
        // add the block id to the combined macro
        combined_macro.push_back(*old_macro_it);
        // go to the next block in both macros
        old_macro_it++;
        new_macro_it++;
    }

    // Store the last part of the combined macro. Similar to blocks 4 -> 5 in case 2.
    if (old_macro_it != old_macro_blocks.end()) {
        // old_macro is similar to Macro 2 in case 2
        combined_macro.insert(combined_macro.end(), old_macro_it, old_macro_blocks.end());
    } else if (new_macro_it != new_macro_blocks.end()) {
        // new_macro is similar to Macro 2 in case 2
        combined_macro.insert(combined_macro.end(), new_macro_it, new_macro_blocks.end());
    }

    // updated the old macro in the 2D array of macros with the new combined macro
    pl_macro_member_blk_num[matching_macro] = combined_macro;
    // remove the newly created macro which is now included in another macro
    pl_macro_member_blk_num[latest_macro].clear();

    return true;
}

std::vector<t_pl_macro> alloc_and_load_placement_macros(t_direct_inf* directs, int num_directs) {
    /* This function allocates and loads the macros placement macros   *
     * and returns the total number of macros in 2 steps.              *
     *   1) Allocate temporary data structure for maximum possible     *
     *      size and loops through all the blocks storing the data     *
     *      relevant to the carry chains. At the same time, also count *
     *      the amount of memory required for the actual variables.    *
     *   2) Allocate the actual variables with the exact amount of     *
     *      memory. Then loads the data from the temporary data        *
     *       structures before freeing them.                           *
     *                                                                 *
     * For pl_macro_member_blk_num, allocate for the first dimension   *
     * only at first. Allocate for the second dimension when I know    *
     * the size. Otherwise, the array is going to be of size           *
     * cluster_ctx.clb_nlist.blocks().size()^2 (There are big		   *
     * benckmarks VPR that have cluster_ctx.clb_nlist.blocks().size()  *
     * in the 100k's range).										   *
     *																   *
     * The placement macro array is freed by the caller(s).            */

    /* Declaration of local variables */
    int num_macro;
    auto& cluster_ctx = g_vpr_ctx.clustering();

    /* Allocate maximum memory for temporary variables. */
    std::vector<int> pl_macro_idirect(cluster_ctx.clb_nlist.blocks().size());
    std::vector<int> pl_macro_num_members(cluster_ctx.clb_nlist.blocks().size());
    std::vector<std::vector<ClusterBlockId>> pl_macro_member_blk_num(cluster_ctx.clb_nlist.blocks().size());
    std::vector<ClusterBlockId> pl_macro_member_blk_num_of_this_blk(cluster_ctx.clb_nlist.blocks().size());

    /* Sets up the required variables. */
    alloc_and_load_idirect_from_blk_pin(directs, num_directs,
                                        &f_idirect_from_blk_pin, &f_direct_type_from_blk_pin);

    /* Compute required size:                                                *
     * Go through all the pins with possible direct connections in           *
     * f_idirect_from_blk_pin. Count the number of heads (which is the same  *
     * as the number macros) and also the length of each macro               *
     * Head - blocks with to_pin OPEN and from_pin connected                 *
     * Tail - blocks with to_pin connected and from_pin OPEN                 */
    num_macro = 0;
    find_all_the_macro(&num_macro, pl_macro_member_blk_num_of_this_blk,
                       pl_macro_idirect, pl_macro_num_members, pl_macro_member_blk_num);

    /* Allocate the memories for the macro. */
    std::vector<t_pl_macro> macros(num_macro);

    /* Allocate the memories for the chain members.             *
     * Load the values from the temporary data structures.      */
    for (int imacro = 0; imacro < num_macro; imacro++) {
        macros[imacro].members = std::vector<t_pl_macro_member>(pl_macro_num_members[imacro]);

        /* Load the values for each member of the macro */
        for (size_t imember = 0; imember < macros[imacro].members.size(); imember++) {
            macros[imacro].members[imember].offset.x = imember * directs[pl_macro_idirect[imacro]].x_offset;
            macros[imacro].members[imember].offset.y = imember * directs[pl_macro_idirect[imacro]].y_offset;
            macros[imacro].members[imember].offset.z = directs[pl_macro_idirect[imacro]].z_offset;
            macros[imacro].members[imember].blk_index = pl_macro_member_blk_num[imacro][imember];
        }
    }

    if (isEchoFileEnabled(E_ECHO_PLACE_MACROS)) {
        write_place_macros(getEchoFileName(E_ECHO_PLACE_MACROS), macros);
    }

    validate_macros(macros);

    return macros;
}

void get_imacro_from_iblk(int* imacro, ClusterBlockId iblk, const std::vector<t_pl_macro>& macros) {
    /* This mapping is needed for fast lookup's whether the block with index *
     * iblk belongs to a placement macro or not.                             *
     *                                                                       *
     * The array f_imacro_from_iblk is used for the mapping for speed reason *
     * [0...cluster_ctx.clb_nlist.blocks().size()-1]                                                    */

    /* If the array is not allocated and loaded, allocate it.                */
    if (f_imacro_from_iblk.size() == 0) {
        alloc_and_load_imacro_from_iblk(macros);
    }

    if (iblk) {
        /* Return the imacro for the block. */
        *imacro = f_imacro_from_iblk[iblk];
    } else {
        *imacro = OPEN; //No valid block, so no valid macro
    }
}

/* Allocates and loads imacro_from_iblk array. */
static void alloc_and_load_imacro_from_iblk(const std::vector<t_pl_macro>& macros) {
    auto& cluster_ctx = g_vpr_ctx.clustering();

    f_imacro_from_iblk.resize(cluster_ctx.clb_nlist.blocks().size());

    /* Allocate and initialize the values to OPEN (-1). */
    for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
        f_imacro_from_iblk.insert(blk_id, OPEN);
    }

    /* Load the values */
    for (size_t imacro = 0; imacro < macros.size(); imacro++) {
        for (size_t imember = 0; imember < macros[imacro].members.size(); imember++) {
            ClusterBlockId blk_id = macros[imacro].members[imember].blk_index;
            f_imacro_from_iblk.insert(blk_id, imacro);
        }
    }
}

void free_placement_macros_structs() {
    /* This function frees up all the static data structures used. */

    // This frees up the two arrays and set the pointers to NULL
    auto& device_ctx = g_vpr_ctx.device();
    unsigned int itype;
    if (f_idirect_from_blk_pin != nullptr) {
        for (itype = 1; itype < device_ctx.physical_tile_types.size(); itype++) {
            free(f_idirect_from_blk_pin[itype]);
        }
        free(f_idirect_from_blk_pin);
        f_idirect_from_blk_pin = nullptr;
    }

    if (f_direct_type_from_blk_pin != nullptr) {
        for (itype = 1; itype < device_ctx.physical_tile_types.size(); itype++) {
            free(f_direct_type_from_blk_pin[itype]);
        }
        free(f_direct_type_from_blk_pin);
        f_direct_type_from_blk_pin = nullptr;
    }
}

static void write_place_macros(std::string filename, const std::vector<t_pl_macro>& macros) {
    FILE* f = vtr::fopen(filename.c_str(), "w");

    auto& cluster_ctx = g_vpr_ctx.clustering();

    fprintf(f, "#Identified Placement macros\n");
    fprintf(f, "Num_Macros: %zu\n", macros.size());
    for (size_t imacro = 0; imacro < macros.size(); ++imacro) {
        const t_pl_macro* macro = &macros[imacro];
        fprintf(f, "Macro_Id: %zu, Num_Blocks: %zu\n", imacro, macro->members.size());
        fprintf(f, "------------------------------------------------------\n");
        for (size_t imember = 0; imember < macro->members.size(); ++imember) {
            const t_pl_macro_member* macro_memb = &macro->members[imember];
            fprintf(f, "Block_Id: %zu (%s), x_offset: %d, y_offset: %d, z_offset: %d\n",
                    size_t(macro_memb->blk_index),
                    cluster_ctx.clb_nlist.block_name(macro_memb->blk_index).c_str(),
                    macro_memb->offset.x,
                    macro_memb->offset.y,
                    macro_memb->offset.z);
        }
        fprintf(f, "\n");
    }

    fprintf(f, "\n");

    fprintf(f, "#Macro-related direct connections\n");
    fprintf(f, "type      type_pin  is_direct direct_type\n");
    fprintf(f, "------------------------------------------\n");
    auto& device_ctx = g_vpr_ctx.device();
    for (const auto& type : device_ctx.physical_tile_types) {
        if (is_empty_type(&type)) {
            continue;
        }

        int itype = type.index;
        for (int ipin = 0; ipin < type.num_pins; ++ipin) {
            if (f_idirect_from_blk_pin[itype][ipin] != OPEN) {
                if (f_direct_type_from_blk_pin[itype][ipin] == SOURCE) {
                    fprintf(f, "%-9s %-9d true      SOURCE    \n", type.name, ipin);
                } else {
                    VTR_ASSERT(f_direct_type_from_blk_pin[itype][ipin] == SINK);
                    fprintf(f, "%-9s %-9d true      SINK      \n", type.name, ipin);
                }
            } else {
                VTR_ASSERT(f_direct_type_from_blk_pin[itype][ipin] == OPEN);
            }
        }
    }

    fclose(f);
}

static bool is_constant_clb_net(ClusterNetId clb_net) {
    auto& atom_ctx = g_vpr_ctx.atom();
    AtomNetId atom_net = atom_ctx.lookup.atom_net(clb_net);

    return atom_ctx.nlist.net_is_constant(atom_net);
}

static bool net_is_driven_by_direct(ClusterNetId clb_net) {
    auto& cluster_ctx = g_vpr_ctx.clustering();

    ClusterBlockId block_id = cluster_ctx.clb_nlist.net_driver_block(clb_net);
    int pin_index = cluster_ctx.clb_nlist.net_pin_logical_index(clb_net, 0);

    auto direct = f_idirect_from_blk_pin[cluster_ctx.clb_nlist.block_type(block_id)->index][pin_index];

    return direct != OPEN;
}

static void validate_macros(const std::vector<t_pl_macro>& macros) {
    //Perform sanity checks on macros
    auto& cluster_ctx = g_vpr_ctx.clustering();

    //Verify that blocks only appear in a single macro
    std::multimap<ClusterBlockId, int> block_to_macro;
    for (size_t imacro = 0; imacro < macros.size(); ++imacro) {
        for (size_t imember = 0; imember < macros[imacro].members.size(); ++imember) {
            ClusterBlockId iblk = macros[imacro].members[imember].blk_index;

            block_to_macro.emplace(iblk, imacro);
        }
    }

    for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
        auto range = block_to_macro.equal_range(blk_id);

        int blk_macro_cnt = std::distance(range.first, range.second);
        if (blk_macro_cnt > 1) {
            std::stringstream msg;
            msg << "Block #" << size_t(blk_id) << " '" << cluster_ctx.clb_nlist.block_name(blk_id) << "'"
                << " appears in " << blk_macro_cnt << " placement macros (should appear in at most one). Related Macros:\n";

            for (auto iter = range.first; iter != range.second; ++iter) {
                int imacro = iter->second;
                msg << "  Macro #: " << imacro << "\n";
            }

            VPR_FATAL_ERROR(VPR_ERROR_PLACE, msg.str().c_str());
        }
    }
}