From 5759f5f35b1d09c7011257854b77a611747a839f Mon Sep 17 00:00:00 2001 From: tangxifan Date: Fri, 3 Sep 2021 17:55:23 -0700 Subject: [PATCH 01/16] [Engine] Start developing QL memory bank: upgrade infrastructures of fabric builder --- .../libarchopenfpga/src/circuit_types.h | 3 +- openfpga/src/fabric/build_top_module.cpp | 2 +- .../src/fabric/build_top_module_memory.cpp | 190 +++++++++++++----- openfpga/src/fabric/build_top_module_memory.h | 16 +- openfpga/src/fabric/module_manager.cpp | 34 +++- openfpga/src/fabric/module_manager.h | 17 +- 6 files changed, 202 insertions(+), 60 deletions(-) diff --git a/libopenfpga/libarchopenfpga/src/circuit_types.h b/libopenfpga/libarchopenfpga/src/circuit_types.h index f13a8a5ee..3a2604a42 100644 --- a/libopenfpga/libarchopenfpga/src/circuit_types.h +++ b/libopenfpga/libarchopenfpga/src/circuit_types.h @@ -125,10 +125,11 @@ enum e_config_protocol_type { CONFIG_MEM_STANDALONE, CONFIG_MEM_SCAN_CHAIN, CONFIG_MEM_MEMORY_BANK, + CONFIG_MEM_QL_MEMORY_BANK, CONFIG_MEM_FRAME_BASED, NUM_CONFIG_PROTOCOL_TYPES }; -constexpr std::array CONFIG_PROTOCOL_TYPE_STRING = {{"standalone", "scan_chain", "memory_bank", "frame_based"}}; +constexpr std::array CONFIG_PROTOCOL_TYPE_STRING = {{"standalone", "scan_chain", "memory_bank", "ql_memory_bank", "frame_based"}}; #endif diff --git a/openfpga/src/fabric/build_top_module.cpp b/openfpga/src/fabric/build_top_module.cpp index 56b7d4b2b..1066b57bb 100644 --- a/openfpga/src/fabric/build_top_module.cpp +++ b/openfpga/src/fabric/build_top_module.cpp @@ -407,7 +407,7 @@ int build_top_module(ModuleManager& module_manager, * This is a much easier job after adding sub modules (instances), * we just need to find all the I/O ports from the child modules and build a list of it */ - vtr::vector top_module_num_config_bits = find_top_module_regional_num_config_bit(module_manager, top_module, circuit_lib, sram_model, config_protocol.type()); + vtr::vector top_module_num_config_bits = find_top_module_regional_num_config_bit(module_manager, top_module, circuit_lib, sram_model, config_protocol.type()); if (!top_module_num_config_bits.empty()) { add_top_module_sram_ports(module_manager, top_module, diff --git a/openfpga/src/fabric/build_top_module_memory.cpp b/openfpga/src/fabric/build_top_module_memory.cpp index 422a73ff2..44545e608 100644 --- a/openfpga/src/fabric/build_top_module_memory.cpp +++ b/openfpga/src/fabric/build_top_module_memory.cpp @@ -74,8 +74,9 @@ void organize_top_module_tile_cb_modules(ModuleManager& module_manager, if (0 < find_module_num_config_bits(module_manager, cb_module, circuit_lib, sram_model, sram_orgz_type)) { + vtr::Point config_coord(rr_gsb.get_cb_x(cb_type) * 2, rr_gsb.get_cb_y(cb_type) * 2); /* Note that use the original CB coodinate for instance id searching ! */ - module_manager.add_configurable_child(top_module, cb_module, cb_instance_ids[rr_gsb.get_cb_x(cb_type)][rr_gsb.get_cb_y(cb_type)]); + module_manager.add_configurable_child(top_module, cb_module, cb_instance_ids[rr_gsb.get_cb_x(cb_type)][rr_gsb.get_cb_y(cb_type)], config_coord); } } @@ -84,6 +85,37 @@ void organize_top_module_tile_cb_modules(ModuleManager& module_manager, * to the memory modules and memory instances * This function is designed for organizing memory modules in top-level * module + * This function also adds coordindates for each configurable child under the top-level module + * of a FPGA fabric. A configurable child could be a programmable block (grid), + * a Connection Block (CBx/y) or a Switch block (SB). + * This function, we consider a coordinate system as follows + * - Each row may consist of either (1) grid and CBy or (2) CBx and SB + * - Each column may consist of either (1) grid and CBx or (2) CBy and SB + * + * Column 0 Column 1 + * + * +---------------+----------+ + * | | | + * | | | + * | Grid | CBY | Row 3 + * | | | + * | | | + * +---------------+----------+ + * | | | + * | CBX | SB | Row 2 + * | | | + * +---------------+----------+ + * | | | + * | | | + * | Grid | CBY | Row 1 + * | | | + * | | | + * +---------------+----------+ + * | | | + * | CBX | SB | Row 0 + * | | | + * +---------------+----------+ + *******************************************************************/ static void organize_top_module_tile_memory_modules(ModuleManager& module_manager, @@ -130,7 +162,8 @@ void organize_top_module_tile_memory_modules(ModuleManager& module_manager, if (0 < find_module_num_config_bits(module_manager, sb_module, circuit_lib, sram_model, sram_orgz_type)) { - module_manager.add_configurable_child(top_module, sb_module, sb_instance_ids[rr_gsb.get_sb_x()][rr_gsb.get_sb_y()]); + vtr::Point config_coord(rr_gsb.get_sb_x() * 2 + 1, rr_gsb.get_sb_y() * 2 + 1); + module_manager.add_configurable_child(top_module, sb_module, sb_instance_ids[rr_gsb.get_sb_x()][rr_gsb.get_sb_y()], config_coord); } } @@ -172,11 +205,11 @@ void organize_top_module_tile_memory_modules(ModuleManager& module_manager, if (0 < find_module_num_config_bits(module_manager, grid_module, circuit_lib, sram_model, sram_orgz_type)) { - module_manager.add_configurable_child(top_module, grid_module, grid_instance_ids[tile_coord.x()][tile_coord.y()]); + vtr::Point config_coord(tile_coord.x() * 2, tile_coord.y() * 2); + module_manager.add_configurable_child(top_module, grid_module, grid_instance_ids[tile_coord.x()][tile_coord.y()], config_coord); } } - /******************************************************************** * Split memory modules into different configurable regions * This function will create regions based on the definition @@ -496,6 +529,7 @@ void shuffle_top_module_configurable_children(ModuleManager& module_manager, /* Cache the configurable children and their instances */ std::vector orig_configurable_children = module_manager.configurable_children(top_module); std::vector orig_configurable_child_instances = module_manager.configurable_child_instances(top_module); + std::vector> orig_configurable_child_coordinates = module_manager.configurable_child_coordinates(top_module); /* Reorganize the configurable children */ module_manager.clear_configurable_children(top_module); @@ -503,7 +537,8 @@ void shuffle_top_module_configurable_children(ModuleManager& module_manager, for (size_t ikey = 0; ikey < num_keys; ++ikey) { module_manager.add_configurable_child(top_module, orig_configurable_children[shuffled_keys[ikey]], - orig_configurable_child_instances[shuffled_keys[ikey]]); + orig_configurable_child_instances[shuffled_keys[ikey]], + orig_configurable_child_coordinates[shuffled_keys[ikey]]); } /* Reset configurable regions */ @@ -614,13 +649,13 @@ int load_top_module_memory_modules_from_fabric_key(ModuleManager& module_manager * - This function should be called after the configurable children * is loaded to the top-level module! ********************************************************************/ -vtr::vector find_top_module_regional_num_config_bit(const ModuleManager& module_manager, - const ModuleId& top_module, - const CircuitLibrary& circuit_lib, - const CircuitModelId& sram_model, - const e_config_protocol_type& config_protocol_type) { +vtr::vector find_top_module_regional_num_config_bit(const ModuleManager& module_manager, + const ModuleId& top_module, + const CircuitLibrary& circuit_lib, + const CircuitModelId& sram_model, + const e_config_protocol_type& config_protocol_type) { /* Initialize the number of configuration bits for each region */ - vtr::vector num_config_bits(module_manager.regions(top_module).size(), 0); + vtr::vector num_config_bits(module_manager.regions(top_module).size(), TopModuleNumConfigBits(0, 0)); switch (config_protocol_type) { case CONFIG_MEM_STANDALONE: @@ -632,7 +667,36 @@ vtr::vector find_top_module_regional_num_config_bit(cons */ for (const ConfigRegionId& config_region : module_manager.regions(top_module)) { for (const ModuleId& child_module : module_manager.region_configurable_children(top_module, config_region)) { - num_config_bits[config_region] += find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type); + num_config_bits[config_region].first += find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type); + } + } + break; + } + case CONFIG_MEM_QL_MEMORY_BANK: { + /* For QL memory bank: we will use the row and column information for each configuration child + * in order to identify the number of unique BLs and WLs + * In this configuration protocol, + * - all the configurable child in the same row will share the same WLs + * - the number of WLs per row is limited by the configurable child which requires most WLs + * - each row has independent WLs + * - all the configurable child in the same column will share the same BLs + * - the number of BLs per column is limited by the configurable child which requires most BLs + * - each column has independent BLs + */ + for (const ConfigRegionId& config_region : module_manager.regions(top_module)) { + std::map num_bls; + std::map num_wls; + for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { + ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; + vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; + num_bls[coord.x()] = std::max(num_bls[coord.x()], find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type)); + num_wls[coord.y()] = std::max(num_wls[coord.y()], find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type)); + for (const auto& kv : num_bls) { + num_config_bits[config_region].first += kv.first; + } + for (const auto& kv : num_wls) { + num_config_bits[config_region].second += kv.second; + } } } break; @@ -646,14 +710,14 @@ vtr::vector find_top_module_regional_num_config_bit(cons for (const ConfigRegionId& config_region : module_manager.regions(top_module)) { for (const ModuleId& child_module : module_manager.region_configurable_children(top_module, config_region)) { size_t temp_num_config_bits = find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type); - num_config_bits[config_region] = std::max((int)temp_num_config_bits, (int)num_config_bits[config_region]); + num_config_bits[config_region].first = std::max(temp_num_config_bits, num_config_bits[config_region].first); } /* If there are more than 2 configurable children, we need a decoder * Otherwise, we can just short wire the address port to the children */ if (1 < module_manager.region_configurable_children(top_module, config_region).size()) { - num_config_bits[config_region] += find_mux_local_decoder_addr_size(module_manager.region_configurable_children(top_module, config_region).size()); + num_config_bits[config_region].first += find_mux_local_decoder_addr_size(module_manager.region_configurable_children(top_module, config_region).size()); } } @@ -706,34 +770,40 @@ size_t generate_top_module_sram_port_size(const ConfigProtocol& config_protocol, * top-level module * The type and names of added ports strongly depend on the * organization of SRAMs. - * 1. Standalone SRAMs: - * two ports will be added, which are BL and WL - * 2. Scan-chain Flip-flops: - * two ports will be added, which are the head of scan-chain - * and the tail of scan-chain - * IMPORTANT: the port size will be forced to 1 in this case - * because the head and tail are both 1-bit ports!!! - * 3. Memory decoders: - * - An enable signal - * - A BL address port - * - A WL address port - * - A data-in port for the BL decoder - * 4. Frame-based memory: - * - An Enable signal - * - An address port, whose size depends on the number of config bits - * and the maximum size of address ports of configurable children - * - An data_in port (single-bit) + * - Standalone SRAMs: + * two ports will be added, which are BL and WL + * - Scan-chain Flip-flops: + * two ports will be added, which are the head of scan-chain + * and the tail of scan-chain + * IMPORTANT: the port size will be forced to 1 in this case + * because the head and tail are both 1-bit ports!!! + * - Memory decoders: + * - An enable signal + * - A BL address port + * - A WL address port + * - A data-in port for the BL decoder + * - QL memory decoder: + * - An enable signal + * - An BL address port + * - A WL address port + * - A data-in port for the BL decoder + * @note In this memory decoders, the address size will be computed in a different way than the regular one + * - Frame-based memory: + * - An Enable signal + * - An address port, whose size depends on the number of config bits + * and the maximum size of address ports of configurable children + * - An data_in port (single-bit) ********************************************************************/ void add_top_module_sram_ports(ModuleManager& module_manager, const ModuleId& module_id, const CircuitLibrary& circuit_lib, const CircuitModelId& sram_model, const ConfigProtocol& config_protocol, - const vtr::vector& num_config_bits) { + const vtr::vector& num_config_bits) { std::vector sram_port_names = generate_sram_port_names(circuit_lib, sram_model, config_protocol.type()); size_t total_num_config_bits = 0; - for (const size_t& curr_num_config_bits : num_config_bits) { - total_num_config_bits += curr_num_config_bits; + for (const auto& curr_num_config_bits : num_config_bits) { + total_num_config_bits += curr_num_config_bits.first; } size_t sram_port_size = generate_top_module_sram_port_size(config_protocol, total_num_config_bits); @@ -754,7 +824,7 @@ void add_top_module_sram_ports(ModuleManager& module_manager, /* BL address size is the largest among all the regions */ size_t bl_addr_size = 0; for (const ConfigRegionId& config_region : module_manager.regions(module_id)) { - bl_addr_size = std::max(bl_addr_size, find_memory_decoder_addr_size(num_config_bits[config_region])); + bl_addr_size = std::max(bl_addr_size, find_memory_decoder_addr_size(num_config_bits[config_region].first)); } BasicPort bl_addr_port(std::string(DECODER_BL_ADDRESS_PORT_NAME), bl_addr_size); module_manager.add_port(module_id, bl_addr_port, ModuleManager::MODULE_INPUT_PORT); @@ -762,7 +832,33 @@ void add_top_module_sram_ports(ModuleManager& module_manager, /* WL address size is the largest among all the regions */ size_t wl_addr_size = 0; for (const ConfigRegionId& config_region : module_manager.regions(module_id)) { - wl_addr_size = std::max(wl_addr_size, find_memory_decoder_addr_size(num_config_bits[config_region])); + wl_addr_size = std::max(wl_addr_size, find_memory_decoder_addr_size(num_config_bits[config_region].first)); + } + BasicPort wl_addr_port(std::string(DECODER_WL_ADDRESS_PORT_NAME), wl_addr_size); + module_manager.add_port(module_id, wl_addr_port, ModuleManager::MODULE_INPUT_PORT); + + /* Data input should be dependent on the number of configuration regions*/ + BasicPort din_port(std::string(DECODER_DATA_IN_PORT_NAME), config_protocol.num_regions()); + module_manager.add_port(module_id, din_port, ModuleManager::MODULE_INPUT_PORT); + + break; + } + case CONFIG_MEM_QL_MEMORY_BANK: { + BasicPort en_port(std::string(DECODER_ENABLE_PORT_NAME), 1); + module_manager.add_port(module_id, en_port, ModuleManager::MODULE_INPUT_PORT); + + /* BL address size is the largest among all the regions */ + size_t bl_addr_size = 0; + for (const ConfigRegionId& config_region : module_manager.regions(module_id)) { + bl_addr_size = std::max(bl_addr_size, find_memory_decoder_addr_size(num_config_bits[config_region].first)); + } + BasicPort bl_addr_port(std::string(DECODER_BL_ADDRESS_PORT_NAME), bl_addr_size); + module_manager.add_port(module_id, bl_addr_port, ModuleManager::MODULE_INPUT_PORT); + + /* WL address size is the largest among all the regions */ + size_t wl_addr_size = 0; + for (const ConfigRegionId& config_region : module_manager.regions(module_id)) { + wl_addr_size = std::max(wl_addr_size, find_memory_decoder_addr_size(num_config_bits[config_region].second)); } BasicPort wl_addr_port(std::string(DECODER_WL_ADDRESS_PORT_NAME), wl_addr_size); module_manager.add_port(module_id, wl_addr_port, ModuleManager::MODULE_INPUT_PORT); @@ -798,8 +894,8 @@ void add_top_module_sram_ports(ModuleManager& module_manager, module_manager.add_port(module_id, en_port, ModuleManager::MODULE_INPUT_PORT); size_t max_num_config_bits = 0; - for (const size_t& curr_num_config_bits : num_config_bits) { - max_num_config_bits = std::max(max_num_config_bits, curr_num_config_bits); + for (const auto& curr_num_config_bits : num_config_bits) { + max_num_config_bits = std::max(max_num_config_bits, curr_num_config_bits.first); } BasicPort addr_port(std::string(DECODER_ADDRESS_PORT_NAME), max_num_config_bits); @@ -910,7 +1006,7 @@ static void add_top_module_nets_cmos_memory_bank_config_bus(ModuleManager& module_manager, DecoderLibrary& decoder_lib, const ModuleId& top_module, - const vtr::vector& num_config_bits) { + const vtr::vector& num_config_bits) { /* Find Enable port from the top-level module */ ModulePortId en_port = module_manager.find_module_port(top_module, std::string(DECODER_ENABLE_PORT_NAME)); BasicPort en_port_info = module_manager.module_port(top_module, en_port); @@ -935,13 +1031,13 @@ void add_top_module_nets_cmos_memory_bank_config_bus(ModuleManager& module_manag /* Each memory bank has a unified number of BL/WLs */ size_t num_bls = 0; - for (const size_t& curr_config_bits : num_config_bits) { - num_bls = std::max(num_bls, find_memory_decoder_data_size(curr_config_bits)); + for (const auto& curr_config_bits : num_config_bits) { + num_bls = std::max(num_bls, find_memory_decoder_data_size(curr_config_bits.first)); } size_t num_wls = 0; - for (const size_t& curr_config_bits : num_config_bits) { - num_wls = std::max(num_wls, find_memory_decoder_data_size(curr_config_bits)); + for (const auto& curr_config_bits : num_config_bits) { + num_wls = std::max(num_wls, find_memory_decoder_data_size(curr_config_bits.first)); } /* Create separated memory bank circuitry, i.e., BL/WL decoders for each region */ @@ -1582,7 +1678,7 @@ static void add_top_module_nets_cmos_memory_frame_config_bus(ModuleManager& module_manager, DecoderLibrary& decoder_lib, const ModuleId& top_module, - const vtr::vector& num_config_bits) { + const vtr::vector& num_config_bits) { /* Find the number of address bits for the top-level module */ ModulePortId top_addr_port = module_manager.find_module_port(top_module, std::string(DECODER_ADDRESS_PORT_NAME)); BasicPort top_addr_port_info = module_manager.module_port(top_module, top_addr_port); @@ -1598,7 +1694,7 @@ void add_top_module_nets_cmos_memory_frame_config_bus(ModuleManager& module_mana * - The number of address bits of the configurable child is the same as top-level */ if ( (1 == module_manager.region_configurable_children(top_module, config_region).size()) - && (num_config_bits[config_region] == top_addr_size)) { + && (num_config_bits[config_region].first == top_addr_size)) { add_top_module_nets_cmos_memory_frame_short_config_bus(module_manager, top_module, config_region); } else { add_top_module_nets_cmos_memory_frame_decoder_config_bus(module_manager, decoder_lib, top_module, config_region); @@ -1655,7 +1751,7 @@ void add_top_module_nets_cmos_memory_config_bus(ModuleManager& module_manager, DecoderLibrary& decoder_lib, const ModuleId& parent_module, const ConfigProtocol& config_protocol, - const vtr::vector& num_config_bits) { + const vtr::vector& num_config_bits) { switch (config_protocol.type()) { case CONFIG_MEM_STANDALONE: add_module_nets_cmos_flatten_memory_config_bus(module_manager, parent_module, @@ -1717,7 +1813,7 @@ void add_top_module_nets_memory_config_bus(ModuleManager& module_manager, const ModuleId& parent_module, const ConfigProtocol& config_protocol, const e_circuit_model_design_tech& mem_tech, - const vtr::vector& num_config_bits) { + const vtr::vector& num_config_bits) { vtr::ScopedStartFinishTimer timer("Add module nets for configuration buses"); diff --git a/openfpga/src/fabric/build_top_module_memory.h b/openfpga/src/fabric/build_top_module_memory.h index e1d495fd4..d44522ff0 100644 --- a/openfpga/src/fabric/build_top_module_memory.h +++ b/openfpga/src/fabric/build_top_module_memory.h @@ -26,6 +26,8 @@ /* begin namespace openfpga */ namespace openfpga { +typedef std::pair TopModuleNumConfigBits; + void organize_top_module_memory_modules(ModuleManager& module_manager, const ModuleId& top_module, const CircuitLibrary& circuit_lib, @@ -48,25 +50,25 @@ int load_top_module_memory_modules_from_fabric_key(ModuleManager& module_manager const ConfigProtocol& config_protocol, const FabricKey& fabric_key); -vtr::vector find_top_module_regional_num_config_bit(const ModuleManager& module_manager, - const ModuleId& top_module, - const CircuitLibrary& circuit_lib, - const CircuitModelId& sram_model, - const e_config_protocol_type& config_protocol_type); +vtr::vector find_top_module_regional_num_config_bit(const ModuleManager& module_manager, + const ModuleId& top_module, + const CircuitLibrary& circuit_lib, + const CircuitModelId& sram_model, + const e_config_protocol_type& config_protocol_type); void add_top_module_sram_ports(ModuleManager& module_manager, const ModuleId& module_id, const CircuitLibrary& circuit_lib, const CircuitModelId& sram_model, const ConfigProtocol& config_protocol, - const vtr::vector& num_config_bits); + const vtr::vector& num_config_bits); void add_top_module_nets_memory_config_bus(ModuleManager& module_manager, DecoderLibrary& decoder_lib, const ModuleId& parent_module, const ConfigProtocol& config_protocol, const e_circuit_model_design_tech& mem_tech, - const vtr::vector& num_config_bits); + const vtr::vector& num_config_bits); } /* end namespace openfpga */ diff --git a/openfpga/src/fabric/module_manager.cpp b/openfpga/src/fabric/module_manager.cpp index 87fa0ca5c..de5ed0fa6 100644 --- a/openfpga/src/fabric/module_manager.cpp +++ b/openfpga/src/fabric/module_manager.cpp @@ -84,6 +84,13 @@ std::vector ModuleManager::configurable_child_instances(const ModuleId& return configurable_child_instances_[parent_module]; } +std::vector> ModuleManager::configurable_child_coordinates(const ModuleId& parent_module) const { + /* Validate the module_id */ + VTR_ASSERT(valid_module_id(parent_module)); + + return configurable_child_coordinates_[parent_module]; +} + /* Find the source ids of modules */ ModuleManager::module_net_src_range ModuleManager::module_net_sources(const ModuleId& module, const ModuleNetId& net) const { /* Validate the module_id */ @@ -135,6 +142,22 @@ std::vector ModuleManager::region_configurable_child_instances(const Mod return region_config_child_instances; } +std::vector> ModuleManager::region_configurable_child_coordinates(const ModuleId& parent_module, + const ConfigRegionId& region) const { + /* Validate the module_id */ + VTR_ASSERT(valid_module_id(parent_module)); + VTR_ASSERT(valid_region_id(parent_module, region)); + + std::vector> region_config_child_coordinates; + region_config_child_coordinates.reserve(config_region_children_[parent_module][region].size()); + + for (const size_t& child_id : config_region_children_[parent_module][region]) { + region_config_child_coordinates.push_back(configurable_child_coordinates_[parent_module][child_id]); + } + + return region_config_child_coordinates; +} + /****************************************************************************** * Public Accessors ******************************************************************************/ @@ -534,6 +557,7 @@ ModuleId ModuleManager::add_module(const std::string& name) { configurable_children_.emplace_back(); configurable_child_instances_.emplace_back(); configurable_child_regions_.emplace_back(); + configurable_child_coordinates_.emplace_back(); config_region_ids_.emplace_back(); config_region_children_.emplace_back(); @@ -716,7 +740,8 @@ void ModuleManager::set_child_instance_name(const ModuleId& parent_module, */ void ModuleManager::add_configurable_child(const ModuleId& parent_module, const ModuleId& child_module, - const size_t& child_instance) { + const size_t& child_instance, + const vtr::Point coord) { /* Validate the id of both parent and child modules */ VTR_ASSERT ( valid_module_id(parent_module) ); VTR_ASSERT ( valid_module_id(child_module) ); @@ -726,6 +751,7 @@ void ModuleManager::add_configurable_child(const ModuleId& parent_module, configurable_children_[parent_module].push_back(child_module); configurable_child_instances_[parent_module].push_back(child_instance); configurable_child_regions_[parent_module].push_back(ConfigRegionId::INVALID()); + configurable_child_coordinates_[parent_module].push_back(coord); } void ModuleManager::reserve_configurable_child(const ModuleId& parent_module, @@ -738,9 +764,12 @@ void ModuleManager::reserve_configurable_child(const ModuleId& parent_module, if (num_children > configurable_child_instances_[parent_module].size()) { configurable_child_instances_[parent_module].reserve(num_children); } - if (num_children > configurable_child_instances_[parent_module].size()) { + if (num_children > configurable_child_regions_[parent_module].size()) { configurable_child_regions_[parent_module].reserve(num_children); } + if (num_children > configurable_child_coordinates_[parent_module].size()) { + configurable_child_coordinates_[parent_module].reserve(num_children); + } } ConfigRegionId ModuleManager::add_config_region(const ModuleId& module) { @@ -981,6 +1010,7 @@ void ModuleManager::clear_configurable_children(const ModuleId& parent_module) { configurable_children_[parent_module].clear(); configurable_child_instances_[parent_module].clear(); configurable_child_regions_[parent_module].clear(); + configurable_child_coordinates_[parent_module].clear(); } void ModuleManager::clear_config_region(const ModuleId& parent_module) { diff --git a/openfpga/src/fabric/module_manager.h b/openfpga/src/fabric/module_manager.h index d58442c73..3d4405b7a 100644 --- a/openfpga/src/fabric/module_manager.h +++ b/openfpga/src/fabric/module_manager.h @@ -8,6 +8,7 @@ #include #include "vtr_vector.h" +#include "vtr_geometry.h" #include "module_manager_fwd.h" #include "openfpga_port.h" @@ -148,6 +149,8 @@ class ModuleManager { std::vector configurable_children(const ModuleId& parent_module) const; /* Find all the instances of configurable child modules under a parent module */ std::vector configurable_child_instances(const ModuleId& parent_module) const; + /* Find the coordindate of a configurable child module under a parent module */ + std::vector> configurable_child_coordinates(const ModuleId& parent_module) const; /* Find the source ids of modules */ module_net_src_range module_net_sources(const ModuleId& module, const ModuleNetId& net) const; /* Find the sink ids of modules */ @@ -161,6 +164,10 @@ class ModuleManager { /* Find all the instances of configurable child modules under a region of a parent module */ std::vector region_configurable_child_instances(const ModuleId& parent_module, const ConfigRegionId& region) const; + + /* Find all the coordinates of configurable child modules under a region of a parent module */ + std::vector> region_configurable_child_coordinates(const ModuleId& parent_module, + const ConfigRegionId& region) const; public: /* Public accessors */ size_t num_modules() const; @@ -252,8 +259,13 @@ class ModuleManager { void add_child_module(const ModuleId& parent_module, const ModuleId& child_module); /* Set the instance name of a child module */ void set_child_instance_name(const ModuleId& parent_module, const ModuleId& child_module, const size_t& instance_id, const std::string& instance_name); - /* Add a configurable child module to module */ - void add_configurable_child(const ModuleId& module, const ModuleId& child_module, const size_t& child_instance); + /* Add a configurable child module to module + * This function also set the coordinate of a configurable child + * The coordinate is a relative position in each region, which is used to + * idenify BL/WL sharing + * By default, it is an invalid coordinate + */ + void add_configurable_child(const ModuleId& module, const ModuleId& child_module, const size_t& child_instance, const vtr::Point coord = vtr::Point(-1, -1)); /* Reserved a number of configurable children * for memory efficiency */ @@ -350,6 +362,7 @@ class ModuleManager { vtr::vector> configurable_children_; /* Child modules with configurable memory bits that this module contain */ vtr::vector> configurable_child_instances_; /* Instances of child modules with configurable memory bits that this module contain */ vtr::vector> configurable_child_regions_; /* Instances of child modules with configurable memory bits that this module contain */ + vtr::vector>> configurable_child_coordinates_; /* Relative coorindates of child modules with configurable memory bits that this module contain */ /* Configurable regions to group the configurable children * Note: From f75456e3040718b66f8c44270a7b8047c617187e Mon Sep 17 00:00:00 2001 From: tangxifan Date: Sun, 5 Sep 2021 11:53:33 -0700 Subject: [PATCH 02/16] [Engine] Update BL/WL estimation function for QL memory bank protocol --- openfpga/src/fabric/build_top_module_memory.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openfpga/src/fabric/build_top_module_memory.cpp b/openfpga/src/fabric/build_top_module_memory.cpp index 44545e608..a00d8a9c9 100644 --- a/openfpga/src/fabric/build_top_module_memory.cpp +++ b/openfpga/src/fabric/build_top_module_memory.cpp @@ -689,8 +689,8 @@ vtr::vector find_top_module_regional_num for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; - num_bls[coord.x()] = std::max(num_bls[coord.x()], find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type)); - num_wls[coord.y()] = std::max(num_wls[coord.y()], find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type)); + num_bls[coord.x()] = std::max(num_bls[coord.x()], find_memory_decoder_addr_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type))); + num_wls[coord.y()] = std::max(num_wls[coord.y()], find_memory_decoder_addr_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type))); for (const auto& kv : num_bls) { num_config_bits[config_region].first += kv.first; } @@ -850,7 +850,7 @@ void add_top_module_sram_ports(ModuleManager& module_manager, /* BL address size is the largest among all the regions */ size_t bl_addr_size = 0; for (const ConfigRegionId& config_region : module_manager.regions(module_id)) { - bl_addr_size = std::max(bl_addr_size, find_memory_decoder_addr_size(num_config_bits[config_region].first)); + bl_addr_size = std::max(bl_addr_size, num_config_bits[config_region].first); } BasicPort bl_addr_port(std::string(DECODER_BL_ADDRESS_PORT_NAME), bl_addr_size); module_manager.add_port(module_id, bl_addr_port, ModuleManager::MODULE_INPUT_PORT); @@ -858,7 +858,7 @@ void add_top_module_sram_ports(ModuleManager& module_manager, /* WL address size is the largest among all the regions */ size_t wl_addr_size = 0; for (const ConfigRegionId& config_region : module_manager.regions(module_id)) { - wl_addr_size = std::max(wl_addr_size, find_memory_decoder_addr_size(num_config_bits[config_region].second)); + wl_addr_size = std::max(wl_addr_size, num_config_bits[config_region].second); } BasicPort wl_addr_port(std::string(DECODER_WL_ADDRESS_PORT_NAME), wl_addr_size); module_manager.add_port(module_id, wl_addr_port, ModuleManager::MODULE_INPUT_PORT); From cf2e479d18d2a1361b9063a6e549b137b0973a66 Mon Sep 17 00:00:00 2001 From: tangxifan Date: Sun, 5 Sep 2021 12:01:38 -0700 Subject: [PATCH 03/16] [Engine] Refactor the TopModuleNumConfigBits data structure --- openfpga/src/fabric/build_top_module.cpp | 2 +- .../src/fabric/build_top_module_memory.cpp | 22 +++++++++---------- openfpga/src/fabric/build_top_module_memory.h | 16 +++++++------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/openfpga/src/fabric/build_top_module.cpp b/openfpga/src/fabric/build_top_module.cpp index 1066b57bb..e345e00ce 100644 --- a/openfpga/src/fabric/build_top_module.cpp +++ b/openfpga/src/fabric/build_top_module.cpp @@ -407,7 +407,7 @@ int build_top_module(ModuleManager& module_manager, * This is a much easier job after adding sub modules (instances), * we just need to find all the I/O ports from the child modules and build a list of it */ - vtr::vector top_module_num_config_bits = find_top_module_regional_num_config_bit(module_manager, top_module, circuit_lib, sram_model, config_protocol.type()); + TopModuleNumConfigBits top_module_num_config_bits = find_top_module_regional_num_config_bit(module_manager, top_module, circuit_lib, sram_model, config_protocol.type()); if (!top_module_num_config_bits.empty()) { add_top_module_sram_ports(module_manager, top_module, diff --git a/openfpga/src/fabric/build_top_module_memory.cpp b/openfpga/src/fabric/build_top_module_memory.cpp index a00d8a9c9..5faf4fc02 100644 --- a/openfpga/src/fabric/build_top_module_memory.cpp +++ b/openfpga/src/fabric/build_top_module_memory.cpp @@ -649,13 +649,13 @@ int load_top_module_memory_modules_from_fabric_key(ModuleManager& module_manager * - This function should be called after the configurable children * is loaded to the top-level module! ********************************************************************/ -vtr::vector find_top_module_regional_num_config_bit(const ModuleManager& module_manager, - const ModuleId& top_module, - const CircuitLibrary& circuit_lib, - const CircuitModelId& sram_model, - const e_config_protocol_type& config_protocol_type) { +TopModuleNumConfigBits find_top_module_regional_num_config_bit(const ModuleManager& module_manager, + const ModuleId& top_module, + const CircuitLibrary& circuit_lib, + const CircuitModelId& sram_model, + const e_config_protocol_type& config_protocol_type) { /* Initialize the number of configuration bits for each region */ - vtr::vector num_config_bits(module_manager.regions(top_module).size(), TopModuleNumConfigBits(0, 0)); + TopModuleNumConfigBits num_config_bits(module_manager.regions(top_module).size(), std::pair(0, 0)); switch (config_protocol_type) { case CONFIG_MEM_STANDALONE: @@ -799,7 +799,7 @@ void add_top_module_sram_ports(ModuleManager& module_manager, const CircuitLibrary& circuit_lib, const CircuitModelId& sram_model, const ConfigProtocol& config_protocol, - const vtr::vector& num_config_bits) { + const TopModuleNumConfigBits& num_config_bits) { std::vector sram_port_names = generate_sram_port_names(circuit_lib, sram_model, config_protocol.type()); size_t total_num_config_bits = 0; for (const auto& curr_num_config_bits : num_config_bits) { @@ -1006,7 +1006,7 @@ static void add_top_module_nets_cmos_memory_bank_config_bus(ModuleManager& module_manager, DecoderLibrary& decoder_lib, const ModuleId& top_module, - const vtr::vector& num_config_bits) { + const TopModuleNumConfigBits& num_config_bits) { /* Find Enable port from the top-level module */ ModulePortId en_port = module_manager.find_module_port(top_module, std::string(DECODER_ENABLE_PORT_NAME)); BasicPort en_port_info = module_manager.module_port(top_module, en_port); @@ -1678,7 +1678,7 @@ static void add_top_module_nets_cmos_memory_frame_config_bus(ModuleManager& module_manager, DecoderLibrary& decoder_lib, const ModuleId& top_module, - const vtr::vector& num_config_bits) { + const TopModuleNumConfigBits& num_config_bits) { /* Find the number of address bits for the top-level module */ ModulePortId top_addr_port = module_manager.find_module_port(top_module, std::string(DECODER_ADDRESS_PORT_NAME)); BasicPort top_addr_port_info = module_manager.module_port(top_module, top_addr_port); @@ -1751,7 +1751,7 @@ void add_top_module_nets_cmos_memory_config_bus(ModuleManager& module_manager, DecoderLibrary& decoder_lib, const ModuleId& parent_module, const ConfigProtocol& config_protocol, - const vtr::vector& num_config_bits) { + const TopModuleNumConfigBits& num_config_bits) { switch (config_protocol.type()) { case CONFIG_MEM_STANDALONE: add_module_nets_cmos_flatten_memory_config_bus(module_manager, parent_module, @@ -1813,7 +1813,7 @@ void add_top_module_nets_memory_config_bus(ModuleManager& module_manager, const ModuleId& parent_module, const ConfigProtocol& config_protocol, const e_circuit_model_design_tech& mem_tech, - const vtr::vector& num_config_bits) { + const TopModuleNumConfigBits& num_config_bits) { vtr::ScopedStartFinishTimer timer("Add module nets for configuration buses"); diff --git a/openfpga/src/fabric/build_top_module_memory.h b/openfpga/src/fabric/build_top_module_memory.h index d44522ff0..fa6f34cdd 100644 --- a/openfpga/src/fabric/build_top_module_memory.h +++ b/openfpga/src/fabric/build_top_module_memory.h @@ -26,7 +26,7 @@ /* begin namespace openfpga */ namespace openfpga { -typedef std::pair TopModuleNumConfigBits; +typedef vtr::vector> TopModuleNumConfigBits; void organize_top_module_memory_modules(ModuleManager& module_manager, const ModuleId& top_module, @@ -50,25 +50,25 @@ int load_top_module_memory_modules_from_fabric_key(ModuleManager& module_manager const ConfigProtocol& config_protocol, const FabricKey& fabric_key); -vtr::vector find_top_module_regional_num_config_bit(const ModuleManager& module_manager, - const ModuleId& top_module, - const CircuitLibrary& circuit_lib, - const CircuitModelId& sram_model, - const e_config_protocol_type& config_protocol_type); +TopModuleNumConfigBits find_top_module_regional_num_config_bit(const ModuleManager& module_manager, + const ModuleId& top_module, + const CircuitLibrary& circuit_lib, + const CircuitModelId& sram_model, + const e_config_protocol_type& config_protocol_type); void add_top_module_sram_ports(ModuleManager& module_manager, const ModuleId& module_id, const CircuitLibrary& circuit_lib, const CircuitModelId& sram_model, const ConfigProtocol& config_protocol, - const vtr::vector& num_config_bits); + const TopModuleNumConfigBits& num_config_bits); void add_top_module_nets_memory_config_bus(ModuleManager& module_manager, DecoderLibrary& decoder_lib, const ModuleId& parent_module, const ConfigProtocol& config_protocol, const e_circuit_model_design_tech& mem_tech, - const vtr::vector& num_config_bits); + const TopModuleNumConfigBits& num_config_bits); } /* end namespace openfpga */ From ed80d6b3f4e4ad23c52e4ae935fc230badfc2b22 Mon Sep 17 00:00:00 2001 From: tangxifan Date: Sun, 5 Sep 2021 13:23:38 -0700 Subject: [PATCH 04/16] [Engine] Place QL memory bank source codes in a separated source file so that integration to OpenFPGA open-source version is easier --- openfpga/src/fabric/build_top_module.cpp | 1 + .../src/fabric/build_top_module_memory.cpp | 20 +- openfpga/src/fabric/build_top_module_memory.h | 5 +- .../fabric/build_top_module_memory_bank.cpp | 383 ++++++++++++++++++ .../src/fabric/build_top_module_memory_bank.h | 33 ++ .../fabric/build_top_module_memory_utils.h | 28 ++ 6 files changed, 463 insertions(+), 7 deletions(-) create mode 100644 openfpga/src/fabric/build_top_module_memory_bank.cpp create mode 100644 openfpga/src/fabric/build_top_module_memory_bank.h create mode 100644 openfpga/src/fabric/build_top_module_memory_utils.h diff --git a/openfpga/src/fabric/build_top_module.cpp b/openfpga/src/fabric/build_top_module.cpp index e345e00ce..add2ea30b 100644 --- a/openfpga/src/fabric/build_top_module.cpp +++ b/openfpga/src/fabric/build_top_module.cpp @@ -422,6 +422,7 @@ int build_top_module(ModuleManager& module_manager, if (0 < module_manager.configurable_children(top_module).size()) { add_top_module_nets_memory_config_bus(module_manager, decoder_lib, top_module, + circuit_lib, sram_model, config_protocol, circuit_lib.design_tech_type(sram_model), top_module_num_config_bits); } diff --git a/openfpga/src/fabric/build_top_module_memory.cpp b/openfpga/src/fabric/build_top_module_memory.cpp index 5faf4fc02..297a5abf4 100644 --- a/openfpga/src/fabric/build_top_module_memory.cpp +++ b/openfpga/src/fabric/build_top_module_memory.cpp @@ -23,6 +23,7 @@ #include "decoder_library_utils.h" #include "module_manager_utils.h" #include "build_decoder_modules.h" +#include "build_top_module_memory_bank.h" #include "build_top_module_memory.h" /* begin namespace openfpga */ @@ -689,8 +690,8 @@ TopModuleNumConfigBits find_top_module_regional_num_config_bit(const ModuleManag for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; - num_bls[coord.x()] = std::max(num_bls[coord.x()], find_memory_decoder_addr_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type))); - num_wls[coord.y()] = std::max(num_wls[coord.y()], find_memory_decoder_addr_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type))); + num_bls[coord.x()] = std::max(num_bls[coord.x()], find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type))); + num_wls[coord.y()] = std::max(num_wls[coord.y()], find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type))); for (const auto& kv : num_bls) { num_config_bits[config_region].first += kv.first; } @@ -850,7 +851,7 @@ void add_top_module_sram_ports(ModuleManager& module_manager, /* BL address size is the largest among all the regions */ size_t bl_addr_size = 0; for (const ConfigRegionId& config_region : module_manager.regions(module_id)) { - bl_addr_size = std::max(bl_addr_size, num_config_bits[config_region].first); + bl_addr_size = std::max(bl_addr_size, find_memory_decoder_addr_size(num_config_bits[config_region].first)); } BasicPort bl_addr_port(std::string(DECODER_BL_ADDRESS_PORT_NAME), bl_addr_size); module_manager.add_port(module_id, bl_addr_port, ModuleManager::MODULE_INPUT_PORT); @@ -858,7 +859,7 @@ void add_top_module_sram_ports(ModuleManager& module_manager, /* WL address size is the largest among all the regions */ size_t wl_addr_size = 0; for (const ConfigRegionId& config_region : module_manager.regions(module_id)) { - wl_addr_size = std::max(wl_addr_size, num_config_bits[config_region].second); + wl_addr_size = std::max(wl_addr_size, find_memory_decoder_addr_size(num_config_bits[config_region].second)); } BasicPort wl_addr_port(std::string(DECODER_WL_ADDRESS_PORT_NAME), wl_addr_size); module_manager.add_port(module_id, wl_addr_port, ModuleManager::MODULE_INPUT_PORT); @@ -1750,6 +1751,8 @@ static void add_top_module_nets_cmos_memory_config_bus(ModuleManager& module_manager, DecoderLibrary& decoder_lib, const ModuleId& parent_module, + const CircuitLibrary& circuit_lib, + const CircuitModelId& sram_model, const ConfigProtocol& config_protocol, const TopModuleNumConfigBits& num_config_bits) { switch (config_protocol.type()) { @@ -1766,6 +1769,9 @@ void add_top_module_nets_cmos_memory_config_bus(ModuleManager& module_manager, case CONFIG_MEM_MEMORY_BANK: add_top_module_nets_cmos_memory_bank_config_bus(module_manager, decoder_lib, parent_module, num_config_bits); break; + case CONFIG_MEM_QL_MEMORY_BANK: + add_top_module_nets_cmos_ql_memory_bank_config_bus(module_manager, decoder_lib, parent_module, circuit_lib, sram_model, num_config_bits); + break; case CONFIG_MEM_FRAME_BASED: add_top_module_nets_cmos_memory_frame_config_bus(module_manager, decoder_lib, parent_module, num_config_bits); break; @@ -1811,6 +1817,8 @@ void add_top_module_nets_cmos_memory_config_bus(ModuleManager& module_manager, void add_top_module_nets_memory_config_bus(ModuleManager& module_manager, DecoderLibrary& decoder_lib, const ModuleId& parent_module, + const CircuitLibrary& circuit_lib, + const CircuitModelId& sram_model, const ConfigProtocol& config_protocol, const e_circuit_model_design_tech& mem_tech, const TopModuleNumConfigBits& num_config_bits) { @@ -1820,7 +1828,9 @@ void add_top_module_nets_memory_config_bus(ModuleManager& module_manager, switch (mem_tech) { case CIRCUIT_MODEL_DESIGN_CMOS: add_top_module_nets_cmos_memory_config_bus(module_manager, decoder_lib, - parent_module, + parent_module, + circuit_lib, + sram_model, config_protocol, num_config_bits); break; diff --git a/openfpga/src/fabric/build_top_module_memory.h b/openfpga/src/fabric/build_top_module_memory.h index fa6f34cdd..336125df0 100644 --- a/openfpga/src/fabric/build_top_module_memory.h +++ b/openfpga/src/fabric/build_top_module_memory.h @@ -18,6 +18,7 @@ #include "device_rr_gsb.h" #include "fabric_key.h" #include "config_protocol.h" +#include "build_top_module_memory_utils.h" /******************************************************************** * Function declaration @@ -26,8 +27,6 @@ /* begin namespace openfpga */ namespace openfpga { -typedef vtr::vector> TopModuleNumConfigBits; - void organize_top_module_memory_modules(ModuleManager& module_manager, const ModuleId& top_module, const CircuitLibrary& circuit_lib, @@ -66,6 +65,8 @@ void add_top_module_sram_ports(ModuleManager& module_manager, void add_top_module_nets_memory_config_bus(ModuleManager& module_manager, DecoderLibrary& decoder_lib, const ModuleId& parent_module, + const CircuitLibrary& circuit_lib, + const CircuitModelId& sram_model, const ConfigProtocol& config_protocol, const e_circuit_model_design_tech& mem_tech, const TopModuleNumConfigBits& num_config_bits); diff --git a/openfpga/src/fabric/build_top_module_memory_bank.cpp b/openfpga/src/fabric/build_top_module_memory_bank.cpp new file mode 100644 index 000000000..4854b711c --- /dev/null +++ b/openfpga/src/fabric/build_top_module_memory_bank.cpp @@ -0,0 +1,383 @@ +/******************************************************************** + * This file includes functions that are used to organize memories + * in the top module of FPGA fabric + *******************************************************************/ +#include + +/* Headers from vtrutil library */ +#include "vtr_assert.h" +#include "vtr_log.h" +#include "vtr_time.h" + +/* Headers from vpr library */ +#include "vpr_utils.h" + +/* Headers from openfpgashell library */ +#include "command_exit_codes.h" + +#include "rr_gsb_utils.h" +#include "openfpga_reserved_words.h" +#include "openfpga_naming.h" + +#include "memory_utils.h" +#include "decoder_library_utils.h" +#include "module_manager_utils.h" +#include "build_decoder_modules.h" +#include "build_top_module_memory_bank.h" + +/* begin namespace openfpga */ +namespace openfpga { + +/********************************************************************* + * Top-level function to add nets for quicklogic memory banks + * Each configuration region has independent memory bank circuitry + * - Find the number of BLs and WLs required for each region + * - Create BL and WL decoders, and add them to decoder library + * - Create nets to connect from top-level module inputs to inputs of decoders + * - Create nets to connect from outputs of decoders to BL/WL of configurable children + * + * Detailed schematic of how memory banks are connected in the top-level: + * Consider a random Region X, local BL address lines are aligned to the LSB of the + * top-level BL address lines + * + * top_bl_addr[N-1:0] + * ^ + * | local_bl_addr[N-1:0] + * | + * +-----+------------------+ + * | | | + * | +-------------------+ | + * | | Word Line Decoder | | + * | +-------------------+ | + * | | + * + * The BL/WL decoders should have the same circuit designs no matter what region + * they are placed even when the number of configuration bits are different + * from one region to another! + * This is designed to avoid any address collision between memory banks + * since they are programmed in the same clock cycle + * For example: + * - Memory Bank A has 36 memory cells. + * Its BL decoder has 3 address bit and 6 data output bit + * Its WL decoder has 3 address bit and 6 data output bit + * - Memory Bank B has 16 memory cells. + * Its BL decoder has 2 address bit and 4 data output bit + * Its WL decoder has 2 address bit and 4 data output bit + * - If we try to program the 36th memory cell in bank A + * the BL address will be 3'b110 + * the WL address will be 3'b110 + * the data input will be 1'b0 + * - If we try to program the 4th memory cell in bank A + * the BL address will be 3'b010 + * the WL address will be 3'b010 + * the data input will be 1'b1 + * However, in both cases, this will trigger a parasitic programming in bank B + * the BL address will be 2'b10 + * the WL address will be 2'b10 + * Assume the data input is expected to be 1'b1 for bank B + * but it will be overwritten to 1'b0 when programming the 36th cell in bank A! + * + * Detailed schematic of each memory bank: + * @note The numbers are just made to show a simplified example, practical cases are more complicated! + * + * WL_enable WL address + * | | + * v v + * +-----------------------------------------------+ + * | Word Line Decoder | + * +-----------------------------------------------+ + * +---------+ | | | + * BL | | | | | + * enable ---->| |-----------+---------------+---- ... |------+--> BL[0:2] + * | | | | | | | | + * | | | v | v | v + * | Bit | | +-------+ | +-------+ | +------+ + * BL | Line | +-->| SRAM | +-->| SRAM | +->| SRAM | + * address ---->| Decoder | | | [0:8] | | | [0:5] | ... | | [0:7]| + * | | | +-------+ | +-------+ | +------+ + * | | | | | + * | |-----------+--------------+--------- | -----+--> BL[0:9] + * | | | | | | | | + * | | | v | v | v + * | | | +-------+ | +-------+ | +-------+ + * | | +-->| SRAM | | | SRAM | +->| SRAM | + * | | | | [0:80]| | | [0:63]| ... | | [0:31]| + * | | | +-------+ | +-------+ | +-------+ + * | | | | + * | | | ... ... ... | ... + * | | | | | + * | |-----------+---------------+---- --- | -----+--> BL[0:3] + * | | | | | | | | + * | | | v | v | v + * | | | +-------+ | +-------+ | +-------+ + * | | +-->| SRAM | +-->| SRAM | +->| SRAM | + * | | | |[0:5] | | | [0:8] | ... | | [0:2] | + * | | | +-------+ | +-------+ | +-------+ + * BL | | v v v + * data_in ---->| | WL[0:9] WL[0:7] WL[0:4] + * +---------+ + * + **********************************************************************/ +void add_top_module_nets_cmos_ql_memory_bank_config_bus(ModuleManager& module_manager, + DecoderLibrary& decoder_lib, + const ModuleId& top_module, + const CircuitLibrary& circuit_lib, + const CircuitModelId& sram_model, + const TopModuleNumConfigBits& num_config_bits) { + /* Find Enable port from the top-level module */ + ModulePortId en_port = module_manager.find_module_port(top_module, std::string(DECODER_ENABLE_PORT_NAME)); + BasicPort en_port_info = module_manager.module_port(top_module, en_port); + + /* Find data-in port from the top-level module */ + ModulePortId din_port = module_manager.find_module_port(top_module, std::string(DECODER_DATA_IN_PORT_NAME)); + BasicPort din_port_info = module_manager.module_port(top_module, din_port); + + /* Data in port should match the number of configuration regions */ + VTR_ASSERT(din_port_info.get_width() == module_manager.regions(top_module).size()); + + /* Find BL and WL address port from the top-level module */ + ModulePortId bl_addr_port = module_manager.find_module_port(top_module, std::string(DECODER_BL_ADDRESS_PORT_NAME)); + BasicPort bl_addr_port_info = module_manager.module_port(top_module, bl_addr_port); + + ModulePortId wl_addr_port = module_manager.find_module_port(top_module, std::string(DECODER_WL_ADDRESS_PORT_NAME)); + BasicPort wl_addr_port_info = module_manager.module_port(top_module, wl_addr_port); + + /* Find the top-level number of BLs and WLs required to access each memory bit */ + size_t bl_addr_size = bl_addr_port_info.get_width(); + size_t wl_addr_size = wl_addr_port_info.get_width(); + + /* Each memory bank has a unified number of BL/WLs */ + size_t num_bls = 0; + for (const auto& curr_config_bits : num_config_bits) { + num_bls = std::max(num_bls, find_memory_decoder_data_size(curr_config_bits.first)); + } + + size_t num_wls = 0; + for (const auto& curr_config_bits : num_config_bits) { + num_wls = std::max(num_wls, find_memory_decoder_data_size(curr_config_bits.second)); + } + + /* Create separated memory bank circuitry, i.e., BL/WL decoders for each region */ + for (const ConfigRegionId& config_region : module_manager.regions(top_module)) { + /************************************************************** + * Add the BL decoder module + * Search the decoder library + * If we find one, we use the module. + * Otherwise, we create one and add it to the decoder library + */ + DecoderId bl_decoder_id = decoder_lib.find_decoder(bl_addr_size, num_bls, + true, true, false); + if (DecoderId::INVALID() == bl_decoder_id) { + bl_decoder_id = decoder_lib.add_decoder(bl_addr_size, num_bls, true, true, false); + } + VTR_ASSERT(DecoderId::INVALID() != bl_decoder_id); + + /* Create a module if not existed yet */ + std::string bl_decoder_module_name = generate_memory_decoder_with_data_in_subckt_name(bl_addr_size, num_bls); + ModuleId bl_decoder_module = module_manager.find_module(bl_decoder_module_name); + if (ModuleId::INVALID() == bl_decoder_module) { + /* BL decoder has the same ports as the frame-based decoders + * We reuse it here + */ + bl_decoder_module = build_bl_memory_decoder_module(module_manager, + decoder_lib, + bl_decoder_id); + } + VTR_ASSERT(ModuleId::INVALID() != bl_decoder_module); + size_t curr_bl_decoder_instance_id = module_manager.num_instance(top_module, bl_decoder_module); + module_manager.add_child_module(top_module, bl_decoder_module); + + /************************************************************** + * Add the WL decoder module + * Search the decoder library + * If we find one, we use the module. + * Otherwise, we create one and add it to the decoder library + */ + DecoderId wl_decoder_id = decoder_lib.find_decoder(wl_addr_size, num_wls, + true, false, false); + if (DecoderId::INVALID() == wl_decoder_id) { + wl_decoder_id = decoder_lib.add_decoder(wl_addr_size, num_wls, true, false, false); + } + VTR_ASSERT(DecoderId::INVALID() != wl_decoder_id); + + /* Create a module if not existed yet */ + std::string wl_decoder_module_name = generate_memory_decoder_subckt_name(wl_addr_size, num_wls); + ModuleId wl_decoder_module = module_manager.find_module(wl_decoder_module_name); + if (ModuleId::INVALID() == wl_decoder_module) { + /* BL decoder has the same ports as the frame-based decoders + * We reuse it here + */ + wl_decoder_module = build_wl_memory_decoder_module(module_manager, + decoder_lib, + wl_decoder_id); + } + VTR_ASSERT(ModuleId::INVALID() != wl_decoder_module); + size_t curr_wl_decoder_instance_id = module_manager.num_instance(top_module, wl_decoder_module); + module_manager.add_child_module(top_module, wl_decoder_module); + + /************************************************************** + * Add module nets from the top module to BL decoder's inputs + */ + ModulePortId bl_decoder_en_port = module_manager.find_module_port(bl_decoder_module, std::string(DECODER_ENABLE_PORT_NAME)); + BasicPort bl_decoder_en_port_info = module_manager.module_port(bl_decoder_module, bl_decoder_en_port); + + ModulePortId bl_decoder_addr_port = module_manager.find_module_port(bl_decoder_module, std::string(DECODER_ADDRESS_PORT_NAME)); + BasicPort bl_decoder_addr_port_info = module_manager.module_port(bl_decoder_module, bl_decoder_addr_port); + + ModulePortId bl_decoder_din_port = module_manager.find_module_port(bl_decoder_module, std::string(DECODER_DATA_IN_PORT_NAME)); + BasicPort bl_decoder_din_port_info = module_manager.module_port(bl_decoder_module, bl_decoder_din_port); + + /* Data in port of the local BL decoder should always be 1 */ + VTR_ASSERT(1 == bl_decoder_din_port_info.get_width()); + + /* Top module Enable port -> BL Decoder Enable port */ + add_module_bus_nets(module_manager, + top_module, + top_module, 0, en_port, + bl_decoder_module, curr_bl_decoder_instance_id, bl_decoder_en_port); + + /* Top module Address port -> BL Decoder Address port */ + add_module_bus_nets(module_manager, + top_module, + top_module, 0, bl_addr_port, + bl_decoder_module, curr_bl_decoder_instance_id, bl_decoder_addr_port); + + /* Top module data_in port -> BL Decoder data_in port: + * Note that each region has independent data_in connection from the top-level module + * The pin index is the configuration region index + */ + ModuleNetId din_net = create_module_source_pin_net(module_manager, top_module, + top_module, 0, + din_port, + din_port_info.pins()[size_t(config_region)]); + VTR_ASSERT(ModuleNetId::INVALID() != din_net); + + /* Configure the net sink */ + module_manager.add_module_net_sink(top_module, din_net, bl_decoder_module, curr_bl_decoder_instance_id, bl_decoder_din_port, bl_decoder_din_port_info.pins()[0]); + + /************************************************************** + * Add module nets from the top module to WL decoder's inputs + */ + ModulePortId wl_decoder_en_port = module_manager.find_module_port(wl_decoder_module, std::string(DECODER_ENABLE_PORT_NAME)); + BasicPort wl_decoder_en_port_info = module_manager.module_port(wl_decoder_module, wl_decoder_en_port); + + ModulePortId wl_decoder_addr_port = module_manager.find_module_port(wl_decoder_module, std::string(DECODER_ADDRESS_PORT_NAME)); + BasicPort wl_decoder_addr_port_info = module_manager.module_port(wl_decoder_module, bl_decoder_addr_port); + + /* Top module Enable port -> WL Decoder Enable port */ + add_module_bus_nets(module_manager, + top_module, + top_module, 0, en_port, + wl_decoder_module, curr_wl_decoder_instance_id, wl_decoder_en_port); + + /* Top module Address port -> WL Decoder Address port */ + add_module_bus_nets(module_manager, + top_module, + top_module, 0, wl_addr_port, + wl_decoder_module, curr_wl_decoder_instance_id, wl_decoder_addr_port); + + /************************************************************** + * Add nets from BL data out to each configurable child + */ + size_t cur_bl_index = 0; + + ModulePortId bl_decoder_dout_port = module_manager.find_module_port(bl_decoder_module, std::string(DECODER_DATA_OUT_PORT_NAME)); + BasicPort bl_decoder_dout_port_info = module_manager.module_port(bl_decoder_module, bl_decoder_dout_port); + + std::map num_bls_per_tile; + std::map num_wls_per_tile; + for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { + ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; + vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; + num_bls_per_tile[coord.x()] = std::max(num_bls_per_tile[coord.x()], find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK))); + num_wls_per_tile[coord.y()] = std::max(num_wls_per_tile[coord.y()], find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK))); + + size_t child_instance = module_manager.region_configurable_child_instances(top_module, config_region)[child_id]; + + /* Find the BL port */ + ModulePortId child_bl_port = module_manager.find_module_port(child_module, std::string(MEMORY_BL_PORT_NAME)); + BasicPort child_bl_port_info = module_manager.module_port(child_module, child_bl_port); + + for (const size_t& sink_bl_pin : child_bl_port_info.pins()) { + /* Find the BL decoder data index: + * It should be the residual when divided by the number of BLs + */ + size_t bl_pin_id = std::floor(cur_bl_index / num_bls); + if (!(bl_pin_id < bl_decoder_dout_port_info.pins().size())) + VTR_ASSERT(bl_pin_id < bl_decoder_dout_port_info.pins().size()); + + /* Create net */ + ModuleNetId net = create_module_source_pin_net(module_manager, top_module, + bl_decoder_module, curr_bl_decoder_instance_id, + bl_decoder_dout_port, + bl_decoder_dout_port_info.pins()[bl_pin_id]); + VTR_ASSERT(ModuleNetId::INVALID() != net); + + /* Add net sink */ + module_manager.add_module_net_sink(top_module, net, + child_module, child_instance, child_bl_port, sink_bl_pin); + + /* Increment the BL index */ + cur_bl_index++; + } + } + + /************************************************************** + * Add nets from WL data out to each configurable child + */ + size_t cur_wl_index = 0; + + ModulePortId wl_decoder_dout_port = module_manager.find_module_port(wl_decoder_module, std::string(DECODER_DATA_OUT_PORT_NAME)); + BasicPort wl_decoder_dout_port_info = module_manager.module_port(wl_decoder_module, wl_decoder_dout_port); + + for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { + ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; + size_t child_instance = module_manager.region_configurable_child_instances(top_module, config_region)[child_id]; + + /* Find the WL port */ + ModulePortId child_wl_port = module_manager.find_module_port(child_module, std::string(MEMORY_WL_PORT_NAME)); + BasicPort child_wl_port_info = module_manager.module_port(child_module, child_wl_port); + + for (const size_t& sink_wl_pin : child_wl_port_info.pins()) { + /* Find the BL decoder data index: + * It should be the residual when divided by the number of BLs + */ + size_t wl_pin_id = cur_wl_index % num_wls; + + /* Create net */ + ModuleNetId net = create_module_source_pin_net(module_manager, top_module, + wl_decoder_module, curr_wl_decoder_instance_id, + wl_decoder_dout_port, + wl_decoder_dout_port_info.pins()[wl_pin_id]); + VTR_ASSERT(ModuleNetId::INVALID() != net); + + /* Add net sink */ + module_manager.add_module_net_sink(top_module, net, + child_module, child_instance, child_wl_port, sink_wl_pin); + + /* Increment the WL index */ + cur_wl_index++; + } + } + + /************************************************************** + * Add the BL and WL decoders to the end of configurable children list + * Note: this MUST be done after adding all the module nets to other regular configurable children + */ + module_manager.add_configurable_child(top_module, bl_decoder_module, curr_bl_decoder_instance_id); + module_manager.add_configurable_child_to_region(top_module, + config_region, + bl_decoder_module, + curr_bl_decoder_instance_id, + module_manager.configurable_children(top_module).size() - 1); + + module_manager.add_configurable_child(top_module, wl_decoder_module, curr_wl_decoder_instance_id); + module_manager.add_configurable_child_to_region(top_module, + config_region, + wl_decoder_module, + curr_wl_decoder_instance_id, + module_manager.configurable_children(top_module).size() - 1); + } +} + +} /* end namespace openfpga */ diff --git a/openfpga/src/fabric/build_top_module_memory_bank.h b/openfpga/src/fabric/build_top_module_memory_bank.h new file mode 100644 index 000000000..69f4408ef --- /dev/null +++ b/openfpga/src/fabric/build_top_module_memory_bank.h @@ -0,0 +1,33 @@ +#ifndef BUILD_TOP_MODULE_MEMORY_BANK_H +#define BUILD_TOP_MODULE_MEMORY_BANK_H + +/******************************************************************** + * Include header files that are required by function declaration + *******************************************************************/ + +#include +#include +#include "vtr_vector.h" +#include "vtr_ndmatrix.h" +#include "module_manager.h" +#include "circuit_library.h" +#include "decoder_library.h" +#include "build_top_module_memory_utils.h" + +/******************************************************************** + * Function declaration + *******************************************************************/ + +/* begin namespace openfpga */ +namespace openfpga { + +void add_top_module_nets_cmos_ql_memory_bank_config_bus(ModuleManager& module_manager, + DecoderLibrary& decoder_lib, + const ModuleId& top_module, + const CircuitLibrary& circuit_lib, + const CircuitModelId& sram_model, + const TopModuleNumConfigBits& num_config_bits); + +} /* end namespace openfpga */ + +#endif diff --git a/openfpga/src/fabric/build_top_module_memory_utils.h b/openfpga/src/fabric/build_top_module_memory_utils.h new file mode 100644 index 000000000..235f54055 --- /dev/null +++ b/openfpga/src/fabric/build_top_module_memory_utils.h @@ -0,0 +1,28 @@ +#ifndef BUILD_TOP_MODULE_MEMORY_UTILS_H +#define BUILD_TOP_MODULE_MEMORY_UTILS_H + +/******************************************************************** + * Include header files that are required by function declaration + *******************************************************************/ + +#include +#include +#include "vtr_vector.h" + +/******************************************************************** + * Function declaration + *******************************************************************/ + +/* begin namespace openfpga */ +namespace openfpga { + +/* A data structure to store the number of configuration bits for each configurable region + * of the top-level module. + * For different configuration protocol, the std::pair represents different data + * See details in each function about how the data is organized + */ +typedef vtr::vector> TopModuleNumConfigBits; + +} /* end namespace openfpga */ + +#endif From 475ce2c6d9a9d664ffbd7afe31d676362b334151 Mon Sep 17 00:00:00 2001 From: tangxifan Date: Sun, 5 Sep 2021 17:49:01 -0700 Subject: [PATCH 05/16] [Engine] Upgrade fabric generator in support QL memory bank connections --- .../fabric/build_top_module_memory_bank.cpp | 89 ++++++++++++++++--- 1 file changed, 75 insertions(+), 14 deletions(-) diff --git a/openfpga/src/fabric/build_top_module_memory_bank.cpp b/openfpga/src/fabric/build_top_module_memory_bank.cpp index 4854b711c..97986f3ed 100644 --- a/openfpga/src/fabric/build_top_module_memory_bank.cpp +++ b/openfpga/src/fabric/build_top_module_memory_bank.cpp @@ -3,6 +3,7 @@ * in the top module of FPGA fabric *******************************************************************/ #include +#include /* Headers from vtrutil library */ #include "vtr_assert.h" @@ -277,20 +278,75 @@ void add_top_module_nets_cmos_ql_memory_bank_config_bus(ModuleManager& module_ma wl_decoder_module, curr_wl_decoder_instance_id, wl_decoder_addr_port); /************************************************************** - * Add nets from BL data out to each configurable child + * Precompute the BLs and WLs distribution across the FPGA fabric + * The distribution is a matrix which contains the starting index of BL/WL for each column or row */ - size_t cur_bl_index = 0; + std::pair child_x_range(std::numeric_limits::max(), std::numeric_limits::min()); // Deposit an invalid range first: LSB->max(); MSB->min() + std::pair child_y_range(std::numeric_limits::max(), std::numeric_limits::min()); // Deposit an invalid range first: LSB->max(); MSB->min() + for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { + vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; + child_x_range.first = std::min(coord.x(), child_x_range.first); + child_x_range.second = std::max(coord.x(), child_x_range.second); + child_y_range.first = std::min(coord.y(), child_y_range.first); + child_y_range.second = std::max(coord.y(), child_y_range.second); + } - ModulePortId bl_decoder_dout_port = module_manager.find_module_port(bl_decoder_module, std::string(DECODER_DATA_OUT_PORT_NAME)); - BasicPort bl_decoder_dout_port_info = module_manager.module_port(bl_decoder_module, bl_decoder_dout_port); - - std::map num_bls_per_tile; - std::map num_wls_per_tile; + std::map num_bls_per_tile; + std::map num_wls_per_tile; for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; num_bls_per_tile[coord.x()] = std::max(num_bls_per_tile[coord.x()], find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK))); num_wls_per_tile[coord.y()] = std::max(num_wls_per_tile[coord.y()], find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK))); + } + + std::map bl_starting_index_per_tile; + for (int ibl = child_x_range.first; ibl <= child_x_range.second; ++ibl) { + if (ibl == child_x_range.first) { + bl_starting_index_per_tile[ibl] = 0; + } else { + bl_starting_index_per_tile[ibl] = num_bls_per_tile[ibl - 1] + bl_starting_index_per_tile[ibl - 1]; + } + } + + std::map wl_starting_index_per_tile; + for (int iwl = child_y_range.first; iwl <= child_y_range.second; ++iwl) { + if (iwl == child_y_range.first) { + wl_starting_index_per_tile[iwl] = 0; + } else { + wl_starting_index_per_tile[iwl] = num_wls_per_tile[iwl - 1] + wl_starting_index_per_tile[iwl - 1]; + } + } + + /************************************************************** + * Add nets from BL data out to each configurable child + * BL data output pins are connected to the BL input pins of each PB/CB/SB + * For all the PB/CB/SB in the same column, they share the same set of BLs + * A quick example + * + * BL[i .. i + sqrt(N)] + * | + * | CLB[1][H] + * | +---------+ + * | | SRAM | + * +-->| [0..N] | + * | +---------+ + * | + * ... + * | CLB[1][1] + * | +---------+ + * | | SRAM | + * +-->| [0..N] | + * | +---------+ + * | + */ + ModulePortId bl_decoder_dout_port = module_manager.find_module_port(bl_decoder_module, std::string(DECODER_DATA_OUT_PORT_NAME)); + BasicPort bl_decoder_dout_port_info = module_manager.module_port(bl_decoder_module, bl_decoder_dout_port); + + for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { + ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; + vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; + int child_num_unique_blwls = find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK)); size_t child_instance = module_manager.region_configurable_child_instances(top_module, config_region)[child_id]; @@ -298,11 +354,13 @@ void add_top_module_nets_cmos_ql_memory_bank_config_bus(ModuleManager& module_ma ModulePortId child_bl_port = module_manager.find_module_port(child_module, std::string(MEMORY_BL_PORT_NAME)); BasicPort child_bl_port_info = module_manager.module_port(child_module, child_bl_port); + size_t cur_bl_index = 0; + for (const size_t& sink_bl_pin : child_bl_port_info.pins()) { /* Find the BL decoder data index: - * It should be the residual when divided by the number of BLs + * It should be the starting index plus an offset which is the residual when divided by the number of BLs in this tile */ - size_t bl_pin_id = std::floor(cur_bl_index / num_bls); + size_t bl_pin_id = bl_starting_index_per_tile[coord.x()] + std::floor(cur_bl_index / child_num_unique_blwls); if (!(bl_pin_id < bl_decoder_dout_port_info.pins().size())) VTR_ASSERT(bl_pin_id < bl_decoder_dout_port_info.pins().size()); @@ -325,24 +383,27 @@ void add_top_module_nets_cmos_ql_memory_bank_config_bus(ModuleManager& module_ma /************************************************************** * Add nets from WL data out to each configurable child */ - size_t cur_wl_index = 0; - ModulePortId wl_decoder_dout_port = module_manager.find_module_port(wl_decoder_module, std::string(DECODER_DATA_OUT_PORT_NAME)); BasicPort wl_decoder_dout_port_info = module_manager.module_port(wl_decoder_module, wl_decoder_dout_port); for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; + vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; + int child_num_unique_blwls = find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK)); + size_t child_instance = module_manager.region_configurable_child_instances(top_module, config_region)[child_id]; /* Find the WL port */ ModulePortId child_wl_port = module_manager.find_module_port(child_module, std::string(MEMORY_WL_PORT_NAME)); BasicPort child_wl_port_info = module_manager.module_port(child_module, child_wl_port); + size_t cur_wl_index = 0; + for (const size_t& sink_wl_pin : child_wl_port_info.pins()) { - /* Find the BL decoder data index: - * It should be the residual when divided by the number of BLs + /* Find the WL decoder data index: + * It should be the starting index plus an offset which is the residual when divided by the number of WLs in this tile */ - size_t wl_pin_id = cur_wl_index % num_wls; + size_t wl_pin_id = wl_starting_index_per_tile[coord.x()] + cur_wl_index % child_num_unique_blwls; /* Create net */ ModuleNetId net = create_module_source_pin_net(module_manager, top_module, From 1085e468e2ea55ea58d150efa6a35879ab7d7223 Mon Sep 17 00:00:00 2001 From: tangxifan Date: Sun, 5 Sep 2021 20:45:56 -0700 Subject: [PATCH 06/16] [Engine] Move most utilized functions for memory bank configuration protocol to a separated source file --- .../fabric/build_top_module_memory_bank.cpp | 49 +--- .../fpga_bitstream/build_fabric_bitstream.cpp | 8 + .../build_fabric_bitstream_memory_bank.cpp | 237 ++++++++++++++++++ .../build_fabric_bitstream_memory_bank.h | 29 +++ openfpga/src/utils/memory_bank_utils.cpp | 105 ++++++++ openfpga/src/utils/memory_bank_utils.h | 74 ++++++ 6 files changed, 466 insertions(+), 36 deletions(-) create mode 100644 openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp create mode 100644 openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.h create mode 100644 openfpga/src/utils/memory_bank_utils.cpp create mode 100644 openfpga/src/utils/memory_bank_utils.h diff --git a/openfpga/src/fabric/build_top_module_memory_bank.cpp b/openfpga/src/fabric/build_top_module_memory_bank.cpp index 97986f3ed..2df41c5e2 100644 --- a/openfpga/src/fabric/build_top_module_memory_bank.cpp +++ b/openfpga/src/fabric/build_top_module_memory_bank.cpp @@ -23,6 +23,7 @@ #include "memory_utils.h" #include "decoder_library_utils.h" #include "module_manager_utils.h" +#include "memory_bank_utils.h" #include "build_decoder_modules.h" #include "build_top_module_memory_bank.h" @@ -281,42 +282,18 @@ void add_top_module_nets_cmos_ql_memory_bank_config_bus(ModuleManager& module_ma * Precompute the BLs and WLs distribution across the FPGA fabric * The distribution is a matrix which contains the starting index of BL/WL for each column or row */ - std::pair child_x_range(std::numeric_limits::max(), std::numeric_limits::min()); // Deposit an invalid range first: LSB->max(); MSB->min() - std::pair child_y_range(std::numeric_limits::max(), std::numeric_limits::min()); // Deposit an invalid range first: LSB->max(); MSB->min() - for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { - vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; - child_x_range.first = std::min(coord.x(), child_x_range.first); - child_x_range.second = std::max(coord.x(), child_x_range.second); - child_y_range.first = std::min(coord.y(), child_y_range.first); - child_y_range.second = std::max(coord.y(), child_y_range.second); - } + std::pair child_x_range = compute_memory_bank_regional_configurable_child_x_range(module_manager, top_module, config_region); + std::pair child_y_range = compute_memory_bank_regional_configurable_child_y_range(module_manager, top_module, config_region); - std::map num_bls_per_tile; - std::map num_wls_per_tile; - for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { - ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; - vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; - num_bls_per_tile[coord.x()] = std::max(num_bls_per_tile[coord.x()], find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK))); - num_wls_per_tile[coord.y()] = std::max(num_wls_per_tile[coord.y()], find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK))); - } + std::map num_bls_per_tile = compute_memory_bank_regional_bitline_numbers_per_tile(module_manager, top_module, + config_region, + circuit_lib, sram_model); + std::map num_wls_per_tile = compute_memory_bank_regional_wordline_numbers_per_tile(module_manager, top_module, + config_region, + circuit_lib, sram_model); - std::map bl_starting_index_per_tile; - for (int ibl = child_x_range.first; ibl <= child_x_range.second; ++ibl) { - if (ibl == child_x_range.first) { - bl_starting_index_per_tile[ibl] = 0; - } else { - bl_starting_index_per_tile[ibl] = num_bls_per_tile[ibl - 1] + bl_starting_index_per_tile[ibl - 1]; - } - } - - std::map wl_starting_index_per_tile; - for (int iwl = child_y_range.first; iwl <= child_y_range.second; ++iwl) { - if (iwl == child_y_range.first) { - wl_starting_index_per_tile[iwl] = 0; - } else { - wl_starting_index_per_tile[iwl] = num_wls_per_tile[iwl - 1] + wl_starting_index_per_tile[iwl - 1]; - } - } + std::map bl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_x_range, num_bls_per_tile); + std::map wl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_y_range, num_wls_per_tile); /************************************************************** * Add nets from BL data out to each configurable child @@ -360,7 +337,7 @@ void add_top_module_nets_cmos_ql_memory_bank_config_bus(ModuleManager& module_ma /* Find the BL decoder data index: * It should be the starting index plus an offset which is the residual when divided by the number of BLs in this tile */ - size_t bl_pin_id = bl_starting_index_per_tile[coord.x()] + std::floor(cur_bl_index / child_num_unique_blwls); + size_t bl_pin_id = bl_start_index_per_tile[coord.x()] + std::floor(cur_bl_index / child_num_unique_blwls); if (!(bl_pin_id < bl_decoder_dout_port_info.pins().size())) VTR_ASSERT(bl_pin_id < bl_decoder_dout_port_info.pins().size()); @@ -403,7 +380,7 @@ void add_top_module_nets_cmos_ql_memory_bank_config_bus(ModuleManager& module_ma /* Find the WL decoder data index: * It should be the starting index plus an offset which is the residual when divided by the number of WLs in this tile */ - size_t wl_pin_id = wl_starting_index_per_tile[coord.x()] + cur_wl_index % child_num_unique_blwls; + size_t wl_pin_id = wl_start_index_per_tile[coord.x()] + cur_wl_index % child_num_unique_blwls; /* Create net */ ModuleNetId net = create_module_source_pin_net(module_manager, top_module, diff --git a/openfpga/src/fpga_bitstream/build_fabric_bitstream.cpp b/openfpga/src/fpga_bitstream/build_fabric_bitstream.cpp index cf028b084..9065a6ae5 100644 --- a/openfpga/src/fpga_bitstream/build_fabric_bitstream.cpp +++ b/openfpga/src/fpga_bitstream/build_fabric_bitstream.cpp @@ -19,6 +19,7 @@ #include "decoder_library_utils.h" #include "bitstream_manager_utils.h" #include "build_fabric_bitstream.h" +#include "build_fabric_bitstream_memory_bank.h" /* begin namespace openfpga */ namespace openfpga { @@ -575,6 +576,13 @@ void build_module_fabric_dependent_bitstream(const ConfigProtocol& config_protoc } break; } + case CONFIG_MEM_QL_MEMORY_BANK: { + build_module_fabric_dependent_bitstream_ql_memory_bank(config_protocol, + bitstream_manager, top_block, + module_manager, top_module, + fabric_bitstream); + break; + } case CONFIG_MEM_FRAME_BASED: { /* Find address port size */ diff --git a/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp b/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp new file mode 100644 index 000000000..1fefa79b2 --- /dev/null +++ b/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp @@ -0,0 +1,237 @@ +/******************************************************************** + * This file includes functions to build fabric dependent bitstream + * for memory bank configuration protocol + *******************************************************************/ +#include +#include +#include + +/* Headers from vtrutil library */ +#include "vtr_assert.h" +#include "vtr_log.h" +#include "vtr_time.h" + +/* Headers from openfpgautil library */ +#include "openfpga_decode.h" + +#include "openfpga_reserved_words.h" +#include "openfpga_naming.h" + +#include "decoder_library_utils.h" +#include "bitstream_manager_utils.h" +#include "build_fabric_bitstream_memory_bank.h" + +/* begin namespace openfpga */ +namespace openfpga { + +/******************************************************************** + * This function aims to build a bitstream for memory-bank protocol + * It will walk through all the configurable children under a module + * in a recursive way, following a Depth-First Search (DFS) strategy + * For each configuration child, we use its instance name as a key to spot the + * configuration bits in bitstream manager. + * Note that it is guarentee that the instance name in module manager is + * consistent with the block names in bitstream manager + * We use this link to reorganize the bitstream in the sequence of memories as we stored + * in the configurable_children() and configurable_child_instances() of each module of module manager + * + * In such configuration organization, each memory cell has an unique index. + * Using this index, we can infer the address codes for both BL and WL decoders. + * Note that, we must get the number of BLs and WLs before using this function! + *******************************************************************/ +static +void rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(const BitstreamManager& bitstream_manager, + const ConfigBlockId& parent_block, + const ModuleManager& module_manager, + const ModuleId& top_module, + const ModuleId& parent_module, + const ConfigRegionId& config_region, + const size_t& bl_addr_size, + const size_t& wl_addr_size, + const size_t& num_bls, + const size_t& num_wls, + size_t& cur_mem_index, + FabricBitstream& fabric_bitstream, + const FabricBitRegionId& fabric_bitstream_region) { + + /* Depth-first search: if we have any children in the parent_block, + * we dive to the next level first! + */ + if (0 < bitstream_manager.block_children(parent_block).size()) { + /* For top module: + * - Use regional configurable children + * - we will skip the two decoders at the end of the configurable children list + */ + if (parent_module == top_module) { + std::vector configurable_children = module_manager.region_configurable_children(parent_module, config_region); + + VTR_ASSERT(2 <= configurable_children.size()); + size_t num_configurable_children = configurable_children.size() - 2; + + /* Early exit if there is no configurable children */ + if (0 == num_configurable_children) { + /* Ensure that there should be no configuration bits in the parent block */ + VTR_ASSERT(0 == bitstream_manager.block_bits(parent_block).size()); + return; + } + + for (size_t child_id = 0; child_id < num_configurable_children; ++child_id) { + ModuleId child_module = configurable_children[child_id]; + size_t child_instance = module_manager.region_configurable_child_instances(parent_module, config_region)[child_id]; + + /* Get the instance name and ensure it is not empty */ + std::string instance_name = module_manager.instance_name(parent_module, child_module, child_instance); + + /* Find the child block that matches the instance name! */ + ConfigBlockId child_block = bitstream_manager.find_child_block(parent_block, instance_name); + /* We must have one valid block id! */ + VTR_ASSERT(true == bitstream_manager.valid_block_id(child_block)); + + /* Go recursively */ + rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(bitstream_manager, child_block, + module_manager, top_module, child_module, + config_region, + bl_addr_size, wl_addr_size, + num_bls, num_wls, + cur_mem_index, + fabric_bitstream, + fabric_bitstream_region); + } + } else { + VTR_ASSERT(parent_module != top_module); + /* For other modules: + * - Use configurable children directly + * - no need to exclude decoders as they are not there + */ + std::vector configurable_children = module_manager.configurable_children(parent_module); + + size_t num_configurable_children = configurable_children.size(); + + /* Early exit if there is no configurable children */ + if (0 == num_configurable_children) { + /* Ensure that there should be no configuration bits in the parent block */ + VTR_ASSERT(0 == bitstream_manager.block_bits(parent_block).size()); + return; + } + + for (size_t child_id = 0; child_id < num_configurable_children; ++child_id) { + ModuleId child_module = configurable_children[child_id]; + size_t child_instance = module_manager.configurable_child_instances(parent_module)[child_id]; + + /* Get the instance name and ensure it is not empty */ + std::string instance_name = module_manager.instance_name(parent_module, child_module, child_instance); + + /* Find the child block that matches the instance name! */ + ConfigBlockId child_block = bitstream_manager.find_child_block(parent_block, instance_name); + /* We must have one valid block id! */ + VTR_ASSERT(true == bitstream_manager.valid_block_id(child_block)); + + /* Go recursively */ + rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(bitstream_manager, child_block, + module_manager, top_module, child_module, + config_region, + bl_addr_size, wl_addr_size, + num_bls, num_wls, + cur_mem_index, + fabric_bitstream, + fabric_bitstream_region); + } + } + /* Ensure that there should be no configuration bits in the parent block */ + VTR_ASSERT(0 == bitstream_manager.block_bits(parent_block).size()); + + return; + } + + /* Note that, reach here, it means that this is a leaf node. + * We add the configuration bits to the fabric_bitstream, + * And then, we can return + */ + for (const ConfigBitId& config_bit : bitstream_manager.block_bits(parent_block)) { + FabricBitId fabric_bit = fabric_bitstream.add_bit(config_bit); + + /* Find BL address */ + size_t cur_bl_index = std::floor(cur_mem_index / num_bls); + std::vector bl_addr_bits_vec = itobin_charvec(cur_bl_index, bl_addr_size); + + /* Find WL address */ + size_t cur_wl_index = cur_mem_index % num_wls; + std::vector wl_addr_bits_vec = itobin_charvec(cur_wl_index, wl_addr_size); + + /* Set BL address */ + fabric_bitstream.set_bit_bl_address(fabric_bit, bl_addr_bits_vec); + + /* Set WL address */ + fabric_bitstream.set_bit_wl_address(fabric_bit, wl_addr_bits_vec); + + /* Set data input */ + fabric_bitstream.set_bit_din(fabric_bit, bitstream_manager.bit_value(config_bit)); + + /* Add the bit to the region */ + fabric_bitstream.add_bit_to_region(fabric_bitstream_region, fabric_bit); + + /* Increase the memory index */ + cur_mem_index++; + } +} + +/******************************************************************** + * Main function to build a fabric-dependent bitstream + * by considering the configuration protocol types + *******************************************************************/ +void build_module_fabric_dependent_bitstream_ql_memory_bank(const ConfigProtocol& config_protocol, + const BitstreamManager& bitstream_manager, + const ConfigBlockId& top_block, + const ModuleManager& module_manager, + const ModuleId& top_module, + FabricBitstream& fabric_bitstream) { + /* Ensure we are in the correct type of configuration protocol*/ + VTR_ASSERT(config_protocol.type() == CONFIG_MEM_QL_MEMORY_BANK); + + /* Find global BL address port size */ + ModulePortId bl_addr_port = module_manager.find_module_port(top_module, std::string(DECODER_BL_ADDRESS_PORT_NAME)); + BasicPort bl_addr_port_info = module_manager.module_port(top_module, bl_addr_port); + + /* Find global WL address port size */ + ModulePortId wl_addr_port = module_manager.find_module_port(top_module, std::string(DECODER_WL_ADDRESS_PORT_NAME)); + BasicPort wl_addr_port_info = module_manager.module_port(top_module, wl_addr_port); + + /* Reserve bits before build-up */ + fabric_bitstream.set_use_address(true); + fabric_bitstream.set_use_wl_address(true); + fabric_bitstream.set_bl_address_length(bl_addr_port_info.get_width()); + fabric_bitstream.set_wl_address_length(wl_addr_port_info.get_width()); + fabric_bitstream.reserve_bits(bitstream_manager.num_bits()); + + /* Build bitstreams by region */ + for (const ConfigRegionId& config_region : module_manager.regions(top_module)) { + size_t cur_mem_index = 0; + + /* Find port information for local BL and WL decoder in this region */ + std::vector configurable_children = module_manager.region_configurable_children(top_module, config_region); + VTR_ASSERT(2 <= configurable_children.size()); + ModuleId bl_decoder_module = configurable_children[configurable_children.size() - 2]; + ModuleId wl_decoder_module = configurable_children[configurable_children.size() - 1]; + + ModulePortId bl_port = module_manager.find_module_port(bl_decoder_module, std::string(DECODER_DATA_OUT_PORT_NAME)); + BasicPort bl_port_info = module_manager.module_port(bl_decoder_module, bl_port); + + ModulePortId wl_port = module_manager.find_module_port(wl_decoder_module, std::string(DECODER_DATA_OUT_PORT_NAME)); + BasicPort wl_port_info = module_manager.module_port(wl_decoder_module, wl_port); + + /* Build the bitstream for all the blocks in this region */ + FabricBitRegionId fabric_bitstream_region = fabric_bitstream.add_region(); + rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(bitstream_manager, top_block, + module_manager, top_module, top_module, + config_region, + bl_addr_port_info.get_width(), + wl_addr_port_info.get_width(), + bl_port_info.get_width(), + wl_port_info.get_width(), + cur_mem_index, + fabric_bitstream, + fabric_bitstream_region); + } +} + +} /* end namespace openfpga */ diff --git a/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.h b/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.h new file mode 100644 index 000000000..41174a2fe --- /dev/null +++ b/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.h @@ -0,0 +1,29 @@ +#ifndef BUILD_FABRIC_BITSTREAM_MEMORY_BANK_H +#define BUILD_FABRIC_BITSTREAM_MEMORY_BANK_H + +/******************************************************************** + * Include header files that are required by function declaration + *******************************************************************/ +#include +#include "config_protocol.h" +#include "bitstream_manager.h" +#include "fabric_bitstream.h" +#include "module_manager.h" + +/******************************************************************** + * Function declaration + *******************************************************************/ + +/* begin namespace openfpga */ +namespace openfpga { + +void build_module_fabric_dependent_bitstream_ql_memory_bank(const ConfigProtocol& config_protocol, + const BitstreamManager& bitstream_manager, + const ConfigBlockId& top_block, + const ModuleManager& module_manager, + const ModuleId& top_module, + FabricBitstream& fabric_bitstream); + +} /* end namespace openfpga */ + +#endif diff --git a/openfpga/src/utils/memory_bank_utils.cpp b/openfpga/src/utils/memory_bank_utils.cpp new file mode 100644 index 000000000..d43909e4d --- /dev/null +++ b/openfpga/src/utils/memory_bank_utils.cpp @@ -0,0 +1,105 @@ +/******************************************************************** + * This file includes functions that are used to organize memories + * in the top module of FPGA fabric + *******************************************************************/ +#include +#include + +/* Headers from vtrutil library */ +#include "vtr_assert.h" +#include "vtr_log.h" +#include "vtr_time.h" + +/* Headers from vpr library */ +#include "vpr_utils.h" + +/* Headers from openfpgashell library */ +#include "command_exit_codes.h" + +#include "rr_gsb_utils.h" +#include "openfpga_reserved_words.h" +#include "openfpga_naming.h" + +#include "memory_utils.h" +#include "decoder_library_utils.h" +#include "module_manager_utils.h" +#include "memory_bank_utils.h" + +/* begin namespace openfpga */ +namespace openfpga { + +std::pair compute_memory_bank_regional_configurable_child_x_range(const ModuleManager& module_manager, + const ModuleId& top_module, + const ConfigRegionId& config_region) { + std::pair child_x_range(std::numeric_limits::max(), std::numeric_limits::min()); // Deposit an invalid range first: LSB->max(); MSB->min() + for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { + vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; + child_x_range.first = std::min(coord.x(), child_x_range.first); + child_x_range.second = std::max(coord.x(), child_x_range.second); + } + + VTR_ASSERT(child_x_range.first <= child_x_range.second); + return child_x_range; +} + +std::pair compute_memory_bank_regional_configurable_child_y_range(const ModuleManager& module_manager, + const ModuleId& top_module, + const ConfigRegionId& config_region) { + std::pair child_y_range(std::numeric_limits::max(), std::numeric_limits::min()); // Deposit an invalid range first: LSB->max(); MSB->min() + for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { + vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; + child_y_range.first = std::min(coord.y(), child_y_range.first); + child_y_range.second = std::max(coord.y(), child_y_range.second); + } + + VTR_ASSERT(child_y_range.first <= child_y_range.second); + return child_y_range; +} + +std::map compute_memory_bank_regional_bitline_numbers_per_tile(const ModuleManager& module_manager, + const ModuleId& top_module, + const ConfigRegionId& config_region, + const CircuitLibrary& circuit_lib, + const CircuitModelId& sram_model) { + std::map num_bls_per_tile; + + for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { + ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; + vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; + num_bls_per_tile[coord.x()] = std::max(num_bls_per_tile[coord.x()], find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK))); + } + + return num_bls_per_tile; +} + +std::map compute_memory_bank_regional_wordline_numbers_per_tile(const ModuleManager& module_manager, + const ModuleId& top_module, + const ConfigRegionId& config_region, + const CircuitLibrary& circuit_lib, + const CircuitModelId& sram_model) { + std::map num_wls_per_tile; + + for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { + ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; + vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; + num_wls_per_tile[coord.y()] = std::max(num_wls_per_tile[coord.y()], find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK))); + } + + return num_wls_per_tile; +} + +std::map compute_memory_bank_regional_blwl_start_index_per_tile(const std::pair& child_xy_range, + const std::map& num_blwls_per_tile) { + std::map blwl_start_index_per_tile; + for (int iblwl = child_xy_range.first; iblwl <= child_xy_range.second; ++iblwl) { + if (iblwl == child_xy_range.first) { + blwl_start_index_per_tile[iblwl] = 0; + } else { + blwl_start_index_per_tile[iblwl] = num_blwls_per_tile.at(iblwl - 1) + blwl_start_index_per_tile[iblwl - 1]; + } + } + return blwl_start_index_per_tile; +} + +} /* end namespace openfpga */ + diff --git a/openfpga/src/utils/memory_bank_utils.h b/openfpga/src/utils/memory_bank_utils.h new file mode 100644 index 000000000..fd08c5e3d --- /dev/null +++ b/openfpga/src/utils/memory_bank_utils.h @@ -0,0 +1,74 @@ +#ifndef MEMORY_BANK_UTILS_H +#define MEMORY_BANK_UTILS_H + +/******************************************************************** + * Include header files that are required by function declaration + *******************************************************************/ + +#include +#include +#include "vtr_vector.h" +#include "vtr_ndmatrix.h" +#include "module_manager.h" +#include "circuit_library.h" +#include "decoder_library.h" +#include "build_top_module_memory_utils.h" + +/******************************************************************** + * Function declaration + *******************************************************************/ + +/* begin namespace openfpga */ +namespace openfpga { + +/** + * @brief Precompute the range of x coordinates of all the configurable children under a specific configuration region + * The lower bound is stored in the first element of the return struct + * The upper bound is stored in the second element of the return struct + */ +std::pair compute_memory_bank_regional_configurable_child_x_range(const ModuleManager& module_manager, + const ModuleId& top_module, + const ConfigRegionId& config_region); + +/** + * @brief Precompute the range of y coordinates of all the configurable children under a specific configuration region + * The lower bound is stored in the first element of the return struct + * The upper bound is stored in the second element of the return struct + */ +std::pair compute_memory_bank_regional_configurable_child_y_range(const ModuleManager& module_manager, + const ModuleId& top_module, + const ConfigRegionId& config_region); + +/** + * @brief Precompute the number of bit lines required by each tile under a specific configuration region + * @note + * Not every index in the range computed by the compute_memory_bank_regional_configurable_child_x_range() function has a postive number of bit lines + * If an empty entry is found (e.g., std::map::find(x) is empty), it means there are not bit lines required in that tile + */ +std::map compute_memory_bank_regional_bitline_numbers_per_tile(const ModuleManager& module_manager, + const ModuleId& top_module, + const ConfigRegionId& config_region, + const CircuitLibrary& circuit_lib, + const CircuitModelId& sram_model); +/** + * @brief Precompute the number of word lines required by each tile under a specific configuration region + * @note + * Not every index in the range computed by the compute_memory_bank_regional_configurable_child_x_range() function has a postive number of word lines + * If an empty entry is found (e.g., std::map::find(y) is empty), it means there are not word lines required in that tile + */ +std::map compute_memory_bank_regional_wordline_numbers_per_tile(const ModuleManager& module_manager, + const ModuleId& top_module, + const ConfigRegionId& config_region, + const CircuitLibrary& circuit_lib, + const CircuitModelId& sram_model); + +/** + * @brief Precompute the BLs and WLs distribution across the FPGA fabric + * The distribution is a matrix which contains the starting index of BL/WL for each column or row + */ +std::map compute_memory_bank_regional_blwl_start_index_per_tile(const std::pair& child_xy_range, + const std::map& num_blwls_per_tile); + +} /* end namespace openfpga */ + +#endif From 6f09f5f7adbaeb9a75bedf62c2690cf01654b709 Mon Sep 17 00:00:00 2001 From: tangxifan Date: Sun, 5 Sep 2021 21:25:58 -0700 Subject: [PATCH 07/16] [FPGA-Bitstream] Upgrade bitstream generator to support QL memory bank --- openfpga/src/base/openfpga_bitstream.cpp | 1 + .../fpga_bitstream/build_fabric_bitstream.cpp | 6 ++- .../fpga_bitstream/build_fabric_bitstream.h | 2 + .../build_fabric_bitstream_memory_bank.cpp | 54 ++++++++++++++++--- .../build_fabric_bitstream_memory_bank.h | 2 + 5 files changed, 55 insertions(+), 10 deletions(-) diff --git a/openfpga/src/base/openfpga_bitstream.cpp b/openfpga/src/base/openfpga_bitstream.cpp index b978b200d..315f647bc 100644 --- a/openfpga/src/base/openfpga_bitstream.cpp +++ b/openfpga/src/base/openfpga_bitstream.cpp @@ -76,6 +76,7 @@ int build_fabric_bitstream(OpenfpgaContext& openfpga_ctx, /* Build fabric bitstream here */ openfpga_ctx.mutable_fabric_bitstream() = build_fabric_dependent_bitstream(openfpga_ctx.bitstream_manager(), openfpga_ctx.module_graph(), + openfpga_ctx.arch().circuit_lib, openfpga_ctx.arch().config_protocol, cmd_context.option_enable(cmd, opt_verbose)); diff --git a/openfpga/src/fpga_bitstream/build_fabric_bitstream.cpp b/openfpga/src/fpga_bitstream/build_fabric_bitstream.cpp index 9065a6ae5..216f44354 100644 --- a/openfpga/src/fpga_bitstream/build_fabric_bitstream.cpp +++ b/openfpga/src/fpga_bitstream/build_fabric_bitstream.cpp @@ -490,6 +490,7 @@ void rec_build_module_fabric_dependent_frame_bitstream(const BitstreamManager& b *******************************************************************/ static void build_module_fabric_dependent_bitstream(const ConfigProtocol& config_protocol, + const CircuitLibrary& circuit_lib, const BitstreamManager& bitstream_manager, const ConfigBlockId& top_block, const ModuleManager& module_manager, @@ -577,7 +578,7 @@ void build_module_fabric_dependent_bitstream(const ConfigProtocol& config_protoc break; } case CONFIG_MEM_QL_MEMORY_BANK: { - build_module_fabric_dependent_bitstream_ql_memory_bank(config_protocol, + build_module_fabric_dependent_bitstream_ql_memory_bank(config_protocol, circuit_lib, bitstream_manager, top_block, module_manager, top_module, fabric_bitstream); @@ -702,6 +703,7 @@ void build_module_fabric_dependent_bitstream(const ConfigProtocol& config_protoc *******************************************************************/ FabricBitstream build_fabric_dependent_bitstream(const BitstreamManager& bitstream_manager, const ModuleManager& module_manager, + const CircuitLibrary& circuit_lib, const ConfigProtocol& config_protocol, const bool& verbose) { FabricBitstream fabric_bitstream; @@ -720,7 +722,7 @@ FabricBitstream build_fabric_dependent_bitstream(const BitstreamManager& bitstre VTR_ASSERT(0 == top_module_name.compare(bitstream_manager.block_name(top_block[0]))); /* Start build-up formally */ - build_module_fabric_dependent_bitstream(config_protocol, + build_module_fabric_dependent_bitstream(config_protocol, circuit_lib, bitstream_manager, top_block[0], module_manager, top_module, fabric_bitstream); diff --git a/openfpga/src/fpga_bitstream/build_fabric_bitstream.h b/openfpga/src/fpga_bitstream/build_fabric_bitstream.h index 3daadde75..8905b03a5 100644 --- a/openfpga/src/fpga_bitstream/build_fabric_bitstream.h +++ b/openfpga/src/fpga_bitstream/build_fabric_bitstream.h @@ -6,6 +6,7 @@ *******************************************************************/ #include #include "config_protocol.h" +#include "circuit_library.h" #include "bitstream_manager.h" #include "fabric_bitstream.h" #include "module_manager.h" @@ -19,6 +20,7 @@ namespace openfpga { FabricBitstream build_fabric_dependent_bitstream(const BitstreamManager& bitstream_manager, const ModuleManager& module_manager, + const CircuitLibrary& circuit_lib, const ConfigProtocol& config_protocol, const bool& verbose); diff --git a/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp b/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp index 1fefa79b2..5399832b2 100644 --- a/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp +++ b/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp @@ -19,6 +19,7 @@ #include "decoder_library_utils.h" #include "bitstream_manager_utils.h" +#include "memory_bank_utils.h" #include "build_fabric_bitstream_memory_bank.h" /* begin namespace openfpga */ @@ -48,8 +49,11 @@ void rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(const B const ConfigRegionId& config_region, const size_t& bl_addr_size, const size_t& wl_addr_size, - const size_t& num_bls, - const size_t& num_wls, + const std::map& num_bls_per_tile, + const std::map& bl_start_index_per_tile, + const std::map& num_wls_per_tile, + const std::map& wl_start_index_per_tile, + vtr::Point& tile_coord, size_t& cur_mem_index, FabricBitstream& fabric_bitstream, const FabricBitRegionId& fabric_bitstream_region) { @@ -79,6 +83,10 @@ void rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(const B ModuleId child_module = configurable_children[child_id]; size_t child_instance = module_manager.region_configurable_child_instances(parent_module, config_region)[child_id]; + if (parent_module == top_module) { + tile_coord = module_manager.region_configurable_child_coordinates(parent_module, config_region)[child_id]; + } + /* Get the instance name and ensure it is not empty */ std::string instance_name = module_manager.instance_name(parent_module, child_module, child_instance); @@ -87,12 +95,19 @@ void rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(const B /* We must have one valid block id! */ VTR_ASSERT(true == bitstream_manager.valid_block_id(child_block)); + /* Reset the memory index for each children under the top-level module */ + if (parent_module == top_module) { + cur_mem_index = 0; + } + /* Go recursively */ rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(bitstream_manager, child_block, module_manager, top_module, child_module, config_region, bl_addr_size, wl_addr_size, - num_bls, num_wls, + num_bls_per_tile, bl_start_index_per_tile, + num_wls_per_tile, wl_start_index_per_tile, + tile_coord, cur_mem_index, fabric_bitstream, fabric_bitstream_region); @@ -131,7 +146,9 @@ void rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(const B module_manager, top_module, child_module, config_region, bl_addr_size, wl_addr_size, - num_bls, num_wls, + num_bls_per_tile, bl_start_index_per_tile, + num_wls_per_tile, wl_start_index_per_tile, + tile_coord, cur_mem_index, fabric_bitstream, fabric_bitstream_region); @@ -151,11 +168,11 @@ void rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(const B FabricBitId fabric_bit = fabric_bitstream.add_bit(config_bit); /* Find BL address */ - size_t cur_bl_index = std::floor(cur_mem_index / num_bls); + size_t cur_bl_index = bl_start_index_per_tile.at(tile_coord.x()) + std::floor(cur_mem_index / num_bls_per_tile.at(tile_coord.x())); std::vector bl_addr_bits_vec = itobin_charvec(cur_bl_index, bl_addr_size); /* Find WL address */ - size_t cur_wl_index = cur_mem_index % num_wls; + size_t cur_wl_index = wl_start_index_per_tile.at(tile_coord.y()) + cur_mem_index % num_wls_per_tile.at(tile_coord.y()); std::vector wl_addr_bits_vec = itobin_charvec(cur_wl_index, wl_addr_size); /* Set BL address */ @@ -180,6 +197,7 @@ void rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(const B * by considering the configuration protocol types *******************************************************************/ void build_module_fabric_dependent_bitstream_ql_memory_bank(const ConfigProtocol& config_protocol, + const CircuitLibrary& circuit_lib, const BitstreamManager& bitstream_manager, const ConfigBlockId& top_block, const ModuleManager& module_manager, @@ -221,13 +239,33 @@ void build_module_fabric_dependent_bitstream_ql_memory_bank(const ConfigProtocol /* Build the bitstream for all the blocks in this region */ FabricBitRegionId fabric_bitstream_region = fabric_bitstream.add_region(); + + /************************************************************** + * Precompute the BLs and WLs distribution across the FPGA fabric + * The distribution is a matrix which contains the starting index of BL/WL for each column or row + */ + std::pair child_x_range = compute_memory_bank_regional_configurable_child_x_range(module_manager, top_module, config_region); + std::pair child_y_range = compute_memory_bank_regional_configurable_child_y_range(module_manager, top_module, config_region); + + std::map num_bls_per_tile = compute_memory_bank_regional_bitline_numbers_per_tile(module_manager, top_module, + config_region, + circuit_lib, config_protocol.memory_model()); + std::map num_wls_per_tile = compute_memory_bank_regional_wordline_numbers_per_tile(module_manager, top_module, + config_region, + circuit_lib, config_protocol.memory_model()); + + std::map bl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_x_range, num_bls_per_tile); + std::map wl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_y_range, num_wls_per_tile); + + vtr::Point temp_coord; rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(bitstream_manager, top_block, module_manager, top_module, top_module, config_region, bl_addr_port_info.get_width(), wl_addr_port_info.get_width(), - bl_port_info.get_width(), - wl_port_info.get_width(), + num_bls_per_tile, bl_start_index_per_tile, + num_wls_per_tile, wl_start_index_per_tile, + temp_coord, cur_mem_index, fabric_bitstream, fabric_bitstream_region); diff --git a/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.h b/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.h index 41174a2fe..58d511394 100644 --- a/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.h +++ b/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.h @@ -6,6 +6,7 @@ *******************************************************************/ #include #include "config_protocol.h" +#include "circuit_library.h" #include "bitstream_manager.h" #include "fabric_bitstream.h" #include "module_manager.h" @@ -18,6 +19,7 @@ namespace openfpga { void build_module_fabric_dependent_bitstream_ql_memory_bank(const ConfigProtocol& config_protocol, + const CircuitLibrary& circuit_lib, const BitstreamManager& bitstream_manager, const ConfigBlockId& top_block, const ModuleManager& module_manager, From 1aac3197ebb1ecd2372a7ae55f61311cd27da037 Mon Sep 17 00:00:00 2001 From: tangxifan Date: Sun, 5 Sep 2021 21:38:00 -0700 Subject: [PATCH 08/16] [FPGA-Verilog] Upgrade testbench generator to support QL memory bank --- .../fpga_verilog/verilog_top_testbench.cpp | 45 +---- .../verilog_top_testbench_constants.h | 42 +++++ .../verilog_top_testbench_memory_bank.cpp | 175 ++++++++++++++++++ .../verilog_top_testbench_memory_bank.h | 43 +++++ 4 files changed, 270 insertions(+), 35 deletions(-) create mode 100644 openfpga/src/fpga_verilog/verilog_top_testbench_constants.h create mode 100644 openfpga/src/fpga_verilog/verilog_top_testbench_memory_bank.cpp create mode 100644 openfpga/src/fpga_verilog/verilog_top_testbench_memory_bank.h diff --git a/openfpga/src/fpga_verilog/verilog_top_testbench.cpp b/openfpga/src/fpga_verilog/verilog_top_testbench.cpp index e4778c292..4751823a3 100644 --- a/openfpga/src/fpga_verilog/verilog_top_testbench.cpp +++ b/openfpga/src/fpga_verilog/verilog_top_testbench.cpp @@ -29,46 +29,14 @@ #include "verilog_constants.h" #include "verilog_writer_utils.h" #include "verilog_testbench_utils.h" +#include "verilog_top_testbench_memory_bank.h" #include "verilog_top_testbench.h" +#include "verilog_top_testbench_constants.h" + /* begin namespace openfpga */ namespace openfpga { -/******************************************************************** - * Local variables used only in this file - *******************************************************************/ -constexpr char* TOP_TESTBENCH_REFERENCE_INSTANCE_NAME = "REF_DUT"; -constexpr char* TOP_TESTBENCH_FPGA_INSTANCE_NAME = "FPGA_DUT"; -constexpr char* TOP_TESTBENCH_REFERENCE_OUTPUT_POSTFIX = "_benchmark"; -constexpr char* TOP_TESTBENCH_FPGA_OUTPUT_POSTFIX = "_fpga"; - -constexpr char* TOP_TESTBENCH_CHECKFLAG_PORT_POSTFIX = "_flag"; - -constexpr char* TOP_TESTBENCH_PROG_TASK_NAME = "prog_cycle_task"; - -constexpr char* TOP_TESTBENCH_SIM_START_PORT_NAME = "sim_start"; - -constexpr char* TOP_TESTBENCH_ERROR_COUNTER = "nb_error"; - -constexpr char* TOP_TB_RESET_PORT_NAME = "greset"; -constexpr char* TOP_TB_SET_PORT_NAME = "gset"; -constexpr char* TOP_TB_PROG_RESET_PORT_NAME = "prog_reset"; -constexpr char* TOP_TB_PROG_SET_PORT_NAME = "prog_set"; -constexpr char* TOP_TB_CONFIG_DONE_PORT_NAME = "config_done"; -constexpr char* TOP_TB_OP_CLOCK_PORT_NAME = "op_clock"; -constexpr char* TOP_TB_OP_CLOCK_PORT_PREFIX = "operating_clk_"; -constexpr char* TOP_TB_PROG_CLOCK_PORT_NAME = "prog_clock"; -constexpr char* TOP_TB_INOUT_REG_POSTFIX = "_reg"; -constexpr char* TOP_TB_CLOCK_REG_POSTFIX = "_reg"; -constexpr char* TOP_TB_BITSTREAM_LENGTH_VARIABLE = "BITSTREAM_LENGTH"; -constexpr char* TOP_TB_BITSTREAM_WIDTH_VARIABLE = "BITSTREAM_WIDTH"; -constexpr char* TOP_TB_BITSTREAM_MEM_REG_NAME = "bit_mem"; -constexpr char* TOP_TB_BITSTREAM_INDEX_REG_NAME = "bit_index"; -constexpr char* TOP_TB_BITSTREAM_ITERATOR_REG_NAME = "ibit"; -constexpr char* TOP_TB_BITSTREAM_SKIP_FLAG_REG_NAME = "skip_bits"; - -constexpr char* AUTOCHECK_TOP_TESTBENCH_VERILOG_MODULE_POSTFIX = "_autocheck_top_tb"; - /******************************************************************** * Generate a simulation clock port name * This function is designed to produce a uniform clock naming for these ports @@ -1735,6 +1703,13 @@ void print_verilog_full_testbench_bitstream(std::fstream& fp, module_manager, top_module, fabric_bitstream); break; + case CONFIG_MEM_QL_MEMORY_BANK: + print_verilog_full_testbench_ql_memory_bank_bitstream(fp, bitstream_file, + fast_configuration, + bit_value_to_skip, + module_manager, top_module, + fabric_bitstream); + break; case CONFIG_MEM_FRAME_BASED: print_verilog_full_testbench_frame_decoder_bitstream(fp, bitstream_file, fast_configuration, diff --git a/openfpga/src/fpga_verilog/verilog_top_testbench_constants.h b/openfpga/src/fpga_verilog/verilog_top_testbench_constants.h new file mode 100644 index 000000000..31c251ee4 --- /dev/null +++ b/openfpga/src/fpga_verilog/verilog_top_testbench_constants.h @@ -0,0 +1,42 @@ +#ifndef VERILOG_TOP_TESTBENCH_CONSTANTS +#define VERILOG_TOP_TESTBENCH_CONSTANTS + +/* begin namespace openfpga */ +namespace openfpga { + +constexpr char* TOP_TESTBENCH_REFERENCE_INSTANCE_NAME = "REF_DUT"; +constexpr char* TOP_TESTBENCH_FPGA_INSTANCE_NAME = "FPGA_DUT"; +constexpr char* TOP_TESTBENCH_REFERENCE_OUTPUT_POSTFIX = "_benchmark"; +constexpr char* TOP_TESTBENCH_FPGA_OUTPUT_POSTFIX = "_fpga"; + +constexpr char* TOP_TESTBENCH_CHECKFLAG_PORT_POSTFIX = "_flag"; + +constexpr char* TOP_TESTBENCH_PROG_TASK_NAME = "prog_cycle_task"; + +constexpr char* TOP_TESTBENCH_SIM_START_PORT_NAME = "sim_start"; + +constexpr char* TOP_TESTBENCH_ERROR_COUNTER = "nb_error"; + +constexpr char* TOP_TB_RESET_PORT_NAME = "greset"; +constexpr char* TOP_TB_SET_PORT_NAME = "gset"; +constexpr char* TOP_TB_PROG_RESET_PORT_NAME = "prog_reset"; +constexpr char* TOP_TB_PROG_SET_PORT_NAME = "prog_set"; +constexpr char* TOP_TB_CONFIG_DONE_PORT_NAME = "config_done"; +constexpr char* TOP_TB_OP_CLOCK_PORT_NAME = "op_clock"; +constexpr char* TOP_TB_OP_CLOCK_PORT_PREFIX = "operating_clk_"; +constexpr char* TOP_TB_PROG_CLOCK_PORT_NAME = "prog_clock"; +constexpr char* TOP_TB_INOUT_REG_POSTFIX = "_reg"; +constexpr char* TOP_TB_CLOCK_REG_POSTFIX = "_reg"; +constexpr char* TOP_TB_BITSTREAM_LENGTH_VARIABLE = "BITSTREAM_LENGTH"; +constexpr char* TOP_TB_BITSTREAM_WIDTH_VARIABLE = "BITSTREAM_WIDTH"; +constexpr char* TOP_TB_BITSTREAM_MEM_REG_NAME = "bit_mem"; +constexpr char* TOP_TB_BITSTREAM_INDEX_REG_NAME = "bit_index"; +constexpr char* TOP_TB_BITSTREAM_ITERATOR_REG_NAME = "ibit"; +constexpr char* TOP_TB_BITSTREAM_SKIP_FLAG_REG_NAME = "skip_bits"; + +constexpr char* AUTOCHECK_TOP_TESTBENCH_VERILOG_MODULE_POSTFIX = "_autocheck_top_tb"; + + +} /* end namespace openfpga */ + +#endif diff --git a/openfpga/src/fpga_verilog/verilog_top_testbench_memory_bank.cpp b/openfpga/src/fpga_verilog/verilog_top_testbench_memory_bank.cpp new file mode 100644 index 000000000..d2a286984 --- /dev/null +++ b/openfpga/src/fpga_verilog/verilog_top_testbench_memory_bank.cpp @@ -0,0 +1,175 @@ +/******************************************************************** + * This file includes functions that are used to create + * an auto-check top-level testbench for a FPGA fabric + *******************************************************************/ +#include +#include +#include + +/* Headers from vtrutil library */ +#include "vtr_log.h" +#include "vtr_assert.h" +#include "vtr_time.h" + +/* Headers from openfpgautil library */ +#include "openfpga_port.h" +#include "openfpga_digest.h" + +#include "bitstream_manager_utils.h" + +#include "openfpga_reserved_words.h" +#include "openfpga_naming.h" +#include "simulation_utils.h" +#include "openfpga_atom_netlist_utils.h" + +#include "fast_configuration.h" +#include "fabric_bitstream_utils.h" +#include "fabric_global_port_info_utils.h" + +#include "verilog_constants.h" +#include "verilog_writer_utils.h" +#include "verilog_testbench_utils.h" +#include "verilog_top_testbench_memory_bank.h" + +#include "verilog_top_testbench_constants.h" + +/* begin namespace openfpga */ +namespace openfpga { + +void print_verilog_full_testbench_ql_memory_bank_bitstream(std::fstream& fp, + const std::string& bitstream_file, + const bool& fast_configuration, + const bool& bit_value_to_skip, + const ModuleManager& module_manager, + const ModuleId& top_module, + const FabricBitstream& fabric_bitstream) { + /* Validate the file stream */ + valid_file_stream(fp); + + /* Reorganize the fabric bitstream by the same address across regions */ + MemoryBankFabricBitstream fabric_bits_by_addr = build_memory_bank_fabric_bitstream_by_address(fabric_bitstream); + + /* For fast configuration, identify the final bitstream size to be used */ + size_t num_bits_to_skip = 0; + if (true == fast_configuration) { + num_bits_to_skip = fabric_bits_by_addr.size() - find_memory_bank_fast_configuration_fabric_bitstream_size(fabric_bitstream, bit_value_to_skip); + } + VTR_ASSERT(num_bits_to_skip < fabric_bits_by_addr.size()); + + /* Feed address and data input pair one by one + * Note: the first cycle is reserved for programming reset + * We should give dummy values + */ + ModulePortId bl_addr_port_id = module_manager.find_module_port(top_module, + std::string(DECODER_BL_ADDRESS_PORT_NAME)); + BasicPort bl_addr_port = module_manager.module_port(top_module, bl_addr_port_id); + std::vector initial_bl_addr_values(bl_addr_port.get_width(), 0); + + ModulePortId wl_addr_port_id = module_manager.find_module_port(top_module, + std::string(DECODER_WL_ADDRESS_PORT_NAME)); + BasicPort wl_addr_port = module_manager.module_port(top_module, wl_addr_port_id); + std::vector initial_wl_addr_values(wl_addr_port.get_width(), 0); + + ModulePortId din_port_id = module_manager.find_module_port(top_module, + std::string(DECODER_DATA_IN_PORT_NAME)); + BasicPort din_port = module_manager.module_port(top_module, din_port_id); + std::vector initial_din_values(din_port.get_width(), 0); + + /* Define a constant for the bitstream length */ + print_verilog_define_flag(fp, std::string(TOP_TB_BITSTREAM_LENGTH_VARIABLE), fabric_bits_by_addr.size() - num_bits_to_skip); + print_verilog_define_flag(fp, std::string(TOP_TB_BITSTREAM_WIDTH_VARIABLE), bl_addr_port.get_width() + wl_addr_port.get_width() + din_port.get_width()); + + /* Declare local variables for bitstream loading in Verilog */ + print_verilog_comment(fp, "----- Virtual memory to store the bitstream from external file -----"); + fp << "reg [0:`" << TOP_TB_BITSTREAM_WIDTH_VARIABLE << " - 1] "; + fp << TOP_TB_BITSTREAM_MEM_REG_NAME << "[0:`" << TOP_TB_BITSTREAM_LENGTH_VARIABLE << " - 1];"; + fp << std::endl; + + fp << "reg [$clog2(`" << TOP_TB_BITSTREAM_LENGTH_VARIABLE << "):0] " << TOP_TB_BITSTREAM_INDEX_REG_NAME << ";" << std::endl; + + print_verilog_comment(fp, "----- Preload bitstream file to a virtual memory -----"); + fp << "initial begin" << std::endl; + fp << "\t"; + fp << "$readmemb(\"" << bitstream_file << "\", " << TOP_TB_BITSTREAM_MEM_REG_NAME << ");"; + fp << std::endl; + + print_verilog_comment(fp, "----- Bit-Line Address port default input -----"); + fp << "\t"; + fp << generate_verilog_port_constant_values(bl_addr_port, initial_bl_addr_values); + fp << ";"; + fp << std::endl; + + print_verilog_comment(fp, "----- Word-Line Address port default input -----"); + fp << "\t"; + fp << generate_verilog_port_constant_values(wl_addr_port, initial_wl_addr_values); + fp << ";"; + fp << std::endl; + + print_verilog_comment(fp, "----- Data-input port default input -----"); + fp << "\t"; + fp << generate_verilog_port_constant_values(din_port, initial_din_values); + fp << ";"; + fp << std::endl; + + fp << "\t"; + fp << TOP_TB_BITSTREAM_INDEX_REG_NAME << " <= 0"; + fp << ";"; + fp << std::endl; + + fp << "end"; + fp << std::endl; + + print_verilog_comment(fp, "----- Begin bitstream loading during configuration phase -----"); + BasicPort prog_clock_port(std::string(TOP_TB_PROG_CLOCK_PORT_NAME) + std::string(TOP_TB_CLOCK_REG_POSTFIX), 1); + fp << "always"; + fp << " @(negedge " << generate_verilog_port(VERILOG_PORT_CONKT, prog_clock_port) << ")"; + fp << " begin"; + fp << std::endl; + + fp << "\t"; + fp << "if ("; + fp << TOP_TB_BITSTREAM_INDEX_REG_NAME; + fp << " >= "; + fp << "`" << TOP_TB_BITSTREAM_LENGTH_VARIABLE; + fp << ") begin"; + fp << std::endl; + + BasicPort config_done_port(std::string(TOP_TB_CONFIG_DONE_PORT_NAME), 1); + fp << "\t\t"; + std::vector config_done_final_values(config_done_port.get_width(), 1); + fp << generate_verilog_port_constant_values(config_done_port, config_done_final_values, true); + fp << ";" << std::endl; + + fp << "\t"; + fp << "end else begin"; + fp << std::endl; + + fp << "\t\t"; + fp << "{"; + fp << generate_verilog_port(VERILOG_PORT_CONKT, bl_addr_port); + fp << ", "; + fp << generate_verilog_port(VERILOG_PORT_CONKT, wl_addr_port); + fp << ", "; + fp << generate_verilog_port(VERILOG_PORT_CONKT, din_port); + fp << "}"; + fp << " <= "; + fp << TOP_TB_BITSTREAM_MEM_REG_NAME << "[" << TOP_TB_BITSTREAM_INDEX_REG_NAME << "]"; + fp << ";" << std::endl; + + fp << "\t\t"; + fp << TOP_TB_BITSTREAM_INDEX_REG_NAME; + fp << " <= "; + fp << TOP_TB_BITSTREAM_INDEX_REG_NAME << " + 1"; + fp << ";" << std::endl; + + fp << "\t"; + fp << "end"; + fp << std::endl; + + fp << "end"; + fp << std::endl; + + print_verilog_comment(fp, "----- End bitstream loading during configuration phase -----"); +} + +} /* end namespace openfpga */ diff --git a/openfpga/src/fpga_verilog/verilog_top_testbench_memory_bank.h b/openfpga/src/fpga_verilog/verilog_top_testbench_memory_bank.h new file mode 100644 index 000000000..e82a956e2 --- /dev/null +++ b/openfpga/src/fpga_verilog/verilog_top_testbench_memory_bank.h @@ -0,0 +1,43 @@ +#ifndef VERILOG_TOP_TESTBENCH_MEMORY_BANK +#define VERILOG_TOP_TESTBENCH_MEMORY_BANK + +/******************************************************************** + * Include header files that are required by function declaration + *******************************************************************/ +#include +#include +#include "module_manager.h" +#include "bitstream_manager.h" +#include "fabric_bitstream.h" +#include "circuit_library.h" +#include "config_protocol.h" +#include "vpr_context.h" +#include "pin_constraints.h" +#include "io_location_map.h" +#include "fabric_global_port_info.h" +#include "vpr_netlist_annotation.h" +#include "simulation_setting.h" +#include "verilog_testbench_options.h" + +/******************************************************************** + * Function declaration + *******************************************************************/ + +/* begin namespace openfpga */ +namespace openfpga { + +/** + * @brief Print stimulus for a FPGA fabric with a memory bank configuration protocol + * where configuration bits are programming in serial (one by one) + */ +void print_verilog_full_testbench_ql_memory_bank_bitstream(std::fstream& fp, + const std::string& bitstream_file, + const bool& fast_configuration, + const bool& bit_value_to_skip, + const ModuleManager& module_manager, + const ModuleId& top_module, + const FabricBitstream& fabric_bitstream); + +} /* end namespace openfpga */ + +#endif From 6be3c64f1c8f51dadae9316272e2635ce184a86f Mon Sep 17 00:00:00 2001 From: tangxifan Date: Thu, 9 Sep 2021 09:22:27 -0700 Subject: [PATCH 09/16] [Arch] Add an example architecture using the physical design friendly memory bank organization --- .../k4_N4_40nm_qlbank_openfpga.xml | 198 ++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 openfpga_flow/openfpga_arch/k4_N4_40nm_qlbank_openfpga.xml diff --git a/openfpga_flow/openfpga_arch/k4_N4_40nm_qlbank_openfpga.xml b/openfpga_flow/openfpga_arch/k4_N4_40nm_qlbank_openfpga.xml new file mode 100644 index 000000000..ac5d59f75 --- /dev/null +++ b/openfpga_flow/openfpga_arch/k4_N4_40nm_qlbank_openfpga.xml @@ -0,0 +1,198 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + + + + + 10e-12 5e-12 5e-12 + + + 10e-12 5e-12 5e-12 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From b82cfdf555b5865dd659764d0d3fb15180016f73 Mon Sep 17 00:00:00 2001 From: tangxifan Date: Thu, 9 Sep 2021 09:29:21 -0700 Subject: [PATCH 10/16] [Test] Add the QL memory bank test to regression test cases --- .../ql_memory_bank/config/task.conf | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 openfpga_flow/tasks/basic_tests/full_testbench/ql_memory_bank/config/task.conf diff --git a/openfpga_flow/tasks/basic_tests/full_testbench/ql_memory_bank/config/task.conf b/openfpga_flow/tasks/basic_tests/full_testbench/ql_memory_bank/config/task.conf new file mode 100644 index 000000000..b7c94008f --- /dev/null +++ b/openfpga_flow/tasks/basic_tests/full_testbench/ql_memory_bank/config/task.conf @@ -0,0 +1,44 @@ +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# Configuration file for running experiments +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs +# Each job execute fpga_flow script on combination of architecture & benchmark +# timeout_each_job is timeout for each job +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + +[GENERAL] +run_engine=openfpga_shell +power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml +power_analysis = true +spice_output=false +verilog_output=true +timeout_each_job = 20*60 +fpga_flow=yosys_vpr + +[OpenFPGA_SHELL] +openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/write_full_testbench_example_script.openfpga +openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_N4_40nm_qlbank_openfpga.xml +openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml +openfpga_vpr_device_layout= +openfpga_fast_configuration= + +[ARCHITECTURES] +arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_N4_tileable_40nm.xml + +[BENCHMARKS] +bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.v +bench1=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/or2/or2.v +bench2=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2_latch/and2_latch.v + +[SYNTHESIS_PARAM] +bench0_top = and2 +bench0_chan_width = 300 + +bench1_top = or2 +bench1_chan_width = 300 + +bench2_top = and2_latch +bench2_chan_width = 300 + +[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH] +end_flow_with_test= From 81a2ad58df18510e91a7873b1b435253d8d390d8 Mon Sep 17 00:00:00 2001 From: tangxifan Date: Thu, 9 Sep 2021 13:48:30 -0700 Subject: [PATCH 11/16] [Test] Deploy the ql memory bank test case to basic regression tests (run on CI) --- openfpga_flow/regression_test_scripts/basic_reg_test.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/openfpga_flow/regression_test_scripts/basic_reg_test.sh b/openfpga_flow/regression_test_scripts/basic_reg_test.sh index 0a11ed4e0..5bef6a05f 100755 --- a/openfpga_flow/regression_test_scripts/basic_reg_test.sh +++ b/openfpga_flow/regression_test_scripts/basic_reg_test.sh @@ -53,6 +53,9 @@ run-task basic_tests/full_testbench/smart_fast_memory_bank --debug --show_thread run-task basic_tests/full_testbench/smart_fast_multi_region_memory_bank --debug --show_thread_logs run-task basic_tests/preconfig_testbench/memory_bank --debug --show_thread_logs +echo -e "Testing physical design friendly memory bank configuration protocol of a K4N4 FPGA"; +run-task basic_tests/full_testbench/ql_memory_bank --debug --show_thread_logs + echo -e "Testing testbenches without self checking features"; run-task basic_tests/full_testbench/full_testbench_without_self_checking --debug --show_thread_logs run-task basic_tests/preconfig_testbench/preconfigured_testbench_without_self_checking --debug --show_thread_logs From b787c4e100ba6650a5bd9f1c3cb50c9fcabdf6fc Mon Sep 17 00:00:00 2001 From: tangxifan Date: Thu, 9 Sep 2021 15:06:51 -0700 Subject: [PATCH 12/16] [Engine] Register QL memory bank as a legal protocol --- openfpga/src/base/openfpga_naming.cpp | 1 + openfpga/src/fabric/build_grid_modules.cpp | 2 +- openfpga/src/fabric/build_memory_modules.cpp | 1 + openfpga/src/fabric/build_top_module_memory.cpp | 4 +++- openfpga/src/fpga_bitstream/build_device_bitstream.cpp | 3 ++- openfpga/src/fpga_bitstream/fast_configuration.cpp | 1 + .../src/fpga_bitstream/write_text_fabric_bitstream.cpp | 1 + openfpga/src/fpga_bitstream/write_xml_fabric_bitstream.cpp | 1 + openfpga/src/fpga_verilog/verilog_top_testbench.cpp | 3 +++ openfpga/src/fpga_verilog/verilog_writer_utils.cpp | 3 +++ openfpga/src/utils/circuit_library_utils.cpp | 3 +++ openfpga/src/utils/memory_utils.cpp | 7 +++++++ openfpga/src/utils/module_manager_utils.cpp | 3 +++ openfpga/src/utils/mux_utils.cpp | 5 ++++- 14 files changed, 34 insertions(+), 4 deletions(-) diff --git a/openfpga/src/base/openfpga_naming.cpp b/openfpga/src/base/openfpga_naming.cpp index 71d9dff58..c898066b3 100644 --- a/openfpga/src/base/openfpga_naming.cpp +++ b/openfpga/src/base/openfpga_naming.cpp @@ -703,6 +703,7 @@ std::string generate_sram_port_name(const e_config_protocol_type& sram_orgz_type } break; case CONFIG_MEM_STANDALONE: + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: /* Two types of ports are available: * (1) Bit Lines (BLs) of a SRAM cell, enabled by port type of BL diff --git a/openfpga/src/fabric/build_grid_modules.cpp b/openfpga/src/fabric/build_grid_modules.cpp index f1a56c4f1..53ebc9292 100644 --- a/openfpga/src/fabric/build_grid_modules.cpp +++ b/openfpga/src/fabric/build_grid_modules.cpp @@ -279,7 +279,7 @@ void build_primitive_block_module(ModuleManager& module_manager, size_t num_shared_config_bits = find_circuit_num_shared_config_bits(circuit_lib, primitive_model, sram_orgz_type); if (0 < num_shared_config_bits) { /* Check: this SRAM organization type must be memory-bank ! */ - VTR_ASSERT( CONFIG_MEM_MEMORY_BANK == sram_orgz_type ); + VTR_ASSERT( CONFIG_MEM_MEMORY_BANK == sram_orgz_type || CONFIG_MEM_QL_MEMORY_BANK == sram_orgz_type ); /* Generate a list of ports */ add_reserved_sram_ports_to_module_manager(module_manager, primitive_module, num_shared_config_bits); diff --git a/openfpga/src/fabric/build_memory_modules.cpp b/openfpga/src/fabric/build_memory_modules.cpp index 970ef8a95..8a354da75 100644 --- a/openfpga/src/fabric/build_memory_modules.cpp +++ b/openfpga/src/fabric/build_memory_modules.cpp @@ -786,6 +786,7 @@ void build_memory_module(ModuleManager& module_manager, const size_t& num_mems) { switch (sram_orgz_type) { case CONFIG_MEM_STANDALONE: + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: build_memory_flatten_module(module_manager, circuit_lib, module_name, sram_model, num_mems); diff --git a/openfpga/src/fabric/build_top_module_memory.cpp b/openfpga/src/fabric/build_top_module_memory.cpp index 297a5abf4..582d84b0c 100644 --- a/openfpga/src/fabric/build_top_module_memory.cpp +++ b/openfpga/src/fabric/build_top_module_memory.cpp @@ -265,7 +265,8 @@ void build_top_module_configurable_regions(ModuleManager& module_manager, /* Exclude decoders from the list */ size_t num_configurable_children = module_manager.configurable_children(top_module).size(); - if (CONFIG_MEM_MEMORY_BANK == config_protocol.type()) { + if (CONFIG_MEM_MEMORY_BANK == config_protocol.type() + || CONFIG_MEM_QL_MEMORY_BANK == config_protocol.type()) { num_configurable_children -= 2; } else if (CONFIG_MEM_FRAME_BASED == config_protocol.type()) { num_configurable_children -= 1; @@ -752,6 +753,7 @@ size_t generate_top_module_sram_port_size(const ConfigProtocol& config_protocol, case CONFIG_MEM_STANDALONE: break; case CONFIG_MEM_SCAN_CHAIN: + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: case CONFIG_MEM_FRAME_BASED: /* CCFF head/tail, data input could be multi-bit ports */ diff --git a/openfpga/src/fpga_bitstream/build_device_bitstream.cpp b/openfpga/src/fpga_bitstream/build_device_bitstream.cpp index e90d73321..0af563914 100644 --- a/openfpga/src/fpga_bitstream/build_device_bitstream.cpp +++ b/openfpga/src/fpga_bitstream/build_device_bitstream.cpp @@ -96,7 +96,8 @@ size_t rec_estimate_device_bitstream_num_bits(const ModuleManager& module_manage /* Memory configuration protocol will have 2 decoders * at the top-level */ - if (CONFIG_MEM_MEMORY_BANK == config_protocol_type) { + if (CONFIG_MEM_MEMORY_BANK == config_protocol_type + || CONFIG_MEM_QL_MEMORY_BANK == config_protocol_type) { VTR_ASSERT(2 <= curr_region_num_config_child); curr_region_num_config_child -= 2; } diff --git a/openfpga/src/fpga_bitstream/fast_configuration.cpp b/openfpga/src/fpga_bitstream/fast_configuration.cpp index 5b3ec0301..112fac5c3 100644 --- a/openfpga/src/fpga_bitstream/fast_configuration.cpp +++ b/openfpga/src/fpga_bitstream/fast_configuration.cpp @@ -87,6 +87,7 @@ bool find_bit_value_to_skip_for_fast_configuration(const e_config_protocol_type& } break; } + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: case CONFIG_MEM_FRAME_BASED: { /* Count how many logic '1' and logic '0' bits we can skip */ diff --git a/openfpga/src/fpga_bitstream/write_text_fabric_bitstream.cpp b/openfpga/src/fpga_bitstream/write_text_fabric_bitstream.cpp index 5338d7b09..8b2daaf87 100644 --- a/openfpga/src/fpga_bitstream/write_text_fabric_bitstream.cpp +++ b/openfpga/src/fpga_bitstream/write_text_fabric_bitstream.cpp @@ -306,6 +306,7 @@ int write_fabric_bitstream_to_text_file(const BitstreamManager& bitstream_manage bitstream_manager, fabric_bitstream); break; + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: status = write_memory_bank_fabric_bitstream_to_text_file(fp, apply_fast_configuration, diff --git a/openfpga/src/fpga_bitstream/write_xml_fabric_bitstream.cpp b/openfpga/src/fpga_bitstream/write_xml_fabric_bitstream.cpp index adc12ce73..8ea50e015 100644 --- a/openfpga/src/fpga_bitstream/write_xml_fabric_bitstream.cpp +++ b/openfpga/src/fpga_bitstream/write_xml_fabric_bitstream.cpp @@ -103,6 +103,7 @@ int write_fabric_config_bit_to_xml_file(std::fstream& fp, case CONFIG_MEM_STANDALONE: case CONFIG_MEM_SCAN_CHAIN: break; + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: { /* Bit line address */ write_tab_to_file(fp, xml_hierarchy_depth + 1); diff --git a/openfpga/src/fpga_verilog/verilog_top_testbench.cpp b/openfpga/src/fpga_verilog/verilog_top_testbench.cpp index 4751823a3..289e4d667 100644 --- a/openfpga/src/fpga_verilog/verilog_top_testbench.cpp +++ b/openfpga/src/fpga_verilog/verilog_top_testbench.cpp @@ -217,6 +217,7 @@ void print_verilog_top_testbench_config_protocol_port(std::fstream& fp, case CONFIG_MEM_SCAN_CHAIN: print_verilog_top_testbench_config_chain_port(fp, module_manager, top_module); break; + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: print_verilog_top_testbench_memory_bank_port(fp, module_manager, top_module); break; @@ -835,6 +836,7 @@ size_t calculate_num_config_clock_cycles(const e_config_protocol_type& sram_orgz 100. * ((float)num_config_clock_cycles / (float)(1 + regional_bitstream_max_size) - 1.)); } break; + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: { /* For fast configuration, we will skip all the zero data points */ num_config_clock_cycles = 1 + build_memory_bank_fabric_bitstream_by_address(fabric_bitstream).size(); @@ -1093,6 +1095,7 @@ void print_verilog_top_testbench_configuration_protocol_stimulus(std::fstream& f break; case CONFIG_MEM_SCAN_CHAIN: break; + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: case CONFIG_MEM_FRAME_BASED: { ModulePortId en_port_id = module_manager.find_module_port(top_module, diff --git a/openfpga/src/fpga_verilog/verilog_writer_utils.cpp b/openfpga/src/fpga_verilog/verilog_writer_utils.cpp index 9a55968b0..7d5c3dd77 100644 --- a/openfpga/src/fpga_verilog/verilog_writer_utils.cpp +++ b/openfpga/src/fpga_verilog/verilog_writer_utils.cpp @@ -1010,6 +1010,7 @@ void print_verilog_local_sram_wires(std::fstream& fp, print_verilog_wire_connection(fp, ccff_tail_local_port, ccff_tail_port, false); break; } + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: { /* Generate the name of local wire for the SRAM output and inverted output */ std::vector sram_ports; @@ -1100,6 +1101,7 @@ void print_verilog_local_config_bus(std::fstream& fp, */ break; case CONFIG_MEM_SCAN_CHAIN: + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: { /* Two configuration buses should be outputted * One for the regular SRAM ports of a routing multiplexer @@ -1173,6 +1175,7 @@ void print_verilog_rram_mux_config_bus(std::fstream& fp, */ break; } + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: { /* This is currently most used in ReRAM FPGAs */ /* Print configuration bus to group reserved BL/WLs */ diff --git a/openfpga/src/utils/circuit_library_utils.cpp b/openfpga/src/utils/circuit_library_utils.cpp index 197c873bd..fabdc05da 100644 --- a/openfpga/src/utils/circuit_library_utils.cpp +++ b/openfpga/src/utils/circuit_library_utils.cpp @@ -94,6 +94,7 @@ size_t find_rram_circuit_num_shared_config_bits(const CircuitLibrary& circuit_li case CONFIG_MEM_STANDALONE: case CONFIG_MEM_SCAN_CHAIN: break; + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: { /* Find BL/WL ports */ std::vector blb_ports = circuit_lib.model_ports_by_type(rram_model, CIRCUIT_MODEL_PORT_BLB); @@ -175,6 +176,7 @@ size_t find_circuit_num_config_bits(const e_config_protocol_type& config_protoco switch (config_protocol_type) { case CONFIG_MEM_STANDALONE: case CONFIG_MEM_SCAN_CHAIN: + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: { break; } @@ -293,6 +295,7 @@ bool check_configurable_memory_circuit_model(const e_config_protocol_type& confi break; case CONFIG_MEM_STANDALONE: case CONFIG_MEM_MEMORY_BANK: + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_FRAME_BASED: num_err = check_sram_circuit_model_ports(circuit_lib, config_mem_circuit_model, diff --git a/openfpga/src/utils/memory_utils.cpp b/openfpga/src/utils/memory_utils.cpp index bb7cefee7..b02fe1e88 100644 --- a/openfpga/src/utils/memory_utils.cpp +++ b/openfpga/src/utils/memory_utils.cpp @@ -74,6 +74,7 @@ std::map generate_cmos_mem_module_port2port_map(const Ba port2port_name_map[generate_configurable_memory_inverted_data_out_name()] = mem_output_bus_ports[1]; break; } + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: /* TODO: */ break; @@ -131,6 +132,7 @@ std::map generate_rram_mem_module_port2port_map(const Ba port2port_name_map[generate_configurable_memory_inverted_data_out_name()] = mem_output_bus_ports[1]; break; } + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: /* TODO: link BL/WL/Reserved Ports to the inputs of a memory module */ break; @@ -189,6 +191,7 @@ void update_cmos_mem_module_config_bus(const e_config_protocol_type& sram_orgz_t */ VTR_ASSERT(true == config_bus.rotate(1)); break; + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: /* In this case, a memory module has a number of BL/WL and BLB/WLB (possibly). * LSB and MSB of configuration bus will be shifted by the number of BL/WL/BLB/WLB. @@ -219,6 +222,7 @@ void update_rram_mem_module_config_bus(const e_config_protocol_type& sram_orgz_t */ VTR_ASSERT(true == config_bus.rotate(1)); break; + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: /* In this case, a memory module contains unique BL/WL or BLB/WLB, * which are not shared with other modules @@ -275,6 +279,7 @@ bool check_mem_config_bus(const e_config_protocol_type& sram_orgz_type, */ return (local_expected_msb == config_bus.get_msb()); break; + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: /* TODO: comment on why */ @@ -319,6 +324,7 @@ std::vector generate_sram_port_names(const CircuitLibrary& circuit_ model_port_types.push_back(CIRCUIT_MODEL_PORT_OUTPUT); break; case CONFIG_MEM_STANDALONE: + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: { std::vector ports_to_search; ports_to_search.push_back(CIRCUIT_MODEL_PORT_BL); @@ -373,6 +379,7 @@ size_t generate_sram_port_size(const e_config_protocol_type sram_orgz_type, /* CCFF head/tail are single-bit ports */ sram_port_size = 1; break; + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: break; case CONFIG_MEM_FRAME_BASED: diff --git a/openfpga/src/utils/module_manager_utils.cpp b/openfpga/src/utils/module_manager_utils.cpp index 4a719b9a1..13cc579fa 100644 --- a/openfpga/src/utils/module_manager_utils.cpp +++ b/openfpga/src/utils/module_manager_utils.cpp @@ -313,6 +313,7 @@ void add_sram_ports_to_module_manager(ModuleManager& module_manager, /* Add ports to the module manager */ switch (sram_orgz_type) { case CONFIG_MEM_STANDALONE: + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: { for (const std::string& sram_port_name : sram_port_names) { /* Add generated ports to the ModuleManager */ @@ -1288,6 +1289,7 @@ void add_module_nets_cmos_memory_config_bus(ModuleManager& module_manager, break; } case CONFIG_MEM_STANDALONE: + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: add_module_nets_cmos_flatten_memory_config_bus(module_manager, parent_module, sram_orgz_type, CIRCUIT_MODEL_PORT_BL); @@ -1742,6 +1744,7 @@ size_t find_module_num_config_bits_from_child_modules(ModuleManager& module_mana switch (sram_orgz_type) { case CONFIG_MEM_STANDALONE: case CONFIG_MEM_SCAN_CHAIN: + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: { /* For scan-chain, standalone and memory bank configuration protocol * The number of configuration bits is the sum of configuration bits diff --git a/openfpga/src/utils/mux_utils.cpp b/openfpga/src/utils/mux_utils.cpp index 15c7a97d7..e4b679df6 100644 --- a/openfpga/src/utils/mux_utils.cpp +++ b/openfpga/src/utils/mux_utils.cpp @@ -257,6 +257,7 @@ size_t find_cmos_mux_num_config_bits(const CircuitLibrary& circuit_lib, size_t num_config_bits = 0; switch (sram_orgz_type) { + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: case CONFIG_MEM_SCAN_CHAIN: case CONFIG_MEM_STANDALONE: @@ -298,6 +299,7 @@ size_t find_rram_mux_num_config_bits(const CircuitLibrary& circuit_lib, const e_config_protocol_type& sram_orgz_type) { size_t num_config_bits = 0; switch (sram_orgz_type) { + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: /* In memory bank, by intensively share the Bit/Word Lines, * we only need 1 additional BL and WL for each MUX level. @@ -365,6 +367,7 @@ size_t find_cmos_mux_num_shared_config_bits(const e_config_protocol_type& sram_o size_t num_shared_config_bits = 0; switch (sram_orgz_type) { + case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: case CONFIG_MEM_SCAN_CHAIN: case CONFIG_MEM_STANDALONE: @@ -388,7 +391,7 @@ size_t find_rram_mux_num_shared_config_bits(const CircuitLibrary& circuit_lib, const e_config_protocol_type& sram_orgz_type) { size_t num_shared_config_bits = 0; switch (sram_orgz_type) { - case CONFIG_MEM_MEMORY_BANK: { + case CONFIG_MEM_QL_MEMORY_BANK: { /* In memory bank, the number of shared configuration bits is * the sum of largest branch size at each level */ From 35c7b0988879c6fce463158ab7883e82f9fe04e9 Mon Sep 17 00:00:00 2001 From: tangxifan Date: Thu, 9 Sep 2021 15:23:29 -0700 Subject: [PATCH 13/16] [Engine] Bug fix for mistakes in calculating number of BLs/WLs for QL memory bank --- openfpga/src/fabric/build_top_module_memory.cpp | 16 ++++++++-------- .../src/fabric/build_top_module_memory_bank.cpp | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/openfpga/src/fabric/build_top_module_memory.cpp b/openfpga/src/fabric/build_top_module_memory.cpp index 582d84b0c..243df74a9 100644 --- a/openfpga/src/fabric/build_top_module_memory.cpp +++ b/openfpga/src/fabric/build_top_module_memory.cpp @@ -693,12 +693,12 @@ TopModuleNumConfigBits find_top_module_regional_num_config_bit(const ModuleManag vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; num_bls[coord.x()] = std::max(num_bls[coord.x()], find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type))); num_wls[coord.y()] = std::max(num_wls[coord.y()], find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type))); - for (const auto& kv : num_bls) { - num_config_bits[config_region].first += kv.first; - } - for (const auto& kv : num_wls) { - num_config_bits[config_region].second += kv.second; - } + } + for (const auto& kv : num_bls) { + num_config_bits[config_region].first += kv.second; + } + for (const auto& kv : num_wls) { + num_config_bits[config_region].second += kv.second; } } break; @@ -853,7 +853,7 @@ void add_top_module_sram_ports(ModuleManager& module_manager, /* BL address size is the largest among all the regions */ size_t bl_addr_size = 0; for (const ConfigRegionId& config_region : module_manager.regions(module_id)) { - bl_addr_size = std::max(bl_addr_size, find_memory_decoder_addr_size(num_config_bits[config_region].first)); + bl_addr_size = std::max(bl_addr_size, find_mux_local_decoder_addr_size(num_config_bits[config_region].first)); } BasicPort bl_addr_port(std::string(DECODER_BL_ADDRESS_PORT_NAME), bl_addr_size); module_manager.add_port(module_id, bl_addr_port, ModuleManager::MODULE_INPUT_PORT); @@ -861,7 +861,7 @@ void add_top_module_sram_ports(ModuleManager& module_manager, /* WL address size is the largest among all the regions */ size_t wl_addr_size = 0; for (const ConfigRegionId& config_region : module_manager.regions(module_id)) { - wl_addr_size = std::max(wl_addr_size, find_memory_decoder_addr_size(num_config_bits[config_region].second)); + wl_addr_size = std::max(wl_addr_size, find_mux_local_decoder_addr_size(num_config_bits[config_region].second)); } BasicPort wl_addr_port(std::string(DECODER_WL_ADDRESS_PORT_NAME), wl_addr_size); module_manager.add_port(module_id, wl_addr_port, ModuleManager::MODULE_INPUT_PORT); diff --git a/openfpga/src/fabric/build_top_module_memory_bank.cpp b/openfpga/src/fabric/build_top_module_memory_bank.cpp index 2df41c5e2..626f46f95 100644 --- a/openfpga/src/fabric/build_top_module_memory_bank.cpp +++ b/openfpga/src/fabric/build_top_module_memory_bank.cpp @@ -151,12 +151,12 @@ void add_top_module_nets_cmos_ql_memory_bank_config_bus(ModuleManager& module_ma /* Each memory bank has a unified number of BL/WLs */ size_t num_bls = 0; for (const auto& curr_config_bits : num_config_bits) { - num_bls = std::max(num_bls, find_memory_decoder_data_size(curr_config_bits.first)); + num_bls = std::max(num_bls, curr_config_bits.first); } size_t num_wls = 0; for (const auto& curr_config_bits : num_config_bits) { - num_wls = std::max(num_wls, find_memory_decoder_data_size(curr_config_bits.second)); + num_wls = std::max(num_wls, curr_config_bits.second); } /* Create separated memory bank circuitry, i.e., BL/WL decoders for each region */ From ba1e277dc9e940ef31bd71f3473c1bae80e66fbc Mon Sep 17 00:00:00 2001 From: tangxifan Date: Fri, 10 Sep 2021 15:05:46 -0700 Subject: [PATCH 14/16] [Engine] Fix a few bugs in the BL/WL arrangement and now bitstream generator is working fine --- .../src/fabric/build_top_module_memory.cpp | 9 ++++++-- .../fabric/build_top_module_memory_bank.cpp | 19 +++++++---------- .../build_fabric_bitstream_memory_bank.cpp | 21 +++++++------------ openfpga/src/utils/decoder_library_utils.cpp | 18 ++++++++++++++++ openfpga/src/utils/decoder_library_utils.h | 2 ++ openfpga/src/utils/memory_bank_utils.cpp | 2 +- 6 files changed, 42 insertions(+), 29 deletions(-) diff --git a/openfpga/src/fabric/build_top_module_memory.cpp b/openfpga/src/fabric/build_top_module_memory.cpp index 243df74a9..9c4ca73f3 100644 --- a/openfpga/src/fabric/build_top_module_memory.cpp +++ b/openfpga/src/fabric/build_top_module_memory.cpp @@ -75,7 +75,12 @@ void organize_top_module_tile_cb_modules(ModuleManager& module_manager, if (0 < find_module_num_config_bits(module_manager, cb_module, circuit_lib, sram_model, sram_orgz_type)) { - vtr::Point config_coord(rr_gsb.get_cb_x(cb_type) * 2, rr_gsb.get_cb_y(cb_type) * 2); + /* CBX coordinate conversion calculation: (1,0) -> (2,1) */ + vtr::Point config_coord(rr_gsb.get_cb_x(cb_type) * 2, rr_gsb.get_cb_y(cb_type) * 2 + 1); + if (cb_type == CHANY) { + /* CBY has a different coordinate conversion calculation: (0,1) -> (1,2) */ + config_coord.set(rr_gsb.get_cb_x(cb_type) * 2 + 1, rr_gsb.get_cb_y(cb_type) * 2); + } /* Note that use the original CB coodinate for instance id searching ! */ module_manager.add_configurable_child(top_module, cb_module, cb_instance_ids[rr_gsb.get_cb_x(cb_type)][rr_gsb.get_cb_y(cb_type)], config_coord); } @@ -692,7 +697,7 @@ TopModuleNumConfigBits find_top_module_regional_num_config_bit(const ModuleManag ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; num_bls[coord.x()] = std::max(num_bls[coord.x()], find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type))); - num_wls[coord.y()] = std::max(num_wls[coord.y()], find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type))); + num_wls[coord.y()] = std::max(num_wls[coord.y()], find_memory_wl_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type))); } for (const auto& kv : num_bls) { num_config_bits[config_region].first += kv.second; diff --git a/openfpga/src/fabric/build_top_module_memory_bank.cpp b/openfpga/src/fabric/build_top_module_memory_bank.cpp index 626f46f95..f1c8b949b 100644 --- a/openfpga/src/fabric/build_top_module_memory_bank.cpp +++ b/openfpga/src/fabric/build_top_module_memory_bank.cpp @@ -323,7 +323,7 @@ void add_top_module_nets_cmos_ql_memory_bank_config_bus(ModuleManager& module_ma for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; - int child_num_unique_blwls = find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK)); + int child_num_unique_blwls = num_bls_per_tile.at(coord.x()); size_t child_instance = module_manager.region_configurable_child_instances(top_module, config_region)[child_id]; @@ -334,12 +334,11 @@ void add_top_module_nets_cmos_ql_memory_bank_config_bus(ModuleManager& module_ma size_t cur_bl_index = 0; for (const size_t& sink_bl_pin : child_bl_port_info.pins()) { + size_t bl_pin_id = bl_start_index_per_tile[coord.x()] + cur_bl_index % child_num_unique_blwls; /* Find the BL decoder data index: * It should be the starting index plus an offset which is the residual when divided by the number of BLs in this tile */ - size_t bl_pin_id = bl_start_index_per_tile[coord.x()] + std::floor(cur_bl_index / child_num_unique_blwls); - if (!(bl_pin_id < bl_decoder_dout_port_info.pins().size())) - VTR_ASSERT(bl_pin_id < bl_decoder_dout_port_info.pins().size()); + VTR_ASSERT(bl_pin_id < bl_decoder_dout_port_info.pins().size()); /* Create net */ ModuleNetId net = create_module_source_pin_net(module_manager, top_module, @@ -352,7 +351,6 @@ void add_top_module_nets_cmos_ql_memory_bank_config_bus(ModuleManager& module_ma module_manager.add_module_net_sink(top_module, net, child_module, child_instance, child_bl_port, sink_bl_pin); - /* Increment the BL index */ cur_bl_index++; } } @@ -366,7 +364,7 @@ void add_top_module_nets_cmos_ql_memory_bank_config_bus(ModuleManager& module_ma for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; - int child_num_unique_blwls = find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK)); + int child_num_unique_blwls = num_bls_per_tile.at(coord.x()); size_t child_instance = module_manager.region_configurable_child_instances(top_module, config_region)[child_id]; @@ -377,10 +375,8 @@ void add_top_module_nets_cmos_ql_memory_bank_config_bus(ModuleManager& module_ma size_t cur_wl_index = 0; for (const size_t& sink_wl_pin : child_wl_port_info.pins()) { - /* Find the WL decoder data index: - * It should be the starting index plus an offset which is the residual when divided by the number of WLs in this tile - */ - size_t wl_pin_id = wl_start_index_per_tile[coord.x()] + cur_wl_index % child_num_unique_blwls; + size_t wl_pin_id = wl_start_index_per_tile[coord.y()] + std::floor(cur_wl_index / child_num_unique_blwls); + VTR_ASSERT(wl_pin_id < wl_decoder_dout_port_info.pins().size()); /* Create net */ ModuleNetId net = create_module_source_pin_net(module_manager, top_module, @@ -392,8 +388,7 @@ void add_top_module_nets_cmos_ql_memory_bank_config_bus(ModuleManager& module_ma /* Add net sink */ module_manager.add_module_net_sink(top_module, net, child_module, child_instance, child_wl_port, sink_wl_pin); - - /* Increment the WL index */ + cur_wl_index++; } } diff --git a/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp b/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp index 5399832b2..82d9a7f0d 100644 --- a/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp +++ b/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp @@ -54,7 +54,7 @@ void rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(const B const std::map& num_wls_per_tile, const std::map& wl_start_index_per_tile, vtr::Point& tile_coord, - size_t& cur_mem_index, + std::map, size_t>& cur_mem_index, FabricBitstream& fabric_bitstream, const FabricBitRegionId& fabric_bitstream_region) { @@ -83,9 +83,7 @@ void rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(const B ModuleId child_module = configurable_children[child_id]; size_t child_instance = module_manager.region_configurable_child_instances(parent_module, config_region)[child_id]; - if (parent_module == top_module) { - tile_coord = module_manager.region_configurable_child_coordinates(parent_module, config_region)[child_id]; - } + tile_coord = module_manager.region_configurable_child_coordinates(parent_module, config_region)[child_id]; /* Get the instance name and ensure it is not empty */ std::string instance_name = module_manager.instance_name(parent_module, child_module, child_instance); @@ -95,11 +93,6 @@ void rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(const B /* We must have one valid block id! */ VTR_ASSERT(true == bitstream_manager.valid_block_id(child_block)); - /* Reset the memory index for each children under the top-level module */ - if (parent_module == top_module) { - cur_mem_index = 0; - } - /* Go recursively */ rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(bitstream_manager, child_block, module_manager, top_module, child_module, @@ -168,11 +161,11 @@ void rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(const B FabricBitId fabric_bit = fabric_bitstream.add_bit(config_bit); /* Find BL address */ - size_t cur_bl_index = bl_start_index_per_tile.at(tile_coord.x()) + std::floor(cur_mem_index / num_bls_per_tile.at(tile_coord.x())); + size_t cur_bl_index = bl_start_index_per_tile.at(tile_coord.x()) + cur_mem_index[tile_coord] % num_bls_per_tile.at(tile_coord.x()); std::vector bl_addr_bits_vec = itobin_charvec(cur_bl_index, bl_addr_size); /* Find WL address */ - size_t cur_wl_index = wl_start_index_per_tile.at(tile_coord.y()) + cur_mem_index % num_wls_per_tile.at(tile_coord.y()); + size_t cur_wl_index = wl_start_index_per_tile.at(tile_coord.y()) + std::floor(cur_mem_index[tile_coord] / num_bls_per_tile.at(tile_coord.x())); std::vector wl_addr_bits_vec = itobin_charvec(cur_wl_index, wl_addr_size); /* Set BL address */ @@ -188,7 +181,7 @@ void rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(const B fabric_bitstream.add_bit_to_region(fabric_bitstream_region, fabric_bit); /* Increase the memory index */ - cur_mem_index++; + cur_mem_index[tile_coord]++; } } @@ -223,8 +216,6 @@ void build_module_fabric_dependent_bitstream_ql_memory_bank(const ConfigProtocol /* Build bitstreams by region */ for (const ConfigRegionId& config_region : module_manager.regions(top_module)) { - size_t cur_mem_index = 0; - /* Find port information for local BL and WL decoder in this region */ std::vector configurable_children = module_manager.region_configurable_children(top_module, config_region); VTR_ASSERT(2 <= configurable_children.size()); @@ -258,6 +249,8 @@ void build_module_fabric_dependent_bitstream_ql_memory_bank(const ConfigProtocol std::map wl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_y_range, num_wls_per_tile); vtr::Point temp_coord; + std::map, size_t> cur_mem_index; + rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(bitstream_manager, top_block, module_manager, top_module, top_module, config_region, diff --git a/openfpga/src/utils/decoder_library_utils.cpp b/openfpga/src/utils/decoder_library_utils.cpp index f4db14ba7..1b86463be 100644 --- a/openfpga/src/utils/decoder_library_utils.cpp +++ b/openfpga/src/utils/decoder_library_utils.cpp @@ -86,6 +86,24 @@ size_t find_memory_decoder_data_size(const size_t& num_mems) { return (size_t)std::ceil(std::sqrt((float)num_mems)); } +/*************************************************************************************** + * Find the size of WL data lines for a memory decoder to access a memory array + * This function is applicable to a memory bank organization where BL data lines + * is the dominant factor. It means that the BL data lines is strictly an integeter close + * to the square root of the number of memory cells. + * For example, 203 memory cells leads to 15 BLs to control + * The WL data lines may not be exactly the same as the number of BLs. + * Considering the example of 203 memory cells again, when 15 BLs are used, we just need + * 203 / 15 = 13.5555 -> 14 WLs + ***************************************************************************************/ +size_t find_memory_wl_decoder_data_size(const size_t& num_mems) { + /* Handle exception: zero memory should have zero WLs */ + if (0 == num_mems) { + return 0; + } + return std::ceil(num_mems / (size_t)std::ceil(std::sqrt((float)num_mems))); +} + /*************************************************************************************** * Try to find if the decoder already exists in the library, * If there is no such decoder, add it to the library diff --git a/openfpga/src/utils/decoder_library_utils.h b/openfpga/src/utils/decoder_library_utils.h index aa25d0df2..33f91cb18 100644 --- a/openfpga/src/utils/decoder_library_utils.h +++ b/openfpga/src/utils/decoder_library_utils.h @@ -17,6 +17,8 @@ size_t find_memory_decoder_addr_size(const size_t& num_mems); size_t find_memory_decoder_data_size(const size_t& num_mems); +size_t find_memory_wl_decoder_data_size(const size_t& num_mems); + DecoderId add_mux_local_decoder_to_library(DecoderLibrary& decoder_lib, const size_t data_size); diff --git a/openfpga/src/utils/memory_bank_utils.cpp b/openfpga/src/utils/memory_bank_utils.cpp index d43909e4d..12a97c23f 100644 --- a/openfpga/src/utils/memory_bank_utils.cpp +++ b/openfpga/src/utils/memory_bank_utils.cpp @@ -82,7 +82,7 @@ std::map compute_memory_bank_regional_wordline_numbers_per_tile(con for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; - num_wls_per_tile[coord.y()] = std::max(num_wls_per_tile[coord.y()], find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK))); + num_wls_per_tile[coord.y()] = std::max(num_wls_per_tile[coord.y()], find_memory_wl_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK))); } return num_wls_per_tile; From 73d21c973062d5e50749b4326028d0871d8d6c72 Mon Sep 17 00:00:00 2001 From: tangxifan Date: Fri, 10 Sep 2021 15:30:37 -0700 Subject: [PATCH 15/16] [Doc] Update doc about how to use the QuickLogic memory bank --- .../manual/arch_lang/config_protocol.rst | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/docs/source/manual/arch_lang/config_protocol.rst b/docs/source/manual/arch_lang/config_protocol.rst index 1fb03f9f3..087914c80 100644 --- a/docs/source/manual/arch_lang/config_protocol.rst +++ b/docs/source/manual/arch_lang/config_protocol.rst @@ -16,7 +16,7 @@ Template -.. option:: type="scan_chain|memory_bank|standalone" +.. option:: type="scan_chain|memory_bank|standalone|frame_based|ql_memory_bank" Specify the type of configuration circuits. @@ -24,6 +24,7 @@ Template - ``scan_chain``: configurable memories are connected in a chain. Bitstream is loaded serially to program a FPGA - ``frame_based``: configurable memories are organized by frames. Each module of a FPGA fabric, e.g., Configurable Logic Block (CLB), Switch Block (SB) and Connection Block (CB), is considered as a frame of configurable memories. Inside each frame, all the memory banks are accessed through an address decoder. Users can write each memory cell with a specific address. Note that the frame-based memory organization is applid hierarchically. Each frame may consists of a number of sub frames, each of which follows the similar organization. - ``memory_bank``: configurable memories are organized in an array, where each element can be accessed by an unique address to the BL/WL decoders + - ``ql_memory_bank``: configurable memories are organized in an array, where each element can be accessed by an unique address to the BL/WL decoders. This is a physical design friendly memory bank organization, where BL/WLs are efficiently shared by programmable blocks per column and row - ``standalone``: configurable memories are directly accessed through ports of FPGA fabrics. In other words, there are no protocol to control the memories. This allows full customization on the configuration protocol for hardware engineers. .. note:: Avoid to use ``standalone`` when designing an FPGA chip. It will causes a huge number of I/Os required, far beyond any package size. It is well applicable to eFPGAs, where designers do need customized protocols between FPGA and processors. @@ -147,6 +148,26 @@ Users can customized the number of memory banks to be used across the fabrics. B .. warning:: Please do NOT add inverted Bit-Line and Word-Line inputs. It is not supported yet! + +QuickLogic Memory bank Example +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The following XML code describes a physical design friendly memory-bank circuitry to configure the core logic of FPGA, as illustrated in :numref:`fig_memory_bank`. +It will use the circuit model defined in :numref:`fig_sram_blwl`. + +.. code-block:: xml + + + + + +.. note:: Memory-bank decoders does require a memory cell to have + + - two outputs (one regular and another inverted) + - a Bit-Line input to load the data + - a Word-Line input to enable data write + +.. warning:: Please do NOT add inverted Bit-Line and Word-Line inputs. It is not supported yet! + Standalone SRAM Example ~~~~~~~~~~~~~~~~~~~~~~~ In the standalone configuration protocol, every memory cell of the core logic of a FPGA fabric can be directly accessed at the top-level module, as illustrated in :numref:`fig_vanilla_config_protocol`. From 4af6413c97610f8b631a47ffa3433fe3e1dce195 Mon Sep 17 00:00:00 2001 From: tangxifan Date: Fri, 10 Sep 2021 17:03:44 -0700 Subject: [PATCH 16/16] [Engine] Fixed a critical bug on WL arrangement; Previously we always consider squart of a local tile. Now we apply global optimization where the number of WLs are determined by the max. number of BLs per column --- .../src/fabric/build_top_module_memory.cpp | 20 +++++++++---------- .../fabric/build_top_module_memory_bank.cpp | 3 ++- .../build_fabric_bitstream_memory_bank.cpp | 3 ++- openfpga/src/utils/decoder_library_utils.cpp | 8 ++++---- openfpga/src/utils/decoder_library_utils.h | 2 +- openfpga/src/utils/memory_bank_utils.cpp | 5 +++-- openfpga/src/utils/memory_bank_utils.h | 11 ++++++++-- 7 files changed, 31 insertions(+), 21 deletions(-) diff --git a/openfpga/src/fabric/build_top_module_memory.cpp b/openfpga/src/fabric/build_top_module_memory.cpp index 9c4ca73f3..3021b2563 100644 --- a/openfpga/src/fabric/build_top_module_memory.cpp +++ b/openfpga/src/fabric/build_top_module_memory.cpp @@ -20,6 +20,7 @@ #include "openfpga_naming.h" #include "memory_utils.h" +#include "memory_bank_utils.h" #include "decoder_library_utils.h" #include "module_manager_utils.h" #include "build_decoder_modules.h" @@ -691,18 +692,17 @@ TopModuleNumConfigBits find_top_module_regional_num_config_bit(const ModuleManag * - each column has independent BLs */ for (const ConfigRegionId& config_region : module_manager.regions(top_module)) { - std::map num_bls; - std::map num_wls; - for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { - ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; - vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; - num_bls[coord.x()] = std::max(num_bls[coord.x()], find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type))); - num_wls[coord.y()] = std::max(num_wls[coord.y()], find_memory_wl_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type))); - } - for (const auto& kv : num_bls) { + std::map num_bls_per_tile = compute_memory_bank_regional_bitline_numbers_per_tile(module_manager, top_module, + config_region, + circuit_lib, sram_model); + std::map num_wls_per_tile = compute_memory_bank_regional_wordline_numbers_per_tile(module_manager, top_module, + config_region, + circuit_lib, sram_model, + num_bls_per_tile); + for (const auto& kv : num_bls_per_tile) { num_config_bits[config_region].first += kv.second; } - for (const auto& kv : num_wls) { + for (const auto& kv : num_wls_per_tile) { num_config_bits[config_region].second += kv.second; } } diff --git a/openfpga/src/fabric/build_top_module_memory_bank.cpp b/openfpga/src/fabric/build_top_module_memory_bank.cpp index f1c8b949b..72c54258f 100644 --- a/openfpga/src/fabric/build_top_module_memory_bank.cpp +++ b/openfpga/src/fabric/build_top_module_memory_bank.cpp @@ -290,7 +290,8 @@ void add_top_module_nets_cmos_ql_memory_bank_config_bus(ModuleManager& module_ma circuit_lib, sram_model); std::map num_wls_per_tile = compute_memory_bank_regional_wordline_numbers_per_tile(module_manager, top_module, config_region, - circuit_lib, sram_model); + circuit_lib, sram_model, + num_bls_per_tile); std::map bl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_x_range, num_bls_per_tile); std::map wl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_y_range, num_wls_per_tile); diff --git a/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp b/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp index 82d9a7f0d..f95cc076a 100644 --- a/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp +++ b/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp @@ -243,7 +243,8 @@ void build_module_fabric_dependent_bitstream_ql_memory_bank(const ConfigProtocol circuit_lib, config_protocol.memory_model()); std::map num_wls_per_tile = compute_memory_bank_regional_wordline_numbers_per_tile(module_manager, top_module, config_region, - circuit_lib, config_protocol.memory_model()); + circuit_lib, config_protocol.memory_model(), + num_bls_per_tile); std::map bl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_x_range, num_bls_per_tile); std::map wl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_y_range, num_wls_per_tile); diff --git a/openfpga/src/utils/decoder_library_utils.cpp b/openfpga/src/utils/decoder_library_utils.cpp index 1b86463be..502620661 100644 --- a/openfpga/src/utils/decoder_library_utils.cpp +++ b/openfpga/src/utils/decoder_library_utils.cpp @@ -96,12 +96,12 @@ size_t find_memory_decoder_data_size(const size_t& num_mems) { * Considering the example of 203 memory cells again, when 15 BLs are used, we just need * 203 / 15 = 13.5555 -> 14 WLs ***************************************************************************************/ -size_t find_memory_wl_decoder_data_size(const size_t& num_mems) { - /* Handle exception: zero memory should have zero WLs */ - if (0 == num_mems) { +size_t find_memory_wl_decoder_data_size(const size_t& num_mems, const size_t& num_bls) { + /* Handle exception: zero BLs should have zero WLs */ + if (0 == num_bls) { return 0; } - return std::ceil(num_mems / (size_t)std::ceil(std::sqrt((float)num_mems))); + return std::ceil((float)num_mems / (float)num_bls); } /*************************************************************************************** diff --git a/openfpga/src/utils/decoder_library_utils.h b/openfpga/src/utils/decoder_library_utils.h index 33f91cb18..34b8662ef 100644 --- a/openfpga/src/utils/decoder_library_utils.h +++ b/openfpga/src/utils/decoder_library_utils.h @@ -17,7 +17,7 @@ size_t find_memory_decoder_addr_size(const size_t& num_mems); size_t find_memory_decoder_data_size(const size_t& num_mems); -size_t find_memory_wl_decoder_data_size(const size_t& num_mems); +size_t find_memory_wl_decoder_data_size(const size_t& num_mems, const size_t& num_bls); DecoderId add_mux_local_decoder_to_library(DecoderLibrary& decoder_lib, const size_t data_size); diff --git a/openfpga/src/utils/memory_bank_utils.cpp b/openfpga/src/utils/memory_bank_utils.cpp index 12a97c23f..5afd7e0e4 100644 --- a/openfpga/src/utils/memory_bank_utils.cpp +++ b/openfpga/src/utils/memory_bank_utils.cpp @@ -76,13 +76,14 @@ std::map compute_memory_bank_regional_wordline_numbers_per_tile(con const ModuleId& top_module, const ConfigRegionId& config_region, const CircuitLibrary& circuit_lib, - const CircuitModelId& sram_model) { + const CircuitModelId& sram_model, + const std::map& num_bls_per_tile) { std::map num_wls_per_tile; for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; - num_wls_per_tile[coord.y()] = std::max(num_wls_per_tile[coord.y()], find_memory_wl_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK))); + num_wls_per_tile[coord.y()] = std::max(num_wls_per_tile[coord.y()], find_memory_wl_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK), num_bls_per_tile.at(coord.x()))); } return num_wls_per_tile; diff --git a/openfpga/src/utils/memory_bank_utils.h b/openfpga/src/utils/memory_bank_utils.h index fd08c5e3d..8347e558f 100644 --- a/openfpga/src/utils/memory_bank_utils.h +++ b/openfpga/src/utils/memory_bank_utils.h @@ -53,14 +53,21 @@ std::map compute_memory_bank_regional_bitline_numbers_per_tile(cons /** * @brief Precompute the number of word lines required by each tile under a specific configuration region * @note - * Not every index in the range computed by the compute_memory_bank_regional_configurable_child_x_range() function has a postive number of word lines + * Not every index in the range computed by the compute_memory_bank_regional_configurable_child_y_range() function has a postive number of word lines * If an empty entry is found (e.g., std::map::find(y) is empty), it means there are not word lines required in that tile + * @note + * This function requires an input argument which describes number of bitlines per tile. Base on the information, the number of word lines are inferred + * by total number of memores / number of bit lines at a given tile location + * This strategy is chosen because in each column, the number of bit lines are bounded by the tile which consumes most configuation bits. It may reduces + * the use of word lines. For example, a tile[0][0] has only 8 bits, from which we may infer 3 BLs and 3 WLs. However, when tile[0][1] contains 100 bits, + * which will force the number of BLs to be 10. In such case, tile[0][0] only requires 1 WL */ std::map compute_memory_bank_regional_wordline_numbers_per_tile(const ModuleManager& module_manager, const ModuleId& top_module, const ConfigRegionId& config_region, const CircuitLibrary& circuit_lib, - const CircuitModelId& sram_model); + const CircuitModelId& sram_model, + const std::map& num_bls_per_tile); /** * @brief Precompute the BLs and WLs distribution across the FPGA fabric