diff --git a/openfpga/src/fabric/build_top_module_memory.cpp b/openfpga/src/fabric/build_top_module_memory.cpp index 9c4ca73f3..3021b2563 100644 --- a/openfpga/src/fabric/build_top_module_memory.cpp +++ b/openfpga/src/fabric/build_top_module_memory.cpp @@ -20,6 +20,7 @@ #include "openfpga_naming.h" #include "memory_utils.h" +#include "memory_bank_utils.h" #include "decoder_library_utils.h" #include "module_manager_utils.h" #include "build_decoder_modules.h" @@ -691,18 +692,17 @@ TopModuleNumConfigBits find_top_module_regional_num_config_bit(const ModuleManag * - each column has independent BLs */ for (const ConfigRegionId& config_region : module_manager.regions(top_module)) { - std::map num_bls; - std::map num_wls; - for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { - ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; - vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; - num_bls[coord.x()] = std::max(num_bls[coord.x()], find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type))); - num_wls[coord.y()] = std::max(num_wls[coord.y()], find_memory_wl_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type))); - } - for (const auto& kv : num_bls) { + std::map num_bls_per_tile = compute_memory_bank_regional_bitline_numbers_per_tile(module_manager, top_module, + config_region, + circuit_lib, sram_model); + std::map num_wls_per_tile = compute_memory_bank_regional_wordline_numbers_per_tile(module_manager, top_module, + config_region, + circuit_lib, sram_model, + num_bls_per_tile); + for (const auto& kv : num_bls_per_tile) { num_config_bits[config_region].first += kv.second; } - for (const auto& kv : num_wls) { + for (const auto& kv : num_wls_per_tile) { num_config_bits[config_region].second += kv.second; } } diff --git a/openfpga/src/fabric/build_top_module_memory_bank.cpp b/openfpga/src/fabric/build_top_module_memory_bank.cpp index f1c8b949b..72c54258f 100644 --- a/openfpga/src/fabric/build_top_module_memory_bank.cpp +++ b/openfpga/src/fabric/build_top_module_memory_bank.cpp @@ -290,7 +290,8 @@ void add_top_module_nets_cmos_ql_memory_bank_config_bus(ModuleManager& module_ma circuit_lib, sram_model); std::map num_wls_per_tile = compute_memory_bank_regional_wordline_numbers_per_tile(module_manager, top_module, config_region, - circuit_lib, sram_model); + circuit_lib, sram_model, + num_bls_per_tile); std::map bl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_x_range, num_bls_per_tile); std::map wl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_y_range, num_wls_per_tile); diff --git a/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp b/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp index 82d9a7f0d..f95cc076a 100644 --- a/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp +++ b/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp @@ -243,7 +243,8 @@ void build_module_fabric_dependent_bitstream_ql_memory_bank(const ConfigProtocol circuit_lib, config_protocol.memory_model()); std::map num_wls_per_tile = compute_memory_bank_regional_wordline_numbers_per_tile(module_manager, top_module, config_region, - circuit_lib, config_protocol.memory_model()); + circuit_lib, config_protocol.memory_model(), + num_bls_per_tile); std::map bl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_x_range, num_bls_per_tile); std::map wl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_y_range, num_wls_per_tile); diff --git a/openfpga/src/utils/decoder_library_utils.cpp b/openfpga/src/utils/decoder_library_utils.cpp index 1b86463be..502620661 100644 --- a/openfpga/src/utils/decoder_library_utils.cpp +++ b/openfpga/src/utils/decoder_library_utils.cpp @@ -96,12 +96,12 @@ size_t find_memory_decoder_data_size(const size_t& num_mems) { * Considering the example of 203 memory cells again, when 15 BLs are used, we just need * 203 / 15 = 13.5555 -> 14 WLs ***************************************************************************************/ -size_t find_memory_wl_decoder_data_size(const size_t& num_mems) { - /* Handle exception: zero memory should have zero WLs */ - if (0 == num_mems) { +size_t find_memory_wl_decoder_data_size(const size_t& num_mems, const size_t& num_bls) { + /* Handle exception: zero BLs should have zero WLs */ + if (0 == num_bls) { return 0; } - return std::ceil(num_mems / (size_t)std::ceil(std::sqrt((float)num_mems))); + return std::ceil((float)num_mems / (float)num_bls); } /*************************************************************************************** diff --git a/openfpga/src/utils/decoder_library_utils.h b/openfpga/src/utils/decoder_library_utils.h index 33f91cb18..34b8662ef 100644 --- a/openfpga/src/utils/decoder_library_utils.h +++ b/openfpga/src/utils/decoder_library_utils.h @@ -17,7 +17,7 @@ size_t find_memory_decoder_addr_size(const size_t& num_mems); size_t find_memory_decoder_data_size(const size_t& num_mems); -size_t find_memory_wl_decoder_data_size(const size_t& num_mems); +size_t find_memory_wl_decoder_data_size(const size_t& num_mems, const size_t& num_bls); DecoderId add_mux_local_decoder_to_library(DecoderLibrary& decoder_lib, const size_t data_size); diff --git a/openfpga/src/utils/memory_bank_utils.cpp b/openfpga/src/utils/memory_bank_utils.cpp index 12a97c23f..5afd7e0e4 100644 --- a/openfpga/src/utils/memory_bank_utils.cpp +++ b/openfpga/src/utils/memory_bank_utils.cpp @@ -76,13 +76,14 @@ std::map compute_memory_bank_regional_wordline_numbers_per_tile(con const ModuleId& top_module, const ConfigRegionId& config_region, const CircuitLibrary& circuit_lib, - const CircuitModelId& sram_model) { + const CircuitModelId& sram_model, + const std::map& num_bls_per_tile) { std::map num_wls_per_tile; for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; vtr::Point coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id]; - num_wls_per_tile[coord.y()] = std::max(num_wls_per_tile[coord.y()], find_memory_wl_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK))); + num_wls_per_tile[coord.y()] = std::max(num_wls_per_tile[coord.y()], find_memory_wl_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK), num_bls_per_tile.at(coord.x()))); } return num_wls_per_tile; diff --git a/openfpga/src/utils/memory_bank_utils.h b/openfpga/src/utils/memory_bank_utils.h index fd08c5e3d..8347e558f 100644 --- a/openfpga/src/utils/memory_bank_utils.h +++ b/openfpga/src/utils/memory_bank_utils.h @@ -53,14 +53,21 @@ std::map compute_memory_bank_regional_bitline_numbers_per_tile(cons /** * @brief Precompute the number of word lines required by each tile under a specific configuration region * @note - * Not every index in the range computed by the compute_memory_bank_regional_configurable_child_x_range() function has a postive number of word lines + * Not every index in the range computed by the compute_memory_bank_regional_configurable_child_y_range() function has a postive number of word lines * If an empty entry is found (e.g., std::map::find(y) is empty), it means there are not word lines required in that tile + * @note + * This function requires an input argument which describes number of bitlines per tile. Base on the information, the number of word lines are inferred + * by total number of memores / number of bit lines at a given tile location + * This strategy is chosen because in each column, the number of bit lines are bounded by the tile which consumes most configuation bits. It may reduces + * the use of word lines. For example, a tile[0][0] has only 8 bits, from which we may infer 3 BLs and 3 WLs. However, when tile[0][1] contains 100 bits, + * which will force the number of BLs to be 10. In such case, tile[0][0] only requires 1 WL */ std::map compute_memory_bank_regional_wordline_numbers_per_tile(const ModuleManager& module_manager, const ModuleId& top_module, const ConfigRegionId& config_region, const CircuitLibrary& circuit_lib, - const CircuitModelId& sram_model); + const CircuitModelId& sram_model, + const std::map& num_bls_per_tile); /** * @brief Precompute the BLs and WLs distribution across the FPGA fabric