[Engine] Fixed a critical bug on WL arrangement; Previously we always consider squart of a local tile. Now we apply global optimization where the number of WLs are determined by the max. number of BLs per column
This commit is contained in:
parent
73d21c9730
commit
4af6413c97
|
@ -20,6 +20,7 @@
|
||||||
#include "openfpga_naming.h"
|
#include "openfpga_naming.h"
|
||||||
|
|
||||||
#include "memory_utils.h"
|
#include "memory_utils.h"
|
||||||
|
#include "memory_bank_utils.h"
|
||||||
#include "decoder_library_utils.h"
|
#include "decoder_library_utils.h"
|
||||||
#include "module_manager_utils.h"
|
#include "module_manager_utils.h"
|
||||||
#include "build_decoder_modules.h"
|
#include "build_decoder_modules.h"
|
||||||
|
@ -691,18 +692,17 @@ TopModuleNumConfigBits find_top_module_regional_num_config_bit(const ModuleManag
|
||||||
* - each column has independent BLs
|
* - each column has independent BLs
|
||||||
*/
|
*/
|
||||||
for (const ConfigRegionId& config_region : module_manager.regions(top_module)) {
|
for (const ConfigRegionId& config_region : module_manager.regions(top_module)) {
|
||||||
std::map<int, size_t> num_bls;
|
std::map<int, size_t> num_bls_per_tile = compute_memory_bank_regional_bitline_numbers_per_tile(module_manager, top_module,
|
||||||
std::map<int, size_t> num_wls;
|
config_region,
|
||||||
for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) {
|
circuit_lib, sram_model);
|
||||||
ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id];
|
std::map<int, size_t> num_wls_per_tile = compute_memory_bank_regional_wordline_numbers_per_tile(module_manager, top_module,
|
||||||
vtr::Point<int> coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id];
|
config_region,
|
||||||
num_bls[coord.x()] = std::max(num_bls[coord.x()], find_memory_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type)));
|
circuit_lib, sram_model,
|
||||||
num_wls[coord.y()] = std::max(num_wls[coord.y()], find_memory_wl_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type)));
|
num_bls_per_tile);
|
||||||
}
|
for (const auto& kv : num_bls_per_tile) {
|
||||||
for (const auto& kv : num_bls) {
|
|
||||||
num_config_bits[config_region].first += kv.second;
|
num_config_bits[config_region].first += kv.second;
|
||||||
}
|
}
|
||||||
for (const auto& kv : num_wls) {
|
for (const auto& kv : num_wls_per_tile) {
|
||||||
num_config_bits[config_region].second += kv.second;
|
num_config_bits[config_region].second += kv.second;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -290,7 +290,8 @@ void add_top_module_nets_cmos_ql_memory_bank_config_bus(ModuleManager& module_ma
|
||||||
circuit_lib, sram_model);
|
circuit_lib, sram_model);
|
||||||
std::map<int, size_t> num_wls_per_tile = compute_memory_bank_regional_wordline_numbers_per_tile(module_manager, top_module,
|
std::map<int, size_t> num_wls_per_tile = compute_memory_bank_regional_wordline_numbers_per_tile(module_manager, top_module,
|
||||||
config_region,
|
config_region,
|
||||||
circuit_lib, sram_model);
|
circuit_lib, sram_model,
|
||||||
|
num_bls_per_tile);
|
||||||
|
|
||||||
std::map<int, size_t> bl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_x_range, num_bls_per_tile);
|
std::map<int, size_t> bl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_x_range, num_bls_per_tile);
|
||||||
std::map<int, size_t> wl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_y_range, num_wls_per_tile);
|
std::map<int, size_t> wl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_y_range, num_wls_per_tile);
|
||||||
|
|
|
@ -243,7 +243,8 @@ void build_module_fabric_dependent_bitstream_ql_memory_bank(const ConfigProtocol
|
||||||
circuit_lib, config_protocol.memory_model());
|
circuit_lib, config_protocol.memory_model());
|
||||||
std::map<int, size_t> num_wls_per_tile = compute_memory_bank_regional_wordline_numbers_per_tile(module_manager, top_module,
|
std::map<int, size_t> num_wls_per_tile = compute_memory_bank_regional_wordline_numbers_per_tile(module_manager, top_module,
|
||||||
config_region,
|
config_region,
|
||||||
circuit_lib, config_protocol.memory_model());
|
circuit_lib, config_protocol.memory_model(),
|
||||||
|
num_bls_per_tile);
|
||||||
|
|
||||||
std::map<int, size_t> bl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_x_range, num_bls_per_tile);
|
std::map<int, size_t> bl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_x_range, num_bls_per_tile);
|
||||||
std::map<int, size_t> wl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_y_range, num_wls_per_tile);
|
std::map<int, size_t> wl_start_index_per_tile = compute_memory_bank_regional_blwl_start_index_per_tile(child_y_range, num_wls_per_tile);
|
||||||
|
|
|
@ -96,12 +96,12 @@ size_t find_memory_decoder_data_size(const size_t& num_mems) {
|
||||||
* Considering the example of 203 memory cells again, when 15 BLs are used, we just need
|
* Considering the example of 203 memory cells again, when 15 BLs are used, we just need
|
||||||
* 203 / 15 = 13.5555 -> 14 WLs
|
* 203 / 15 = 13.5555 -> 14 WLs
|
||||||
***************************************************************************************/
|
***************************************************************************************/
|
||||||
size_t find_memory_wl_decoder_data_size(const size_t& num_mems) {
|
size_t find_memory_wl_decoder_data_size(const size_t& num_mems, const size_t& num_bls) {
|
||||||
/* Handle exception: zero memory should have zero WLs */
|
/* Handle exception: zero BLs should have zero WLs */
|
||||||
if (0 == num_mems) {
|
if (0 == num_bls) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return std::ceil(num_mems / (size_t)std::ceil(std::sqrt((float)num_mems)));
|
return std::ceil((float)num_mems / (float)num_bls);
|
||||||
}
|
}
|
||||||
|
|
||||||
/***************************************************************************************
|
/***************************************************************************************
|
||||||
|
|
|
@ -17,7 +17,7 @@ size_t find_memory_decoder_addr_size(const size_t& num_mems);
|
||||||
|
|
||||||
size_t find_memory_decoder_data_size(const size_t& num_mems);
|
size_t find_memory_decoder_data_size(const size_t& num_mems);
|
||||||
|
|
||||||
size_t find_memory_wl_decoder_data_size(const size_t& num_mems);
|
size_t find_memory_wl_decoder_data_size(const size_t& num_mems, const size_t& num_bls);
|
||||||
|
|
||||||
DecoderId add_mux_local_decoder_to_library(DecoderLibrary& decoder_lib,
|
DecoderId add_mux_local_decoder_to_library(DecoderLibrary& decoder_lib,
|
||||||
const size_t data_size);
|
const size_t data_size);
|
||||||
|
|
|
@ -76,13 +76,14 @@ std::map<int, size_t> compute_memory_bank_regional_wordline_numbers_per_tile(con
|
||||||
const ModuleId& top_module,
|
const ModuleId& top_module,
|
||||||
const ConfigRegionId& config_region,
|
const ConfigRegionId& config_region,
|
||||||
const CircuitLibrary& circuit_lib,
|
const CircuitLibrary& circuit_lib,
|
||||||
const CircuitModelId& sram_model) {
|
const CircuitModelId& sram_model,
|
||||||
|
const std::map<int, size_t>& num_bls_per_tile) {
|
||||||
std::map<int, size_t> num_wls_per_tile;
|
std::map<int, size_t> num_wls_per_tile;
|
||||||
|
|
||||||
for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) {
|
for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) {
|
||||||
ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id];
|
ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id];
|
||||||
vtr::Point<int> coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id];
|
vtr::Point<int> coord = module_manager.region_configurable_child_coordinates(top_module, config_region)[child_id];
|
||||||
num_wls_per_tile[coord.y()] = std::max(num_wls_per_tile[coord.y()], find_memory_wl_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK)));
|
num_wls_per_tile[coord.y()] = std::max(num_wls_per_tile[coord.y()], find_memory_wl_decoder_data_size(find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, CONFIG_MEM_QL_MEMORY_BANK), num_bls_per_tile.at(coord.x())));
|
||||||
}
|
}
|
||||||
|
|
||||||
return num_wls_per_tile;
|
return num_wls_per_tile;
|
||||||
|
|
|
@ -53,14 +53,21 @@ std::map<int, size_t> compute_memory_bank_regional_bitline_numbers_per_tile(cons
|
||||||
/**
|
/**
|
||||||
* @brief Precompute the number of word lines required by each tile under a specific configuration region
|
* @brief Precompute the number of word lines required by each tile under a specific configuration region
|
||||||
* @note
|
* @note
|
||||||
* Not every index in the range computed by the compute_memory_bank_regional_configurable_child_x_range() function has a postive number of word lines
|
* Not every index in the range computed by the compute_memory_bank_regional_configurable_child_y_range() function has a postive number of word lines
|
||||||
* If an empty entry is found (e.g., std::map::find(y) is empty), it means there are not word lines required in that tile
|
* If an empty entry is found (e.g., std::map::find(y) is empty), it means there are not word lines required in that tile
|
||||||
|
* @note
|
||||||
|
* This function requires an input argument which describes number of bitlines per tile. Base on the information, the number of word lines are inferred
|
||||||
|
* by total number of memores / number of bit lines at a given tile location
|
||||||
|
* This strategy is chosen because in each column, the number of bit lines are bounded by the tile which consumes most configuation bits. It may reduces
|
||||||
|
* the use of word lines. For example, a tile[0][0] has only 8 bits, from which we may infer 3 BLs and 3 WLs. However, when tile[0][1] contains 100 bits,
|
||||||
|
* which will force the number of BLs to be 10. In such case, tile[0][0] only requires 1 WL
|
||||||
*/
|
*/
|
||||||
std::map<int, size_t> compute_memory_bank_regional_wordline_numbers_per_tile(const ModuleManager& module_manager,
|
std::map<int, size_t> compute_memory_bank_regional_wordline_numbers_per_tile(const ModuleManager& module_manager,
|
||||||
const ModuleId& top_module,
|
const ModuleId& top_module,
|
||||||
const ConfigRegionId& config_region,
|
const ConfigRegionId& config_region,
|
||||||
const CircuitLibrary& circuit_lib,
|
const CircuitLibrary& circuit_lib,
|
||||||
const CircuitModelId& sram_model);
|
const CircuitModelId& sram_model,
|
||||||
|
const std::map<int, size_t>& num_bls_per_tile);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Precompute the BLs and WLs distribution across the FPGA fabric
|
* @brief Precompute the BLs and WLs distribution across the FPGA fabric
|
||||||
|
|
Loading…
Reference in New Issue