diff --git a/.travis/basic_reg_test.sh b/.travis/basic_reg_test.sh index 474667b84..e6e04412e 100755 --- a/.travis/basic_reg_test.sh +++ b/.travis/basic_reg_test.sh @@ -45,9 +45,11 @@ python3 openfpga_flow/scripts/run_fpga_task.py basic_tests/full_testbench/memory python3 openfpga_flow/scripts/run_fpga_task.py basic_tests/full_testbench/memory_bank_use_set --debug --show_thread_logs python3 openfpga_flow/scripts/run_fpga_task.py basic_tests/full_testbench/memory_bank_use_setb --debug --show_thread_logs python3 openfpga_flow/scripts/run_fpga_task.py basic_tests/full_testbench/memory_bank_use_set_reset --debug --show_thread_logs +python3 openfpga_flow/scripts/run_fpga_task.py basic_tests/full_testbench/multi_region_memory_bank --debug --show_thread_logs python3 openfpga_flow/scripts/run_fpga_task.py basic_tests/full_testbench/fast_memory_bank --debug --show_thread_logs python3 openfpga_flow/scripts/run_fpga_task.py basic_tests/full_testbench/fast_memory_bank_use_set --debug --show_thread_logs python3 openfpga_flow/scripts/run_fpga_task.py basic_tests/full_testbench/smart_fast_memory_bank --debug --show_thread_logs +python3 openfpga_flow/scripts/run_fpga_task.py basic_tests/full_testbench/smart_fast_multi_region_memory_bank --debug --show_thread_logs python3 openfpga_flow/scripts/run_fpga_task.py basic_tests/preconfig_testbench/memory_bank --debug --show_thread_logs echo -e "Testing standalone (flatten memory) configuration protocol of a K4N4 FPGA"; @@ -95,3 +97,4 @@ echo -e "Testing K4N5 with pattern based local routing"; python3 openfpga_flow/scripts/run_fpga_task.py basic_tests/k4_series/k4n5_pattern_local_routing --debug --show_thread_logs end_section "OpenFPGA.TaskTun" +python3 openfpga_flow/scripts/run_fpga_task.py basic_tests/full_testbench/multi_region_memory_bank --debug --show_thread_logs diff --git a/docs/source/manual/arch_lang/config_protocol.rst b/docs/source/manual/arch_lang/config_protocol.rst index a460ad1de..d52399cdf 100644 --- a/docs/source/manual/arch_lang/config_protocol.rst +++ b/docs/source/manual/arch_lang/config_protocol.rst @@ -115,8 +115,9 @@ When the decoder of sub block, e.g., the LUT, is enabled, each memory cells can Memory bank Example ~~~~~~~~~~~~~~~~~~~ -The following XML code describes a memory-bank circuitry to configure the core logic of FPGA, as illustrated in :numref:`fig_sram`. +The following XML code describes a memory-bank circuitry to configure the core logic of FPGA, as illustrated in :numref:`fig_memory_bank`. It will use the circuit model defined in :numref:`fig_sram_blwl`. +Users can customized the number of memory banks to be used across the fabrics. By default, it will be only 1 memory bank. :numref:`fig_memory_bank` shows an example where 4 memory banks are defined. The more memory bank to be used, the fast configuration runtime will be, but at the cost of more I/Os in the FPGA fabrics. The organization of each configurable region can be customized through the fabric key (see details in :ref:`fabric_key`). .. code-block:: xml @@ -124,13 +125,13 @@ It will use the circuit model defined in :numref:`fig_sram_blwl`. -.. _fig_sram: +.. _fig_memory_bank: -.. figure:: figures/sram.png - :scale: 60% +.. figure:: figures/memory_bank.png + :scale: 30% :alt: map to buried treasure - Example of a memory organization using memory decoders + Example of (a) a memory organization using memory decoders; (b) single memory bank across the fabric; and (c) multiple memory banks across the fabric. .. note:: Memory-bank decoders does require a memory cell to have diff --git a/docs/source/manual/arch_lang/figures/memory_bank.png b/docs/source/manual/arch_lang/figures/memory_bank.png new file mode 100644 index 000000000..7092e9d5e Binary files /dev/null and b/docs/source/manual/arch_lang/figures/memory_bank.png differ diff --git a/docs/source/manual/arch_lang/figures/sram.png b/docs/source/manual/arch_lang/figures/sram.png deleted file mode 100644 index 937f4cf92..000000000 Binary files a/docs/source/manual/arch_lang/figures/sram.png and /dev/null differ diff --git a/openfpga/src/fabric/build_top_module.cpp b/openfpga/src/fabric/build_top_module.cpp index 95d1c7dce..a833929f2 100644 --- a/openfpga/src/fabric/build_top_module.cpp +++ b/openfpga/src/fabric/build_top_module.cpp @@ -402,6 +402,16 @@ int build_top_module(ModuleManager& module_manager, compact_routing_hierarchy); } else { VTR_ASSERT_SAFE(false == fabric_key.empty()); + /* Give a warning message that the fabric key may overwrite existing region organization. + * Only applicable when number of regions defined in configuration protocol is different + * than the number of regions defined in the fabric key + */ + if (size_t(config_protocol.num_regions()) != fabric_key.regions().size()) { + VTR_LOG_WARN("Fabric key will overwrite the region organization (='%ld') than architecture definition (=%d)!\n", + fabric_key.regions().size(), + config_protocol.num_regions()); + } + status = load_top_module_memory_modules_from_fabric_key(module_manager, top_module, fabric_key); if (CMD_EXEC_FATAL_ERROR == status) { @@ -427,11 +437,13 @@ int build_top_module(ModuleManager& module_manager, * This is a much easier job after adding sub modules (instances), * we just need to find all the I/O ports from the child modules and build a list of it */ - size_t module_num_config_bits = find_module_num_config_bits_from_child_modules(module_manager, top_module, circuit_lib, sram_model, config_protocol.type()); - if (0 < module_num_config_bits) { + vtr::vector top_module_num_config_bits = find_top_module_regional_num_config_bit(module_manager, top_module, circuit_lib, sram_model, config_protocol.type()); + + if (!top_module_num_config_bits.empty()) { add_top_module_sram_ports(module_manager, top_module, circuit_lib, sram_model, - config_protocol, module_num_config_bits); + config_protocol, + top_module_num_config_bits); } /* Add module nets to connect memory cells inside @@ -441,7 +453,7 @@ int build_top_module(ModuleManager& module_manager, add_top_module_nets_memory_config_bus(module_manager, decoder_lib, top_module, config_protocol, circuit_lib.design_tech_type(sram_model), - module_num_config_bits); + top_module_num_config_bits); } return status; diff --git a/openfpga/src/fabric/build_top_module_memory.cpp b/openfpga/src/fabric/build_top_module_memory.cpp index f8b1894ab..50a1a7f15 100644 --- a/openfpga/src/fabric/build_top_module_memory.cpp +++ b/openfpga/src/fabric/build_top_module_memory.cpp @@ -590,6 +590,68 @@ int load_top_module_memory_modules_from_fabric_key(ModuleManager& module_manager return CMD_EXEC_SUCCESS; } +/******************************************************************** + * Find the number of configuration bits in each region of + * the top-level module. + * + * Note: + * - This function should be called after the configurable children + * is loaded to the top-level module! + ********************************************************************/ +vtr::vector find_top_module_regional_num_config_bit(const ModuleManager& module_manager, + const ModuleId& top_module, + const CircuitLibrary& circuit_lib, + const CircuitModelId& sram_model, + const e_config_protocol_type& config_protocol_type) { + /* Initialize the number of configuration bits for each region */ + vtr::vector num_config_bits(module_manager.regions(top_module).size(), 0); + + switch (config_protocol_type) { + case CONFIG_MEM_STANDALONE: + case CONFIG_MEM_SCAN_CHAIN: + case CONFIG_MEM_MEMORY_BANK: { + /* For flatten, chain and memory bank configuration protocol + * The number of configuration bits is the sum of configuration bits + * per configurable children in each region + */ + for (const ConfigRegionId& config_region : module_manager.regions(top_module)) { + for (const ModuleId& child_module : module_manager.region_configurable_children(top_module, config_region)) { + num_config_bits[config_region] += find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type); + } + } + break; + } + case CONFIG_MEM_FRAME_BASED: { + /* For frame-based configuration protocol + * The number of configuration bits is the sum of + * - the maximum of configuration bits among configurable children + * - and the number of configurable children + */ + for (const ConfigRegionId& config_region : module_manager.regions(top_module)) { + for (const ModuleId& child_module : module_manager.region_configurable_children(top_module, config_region)) { + size_t temp_num_config_bits = find_module_num_config_bits(module_manager, child_module, circuit_lib, sram_model, config_protocol_type); + num_config_bits[config_region] = std::max((int)temp_num_config_bits, (int)num_config_bits[config_region]); + } + + /* If there are more than 2 configurable children, we need a decoder + * Otherwise, we can just short wire the address port to the children + */ + if (1 < module_manager.region_configurable_children(top_module, config_region).size()) { + num_config_bits[config_region] += find_mux_local_decoder_addr_size(module_manager.region_configurable_children(top_module, config_region).size()); + } + } + + break; + } + + default: + VTR_LOG_ERROR("Invalid type of SRAM organization !\n"); + exit(1); + } + + return num_config_bits; +} + /******************************************************************** * Generate a list of ports that are used for SRAM configuration * to the top-level module @@ -609,12 +671,10 @@ size_t generate_top_module_sram_port_size(const ConfigProtocol& config_protocol, case CONFIG_MEM_STANDALONE: break; case CONFIG_MEM_SCAN_CHAIN: - /* CCFF head/tail are single-bit ports */ - sram_port_size = config_protocol.num_regions(); - break; case CONFIG_MEM_MEMORY_BANK: - break; case CONFIG_MEM_FRAME_BASED: + /* CCFF head/tail, data input could be multi-bit ports */ + sram_port_size = config_protocol.num_regions(); break; default: VTR_LOGF_ERROR(__FILE__, __LINE__, @@ -653,9 +713,13 @@ void add_top_module_sram_ports(ModuleManager& module_manager, const CircuitLibrary& circuit_lib, const CircuitModelId& sram_model, const ConfigProtocol& config_protocol, - const size_t& num_config_bits) { + const vtr::vector& num_config_bits) { std::vector sram_port_names = generate_sram_port_names(circuit_lib, sram_model, config_protocol.type()); - size_t sram_port_size = generate_top_module_sram_port_size(config_protocol, num_config_bits); + size_t total_num_config_bits = 0; + for (const size_t& curr_num_config_bits : num_config_bits) { + total_num_config_bits += curr_num_config_bits; + } + size_t sram_port_size = generate_top_module_sram_port_size(config_protocol, total_num_config_bits); /* Add ports to the module manager */ switch (config_protocol.type()) { @@ -671,15 +735,24 @@ void add_top_module_sram_ports(ModuleManager& module_manager, BasicPort en_port(std::string(DECODER_ENABLE_PORT_NAME), 1); module_manager.add_port(module_id, en_port, ModuleManager::MODULE_INPUT_PORT); - size_t bl_addr_size = find_memory_decoder_addr_size(num_config_bits); + /* BL address size is the largest among all the regions */ + size_t bl_addr_size = 0; + for (const ConfigRegionId& config_region : module_manager.regions(module_id)) { + bl_addr_size = std::max(bl_addr_size, find_memory_decoder_addr_size(num_config_bits[config_region])); + } BasicPort bl_addr_port(std::string(DECODER_BL_ADDRESS_PORT_NAME), bl_addr_size); module_manager.add_port(module_id, bl_addr_port, ModuleManager::MODULE_INPUT_PORT); - size_t wl_addr_size = find_memory_decoder_addr_size(num_config_bits); + /* WL address size is the largest among all the regions */ + size_t wl_addr_size = 0; + for (const ConfigRegionId& config_region : module_manager.regions(module_id)) { + wl_addr_size = std::max(wl_addr_size, find_memory_decoder_addr_size(num_config_bits[config_region])); + } BasicPort wl_addr_port(std::string(DECODER_WL_ADDRESS_PORT_NAME), wl_addr_size); module_manager.add_port(module_id, wl_addr_port, ModuleManager::MODULE_INPUT_PORT); - BasicPort din_port(std::string(DECODER_DATA_IN_PORT_NAME), 1); + /* Data input should be dependent on the number of configuration regions*/ + BasicPort din_port(std::string(DECODER_DATA_IN_PORT_NAME), config_protocol.num_regions()); module_manager.add_port(module_id, din_port, ModuleManager::MODULE_INPUT_PORT); break; @@ -708,10 +781,10 @@ void add_top_module_sram_ports(ModuleManager& module_manager, BasicPort en_port(std::string(DECODER_ENABLE_PORT_NAME), 1); module_manager.add_port(module_id, en_port, ModuleManager::MODULE_INPUT_PORT); - BasicPort addr_port(std::string(DECODER_ADDRESS_PORT_NAME), num_config_bits); + BasicPort addr_port(std::string(DECODER_ADDRESS_PORT_NAME), total_num_config_bits); module_manager.add_port(module_id, addr_port, ModuleManager::MODULE_INPUT_PORT); - BasicPort din_port(std::string(DECODER_DATA_IN_PORT_NAME), 1); + BasicPort din_port(std::string(DECODER_DATA_IN_PORT_NAME), sram_port_size); module_manager.add_port(module_id, din_port, ModuleManager::MODULE_INPUT_PORT); break; @@ -725,11 +798,55 @@ void add_top_module_sram_ports(ModuleManager& module_manager, /********************************************************************* * Top-level function to add nets for memory banks - * - Find the number of BLs and WLs required + * Each configuration region has independent memory bank circuitry + * - Find the number of BLs and WLs required for each region * - Create BL and WL decoders, and add them to decoder library * - Create nets to connect from top-level module inputs to inputs of decoders * - Create nets to connect from outputs of decoders to BL/WL of configurable children * + * Detailed schematic of how memory banks are connected in the top-level: + * Consider a random Region X, local BL address lines are aligned to the LSB of the + * top-level BL address lines + * + * top_bl_addr[N-1:0] + * ^ + * | local_bl_addr[N-1:0] + * | + * +-----+------------------+ + * | | | + * | +-------------------+ | + * | | Word Line Decoder | | + * | +-------------------+ | + * | | + * + * The BL/WL decoders should have the same circuit designs no matter what region + * they are placed even when the number of configuration bits are different + * from one region to another! + * This is designed to avoid any address collision between memory banks + * since they are programmed in the same clock cycle + * For example: + * - Memory Bank A has 36 memory cells. + * Its BL decoder has 3 address bit and 6 data output bit + * Its WL decoder has 3 address bit and 6 data output bit + * - Memory Bank B has 16 memory cells. + * Its BL decoder has 2 address bit and 4 data output bit + * Its WL decoder has 2 address bit and 4 data output bit + * - If we try to program the 36th memory cell in bank A + * the BL address will be 3'b110 + * the WL address will be 3'b110 + * the data input will be 1'b0 + * - If we try to program the 4th memory cell in bank A + * the BL address will be 3'b010 + * the WL address will be 3'b010 + * the data input will be 1'b1 + * However, in both cases, this will trigger a parasitic programming in bank B + * the BL address will be 2'b10 + * the WL address will be 2'b10 + * Assume the data input is expected to be 1'b1 for bank B + * but it will be overwritten to 1'b0 when programming the 36th cell in bank A! + * + * Detailed schematic of each memory bank: + * * WL_enable WL address * | | * v v @@ -772,7 +889,7 @@ static void add_top_module_nets_cmos_memory_bank_config_bus(ModuleManager& module_manager, DecoderLibrary& decoder_lib, const ModuleId& top_module, - const size_t& num_config_bits) { + const vtr::vector& num_config_bits) { /* Find Enable port from the top-level module */ ModulePortId en_port = module_manager.find_module_port(top_module, std::string(DECODER_ENABLE_PORT_NAME)); BasicPort en_port_info = module_manager.module_port(top_module, en_port); @@ -781,6 +898,9 @@ void add_top_module_nets_cmos_memory_bank_config_bus(ModuleManager& module_manag ModulePortId din_port = module_manager.find_module_port(top_module, std::string(DECODER_DATA_IN_PORT_NAME)); BasicPort din_port_info = module_manager.module_port(top_module, din_port); + /* Data in port should match the number of configuration regions */ + VTR_ASSERT(din_port_info.get_width() == module_manager.regions(top_module).size()); + /* Find BL and WL address port from the top-level module */ ModulePortId bl_addr_port = module_manager.find_module_port(top_module, std::string(DECODER_BL_ADDRESS_PORT_NAME)); BasicPort bl_addr_port_info = module_manager.module_port(top_module, bl_addr_port); @@ -788,190 +908,236 @@ void add_top_module_nets_cmos_memory_bank_config_bus(ModuleManager& module_manag ModulePortId wl_addr_port = module_manager.find_module_port(top_module, std::string(DECODER_WL_ADDRESS_PORT_NAME)); BasicPort wl_addr_port_info = module_manager.module_port(top_module, wl_addr_port); - /* Find the number of BLs and WLs required to access each memory bit */ + /* Find the top-level number of BLs and WLs required to access each memory bit */ size_t bl_addr_size = bl_addr_port_info.get_width(); size_t wl_addr_size = wl_addr_port_info.get_width(); - size_t num_bls = find_memory_decoder_data_size(num_config_bits); - size_t num_wls = find_memory_decoder_data_size(num_config_bits); - - /* Add the BL decoder module - * Search the decoder library - * If we find one, we use the module. - * Otherwise, we create one and add it to the decoder library - */ - DecoderId bl_decoder_id = decoder_lib.find_decoder(bl_addr_size, num_bls, - true, true, false); - if (DecoderId::INVALID() == bl_decoder_id) { - bl_decoder_id = decoder_lib.add_decoder(bl_addr_size, num_bls, true, true, false); - } - VTR_ASSERT(DecoderId::INVALID() != bl_decoder_id); - /* Create a module if not existed yet */ - std::string bl_decoder_module_name = generate_memory_decoder_with_data_in_subckt_name(bl_addr_size, num_bls); - ModuleId bl_decoder_module = module_manager.find_module(bl_decoder_module_name); - if (ModuleId::INVALID() == bl_decoder_module) { - /* BL decoder has the same ports as the frame-based decoders - * We reuse it here + /* Each memory bank has a unified number of BL/WLs */ + size_t num_bls = 0; + for (const size_t& curr_config_bits : num_config_bits) { + num_bls = std::max(num_bls, find_memory_decoder_data_size(curr_config_bits)); + } + + size_t num_wls = 0; + for (const size_t& curr_config_bits : num_config_bits) { + num_wls = std::max(num_wls, find_memory_decoder_data_size(curr_config_bits)); + } + + /* Create separated memory bank circuitry, i.e., BL/WL decoders for each region */ + for (const ConfigRegionId& config_region : module_manager.regions(top_module)) { + /************************************************************** + * Add the BL decoder module + * Search the decoder library + * If we find one, we use the module. + * Otherwise, we create one and add it to the decoder library */ - bl_decoder_module = build_bl_memory_decoder_module(module_manager, - decoder_lib, - bl_decoder_id); - } - VTR_ASSERT(ModuleId::INVALID() != bl_decoder_module); - VTR_ASSERT(0 == module_manager.num_instance(top_module, bl_decoder_module)); - module_manager.add_child_module(top_module, bl_decoder_module); + DecoderId bl_decoder_id = decoder_lib.find_decoder(bl_addr_size, num_bls, + true, true, false); + if (DecoderId::INVALID() == bl_decoder_id) { + bl_decoder_id = decoder_lib.add_decoder(bl_addr_size, num_bls, true, true, false); + } + VTR_ASSERT(DecoderId::INVALID() != bl_decoder_id); - /* Add the WL decoder module - * Search the decoder library - * If we find one, we use the module. - * Otherwise, we create one and add it to the decoder library - */ - DecoderId wl_decoder_id = decoder_lib.find_decoder(wl_addr_size, num_wls, - true, false, false); - if (DecoderId::INVALID() == wl_decoder_id) { - wl_decoder_id = decoder_lib.add_decoder(wl_addr_size, num_wls, true, false, false); - } - VTR_ASSERT(DecoderId::INVALID() != wl_decoder_id); + /* Create a module if not existed yet */ + std::string bl_decoder_module_name = generate_memory_decoder_with_data_in_subckt_name(bl_addr_size, num_bls); + ModuleId bl_decoder_module = module_manager.find_module(bl_decoder_module_name); + if (ModuleId::INVALID() == bl_decoder_module) { + /* BL decoder has the same ports as the frame-based decoders + * We reuse it here + */ + bl_decoder_module = build_bl_memory_decoder_module(module_manager, + decoder_lib, + bl_decoder_id); + } + VTR_ASSERT(ModuleId::INVALID() != bl_decoder_module); + size_t curr_bl_decoder_instance_id = module_manager.num_instance(top_module, bl_decoder_module); + module_manager.add_child_module(top_module, bl_decoder_module); - /* Create a module if not existed yet */ - std::string wl_decoder_module_name = generate_memory_decoder_subckt_name(wl_addr_size, num_wls); - ModuleId wl_decoder_module = module_manager.find_module(wl_decoder_module_name); - if (ModuleId::INVALID() == wl_decoder_module) { - /* BL decoder has the same ports as the frame-based decoders - * We reuse it here + /************************************************************** + * Add the WL decoder module + * Search the decoder library + * If we find one, we use the module. + * Otherwise, we create one and add it to the decoder library */ - wl_decoder_module = build_wl_memory_decoder_module(module_manager, - decoder_lib, - wl_decoder_id); - } - VTR_ASSERT(ModuleId::INVALID() != wl_decoder_module); - VTR_ASSERT(0 == module_manager.num_instance(top_module, wl_decoder_module)); - module_manager.add_child_module(top_module, wl_decoder_module); - - /* Add module nets from the top module to BL decoder's inputs */ - ModulePortId bl_decoder_en_port = module_manager.find_module_port(bl_decoder_module, std::string(DECODER_ENABLE_PORT_NAME)); - BasicPort bl_decoder_en_port_info = module_manager.module_port(bl_decoder_module, bl_decoder_en_port); - - ModulePortId bl_decoder_addr_port = module_manager.find_module_port(bl_decoder_module, std::string(DECODER_ADDRESS_PORT_NAME)); - BasicPort bl_decoder_addr_port_info = module_manager.module_port(bl_decoder_module, bl_decoder_addr_port); - - ModulePortId bl_decoder_din_port = module_manager.find_module_port(bl_decoder_module, std::string(DECODER_DATA_IN_PORT_NAME)); - BasicPort bl_decoder_din_port_info = module_manager.module_port(bl_decoder_module, bl_decoder_din_port); - - /* Top module Enable port -> BL Decoder Enable port */ - add_module_bus_nets(module_manager, - top_module, - top_module, 0, en_port, - bl_decoder_module, 0, bl_decoder_en_port); - - /* Top module Address port -> BL Decoder Address port */ - add_module_bus_nets(module_manager, - top_module, - top_module, 0, bl_addr_port, - bl_decoder_module, 0, bl_decoder_addr_port); - - /* Top module data_in port -> BL Decoder data_in port */ - add_module_bus_nets(module_manager, - top_module, - top_module, 0, din_port, - bl_decoder_module, 0, bl_decoder_din_port); - - /* Add module nets from the top module to WL decoder's inputs */ - ModulePortId wl_decoder_en_port = module_manager.find_module_port(wl_decoder_module, std::string(DECODER_ENABLE_PORT_NAME)); - BasicPort wl_decoder_en_port_info = module_manager.module_port(wl_decoder_module, wl_decoder_en_port); - - ModulePortId wl_decoder_addr_port = module_manager.find_module_port(wl_decoder_module, std::string(DECODER_ADDRESS_PORT_NAME)); - BasicPort wl_decoder_addr_port_info = module_manager.module_port(wl_decoder_module, bl_decoder_addr_port); - - /* Top module Enable port -> WL Decoder Enable port */ - add_module_bus_nets(module_manager, - top_module, - top_module, 0, en_port, - wl_decoder_module, 0, wl_decoder_en_port); - - /* Top module Address port -> WL Decoder Address port */ - add_module_bus_nets(module_manager, - top_module, - top_module, 0, wl_addr_port, - wl_decoder_module, 0, wl_decoder_addr_port); - - /* Add nets from BL data out to each configurable child */ - size_t cur_bl_index = 0; - - ModulePortId bl_decoder_dout_port = module_manager.find_module_port(bl_decoder_module, std::string(DECODER_DATA_OUT_PORT_NAME)); - BasicPort bl_decoder_dout_port_info = module_manager.module_port(bl_decoder_module, bl_decoder_dout_port); - - for (size_t child_id = 0; child_id < module_manager.configurable_children(top_module).size(); ++child_id) { - ModuleId child_module = module_manager.configurable_children(top_module)[child_id]; - size_t child_instance = module_manager.configurable_child_instances(top_module)[child_id]; - - /* Find the BL port */ - ModulePortId child_bl_port = module_manager.find_module_port(child_module, std::string(MEMORY_BL_PORT_NAME)); - BasicPort child_bl_port_info = module_manager.module_port(child_module, child_bl_port); - - for (const size_t& sink_bl_pin : child_bl_port_info.pins()) { - /* Find the BL decoder data index: - * It should be the residual when divided by the number of BLs - */ - size_t bl_pin_id = std::floor(cur_bl_index / num_bls); - - /* Create net */ - ModuleNetId net = create_module_source_pin_net(module_manager, top_module, - bl_decoder_module, 0, - bl_decoder_dout_port, - bl_decoder_dout_port_info.pins()[bl_pin_id]); - VTR_ASSERT(ModuleNetId::INVALID() != net); - - /* Add net sink */ - module_manager.add_module_net_sink(top_module, net, - child_module, child_instance, child_bl_port, sink_bl_pin); - - /* Increment the BL index */ - cur_bl_index++; + DecoderId wl_decoder_id = decoder_lib.find_decoder(wl_addr_size, num_wls, + true, false, false); + if (DecoderId::INVALID() == wl_decoder_id) { + wl_decoder_id = decoder_lib.add_decoder(wl_addr_size, num_wls, true, false, false); } - } + VTR_ASSERT(DecoderId::INVALID() != wl_decoder_id); - /* Add nets from WL data out to each configurable child */ - size_t cur_wl_index = 0; - - ModulePortId wl_decoder_dout_port = module_manager.find_module_port(wl_decoder_module, std::string(DECODER_DATA_OUT_PORT_NAME)); - BasicPort wl_decoder_dout_port_info = module_manager.module_port(wl_decoder_module, wl_decoder_dout_port); - - for (size_t child_id = 0; child_id < module_manager.configurable_children(top_module).size(); ++child_id) { - ModuleId child_module = module_manager.configurable_children(top_module)[child_id]; - size_t child_instance = module_manager.configurable_child_instances(top_module)[child_id]; - - /* Find the WL port */ - ModulePortId child_wl_port = module_manager.find_module_port(child_module, std::string(MEMORY_WL_PORT_NAME)); - BasicPort child_wl_port_info = module_manager.module_port(child_module, child_wl_port); - - for (const size_t& sink_wl_pin : child_wl_port_info.pins()) { - /* Find the BL decoder data index: - * It should be the residual when divided by the number of BLs + /* Create a module if not existed yet */ + std::string wl_decoder_module_name = generate_memory_decoder_subckt_name(wl_addr_size, num_wls); + ModuleId wl_decoder_module = module_manager.find_module(wl_decoder_module_name); + if (ModuleId::INVALID() == wl_decoder_module) { + /* BL decoder has the same ports as the frame-based decoders + * We reuse it here */ - size_t wl_pin_id = cur_wl_index % num_wls; - - /* Create net */ - ModuleNetId net = create_module_source_pin_net(module_manager, top_module, - wl_decoder_module, 0, - wl_decoder_dout_port, - wl_decoder_dout_port_info.pins()[wl_pin_id]); - VTR_ASSERT(ModuleNetId::INVALID() != net); - - /* Add net sink */ - module_manager.add_module_net_sink(top_module, net, - child_module, child_instance, child_wl_port, sink_wl_pin); - - /* Increment the WL index */ - cur_wl_index++; + wl_decoder_module = build_wl_memory_decoder_module(module_manager, + decoder_lib, + wl_decoder_id); } - } + VTR_ASSERT(ModuleId::INVALID() != wl_decoder_module); + size_t curr_wl_decoder_instance_id = module_manager.num_instance(top_module, wl_decoder_module); + module_manager.add_child_module(top_module, wl_decoder_module); - /* Add the BL and WL decoders to the end of configurable children list - * Note: this MUST be done after adding all the module nets to other regular configurable children - */ - module_manager.add_configurable_child(top_module, bl_decoder_module, 0); - module_manager.add_configurable_child(top_module, wl_decoder_module, 0); + /************************************************************** + * Add module nets from the top module to BL decoder's inputs + */ + ModulePortId bl_decoder_en_port = module_manager.find_module_port(bl_decoder_module, std::string(DECODER_ENABLE_PORT_NAME)); + BasicPort bl_decoder_en_port_info = module_manager.module_port(bl_decoder_module, bl_decoder_en_port); + + ModulePortId bl_decoder_addr_port = module_manager.find_module_port(bl_decoder_module, std::string(DECODER_ADDRESS_PORT_NAME)); + BasicPort bl_decoder_addr_port_info = module_manager.module_port(bl_decoder_module, bl_decoder_addr_port); + + ModulePortId bl_decoder_din_port = module_manager.find_module_port(bl_decoder_module, std::string(DECODER_DATA_IN_PORT_NAME)); + BasicPort bl_decoder_din_port_info = module_manager.module_port(bl_decoder_module, bl_decoder_din_port); + + /* Data in port of the local BL decoder should always be 1 */ + VTR_ASSERT(1 == bl_decoder_din_port_info.get_width()); + + /* Top module Enable port -> BL Decoder Enable port */ + add_module_bus_nets(module_manager, + top_module, + top_module, 0, en_port, + bl_decoder_module, curr_bl_decoder_instance_id, bl_decoder_en_port); + + /* Top module Address port -> BL Decoder Address port */ + add_module_bus_nets(module_manager, + top_module, + top_module, 0, bl_addr_port, + bl_decoder_module, curr_bl_decoder_instance_id, bl_decoder_addr_port); + + /* Top module data_in port -> BL Decoder data_in port: + * Note that each region has independent data_in connection from the top-level module + * The pin index is the configuration region index + */ + ModuleNetId din_net = create_module_source_pin_net(module_manager, top_module, + top_module, 0, + din_port, + din_port_info.pins()[size_t(config_region)]); + VTR_ASSERT(ModuleNetId::INVALID() != din_net); + + /* Configure the net sink */ + module_manager.add_module_net_sink(top_module, din_net, bl_decoder_module, curr_bl_decoder_instance_id, bl_decoder_din_port, bl_decoder_din_port_info.pins()[0]); + + /************************************************************** + * Add module nets from the top module to WL decoder's inputs + */ + ModulePortId wl_decoder_en_port = module_manager.find_module_port(wl_decoder_module, std::string(DECODER_ENABLE_PORT_NAME)); + BasicPort wl_decoder_en_port_info = module_manager.module_port(wl_decoder_module, wl_decoder_en_port); + + ModulePortId wl_decoder_addr_port = module_manager.find_module_port(wl_decoder_module, std::string(DECODER_ADDRESS_PORT_NAME)); + BasicPort wl_decoder_addr_port_info = module_manager.module_port(wl_decoder_module, bl_decoder_addr_port); + + /* Top module Enable port -> WL Decoder Enable port */ + add_module_bus_nets(module_manager, + top_module, + top_module, 0, en_port, + wl_decoder_module, curr_wl_decoder_instance_id, wl_decoder_en_port); + + /* Top module Address port -> WL Decoder Address port */ + add_module_bus_nets(module_manager, + top_module, + top_module, 0, wl_addr_port, + wl_decoder_module, curr_wl_decoder_instance_id, wl_decoder_addr_port); + + /************************************************************** + * Add nets from BL data out to each configurable child + */ + size_t cur_bl_index = 0; + + ModulePortId bl_decoder_dout_port = module_manager.find_module_port(bl_decoder_module, std::string(DECODER_DATA_OUT_PORT_NAME)); + BasicPort bl_decoder_dout_port_info = module_manager.module_port(bl_decoder_module, bl_decoder_dout_port); + + for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { + ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; + size_t child_instance = module_manager.region_configurable_child_instances(top_module, config_region)[child_id]; + + /* Find the BL port */ + ModulePortId child_bl_port = module_manager.find_module_port(child_module, std::string(MEMORY_BL_PORT_NAME)); + BasicPort child_bl_port_info = module_manager.module_port(child_module, child_bl_port); + + for (const size_t& sink_bl_pin : child_bl_port_info.pins()) { + /* Find the BL decoder data index: + * It should be the residual when divided by the number of BLs + */ + size_t bl_pin_id = std::floor(cur_bl_index / num_bls); + if (!(bl_pin_id < bl_decoder_dout_port_info.pins().size())) + VTR_ASSERT(bl_pin_id < bl_decoder_dout_port_info.pins().size()); + + /* Create net */ + ModuleNetId net = create_module_source_pin_net(module_manager, top_module, + bl_decoder_module, curr_bl_decoder_instance_id, + bl_decoder_dout_port, + bl_decoder_dout_port_info.pins()[bl_pin_id]); + VTR_ASSERT(ModuleNetId::INVALID() != net); + + /* Add net sink */ + module_manager.add_module_net_sink(top_module, net, + child_module, child_instance, child_bl_port, sink_bl_pin); + + /* Increment the BL index */ + cur_bl_index++; + } + } + + /************************************************************** + * Add nets from WL data out to each configurable child + */ + size_t cur_wl_index = 0; + + ModulePortId wl_decoder_dout_port = module_manager.find_module_port(wl_decoder_module, std::string(DECODER_DATA_OUT_PORT_NAME)); + BasicPort wl_decoder_dout_port_info = module_manager.module_port(wl_decoder_module, wl_decoder_dout_port); + + for (size_t child_id = 0; child_id < module_manager.region_configurable_children(top_module, config_region).size(); ++child_id) { + ModuleId child_module = module_manager.region_configurable_children(top_module, config_region)[child_id]; + size_t child_instance = module_manager.region_configurable_child_instances(top_module, config_region)[child_id]; + + /* Find the WL port */ + ModulePortId child_wl_port = module_manager.find_module_port(child_module, std::string(MEMORY_WL_PORT_NAME)); + BasicPort child_wl_port_info = module_manager.module_port(child_module, child_wl_port); + + for (const size_t& sink_wl_pin : child_wl_port_info.pins()) { + /* Find the BL decoder data index: + * It should be the residual when divided by the number of BLs + */ + size_t wl_pin_id = cur_wl_index % num_wls; + + /* Create net */ + ModuleNetId net = create_module_source_pin_net(module_manager, top_module, + wl_decoder_module, curr_wl_decoder_instance_id, + wl_decoder_dout_port, + wl_decoder_dout_port_info.pins()[wl_pin_id]); + VTR_ASSERT(ModuleNetId::INVALID() != net); + + /* Add net sink */ + module_manager.add_module_net_sink(top_module, net, + child_module, child_instance, child_wl_port, sink_wl_pin); + + /* Increment the WL index */ + cur_wl_index++; + } + } + + /************************************************************** + * Add the BL and WL decoders to the end of configurable children list + * Note: this MUST be done after adding all the module nets to other regular configurable children + */ + module_manager.add_configurable_child(top_module, bl_decoder_module, curr_bl_decoder_instance_id); + module_manager.add_configurable_child_to_region(top_module, + config_region, + bl_decoder_module, + curr_bl_decoder_instance_id, + module_manager.configurable_children(top_module).size() - 1); + + module_manager.add_configurable_child(top_module, wl_decoder_module, curr_wl_decoder_instance_id); + module_manager.add_configurable_child_to_region(top_module, + config_region, + wl_decoder_module, + curr_wl_decoder_instance_id, + module_manager.configurable_children(top_module).size() - 1); + } } /******************************************************************** @@ -1141,7 +1307,7 @@ void add_top_module_nets_cmos_memory_config_bus(ModuleManager& module_manager, DecoderLibrary& decoder_lib, const ModuleId& parent_module, const ConfigProtocol& config_protocol, - const size_t& num_config_bits) { + const vtr::vector& num_config_bits) { switch (config_protocol.type()) { case CONFIG_MEM_STANDALONE: add_module_nets_cmos_flatten_memory_config_bus(module_manager, parent_module, @@ -1203,7 +1369,7 @@ void add_top_module_nets_memory_config_bus(ModuleManager& module_manager, const ModuleId& parent_module, const ConfigProtocol& config_protocol, const e_circuit_model_design_tech& mem_tech, - const size_t& num_config_bits) { + const vtr::vector& num_config_bits) { vtr::ScopedStartFinishTimer timer("Add module nets for configuration buses"); diff --git a/openfpga/src/fabric/build_top_module_memory.h b/openfpga/src/fabric/build_top_module_memory.h index 87ca278cd..7e93ca85a 100644 --- a/openfpga/src/fabric/build_top_module_memory.h +++ b/openfpga/src/fabric/build_top_module_memory.h @@ -7,6 +7,7 @@ #include #include +#include "vtr_vector.h" #include "vtr_ndmatrix.h" #include "module_manager.h" #include "circuit_types.h" @@ -45,19 +46,25 @@ int load_top_module_memory_modules_from_fabric_key(ModuleManager& module_manager const ModuleId& top_module, const FabricKey& fabric_key); +vtr::vector find_top_module_regional_num_config_bit(const ModuleManager& module_manager, + const ModuleId& top_module, + const CircuitLibrary& circuit_lib, + const CircuitModelId& sram_model, + const e_config_protocol_type& config_protocol_type); + void add_top_module_sram_ports(ModuleManager& module_manager, const ModuleId& module_id, const CircuitLibrary& circuit_lib, const CircuitModelId& sram_model, const ConfigProtocol& config_protocol, - const size_t& num_config_bits); + const vtr::vector& num_config_bits); void add_top_module_nets_memory_config_bus(ModuleManager& module_manager, DecoderLibrary& decoder_lib, const ModuleId& parent_module, const ConfigProtocol& config_protocol, const e_circuit_model_design_tech& mem_tech, - const size_t& num_config_bits); + const vtr::vector& num_config_bits); } /* end namespace openfpga */ diff --git a/openfpga/src/fpga_bitstream/build_device_bitstream.cpp b/openfpga/src/fpga_bitstream/build_device_bitstream.cpp index a2aa97f57..e4ded1821 100644 --- a/openfpga/src/fpga_bitstream/build_device_bitstream.cpp +++ b/openfpga/src/fpga_bitstream/build_device_bitstream.cpp @@ -62,37 +62,69 @@ size_t rec_estimate_device_bitstream_num_blocks(const ModuleManager& module_mana static size_t rec_estimate_device_bitstream_num_bits(const ModuleManager& module_manager, const ModuleId& top_module, + const ModuleId& parent_module, const e_config_protocol_type& config_protocol_type) { size_t num_bits = 0; /* If a child module has no configurable children, this is a leaf node * We can count it in. Otherwise, we should go recursively. */ - if (0 == module_manager.configurable_children(top_module).size()) { + if (0 == module_manager.configurable_children(parent_module).size()) { return 1; } - size_t num_configurable_children = module_manager.configurable_children(top_module).size(); - /* Frame-based configuration protocol will have 1 decoder - * if there are more than 1 configurable children + /* Two cases to walk through configurable children: + * - For top-level module: + * Iterate over the multiple regions and visit each configuration child under any region + * In each region, frame-based configuration protocol or memory bank protocol will contain + * decoders. We should bypass them when count the bitstream size + * - For other modules: + * Iterate over the configurable children regardless of regions */ - if ( (CONFIG_MEM_FRAME_BASED == config_protocol_type) - && (2 <= num_configurable_children)) { - num_configurable_children--; - } + if (parent_module == top_module) { + for (const ConfigRegionId& config_region : module_manager.regions(parent_module)) { + size_t curr_region_num_config_child = module_manager.region_configurable_children(parent_module, config_region).size(); - /* Memory configuration protocol will have 2 decoders - * at the top-level - */ - if (CONFIG_MEM_MEMORY_BANK == config_protocol_type) { - std::string top_block_name = generate_fpga_top_module_name(); - if (top_module == module_manager.find_module(top_block_name)) { - num_configurable_children -= 2; + /* FIXME: This will be uncommented when multi-region support is extended for frame-based + * Frame-based configuration protocol will have 1 decoder + * if there are more than 1 configurable children + if ( (CONFIG_MEM_FRAME_BASED == config_protocol_type) + && (2 <= curr_region_num_config_child)) { + curr_region_num_config_child--; + } + */ + + /* Memory configuration protocol will have 2 decoders + * at the top-level + */ + if (CONFIG_MEM_MEMORY_BANK == config_protocol_type) { + VTR_ASSERT(2 <= curr_region_num_config_child); + curr_region_num_config_child -= 2; + } + + /* Visit all the children in a recursively way */ + for (size_t ichild = 0; ichild < curr_region_num_config_child; ++ichild) { + ModuleId child_module = module_manager.region_configurable_children(parent_module, config_region)[ichild]; + num_bits += rec_estimate_device_bitstream_num_bits(module_manager, top_module, child_module, config_protocol_type); + } + } + } else { + VTR_ASSERT_SAFE(parent_module != top_module); + + size_t num_configurable_children = module_manager.configurable_children(parent_module).size(); + + /* Frame-based configuration protocol will have 1 decoder + * if there are more than 1 configurable children + */ + if ( (CONFIG_MEM_FRAME_BASED == config_protocol_type) + && (2 <= num_configurable_children)) { + num_configurable_children--; + } + + for (size_t ichild = 0; ichild < num_configurable_children; ++ichild) { + ModuleId child_module = module_manager.configurable_children(parent_module)[ichild]; + num_bits += rec_estimate_device_bitstream_num_bits(module_manager, top_module, child_module, config_protocol_type); } - } - for (size_t ichild = 0; ichild < num_configurable_children; ++ichild) { - ModuleId child_module = module_manager.configurable_children(top_module)[ichild]; - num_bits += rec_estimate_device_bitstream_num_bits(module_manager, child_module, config_protocol_type); } return num_bits; @@ -141,6 +173,7 @@ BitstreamManager build_device_bitstream(const VprContext& vpr_ctx, /* Estimate the number of bits to be added to the database */ size_t num_bits_to_reserve = rec_estimate_device_bitstream_num_bits(openfpga_ctx.module_graph(), + top_module, top_module, openfpga_ctx.arch().config_protocol.type()); bitstream_manager.reserve_bits(num_bits_to_reserve); diff --git a/openfpga/src/fpga_bitstream/build_fabric_bitstream.cpp b/openfpga/src/fpga_bitstream/build_fabric_bitstream.cpp index 4c05a5389..04d7fe714 100644 --- a/openfpga/src/fpga_bitstream/build_fabric_bitstream.cpp +++ b/openfpga/src/fpga_bitstream/build_fabric_bitstream.cpp @@ -58,7 +58,6 @@ void rec_build_module_fabric_dependent_chain_bitstream(const BitstreamManager& b /* Find the child block that matches the instance name! */ ConfigBlockId child_block = bitstream_manager.find_child_block(parent_block, instance_name); /* We must have one valid block id! */ - if (true != bitstream_manager.valid_block_id(child_block)) VTR_ASSERT(true == bitstream_manager.valid_block_id(child_block)); /* Go recursively */ @@ -79,7 +78,6 @@ void rec_build_module_fabric_dependent_chain_bitstream(const BitstreamManager& b /* Find the child block that matches the instance name! */ ConfigBlockId child_block = bitstream_manager.find_child_block(parent_block, instance_name); /* We must have one valid block id! */ - if (true != bitstream_manager.valid_block_id(child_block)) VTR_ASSERT(true == bitstream_manager.valid_block_id(child_block)); /* Go recursively */ @@ -126,6 +124,7 @@ void rec_build_module_fabric_dependent_memory_bank_bitstream(const BitstreamMana const ModuleManager& module_manager, const ModuleId& top_module, const ModuleId& parent_module, + const ConfigRegionId& config_region, const size_t& bl_addr_size, const size_t& wl_addr_size, const size_t& num_bls, @@ -138,42 +137,84 @@ void rec_build_module_fabric_dependent_memory_bank_bitstream(const BitstreamMana * we dive to the next level first! */ if (0 < bitstream_manager.block_children(parent_block).size()) { - /* For top module, we will skip the two decoders at the end of the configurable children list */ - std::vector configurable_children = module_manager.configurable_children(parent_module); - - size_t num_configurable_children = configurable_children.size(); + /* For top module: + * - Use regional configurable children + * - we will skip the two decoders at the end of the configurable children list + */ if (parent_module == top_module) { + std::vector configurable_children = module_manager.region_configurable_children(parent_module, config_region); + VTR_ASSERT(2 <= configurable_children.size()); - num_configurable_children -= 2; - } + size_t num_configurable_children = configurable_children.size() - 2; - /* Early exit if there is no configurable children */ - if (0 == num_configurable_children) { - /* Ensure that there should be no configuration bits in the parent block */ - VTR_ASSERT(0 == bitstream_manager.block_bits(parent_block).size()); - return; - } + /* Early exit if there is no configurable children */ + if (0 == num_configurable_children) { + /* Ensure that there should be no configuration bits in the parent block */ + VTR_ASSERT(0 == bitstream_manager.block_bits(parent_block).size()); + return; + } - for (size_t child_id = 0; child_id < num_configurable_children; ++child_id) { - ModuleId child_module = configurable_children[child_id]; - size_t child_instance = module_manager.configurable_child_instances(parent_module)[child_id]; - /* Get the instance name and ensure it is not empty */ - std::string instance_name = module_manager.instance_name(parent_module, child_module, child_instance); - - /* Find the child block that matches the instance name! */ - ConfigBlockId child_block = bitstream_manager.find_child_block(parent_block, instance_name); - /* We must have one valid block id! */ - if (true != bitstream_manager.valid_block_id(child_block)) - VTR_ASSERT(true == bitstream_manager.valid_block_id(child_block)); + for (size_t child_id = 0; child_id < num_configurable_children; ++child_id) { + ModuleId child_module = configurable_children[child_id]; + size_t child_instance = module_manager.region_configurable_child_instances(parent_module, config_region)[child_id]; - /* Go recursively */ - rec_build_module_fabric_dependent_memory_bank_bitstream(bitstream_manager, child_block, - module_manager, top_module, child_module, - bl_addr_size, wl_addr_size, - num_bls, num_wls, - cur_mem_index, - fabric_bitstream, - fabric_bitstream_region); + /* Get the instance name and ensure it is not empty */ + std::string instance_name = module_manager.instance_name(parent_module, child_module, child_instance); + + /* Find the child block that matches the instance name! */ + ConfigBlockId child_block = bitstream_manager.find_child_block(parent_block, instance_name); + /* We must have one valid block id! */ + VTR_ASSERT(true == bitstream_manager.valid_block_id(child_block)); + + /* Go recursively */ + rec_build_module_fabric_dependent_memory_bank_bitstream(bitstream_manager, child_block, + module_manager, top_module, child_module, + config_region, + bl_addr_size, wl_addr_size, + num_bls, num_wls, + cur_mem_index, + fabric_bitstream, + fabric_bitstream_region); + } + } else { + VTR_ASSERT(parent_module != top_module); + /* For other modules: + * - Use configurable children directly + * - no need to exclude decoders as they are not there + */ + std::vector configurable_children = module_manager.configurable_children(parent_module); + + size_t num_configurable_children = configurable_children.size(); + + /* Early exit if there is no configurable children */ + if (0 == num_configurable_children) { + /* Ensure that there should be no configuration bits in the parent block */ + VTR_ASSERT(0 == bitstream_manager.block_bits(parent_block).size()); + return; + } + + for (size_t child_id = 0; child_id < num_configurable_children; ++child_id) { + ModuleId child_module = configurable_children[child_id]; + size_t child_instance = module_manager.configurable_child_instances(parent_module)[child_id]; + + /* Get the instance name and ensure it is not empty */ + std::string instance_name = module_manager.instance_name(parent_module, child_module, child_instance); + + /* Find the child block that matches the instance name! */ + ConfigBlockId child_block = bitstream_manager.find_child_block(parent_block, instance_name); + /* We must have one valid block id! */ + VTR_ASSERT(true == bitstream_manager.valid_block_id(child_block)); + + /* Go recursively */ + rec_build_module_fabric_dependent_memory_bank_bitstream(bitstream_manager, child_block, + module_manager, top_module, child_module, + config_region, + bl_addr_size, wl_addr_size, + num_bls, num_wls, + cur_mem_index, + fabric_bitstream, + fabric_bitstream_region); + } } /* Ensure that there should be no configuration bits in the parent block */ VTR_ASSERT(0 == bitstream_manager.block_bits(parent_block).size()); @@ -451,30 +492,14 @@ void build_module_fabric_dependent_bitstream(const ConfigProtocol& config_protoc break; } case CONFIG_MEM_MEMORY_BANK: { - - size_t cur_mem_index = 0; - /* Find BL address port size */ + /* Find global BL address port size */ ModulePortId bl_addr_port = module_manager.find_module_port(top_module, std::string(DECODER_BL_ADDRESS_PORT_NAME)); BasicPort bl_addr_port_info = module_manager.module_port(top_module, bl_addr_port); - /* Find WL address port size */ + /* Find global WL address port size */ ModulePortId wl_addr_port = module_manager.find_module_port(top_module, std::string(DECODER_WL_ADDRESS_PORT_NAME)); BasicPort wl_addr_port_info = module_manager.module_port(top_module, wl_addr_port); - /* Find BL and WL decoders which are the last two configurable children*/ - std::vector configurable_children = module_manager.configurable_children(top_module); - VTR_ASSERT(2 <= configurable_children.size()); - ModuleId bl_decoder_module = configurable_children[configurable_children.size() - 2]; - VTR_ASSERT(0 == module_manager.configurable_child_instances(top_module)[configurable_children.size() - 2]); - ModuleId wl_decoder_module = configurable_children[configurable_children.size() - 1]; - VTR_ASSERT(0 == module_manager.configurable_child_instances(top_module)[configurable_children.size() - 1]); - - ModulePortId bl_port = module_manager.find_module_port(bl_decoder_module, std::string(DECODER_DATA_OUT_PORT_NAME)); - BasicPort bl_port_info = module_manager.module_port(bl_decoder_module, bl_port); - - ModulePortId wl_port = module_manager.find_module_port(wl_decoder_module, std::string(DECODER_DATA_OUT_PORT_NAME)); - BasicPort wl_port_info = module_manager.module_port(wl_decoder_module, wl_port); - /* Reserve bits before build-up */ fabric_bitstream.set_use_address(true); fabric_bitstream.set_use_wl_address(true); @@ -482,13 +507,27 @@ void build_module_fabric_dependent_bitstream(const ConfigProtocol& config_protoc fabric_bitstream.set_wl_address_length(wl_addr_port_info.get_width()); fabric_bitstream.reserve_bits(bitstream_manager.num_bits()); - /* TODO: Currently only support 1 region. Will expand later! */ - VTR_ASSERT(1 == module_manager.regions(top_module).size()); - + /* Build bitstreams by region */ for (const ConfigRegionId& config_region : module_manager.regions(top_module)) { + size_t cur_mem_index = 0; + + /* Find port information for local BL and WL decoder in this region */ + std::vector configurable_children = module_manager.region_configurable_children(top_module, config_region); + VTR_ASSERT(2 <= configurable_children.size()); + ModuleId bl_decoder_module = configurable_children[configurable_children.size() - 2]; + ModuleId wl_decoder_module = configurable_children[configurable_children.size() - 1]; + + ModulePortId bl_port = module_manager.find_module_port(bl_decoder_module, std::string(DECODER_DATA_OUT_PORT_NAME)); + BasicPort bl_port_info = module_manager.module_port(bl_decoder_module, bl_port); + + ModulePortId wl_port = module_manager.find_module_port(wl_decoder_module, std::string(DECODER_DATA_OUT_PORT_NAME)); + BasicPort wl_port_info = module_manager.module_port(wl_decoder_module, wl_port); + + /* Build the bitstream for all the blocks in this region */ FabricBitRegionId fabric_bitstream_region = fabric_bitstream.add_region(); rec_build_module_fabric_dependent_memory_bank_bitstream(bitstream_manager, top_block, module_manager, top_module, top_module, + config_region, bl_addr_port_info.get_width(), wl_addr_port_info.get_width(), bl_port_info.get_width(), diff --git a/openfpga/src/fpga_verilog/verilog_module_writer.cpp b/openfpga/src/fpga_verilog/verilog_module_writer.cpp index 7d6883236..08e1b3adb 100644 --- a/openfpga/src/fpga_verilog/verilog_module_writer.cpp +++ b/openfpga/src/fpga_verilog/verilog_module_writer.cpp @@ -40,7 +40,7 @@ std::string generate_verilog_undriven_local_wire_name(const ModuleManager& modul if (!module_manager.instance_name(parent, child, instance_id).empty()) { wire_name = module_manager.instance_name(parent, child, instance_id); } else { - wire_name = module_manager.module_name(parent) + std::string("_") + std::to_string(instance_id); + wire_name = module_manager.module_name(child) + std::string("_") + std::to_string(instance_id); wire_name += std::string("_"); } diff --git a/openfpga/src/fpga_verilog/verilog_top_testbench.cpp b/openfpga/src/fpga_verilog/verilog_top_testbench.cpp index a5a6e1f82..b6da8f813 100644 --- a/openfpga/src/fpga_verilog/verilog_top_testbench.cpp +++ b/openfpga/src/fpga_verilog/verilog_top_testbench.cpp @@ -1517,6 +1517,7 @@ void print_verilog_top_testbench_memory_bank_bitstream(std::fstream& fp, fp << ";"; fp << std::endl; + fp << "\t\t"; fp << generate_verilog_port_constant_values(wl_addr_port, initial_wl_addr_values); fp << ";"; fp << std::endl; @@ -1528,37 +1529,87 @@ void print_verilog_top_testbench_memory_bank_bitstream(std::fstream& fp, fp << std::endl; - /* Attention: the configuration chain protcol requires the last configuration bit is fed first - * We will visit the fabric bitstream in a reverse way + /* Reorganize the fabric bitstream by the same address across regions: + * This is due to that the length of fabric bitstream could be different in each region. + * Template: + * + * An example: + * 000000 00000 1011 + * + * Note: the std::map may cause large memory footprint for large bitstream databases! */ - for (const FabricBitId& bit_id : fabric_bitstream.bits()) { - /* When fast configuration is enabled, we skip zero data_in values */ - if ((true == fast_configuration) - && (bit_value_to_skip == fabric_bitstream.bit_din(bit_id))) { - continue; + std::map, std::vector> fabric_bits_by_addr; + for (const FabricBitRegionId& region : fabric_bitstream.regions()) { + for (const FabricBitId& bit_id : fabric_bitstream.region_bits(region)) { + /* Create string for BL address */ + VTR_ASSERT(bl_addr_port.get_width() == fabric_bitstream.bit_bl_address(bit_id).size()); + std::string bl_addr_str; + for (const char& addr_bit : fabric_bitstream.bit_bl_address(bit_id)) { + bl_addr_str.push_back(addr_bit); + } + + /* Create string for WL address */ + VTR_ASSERT(wl_addr_port.get_width() == fabric_bitstream.bit_wl_address(bit_id).size()); + std::string wl_addr_str; + for (const char& addr_bit : fabric_bitstream.bit_wl_address(bit_id)) { + wl_addr_str.push_back(addr_bit); + } + + /* Place the config bit */ + auto result = fabric_bits_by_addr.find(std::make_pair(bl_addr_str, wl_addr_str)); + if (result == fabric_bits_by_addr.end()) { + /* This is a new bit, resize the vector to the number of regions + * and deposit '0' to all the bits + */ + fabric_bits_by_addr[std::make_pair(bl_addr_str, wl_addr_str)] = std::vector(fabric_bitstream.regions().size(), false); + fabric_bits_by_addr[std::make_pair(bl_addr_str, wl_addr_str)][size_t(region)] = fabric_bitstream.bit_din(bit_id); + } else { + VTR_ASSERT_SAFE(result != fabric_bits_by_addr.end()); + result->second[size_t(region)] = fabric_bitstream.bit_din(bit_id); + } + } + } + + for (const auto& addr_din_pair : fabric_bits_by_addr) { + /* When fast configuration is enabled, + * the rule to skip any configuration bit should consider the whole data input values. + * Only all the bits in the din port match the value to be skipped, + * the programming cycle can be skipped! + */ + if (true == fast_configuration) { + bool skip_curr_bits = true; + for (const bool& bit : addr_din_pair.second) { + if (bit_value_to_skip != bit) { + skip_curr_bits = false; + break; + } + } + + if (true == skip_curr_bits) { + continue; + } } fp << "\t\t" << std::string(TOP_TESTBENCH_PROG_TASK_NAME); fp << "(" << bl_addr_port.get_width() << "'b"; - VTR_ASSERT(bl_addr_port.get_width() == fabric_bitstream.bit_bl_address(bit_id).size()); - for (const char& addr_bit : fabric_bitstream.bit_bl_address(bit_id)) { - fp << addr_bit; - } + VTR_ASSERT(bl_addr_port.get_width() == addr_din_pair.first.first.length()); + fp << addr_din_pair.first.first; fp << ", "; fp << wl_addr_port.get_width() << "'b"; - VTR_ASSERT(wl_addr_port.get_width() == fabric_bitstream.bit_wl_address(bit_id).size()); - for (const char& addr_bit : fabric_bitstream.bit_wl_address(bit_id)) { - fp << addr_bit; - } + VTR_ASSERT(wl_addr_port.get_width() == addr_din_pair.first.second.length()); + fp << addr_din_pair.first.second; fp << ", "; - fp <<"1'b"; - if (true == fabric_bitstream.bit_din(bit_id)) { - fp << "1"; - } else { - VTR_ASSERT(false == fabric_bitstream.bit_din(bit_id)); - fp << "0"; + fp << din_port.get_width() << "'b"; + VTR_ASSERT(din_port.get_width() == addr_din_pair.second.size()); + for (const bool& din_value : addr_din_pair.second) { + if (true == din_value) { + fp << "1"; + } else { + VTR_ASSERT(false == din_value); + fp << "0"; + } } fp << ");" << std::endl; } diff --git a/openfpga/src/utils/module_manager_utils.cpp b/openfpga/src/utils/module_manager_utils.cpp index 929897721..7d56430b6 100644 --- a/openfpga/src/utils/module_manager_utils.cpp +++ b/openfpga/src/utils/module_manager_utils.cpp @@ -1793,6 +1793,7 @@ ModuleNetId create_module_source_pin_net(ModuleManager& module_manager, * - des_module should be the cur_module or a child of it * - src_instance should be valid and des_instance should be valid as well * - src port size should match the des port size + * *******************************************************************/ void add_module_bus_nets(ModuleManager& module_manager, const ModuleId& cur_module_id, diff --git a/openfpga_flow/openfpga_arch/k4_N4_40nm_multi_region_bank_openfpga.xml b/openfpga_flow/openfpga_arch/k4_N4_40nm_multi_region_bank_openfpga.xml new file mode 100644 index 000000000..3c074876d --- /dev/null +++ b/openfpga_flow/openfpga_arch/k4_N4_40nm_multi_region_bank_openfpga.xml @@ -0,0 +1,198 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + + + + + 10e-12 5e-12 5e-12 + + + 10e-12 5e-12 5e-12 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/openfpga_flow/openfpga_arch/k4_N4_40nm_multi_region_bank_use_both_set_reset_openfpga.xml b/openfpga_flow/openfpga_arch/k4_N4_40nm_multi_region_bank_use_both_set_reset_openfpga.xml new file mode 100644 index 000000000..816c3e0a4 --- /dev/null +++ b/openfpga_flow/openfpga_arch/k4_N4_40nm_multi_region_bank_use_both_set_reset_openfpga.xml @@ -0,0 +1,200 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + + + + + 10e-12 5e-12 5e-12 + + + 10e-12 5e-12 5e-12 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/openfpga_flow/tasks/basic_tests/full_testbench/multi_region_memory_bank/config/task.conf b/openfpga_flow/tasks/basic_tests/full_testbench/multi_region_memory_bank/config/task.conf new file mode 100644 index 000000000..a78899a35 --- /dev/null +++ b/openfpga_flow/tasks/basic_tests/full_testbench/multi_region_memory_bank/config/task.conf @@ -0,0 +1,35 @@ +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# Configuration file for running experiments +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs +# Each job execute fpga_flow script on combination of architecture & benchmark +# timeout_each_job is timeout for each job +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + +[GENERAL] +run_engine=openfpga_shell +power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml +power_analysis = true +spice_output=false +verilog_output=true +timeout_each_job = 20*60 +fpga_flow=yosys_vpr + +[OpenFPGA_SHELL] +openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/OpenFPGAShellScripts/fix_device_example_script.openfpga +openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_N4_40nm_multi_region_bank_openfpga.xml +openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml +openfpga_vpr_device_layout=2x2 + +[ARCHITECTURES] +arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_N4_tileable_40nm.xml + +[BENCHMARKS] +bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.v + +[SYNTHESIS_PARAM] +bench0_top = and2 +bench0_chan_width = 300 + +[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH] +end_flow_with_test= diff --git a/openfpga_flow/tasks/basic_tests/full_testbench/smart_fast_multi_region_memory_bank/config/task.conf b/openfpga_flow/tasks/basic_tests/full_testbench/smart_fast_multi_region_memory_bank/config/task.conf new file mode 100644 index 000000000..342c3ae94 --- /dev/null +++ b/openfpga_flow/tasks/basic_tests/full_testbench/smart_fast_multi_region_memory_bank/config/task.conf @@ -0,0 +1,34 @@ +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# Configuration file for running experiments +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs +# Each job execute fpga_flow script on combination of architecture & benchmark +# timeout_each_job is timeout for each job +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + +[GENERAL] +run_engine=openfpga_shell +power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml +power_analysis = true +spice_output=false +verilog_output=true +timeout_each_job = 20*60 +fpga_flow=yosys_vpr + +[OpenFPGA_SHELL] +openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/OpenFPGAShellScripts/fast_configuration_example_script.openfpga +openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_N4_40nm_multi_region_bank_use_both_set_reset_openfpga.xml +openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml + +[ARCHITECTURES] +arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_N4_tileable_40nm.xml + +[BENCHMARKS] +bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.v + +[SYNTHESIS_PARAM] +bench0_top = and2 +bench0_chan_width = 300 + +[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH] +end_flow_with_test=