From 987eccf586aa6b746cca7743b8dfaae3c459b6be Mon Sep 17 00:00:00 2001 From: tangxifan Date: Thu, 29 Oct 2020 16:26:45 -0600 Subject: [PATCH] [Tool] Bug fix in multi-region memory bank; Basic test passed --- .../src/fabric/build_top_module_memory.cpp | 74 +++++++----- .../fpga_bitstream/build_device_bitstream.cpp | 70 ++++++++---- .../fpga_bitstream/build_fabric_bitstream.cpp | 106 ++++++++++++------ .../fpga_verilog/verilog_module_writer.cpp | 2 +- openfpga/src/utils/module_manager_utils.cpp | 20 +--- openfpga/src/utils/module_manager_utils.h | 3 +- 6 files changed, 172 insertions(+), 103 deletions(-) diff --git a/openfpga/src/fabric/build_top_module_memory.cpp b/openfpga/src/fabric/build_top_module_memory.cpp index 913c99ed8..48eace3bd 100644 --- a/openfpga/src/fabric/build_top_module_memory.cpp +++ b/openfpga/src/fabric/build_top_module_memory.cpp @@ -811,7 +811,7 @@ void add_top_module_sram_ports(ModuleManager& module_manager, * * top_bl_addr[N-1:0] * ^ - * | local_bl_addr[M-1:0], N > M + * | local_bl_addr[N-1:0] * | * +-----+------------------+ * | | | @@ -820,6 +820,32 @@ void add_top_module_sram_ports(ModuleManager& module_manager, * | +-------------------+ | * | | * + * The BL/WL decoders should have the same circuit designs no matter what region + * they are placed even when the number of configuration bits are different + * from one region to another! + * This is designed to avoid any address collision between memory banks + * since they are programmed in the same clock cycle + * For example: + * - Memory Bank A has 36 memory cells. + * Its BL decoder has 3 address bit and 6 data output bit + * Its WL decoder has 3 address bit and 6 data output bit + * - Memory Bank B has 16 memory cells. + * Its BL decoder has 2 address bit and 4 data output bit + * Its WL decoder has 2 address bit and 4 data output bit + * - If we try to program the 36th memory cell in bank A + * the BL address will be 3'b110 + * the WL address will be 3'b110 + * the data input will be 1'b0 + * - If we try to program the 4th memory cell in bank A + * the BL address will be 3'b010 + * the WL address will be 3'b010 + * the data input will be 1'b1 + * However, in both cases, this will trigger a parasitic programming in bank B + * the BL address will be 2'b10 + * the WL address will be 2'b10 + * Assume the data input is expected to be 1'b1 for bank B + * but it will be overwritten to 1'b0 when programming the 36th cell in bank A! + * * Detailed schematic of each memory bank: * * WL_enable WL address @@ -884,35 +910,37 @@ void add_top_module_nets_cmos_memory_bank_config_bus(ModuleManager& module_manag BasicPort wl_addr_port_info = module_manager.module_port(top_module, wl_addr_port); /* Find the top-level number of BLs and WLs required to access each memory bit */ - size_t top_bl_addr_size = bl_addr_port_info.get_width(); - size_t top_wl_addr_size = wl_addr_port_info.get_width(); + size_t bl_addr_size = bl_addr_port_info.get_width(); + size_t wl_addr_size = wl_addr_port_info.get_width(); + + /* Each memory bank has a unified number of BL/WLs */ + size_t num_bls = 0; + for (const size_t& curr_config_bits : num_config_bits) { + num_bls = std::max(num_bls, find_memory_decoder_data_size(curr_config_bits)); + } + + size_t num_wls = 0; + for (const size_t& curr_config_bits : num_config_bits) { + num_wls = std::max(num_wls, find_memory_decoder_data_size(curr_config_bits)); + } /* Create separated memory bank circuitry, i.e., BL/WL decoders for each region */ for (const ConfigRegionId& config_region : module_manager.regions(top_module)) { - /* Find the number of BL/WLs and address sizes for the local decoders in the region */ - size_t num_bls = find_memory_decoder_data_size(num_config_bits[config_region]); - size_t num_wls = find_memory_decoder_data_size(num_config_bits[config_region]); - size_t local_bl_addr_size = find_memory_decoder_addr_size(num_config_bits[config_region]); - size_t local_wl_addr_size = find_memory_decoder_addr_size(num_config_bits[config_region]); - - VTR_ASSERT(top_bl_addr_size >= local_bl_addr_size); - VTR_ASSERT(top_wl_addr_size >= local_wl_addr_size); - /************************************************************** * Add the BL decoder module * Search the decoder library * If we find one, we use the module. * Otherwise, we create one and add it to the decoder library */ - DecoderId bl_decoder_id = decoder_lib.find_decoder(local_bl_addr_size, num_bls, + DecoderId bl_decoder_id = decoder_lib.find_decoder(bl_addr_size, num_bls, true, true, false); if (DecoderId::INVALID() == bl_decoder_id) { - bl_decoder_id = decoder_lib.add_decoder(local_bl_addr_size, num_bls, true, true, false); + bl_decoder_id = decoder_lib.add_decoder(bl_addr_size, num_bls, true, true, false); } VTR_ASSERT(DecoderId::INVALID() != bl_decoder_id); /* Create a module if not existed yet */ - std::string bl_decoder_module_name = generate_memory_decoder_with_data_in_subckt_name(local_bl_addr_size, num_bls); + std::string bl_decoder_module_name = generate_memory_decoder_with_data_in_subckt_name(bl_addr_size, num_bls); ModuleId bl_decoder_module = module_manager.find_module(bl_decoder_module_name); if (ModuleId::INVALID() == bl_decoder_module) { /* BL decoder has the same ports as the frame-based decoders @@ -932,15 +960,15 @@ void add_top_module_nets_cmos_memory_bank_config_bus(ModuleManager& module_manag * If we find one, we use the module. * Otherwise, we create one and add it to the decoder library */ - DecoderId wl_decoder_id = decoder_lib.find_decoder(local_wl_addr_size, num_wls, + DecoderId wl_decoder_id = decoder_lib.find_decoder(wl_addr_size, num_wls, true, false, false); if (DecoderId::INVALID() == wl_decoder_id) { - wl_decoder_id = decoder_lib.add_decoder(local_wl_addr_size, num_wls, true, false, false); + wl_decoder_id = decoder_lib.add_decoder(wl_addr_size, num_wls, true, false, false); } VTR_ASSERT(DecoderId::INVALID() != wl_decoder_id); /* Create a module if not existed yet */ - std::string wl_decoder_module_name = generate_memory_decoder_subckt_name(local_wl_addr_size, num_wls); + std::string wl_decoder_module_name = generate_memory_decoder_subckt_name(wl_addr_size, num_wls); ModuleId wl_decoder_module = module_manager.find_module(wl_decoder_module_name); if (ModuleId::INVALID() == wl_decoder_module) { /* BL decoder has the same ports as the frame-based decoders @@ -979,17 +1007,12 @@ void add_top_module_nets_cmos_memory_bank_config_bus(ModuleManager& module_manag add_module_bus_nets(module_manager, top_module, top_module, 0, bl_addr_port, - bl_decoder_module, curr_bl_decoder_instance_id, bl_decoder_addr_port, - true); + bl_decoder_module, curr_bl_decoder_instance_id, bl_decoder_addr_port); /* Top module data_in port -> BL Decoder data_in port: * Note that each region has independent data_in connection from the top-level module * The pin index is the configuration region index */ - add_module_bus_nets(module_manager, - top_module, - top_module, 0, din_port, - bl_decoder_module, curr_bl_decoder_instance_id, bl_decoder_din_port); ModuleNetId din_net = create_module_source_pin_net(module_manager, top_module, top_module, 0, din_port, @@ -1018,8 +1041,7 @@ void add_top_module_nets_cmos_memory_bank_config_bus(ModuleManager& module_manag add_module_bus_nets(module_manager, top_module, top_module, 0, wl_addr_port, - wl_decoder_module, curr_wl_decoder_instance_id, wl_decoder_addr_port, - true); + wl_decoder_module, curr_wl_decoder_instance_id, wl_decoder_addr_port); /************************************************************** * Add nets from BL data out to each configurable child diff --git a/openfpga/src/fpga_bitstream/build_device_bitstream.cpp b/openfpga/src/fpga_bitstream/build_device_bitstream.cpp index a2aa97f57..9229cce5f 100644 --- a/openfpga/src/fpga_bitstream/build_device_bitstream.cpp +++ b/openfpga/src/fpga_bitstream/build_device_bitstream.cpp @@ -62,37 +62,68 @@ size_t rec_estimate_device_bitstream_num_blocks(const ModuleManager& module_mana static size_t rec_estimate_device_bitstream_num_bits(const ModuleManager& module_manager, const ModuleId& top_module, + const ModuleId& parent_module, const e_config_protocol_type& config_protocol_type) { size_t num_bits = 0; /* If a child module has no configurable children, this is a leaf node * We can count it in. Otherwise, we should go recursively. */ - if (0 == module_manager.configurable_children(top_module).size()) { + if (0 == module_manager.configurable_children(parent_module).size()) { return 1; } - size_t num_configurable_children = module_manager.configurable_children(top_module).size(); - /* Frame-based configuration protocol will have 1 decoder - * if there are more than 1 configurable children + /* Two cases to walk through configurable children: + * - For top-level module: + * Iterate over the multiple regions and visit each configuration child under any region + * In each region, frame-based configuration protocol or memory bank protocol will contain + * decoders. We should bypass them when count the bitstream size + * - For other modules: + * Iterate over the configurable children regardless of regions */ - if ( (CONFIG_MEM_FRAME_BASED == config_protocol_type) - && (2 <= num_configurable_children)) { - num_configurable_children--; - } + if (parent_module == top_module) { + for (const ConfigRegionId& config_region : module_manager.regions(top_module)) { + size_t curr_region_num_config_child = module_manager.region_configurable_children(parent_module, config_region).size(); - /* Memory configuration protocol will have 2 decoders - * at the top-level - */ - if (CONFIG_MEM_MEMORY_BANK == config_protocol_type) { - std::string top_block_name = generate_fpga_top_module_name(); - if (top_module == module_manager.find_module(top_block_name)) { - num_configurable_children -= 2; + /* Frame-based configuration protocol will have 1 decoder + * if there are more than 1 configurable children + */ + if ( (CONFIG_MEM_FRAME_BASED == config_protocol_type) + && (2 <= curr_region_num_config_child)) { + curr_region_num_config_child--; + } + + /* Memory configuration protocol will have 2 decoders + * at the top-level + */ + if (CONFIG_MEM_MEMORY_BANK == config_protocol_type) { + VTR_ASSERT(2 <= curr_region_num_config_child); + curr_region_num_config_child -= 2; + } + + /* Visit all the children in a recursively way */ + for (size_t ichild = 0; ichild < curr_region_num_config_child; ++ichild) { + ModuleId child_module = module_manager.region_configurable_children(parent_module, config_region)[ichild]; + num_bits += rec_estimate_device_bitstream_num_bits(module_manager, top_module, child_module, config_protocol_type); + } + } + } else { + VTR_ASSERT_SAFE(parent_module == top_module); + + size_t num_configurable_children = module_manager.configurable_children(parent_module).size(); + + /* Frame-based configuration protocol will have 1 decoder + * if there are more than 1 configurable children + */ + if ( (CONFIG_MEM_FRAME_BASED == config_protocol_type) + && (2 <= num_configurable_children)) { + num_configurable_children--; + } + + for (size_t ichild = 0; ichild < num_configurable_children; ++ichild) { + ModuleId child_module = module_manager.configurable_children(parent_module)[ichild]; + num_bits += rec_estimate_device_bitstream_num_bits(module_manager, top_module, child_module, config_protocol_type); } - } - for (size_t ichild = 0; ichild < num_configurable_children; ++ichild) { - ModuleId child_module = module_manager.configurable_children(top_module)[ichild]; - num_bits += rec_estimate_device_bitstream_num_bits(module_manager, child_module, config_protocol_type); } return num_bits; @@ -141,6 +172,7 @@ BitstreamManager build_device_bitstream(const VprContext& vpr_ctx, /* Estimate the number of bits to be added to the database */ size_t num_bits_to_reserve = rec_estimate_device_bitstream_num_bits(openfpga_ctx.module_graph(), + top_module, top_module, openfpga_ctx.arch().config_protocol.type()); bitstream_manager.reserve_bits(num_bits_to_reserve); diff --git a/openfpga/src/fpga_bitstream/build_fabric_bitstream.cpp b/openfpga/src/fpga_bitstream/build_fabric_bitstream.cpp index ef2b7a3c8..04d7fe714 100644 --- a/openfpga/src/fpga_bitstream/build_fabric_bitstream.cpp +++ b/openfpga/src/fpga_bitstream/build_fabric_bitstream.cpp @@ -141,48 +141,80 @@ void rec_build_module_fabric_dependent_memory_bank_bitstream(const BitstreamMana * - Use regional configurable children * - we will skip the two decoders at the end of the configurable children list */ - std::vector configurable_children = module_manager.configurable_children(parent_module); - - size_t num_configurable_children = configurable_children.size(); if (parent_module == top_module) { - configurable_children = module_manager.region_configurable_children(parent_module, config_region); + std::vector configurable_children = module_manager.region_configurable_children(parent_module, config_region); VTR_ASSERT(2 <= configurable_children.size()); - num_configurable_children -= 2; - } + size_t num_configurable_children = configurable_children.size() - 2; - /* Early exit if there is no configurable children */ - if (0 == num_configurable_children) { - /* Ensure that there should be no configuration bits in the parent block */ - VTR_ASSERT(0 == bitstream_manager.block_bits(parent_block).size()); - return; - } - - for (size_t child_id = 0; child_id < num_configurable_children; ++child_id) { - ModuleId child_module = configurable_children[child_id]; - size_t child_instance = module_manager.configurable_child_instances(parent_module)[child_id]; - if (parent_module == top_module) { - child_instance = module_manager.region_configurable_child_instances(parent_module, config_region)[child_id]; + /* Early exit if there is no configurable children */ + if (0 == num_configurable_children) { + /* Ensure that there should be no configuration bits in the parent block */ + VTR_ASSERT(0 == bitstream_manager.block_bits(parent_block).size()); + return; } - /* Get the instance name and ensure it is not empty */ - std::string instance_name = module_manager.instance_name(parent_module, child_module, child_instance); - - /* Find the child block that matches the instance name! */ - ConfigBlockId child_block = bitstream_manager.find_child_block(parent_block, instance_name); - /* We must have one valid block id! */ - if (true != bitstream_manager.valid_block_id(child_block)) - VTR_ASSERT(true == bitstream_manager.valid_block_id(child_block)); + for (size_t child_id = 0; child_id < num_configurable_children; ++child_id) { + ModuleId child_module = configurable_children[child_id]; + size_t child_instance = module_manager.region_configurable_child_instances(parent_module, config_region)[child_id]; - /* Go recursively */ - rec_build_module_fabric_dependent_memory_bank_bitstream(bitstream_manager, child_block, - module_manager, top_module, child_module, - config_region, - bl_addr_size, wl_addr_size, - num_bls, num_wls, - cur_mem_index, - fabric_bitstream, - fabric_bitstream_region); + /* Get the instance name and ensure it is not empty */ + std::string instance_name = module_manager.instance_name(parent_module, child_module, child_instance); + + /* Find the child block that matches the instance name! */ + ConfigBlockId child_block = bitstream_manager.find_child_block(parent_block, instance_name); + /* We must have one valid block id! */ + VTR_ASSERT(true == bitstream_manager.valid_block_id(child_block)); + + /* Go recursively */ + rec_build_module_fabric_dependent_memory_bank_bitstream(bitstream_manager, child_block, + module_manager, top_module, child_module, + config_region, + bl_addr_size, wl_addr_size, + num_bls, num_wls, + cur_mem_index, + fabric_bitstream, + fabric_bitstream_region); + } + } else { + VTR_ASSERT(parent_module != top_module); + /* For other modules: + * - Use configurable children directly + * - no need to exclude decoders as they are not there + */ + std::vector configurable_children = module_manager.configurable_children(parent_module); + + size_t num_configurable_children = configurable_children.size(); + + /* Early exit if there is no configurable children */ + if (0 == num_configurable_children) { + /* Ensure that there should be no configuration bits in the parent block */ + VTR_ASSERT(0 == bitstream_manager.block_bits(parent_block).size()); + return; + } + + for (size_t child_id = 0; child_id < num_configurable_children; ++child_id) { + ModuleId child_module = configurable_children[child_id]; + size_t child_instance = module_manager.configurable_child_instances(parent_module)[child_id]; + + /* Get the instance name and ensure it is not empty */ + std::string instance_name = module_manager.instance_name(parent_module, child_module, child_instance); + + /* Find the child block that matches the instance name! */ + ConfigBlockId child_block = bitstream_manager.find_child_block(parent_block, instance_name); + /* We must have one valid block id! */ + VTR_ASSERT(true == bitstream_manager.valid_block_id(child_block)); + + /* Go recursively */ + rec_build_module_fabric_dependent_memory_bank_bitstream(bitstream_manager, child_block, + module_manager, top_module, child_module, + config_region, + bl_addr_size, wl_addr_size, + num_bls, num_wls, + cur_mem_index, + fabric_bitstream, + fabric_bitstream_region); + } } /* Ensure that there should be no configuration bits in the parent block */ VTR_ASSERT(0 == bitstream_manager.block_bits(parent_block).size()); @@ -475,10 +507,10 @@ void build_module_fabric_dependent_bitstream(const ConfigProtocol& config_protoc fabric_bitstream.set_wl_address_length(wl_addr_port_info.get_width()); fabric_bitstream.reserve_bits(bitstream_manager.num_bits()); - size_t cur_mem_index = 0; - /* Build bitstreams by region */ for (const ConfigRegionId& config_region : module_manager.regions(top_module)) { + size_t cur_mem_index = 0; + /* Find port information for local BL and WL decoder in this region */ std::vector configurable_children = module_manager.region_configurable_children(top_module, config_region); VTR_ASSERT(2 <= configurable_children.size()); diff --git a/openfpga/src/fpga_verilog/verilog_module_writer.cpp b/openfpga/src/fpga_verilog/verilog_module_writer.cpp index 7d6883236..08e1b3adb 100644 --- a/openfpga/src/fpga_verilog/verilog_module_writer.cpp +++ b/openfpga/src/fpga_verilog/verilog_module_writer.cpp @@ -40,7 +40,7 @@ std::string generate_verilog_undriven_local_wire_name(const ModuleManager& modul if (!module_manager.instance_name(parent, child, instance_id).empty()) { wire_name = module_manager.instance_name(parent, child, instance_id); } else { - wire_name = module_manager.module_name(parent) + std::string("_") + std::to_string(instance_id); + wire_name = module_manager.module_name(child) + std::string("_") + std::to_string(instance_id); wire_name += std::string("_"); } diff --git a/openfpga/src/utils/module_manager_utils.cpp b/openfpga/src/utils/module_manager_utils.cpp index 4117735a9..7d56430b6 100644 --- a/openfpga/src/utils/module_manager_utils.cpp +++ b/openfpga/src/utils/module_manager_utils.cpp @@ -1794,15 +1794,6 @@ ModuleNetId create_module_source_pin_net(ModuleManager& module_manager, * - src_instance should be valid and des_instance should be valid as well * - src port size should match the des port size * - * Power options: - * - align_to_lsb: This is by default turned off! - * When enabled, the source and destination ports - * will be connected pin-by-pin starting from - * the Least Significant Bit (LSB) - * The source and destination ports are not required - * to be same in sizes. - * BE CAREFUL! This may cause dangling pins! - * Use when you know what you are doing! *******************************************************************/ void add_module_bus_nets(ModuleManager& module_manager, const ModuleId& cur_module_id, @@ -1811,8 +1802,7 @@ void add_module_bus_nets(ModuleManager& module_manager, const ModulePortId& src_module_port_id, const ModuleId& des_module_id, const size_t& des_instance_id, - const ModulePortId& des_module_port_id, - const bool& align_to_lsb) { + const ModulePortId& des_module_port_id) { VTR_ASSERT(true == module_manager.valid_module_id(cur_module_id)); VTR_ASSERT(true == module_manager.valid_module_id(src_module_id)); @@ -1836,8 +1826,7 @@ void add_module_bus_nets(ModuleManager& module_manager, const BasicPort& src_port = module_manager.module_port(src_module_id, src_module_port_id); const BasicPort& des_port = module_manager.module_port(des_module_id, des_module_port_id); - if ( (false == align_to_lsb) - && (src_port.get_width() != des_port.get_width())) { + if (src_port.get_width() != des_port.get_width()) { VTR_LOGF_ERROR(__FILE__, __LINE__, "Unmatched port size: src_port %s is %lu while des_port %s is %lu!\n", src_port.get_name().c_str(), @@ -1849,11 +1838,6 @@ void add_module_bus_nets(ModuleManager& module_manager, /* Create a net for each pin */ for (size_t pin_id = 0; pin_id < src_port.pins().size(); ++pin_id) { - /* Create net only when des_port has a valid pin! */ - if (pin_id >= des_port.pins().size()) { - continue; - } - ModuleNetId net = create_module_source_pin_net(module_manager, cur_module_id, src_module_id, src_instance_id, src_module_port_id, src_port.pins()[pin_id]); diff --git a/openfpga/src/utils/module_manager_utils.h b/openfpga/src/utils/module_manager_utils.h index 6cdf27fe8..28913e673 100644 --- a/openfpga/src/utils/module_manager_utils.h +++ b/openfpga/src/utils/module_manager_utils.h @@ -165,8 +165,7 @@ void add_module_bus_nets(ModuleManager& module_manager, const ModulePortId& src_module_port_id, const ModuleId& des_module_id, const size_t& des_instance_id, - const ModulePortId& des_module_port_id, - const bool& align_to_lsb = false); + const ModulePortId& des_module_port_id); } /* end namespace openfpga */