From 39934f9d16742a101216467c2d01e5b46d09f50f Mon Sep 17 00:00:00 2001 From: Chung Shien Chai Date: Thu, 20 Jul 2023 22:34:18 -0700 Subject: [PATCH] Address issue 1256 --- .../openfpga_bitstream_command_template.h | 5 + .../src/base/openfpga_bitstream_template.h | 2 + .../build_fabric_bitstream_memory_bank.cpp | 68 +++++--- .../src/fpga_bitstream/fabric_bitstream.cpp | 154 ++++++++++++++++++ .../src/fpga_bitstream/fabric_bitstream.h | 96 +++++++++++ .../write_text_fabric_bitstream.cpp | 147 ++++++++++++++++- .../write_text_fabric_bitstream.h | 3 +- .../write_xml_fabric_bitstream.cpp | 63 ++++++- .../fpga_verilog/verilog_top_testbench.cpp | 13 ++ .../verilog_top_testbench_memory_bank.cpp | 14 +- 10 files changed, 531 insertions(+), 34 deletions(-) diff --git a/openfpga/src/base/openfpga_bitstream_command_template.h b/openfpga/src/base/openfpga_bitstream_command_template.h index b11c058cd..bc781dcdc 100644 --- a/openfpga/src/base/openfpga_bitstream_command_template.h +++ b/openfpga/src/base/openfpga_bitstream_command_template.h @@ -200,6 +200,11 @@ ShellCommandId add_write_fabric_bitstream_command_template( "Keep don't care bits in bitstream file; If not enabled, don't care bits " "are converted to logic '0' or '1'"); + /* Add an option '--wl_incremental_order' */ + shell_cmd.add_option( + "wl_decremental_order", false, + "Generate bitstream in WL decremental addressing order if supported"); + /* Add an option '--no_time_stamp' */ shell_cmd.add_option("no_time_stamp", false, "Do not print time stamp in output files"); diff --git a/openfpga/src/base/openfpga_bitstream_template.h b/openfpga/src/base/openfpga_bitstream_template.h index de01d1df9..196d9ef30 100644 --- a/openfpga/src/base/openfpga_bitstream_template.h +++ b/openfpga/src/base/openfpga_bitstream_template.h @@ -91,6 +91,7 @@ int write_fabric_bitstream_template(const T& openfpga_ctx, const Command& cmd, CommandOptionId opt_file_format = cmd.option("format"); CommandOptionId opt_fast_config = cmd.option("fast_configuration"); CommandOptionId opt_keep_dont_care_bits = cmd.option("keep_dont_care_bits"); + CommandOptionId opt_wl_decremental_order = cmd.option("wl_decremental_order"); CommandOptionId opt_no_time_stamp = cmd.option("no_time_stamp"); /* Write fabric bitstream if required */ @@ -127,6 +128,7 @@ int write_fabric_bitstream_template(const T& openfpga_ctx, const Command& cmd, cmd_context.option_value(cmd, opt_file), cmd_context.option_enable(cmd, opt_fast_config), cmd_context.option_enable(cmd, opt_keep_dont_care_bits), + !cmd_context.option_enable(cmd, opt_wl_decremental_order), !cmd_context.option_enable(cmd, opt_no_time_stamp), cmd_context.option_enable(cmd, opt_verbose)); } diff --git a/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp b/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp index 97fe97d90..1659e3b65 100644 --- a/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp +++ b/openfpga/src/fpga_bitstream/build_fabric_bitstream_memory_bank.cpp @@ -174,6 +174,11 @@ static void rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream( bitstream_manager.block_bits(parent_block)) { FabricBitId fabric_bit = fabric_bitstream.add_bit(config_bit); + /* + If both BL and WL protocols are Flatten, we will have new way of + storing information in fabric_bitstream. This will save high + memory usage, as well as fast processing + */ /* The BL address to be decoded depends on the protocol * - flatten BLs: use 1-hot decoding * - BL decoders: fully encoded @@ -181,38 +186,53 @@ static void rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream( */ size_t cur_bl_index = bl_start_index_per_tile.at(tile_coord.x()) + cur_mem_index[tile_coord] % num_bls_cur_tile; - std::vector bl_addr_bits_vec; - if (BLWL_PROTOCOL_DECODER == config_protocol.bl_protocol_type()) { - bl_addr_bits_vec = itobin_charvec(cur_bl_index, bl_addr_size); - } else if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type() || - BLWL_PROTOCOL_SHIFT_REGISTER == - config_protocol.bl_protocol_type()) { - bl_addr_bits_vec = - ito1hot_charvec(cur_bl_index, bl_addr_size, DONT_CARE_CHAR); + if (BLWL_PROTOCOL_FLATTEN != config_protocol.bl_protocol_type() || + BLWL_PROTOCOL_FLATTEN != config_protocol.wl_protocol_type()) { + // This is using old way + // We only do this kind of resource wasting storing if + // either protocol is not flatten + std::vector bl_addr_bits_vec; + if (BLWL_PROTOCOL_DECODER == config_protocol.bl_protocol_type()) { + bl_addr_bits_vec = itobin_charvec(cur_bl_index, bl_addr_size); + } else if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type() || + BLWL_PROTOCOL_SHIFT_REGISTER == + config_protocol.bl_protocol_type()) { + bl_addr_bits_vec = + ito1hot_charvec(cur_bl_index, bl_addr_size, DONT_CARE_CHAR); + } + /* Set BL address */ + fabric_bitstream.set_bit_bl_address( + fabric_bit, bl_addr_bits_vec, + BLWL_PROTOCOL_DECODER != config_protocol.bl_protocol_type()); } /* Find WL address */ size_t cur_wl_index = wl_start_index_per_tile.at(tile_coord.y()) + std::floor(cur_mem_index[tile_coord] / num_bls_cur_tile); - std::vector wl_addr_bits_vec; - if (BLWL_PROTOCOL_DECODER == config_protocol.wl_protocol_type()) { - wl_addr_bits_vec = itobin_charvec(cur_wl_index, wl_addr_size); - } else if (BLWL_PROTOCOL_FLATTEN == config_protocol.wl_protocol_type() || - BLWL_PROTOCOL_SHIFT_REGISTER == - config_protocol.wl_protocol_type()) { - wl_addr_bits_vec = ito1hot_charvec(cur_wl_index, wl_addr_size); + if (BLWL_PROTOCOL_FLATTEN != config_protocol.bl_protocol_type() || + BLWL_PROTOCOL_FLATTEN != config_protocol.wl_protocol_type()) { + // This is using old way + // We only do this kind of resource wasting storing if + // either protocol is not flatten + std::vector wl_addr_bits_vec; + if (BLWL_PROTOCOL_DECODER == config_protocol.wl_protocol_type()) { + wl_addr_bits_vec = itobin_charvec(cur_wl_index, wl_addr_size); + } else if (BLWL_PROTOCOL_FLATTEN == config_protocol.wl_protocol_type() || + BLWL_PROTOCOL_SHIFT_REGISTER == + config_protocol.wl_protocol_type()) { + wl_addr_bits_vec = ito1hot_charvec(cur_wl_index, wl_addr_size); + } + /* Set WL address */ + fabric_bitstream.set_bit_wl_address( + fabric_bit, wl_addr_bits_vec, + BLWL_PROTOCOL_DECODER != config_protocol.wl_protocol_type()); } - /* Set BL address */ - fabric_bitstream.set_bit_bl_address( - fabric_bit, bl_addr_bits_vec, - BLWL_PROTOCOL_DECODER != config_protocol.bl_protocol_type()); - - /* Set WL address */ - fabric_bitstream.set_bit_wl_address( - fabric_bit, wl_addr_bits_vec, - BLWL_PROTOCOL_DECODER != config_protocol.wl_protocol_type()); + /* New way of storing information in compact way*/ + fabric_bitstream.set_memory_bank_info( + fabric_bit, fabric_bitstream_region, cur_bl_index, cur_wl_index, + bl_addr_size, wl_addr_size, bitstream_manager.bit_value(config_bit)); /* Set data input */ fabric_bitstream.set_bit_din(fabric_bit, diff --git a/openfpga/src/fpga_bitstream/fabric_bitstream.cpp b/openfpga/src/fpga_bitstream/fabric_bitstream.cpp index 5649388b8..9778b76e6 100644 --- a/openfpga/src/fpga_bitstream/fabric_bitstream.cpp +++ b/openfpga/src/fpga_bitstream/fabric_bitstream.cpp @@ -11,6 +11,133 @@ /* begin namespace openfpga */ namespace openfpga { +/************************************************** + * FabricBitstreamMemoryBank + *************************************************/ +void FabricBitstreamMemoryBank::add_bit(const fabric_size_t& bit_id, + const fabric_size_t& region_id, + const fabric_size_t& bl, + const fabric_size_t& wl, + const fabric_size_t& bl_addr_size, + const fabric_size_t& wl_addr_size, + bool bit) { + // Fabric Bit is added in sequential manner and each bit is unique + VTR_ASSERT((size_t)(bit_id) == fabric_bit_datas.size()); + // Region is added in sequntial manner but it is not unique from fabric bit + // perspective + VTR_ASSERT((size_t)(region_id) <= blwl_lengths.size()); + if ((size_t)(region_id) == blwl_lengths.size()) { + // Add if this is first time + blwl_lengths.push_back(fabric_blwl_length(bl_addr_size, wl_addr_size)); + } else { + // Otherwise if the region had been added, it must always be consistent + VTR_ASSERT(blwl_lengths[region_id].bl == bl_addr_size); + VTR_ASSERT(blwl_lengths[region_id].wl == wl_addr_size); + } + // The BL/WL index must be within respective length + VTR_ASSERT(bl < blwl_lengths[region_id].bl); + VTR_ASSERT(wl < blwl_lengths[region_id].wl); + // We might not need this at all to track the raw data + // But since it does not use a lot of memory, tracking for good + fabric_bit_datas.push_back(fabric_bit_data((fabric_size_t)(size_t)(region_id), + (fabric_size_t)(bl), + (fabric_size_t)(wl), bit)); + // This is real compact data + VTR_ASSERT(datas.size() == masks.size()); + while ((size_t)(region_id) >= datas.size()) { + datas.emplace_back(); + masks.emplace_back(); + } + VTR_ASSERT(datas[region_id].size() == masks[region_id].size()); + while ((size_t)(wl) >= datas[region_id].size()) { + datas[region_id].push_back(std::vector((bl_addr_size + 7) / 8, 0)); + masks[region_id].push_back(std::vector((bl_addr_size + 7) / 8, 0)); + } + // Same uniqie config bit cannot be set twice + VTR_ASSERT((masks[region_id][wl][bl >> 3] & (1 << (bl & 7))) == 0); + if (bit) { + // Mark the data value if bit (or din) is true + datas[region_id][wl][bl >> 3] |= (1 << (bl & 7)); + } + // Mark the mask to indicate we had used this bit + masks[region_id][wl][bl >> 3] |= (1 << (bl & 7)); +} + +void FabricBitstreamMemoryBank::fast_configuration( + const bool& fast, const bool& bit_value_to_skip) { + for (auto& wls : wls_to_skip) { + wls.clear(); + } + wls_to_skip.clear(); + // If we had processed it before, we do not need to process again + if (wls_to_skip.size() == 0) { + for (size_t region = 0; region < datas.size(); region++) { + wls_to_skip.emplace_back(); + if (fast) { + for (fabric_size_t wl = 0; wl < blwl_lengths[region].wl; wl++) { + VTR_ASSERT((size_t)(wl) < datas[region].size()); + bool skip_wl = true; + for (fabric_size_t bl = 0; bl < blwl_lengths[region].bl && skip_wl; + bl++) { + // Only check the bit that being used (marked in the mask), + // otherwise it is just a don't care, we can skip + if (masks[region][wl][bl >> 3] & (1 << (bl & 7))) { + if (datas[region][wl][bl >> 3] & (1 << (bl & 7))) { + // If bit_value_to_skip=true, and yet the din (recorded in + // datas) also 1, then we can skip + skip_wl = bit_value_to_skip; + } else { + skip_wl = !bit_value_to_skip; + } + } + } + if (skip_wl) { + // Record down that for this region, we will skip this WL + wls_to_skip[region].push_back(wl); + } + } + } + } + } else { + VTR_ASSERT(wls_to_skip.size() == datas.size()); + } +} + +fabric_size_t FabricBitstreamMemoryBank::get_lontest_effective_wl_addr_size() + const { + // This function check effective WL addr size + // Where effective WL is the WL that we wantt to program after considering + // fast configuration From all the region, it return the longest + fabric_size_t longest_wl = 0; + for (size_t region = 0; region < datas.size(); region++) { + VTR_ASSERT((size_t)(region) < wls_to_skip.size()); + fabric_size_t current_wl = + (fabric_size_t)(datas[region].size() - wls_to_skip[region].size()); + if (current_wl > longest_wl) { + longest_wl = current_wl; + } + } + return longest_wl; +} + +fabric_size_t FabricBitstreamMemoryBank::get_total_bl_addr_size() const { + // Simply total up all the BL addr size + fabric_size_t bl = 0; + for (size_t region = 0; region < datas.size(); region++) { + bl += blwl_lengths[region].bl; + } + return bl; +} + +fabric_size_t FabricBitstreamMemoryBank::get_total_wl_addr_size() const { + // Simply total up all the WL addr size + fabric_size_t wl = 0; + for (size_t region = 0; region < datas.size(); region++) { + wl += blwl_lengths[region].wl; + } + return wl; +} + /************************************************** * Public Constructor *************************************************/ @@ -129,6 +256,12 @@ bool FabricBitstream::use_address() const { return use_address_; } bool FabricBitstream::use_wl_address() const { return use_wl_address_; } +const FabricBitstreamMemoryBank* FabricBitstream::memory_bank_info() const { + VTR_ASSERT(true == use_address_); + VTR_ASSERT(true == use_wl_address_); + return &memory_bank_data_; +} + /****************************************************************************** * Public Mutators ******************************************************************************/ @@ -243,6 +376,27 @@ void FabricBitstream::set_bl_address_length(const size_t& length) { set_address_length(length); } +void FabricBitstream::set_memory_bank_info(const FabricBitId& bit_id, + const FabricBitRegionId& region_id, + const size_t& bl, const size_t& wl, + const size_t& bl_addr_size, + const size_t& wl_addr_size, + bool bit) { + // Bit must be valid one + // We only support this in protocol that use BL and WL address + VTR_ASSERT(true == valid_bit_id(bit_id)); + VTR_ASSERT(true == use_address_); + VTR_ASSERT(true == use_wl_address_); + VTR_ASSERT(bl_addr_size); + VTR_ASSERT(wl_addr_size); + // All the basic checking had passed, we can add the data into + // memory_bank_data_ + memory_bank_data_.add_bit( + (fabric_size_t)(size_t)(bit_id), (fabric_size_t)(size_t)(region_id), + (fabric_size_t)(bl), (fabric_size_t)(wl), (fabric_size_t)(bl_addr_size), + (fabric_size_t)(wl_addr_size), bit); +} + void FabricBitstream::set_use_wl_address(const bool& enable) { /* Add a lock, only can be modified when num bits are zero*/ if (0 == num_bits_) { diff --git a/openfpga/src/fpga_bitstream/fabric_bitstream.h b/openfpga/src/fpga_bitstream/fabric_bitstream.h index 68a972636..dfe8840a2 100644 --- a/openfpga/src/fpga_bitstream/fabric_bitstream.h +++ b/openfpga/src/fpga_bitstream/fabric_bitstream.h @@ -41,6 +41,85 @@ /* begin namespace openfpga */ namespace openfpga { +// Use uint32_t (maximum of 4Gigs) is good enough, we have BL and WL, +// combination of both hold up to 18 quintillion of configuration bits (+ dont +// care) +typedef uint32_t fabric_size_t; +struct fabric_bit_data { + fabric_bit_data(fabric_size_t r, fabric_size_t b, fabric_size_t w, bool bi) + : region(r), bl(b), wl(w), bit(bi) {} + const fabric_size_t region = 0; + const fabric_size_t bl = 0; + const fabric_size_t wl = 0; + const bool bit = false; +}; +struct fabric_blwl_length { + fabric_blwl_length(fabric_size_t b, fabric_size_t w) : bl(b), wl(w) {} + const fabric_size_t bl = 0; + const fabric_size_t wl = 0; +}; + +/* + This class arrange Memory Bank databae in a compact way +*/ +struct FabricBitstreamMemoryBank { + void add_bit(const fabric_size_t& bit_id, const fabric_size_t& region_id, + const fabric_size_t& bl, const fabric_size_t& wl, + const fabric_size_t& bl_addr_size, + const fabric_size_t& wl_addr_size, bool bit); + void fast_configuration(const bool& fast, const bool& bit_value_to_skip); + fabric_size_t get_lontest_effective_wl_addr_size() const; + fabric_size_t get_total_bl_addr_size() const; + fabric_size_t get_total_wl_addr_size() const; + + /************************* + * All the database (except fabric_bit_datas) is sorted by region + * 1. The very first layer of vector is region + * For the datas and masks + * 1. They are sorted by WL, hence second layer is WL + * 2. Layer is BL data stored in vector of uint8_t + * 3. Each uint8_t will store up-to 8 configuration bit info + **************************/ + // Store the BL WL of each region + std::vector blwl_lengths; + // Store config ID raw data. Not used by bitstream generation + // Used by XML generation + /* + fabric_bit_datas[Bit #0] = (region, bl, wl) + fabric_bit_datas[Bit #1] = (region, bl, wl) + fabric_bit_datas[Bit #2] = (region, bl, wl) + */ + std::vector fabric_bit_datas; + // 100K LE FPGA only need few mega bytes + /* + datas represent the Din value of a given WL and BL (1bit) + datas[region #0][wl #0] = std::vector to represent BLs + where uint8_t #0 = MSB{ BL#7, BL#6, .... BL #1, BL #0 } LSB + where uint8_t #1 = MSB{ BL#15, BL#14, .... BL #9, BL #8 } LSB + datas[region #0][wl #1] = std::vector to represent BLs + datas[region #0][wl #2] = std::vector to represent BLs + ...... + datas[region #0][wl #n-1] = std::vector to represent BLs + ...... + datas[region #1][wl #0] = std::vector to represent BLs + datas[region #1][wl #1] = std::vector to represent BLs + ...... + */ + std::vector>> datas; + /* + masks has same structure as datas + but masks presents data that being used + for exampe: + if mask's uint8_t #0 value = 0x41 it means for this WL + a. BL #0 is being used, and its Din is recoreded in datas + b. BL #6 is being used, and its Din is recoreded in datas + c. Other BLs #1, 2, 3, 4, 5, 7 are don't care bit (not being used) + */ + std::vector>> masks; + // This track which WL to skip because of fast configuration + std::vector> wls_to_skip; +}; + class FabricBitstream { public: /* Type implementations */ /* @@ -144,6 +223,8 @@ class FabricBitstream { bool use_address() const; bool use_wl_address() const; + const FabricBitstreamMemoryBank* memory_bank_info() const; + public: /* Public Mutators */ /* Reserve config bits */ void reserve_bits(const size_t& num_bits); @@ -193,6 +274,18 @@ class FabricBitstream { void set_address_length(const size_t& length); void set_bl_address_length(const size_t& length); + /* + This is setting memory bank protocol in a more efficient way + Instead of building lengthy BL/WL bits of database (BL or Wl could be in + thousand bits of size), a small device like 100K LE (compared to other + vendors offer) might end up using tens of gig bytes. + */ + void set_memory_bank_info(const FabricBitId& bit_id, + const FabricBitRegionId& region_id, + const size_t& bl, const size_t& wl, + const size_t& bl_addr_size, + const size_t& wl_addr_size, bool bit); + /* Enable the use of WL-address related data * Same priniciple as the set_use_address() */ @@ -250,6 +343,9 @@ class FabricBitstream { /* Data input (Din) bits: this is designed for memory decoders */ vtr::vector bit_dins_; + + /* New way of dealing with memory bank protocol - fast and compact */ + FabricBitstreamMemoryBank memory_bank_data_; }; } /* end namespace openfpga */ diff --git a/openfpga/src/fpga_bitstream/write_text_fabric_bitstream.cpp b/openfpga/src/fpga_bitstream/write_text_fabric_bitstream.cpp index 9cb2a3f9c..2ade6174b 100644 --- a/openfpga/src/fpga_bitstream/write_text_fabric_bitstream.cpp +++ b/openfpga/src/fpga_bitstream/write_text_fabric_bitstream.cpp @@ -245,6 +245,142 @@ static int write_memory_bank_flatten_fabric_bitstream_to_text_file( return status; } +/******************************************************************** + * Write the fabric bitstream fitting a memory bank protocol + * to a plain text file in efficient method + * + * Old function is write_memory_bank_flatten_fabric_bitstream_to_text_file() + * + * As compared to original function, based on 100K LE FPGA: + * 1. Original function used 600 seconds and need 80G Bytes of memory + * 2. This new function only needs 1 second and 4M Bytes + * + * Old function only print WL in decremental order. It is not by intentional + * It is because of the map-key ordering + * In QL Memory Bank with Flatten BL/WL, data is stored by WL address, + * where we use WL string as map key + * WL #0 --- "1000000000000 .... 0000" + * WL #1 --- "0100000000000 .... 0000" + * WL #n-1 --- "0000000000000 .... 0001 + * From string comparison wise, WL #n-1 will be first, and WL #0 will be last + * The sequence of WL does not really matter, but preferrable in some ordering + * manner. Using map key as ordering cannot guarantee the determinstic + * + * This new way of writting fabric guarantee the WL order in 100% deterministc + * way: either incremental (default) or decremental + * + * Return: + * - 0 if succeed + * - 1 if critical errors occured + *******************************************************************/ +static int fast_write_memory_bank_flatten_fabric_bitstream_to_text_file( + std::fstream& fp, const bool& fast_configuration, + const bool& bit_value_to_skip, const FabricBitstream& fabric_bitstream, + const bool& keep_dont_care_bits, const bool& wl_incremental_order) { + int status = 0; + + std::string dont_care_bit = "0"; + if (keep_dont_care_bits) { + dont_care_bit = "x"; + } + const FabricBitstreamMemoryBank* memory_bank = + fabric_bitstream.memory_bank_info(); + + // Must call this to prepare wls_to_skip + (const_cast(memory_bank)) + ->fast_configuration(fast_configuration, bit_value_to_skip); + + fabric_size_t lontest_effective_wl_addr_size = + memory_bank->get_lontest_effective_wl_addr_size(); + /* Output information about how to intepret the bitstream */ + fp << "// Bitstream length: " << lontest_effective_wl_addr_size << std::endl; + fp << "// Bitstream width (LSB -> MSB): "; + fp << "get_total_bl_addr_size() << " bits>"; + fp << "get_total_wl_addr_size() << " bits>"; + fp << std::endl; + + std::vector wl_indexes; + for (size_t region = 0; region < memory_bank->datas.size(); region++) { + if (wl_incremental_order) { + wl_indexes.push_back(0); + } else { + wl_indexes.push_back( + (fabric_size_t)(memory_bank->datas[region].size() - 1)); + } + } + for (size_t wl_index = 0; wl_index < lontest_effective_wl_addr_size; + wl_index++) { + /* Write BL address code */ + /* cascade region 0, 1, 2, 3 ... */ + for (size_t region = 0; region < memory_bank->datas.size(); region++) { + const fabric_blwl_length& lengths = memory_bank->blwl_lengths[region]; + fabric_size_t current_wl = wl_indexes[region]; + while (std::find(memory_bank->wls_to_skip[region].begin(), + memory_bank->wls_to_skip[region].end(), + current_wl) != memory_bank->wls_to_skip[region].end()) { + // We would like to skip this + if (wl_incremental_order) { + wl_indexes[region]++; + } else { + wl_indexes[region]--; + } + current_wl = wl_indexes[region]; + } + if (current_wl < memory_bank->datas[region].size()) { + const std::vector& data = + memory_bank->datas[region][current_wl]; + const std::vector& mask = + memory_bank->masks[region][current_wl]; + for (size_t bl = 0; bl < lengths.bl; bl++) { + if (mask[bl >> 3] & (1 << (bl & 7))) { + if (data[bl >> 3] & (1 << (bl & 7))) { + fp << "1"; + } else { + fp << "0"; + } + } else { + fp << dont_care_bit.c_str(); + } + } + } else { + /* However not all region has equal WL, for those that is shortest, + * print 'x' for all BL*/ + for (size_t bl = 0; bl < lengths.bl; bl++) { + fp << dont_care_bit.c_str(); + } + } + } + /* Write WL address code */ + /* cascade region 0, 1, 2, 3 ... */ + for (size_t region = 0; region < memory_bank->datas.size(); region++) { + const fabric_blwl_length& lengths = memory_bank->blwl_lengths[region]; + fabric_size_t current_wl = wl_indexes[region]; + if (current_wl < memory_bank->datas[region].size()) { + for (size_t wl_temp = 0; wl_temp < lengths.wl; wl_temp++) { + if (wl_temp == current_wl) { + fp << "1"; + } else { + fp << "0"; + } + } + if (wl_incremental_order) { + wl_indexes[region]++; + } else { + wl_indexes[region]--; + } + } else { + /* However not all region has equal WL, for those that is shortest, + * print 'x' for all WL */ + for (size_t wl_temp = 0; wl_temp < lengths.wl; wl_temp++) { + fp << dont_care_bit.c_str(); + } + } + } + fp << std::endl; + } + return status; +} + /******************************************************************** * Write the fabric bitstream fitting a memory bank protocol * to a plain text file @@ -393,7 +529,8 @@ int write_fabric_bitstream_to_text_file( const ConfigProtocol& config_protocol, const FabricGlobalPortInfo& global_ports, const std::string& fname, const bool& fast_configuration, const bool& keep_dont_care_bits, - const bool& include_time_stamp, const bool& verbose) { + const bool& wl_incremental_order, const bool& include_time_stamp, + const bool& verbose) { /* Ensure that we have a valid file name */ if (true == fname.empty()) { VTR_LOG_ERROR( @@ -454,6 +591,14 @@ int write_fabric_bitstream_to_text_file( if (BLWL_PROTOCOL_DECODER == config_protocol.bl_protocol_type()) { status = write_memory_bank_fabric_bitstream_to_text_file( fp, apply_fast_configuration, bit_value_to_skip, fabric_bitstream); + } else if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type() && + BLWL_PROTOCOL_FLATTEN == config_protocol.wl_protocol_type()) { + // If both BL and WL protocols are flatten, use new way to write the + // bitstream + status = fast_write_memory_bank_flatten_fabric_bitstream_to_text_file( + fp, apply_fast_configuration, bit_value_to_skip, fabric_bitstream, + keep_dont_care_bits, wl_incremental_order); + } else if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type()) { status = write_memory_bank_flatten_fabric_bitstream_to_text_file( fp, apply_fast_configuration, bit_value_to_skip, fabric_bitstream, diff --git a/openfpga/src/fpga_bitstream/write_text_fabric_bitstream.h b/openfpga/src/fpga_bitstream/write_text_fabric_bitstream.h index 0d8682739..59f4774de 100644 --- a/openfpga/src/fpga_bitstream/write_text_fabric_bitstream.h +++ b/openfpga/src/fpga_bitstream/write_text_fabric_bitstream.h @@ -27,7 +27,8 @@ int write_fabric_bitstream_to_text_file( const ConfigProtocol& config_protocol, const FabricGlobalPortInfo& global_ports, const std::string& fname, const bool& fast_configuration, const bool& keep_dont_care_bits, - const bool& include_time_stamp, const bool& verbose); + const bool& wl_incremental_order, const bool& include_time_stamp, + const bool& verbose); } /* end namespace openfpga */ diff --git a/openfpga/src/fpga_bitstream/write_xml_fabric_bitstream.cpp b/openfpga/src/fpga_bitstream/write_xml_fabric_bitstream.cpp index ec8a036fd..786a3768e 100644 --- a/openfpga/src/fpga_bitstream/write_xml_fabric_bitstream.cpp +++ b/openfpga/src/fpga_bitstream/write_xml_fabric_bitstream.cpp @@ -71,7 +71,8 @@ static void write_fabric_bitstream_xml_file_head( static int write_fabric_config_bit_to_xml_file( std::fstream& fp, const BitstreamManager& bitstream_manager, const FabricBitstream& fabric_bitstream, const FabricBitId& fabric_bit, - const e_config_protocol_type& config_type, const int& xml_hierarchy_depth) { + const e_config_protocol_type& config_type, const int& xml_hierarchy_depth, + std::string& bl_addr, std::string& wl_addr) { if (false == valid_file_stream(fp)) { return 1; } @@ -106,7 +107,6 @@ static int write_fabric_config_bit_to_xml_file( case CONFIG_MEM_STANDALONE: case CONFIG_MEM_SCAN_CHAIN: break; - case CONFIG_MEM_QL_MEMORY_BANK: case CONFIG_MEM_MEMORY_BANK: { /* Bit line address */ write_tab_to_file(fp, xml_hierarchy_depth + 1); @@ -124,6 +124,41 @@ static int write_fabric_config_bit_to_xml_file( fp << "\"/>\n"; break; } + case CONFIG_MEM_QL_MEMORY_BANK: { + // New way of printing XML + // This is fast (less than 100s) as compared to original 1300s seen in + // 100K LE FPFA + const FabricBitstreamMemoryBank* memory_bank = + fabric_bitstream.memory_bank_info(); + /* Bit line address */ + write_tab_to_file(fp, xml_hierarchy_depth + 1); + const fabric_bit_data& bit = + memory_bank->fabric_bit_datas[(size_t)(fabric_bit)]; + const fabric_blwl_length& lengths = memory_bank->blwl_lengths[bit.region]; + if (bl_addr.size() == 0) { + VTR_ASSERT(wl_addr.size() == 0); + bl_addr.resize(lengths.bl); + wl_addr.resize(lengths.wl); + memset(&bl_addr[0], 'x', lengths.bl); + memset(&wl_addr[0], '0', lengths.wl); + } else { + VTR_ASSERT((fabric_size_t)(bl_addr.size()) == lengths.bl); + VTR_ASSERT((fabric_size_t)(wl_addr.size()) == lengths.wl); + } + fp << "\n"; + /* Word line address */ + write_tab_to_file(fp, xml_hierarchy_depth + 1); + fp << "\n"; + break; + } case CONFIG_MEM_FRAME_BASED: { write_tab_to_file(fp, xml_hierarchy_depth + 1); fp << "\n"; + size_t bit_index = 0; + size_t total_bits = fabric_bitstream.region_bits(fabric_region).size(); + size_t percentage = 0; for (const FabricBitId& fabric_bit : fabric_bitstream.region_bits(fabric_region)) { status = write_fabric_config_bit_to_xml_file( fp, bitstream_manager, fabric_bitstream, fabric_bit, config_type, - xml_hierarchy_depth + 1); + xml_hierarchy_depth + 1, bl_addr, wl_addr); if (1 == status) { return status; } + // Misc to print percentage of the process + bit_index++; + size_t temp = (bit_index * 100) / total_bits; + if (temp != percentage) { + VTR_LOG(" Progress: %lu%\r", percentage); + percentage = temp; + } } write_tab_to_file(fp, xml_hierarchy_depth); diff --git a/openfpga/src/fpga_verilog/verilog_top_testbench.cpp b/openfpga/src/fpga_verilog/verilog_top_testbench.cpp index 4146fa870..1ac006cca 100644 --- a/openfpga/src/fpga_verilog/verilog_top_testbench.cpp +++ b/openfpga/src/fpga_verilog/verilog_top_testbench.cpp @@ -1061,6 +1061,19 @@ static size_t calculate_num_config_clock_cycles( (float)full_num_config_clock_cycles - 1.)); } + } else if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type() && + BLWL_PROTOCOL_FLATTEN == config_protocol.wl_protocol_type()) { + // Only support new fast way if both BL/WL protocols are flatten + // Based on 100K LE FPGA, we are wasting a lot of time to build + // MemoryBankFlattenFabricBitstream + // just to get the effective WL addr size. So wasteful of the resource + const FabricBitstreamMemoryBank* memory_bank = + fabric_bitstream.memory_bank_info(); + // Must call this to prepare wls_to_skip + (const_cast(memory_bank)) + ->fast_configuration(fast_configuration, bit_value_to_skip); + num_config_clock_cycles = + 1 + memory_bank->get_lontest_effective_wl_addr_size(); } else if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type()) { num_config_clock_cycles = 1 + build_memory_bank_flatten_fabric_bitstream( diff --git a/openfpga/src/fpga_verilog/verilog_top_testbench_memory_bank.cpp b/openfpga/src/fpga_verilog/verilog_top_testbench_memory_bank.cpp index 8b70ef3fb..7b964be62 100644 --- a/openfpga/src/fpga_verilog/verilog_top_testbench_memory_bank.cpp +++ b/openfpga/src/fpga_verilog/verilog_top_testbench_memory_bank.cpp @@ -565,9 +565,15 @@ static void print_verilog_full_testbench_ql_memory_bank_flatten_bitstream( valid_file_stream(fp); /* Reorganize the fabric bitstream by the same address across regions */ - MemoryBankFlattenFabricBitstream fabric_bits_by_addr = - build_memory_bank_flatten_fabric_bitstream( - fabric_bitstream, fast_configuration, bit_value_to_skip); + // New way to get the effective WL addr size. + // Based on 100K LE FPGA, we are wasting a lot of time to build + // MemoryBankFlattenFabricBitstream just to get size(). So wasteful of the + // resource + const FabricBitstreamMemoryBank* memory_bank = + fabric_bitstream.memory_bank_info(); + // Must call this to prepare wls_to_skip + (const_cast(memory_bank)) + ->fast_configuration(fast_configuration, bit_value_to_skip); /* Feed address and data input pair one by one * Note: the first cycle is reserved for programming reset @@ -604,7 +610,7 @@ static void print_verilog_full_testbench_ql_memory_bank_flatten_bitstream( /* Define a constant for the bitstream length */ print_verilog_define_flag(fp, std::string(TOP_TB_BITSTREAM_LENGTH_VARIABLE), - fabric_bits_by_addr.size()); + memory_bank->get_lontest_effective_wl_addr_size()); print_verilog_define_flag(fp, std::string(TOP_TB_BITSTREAM_WIDTH_VARIABLE), bl_port_width + wl_port_width);