Merge pull request #1259 from chungshien/openfpga-issue-1256

Address issue 1256
This commit is contained in:
tangxifan 2023-08-07 18:18:14 -07:00 committed by GitHub
commit 1064520103
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 587 additions and 49 deletions

View File

@ -200,6 +200,11 @@ ShellCommandId add_write_fabric_bitstream_command_template(
"Keep don't care bits in bitstream file; If not enabled, don't care bits "
"are converted to logic '0' or '1'");
/* Add an option '--wl_incremental_order' */
shell_cmd.add_option(
"wl_decremental_order", false,
"Generate bitstream in WL decremental addressing order if supported");
/* Add an option '--no_time_stamp' */
shell_cmd.add_option("no_time_stamp", false,
"Do not print time stamp in output files");

View File

@ -91,6 +91,7 @@ int write_fabric_bitstream_template(const T& openfpga_ctx, const Command& cmd,
CommandOptionId opt_file_format = cmd.option("format");
CommandOptionId opt_fast_config = cmd.option("fast_configuration");
CommandOptionId opt_keep_dont_care_bits = cmd.option("keep_dont_care_bits");
CommandOptionId opt_wl_decremental_order = cmd.option("wl_decremental_order");
CommandOptionId opt_no_time_stamp = cmd.option("no_time_stamp");
/* Write fabric bitstream if required */
@ -127,6 +128,7 @@ int write_fabric_bitstream_template(const T& openfpga_ctx, const Command& cmd,
cmd_context.option_value(cmd, opt_file),
cmd_context.option_enable(cmd, opt_fast_config),
cmd_context.option_enable(cmd, opt_keep_dont_care_bits),
!cmd_context.option_enable(cmd, opt_wl_decremental_order),
!cmd_context.option_enable(cmd, opt_no_time_stamp),
cmd_context.option_enable(cmd, opt_verbose));
}

View File

@ -176,6 +176,11 @@ static void rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(
bitstream_manager.block_bits(parent_block)) {
FabricBitId fabric_bit = fabric_bitstream.add_bit(config_bit);
/*
If both BL and WL protocols are Flatten, we will have new way of
storing information in fabric_bitstream. This will save high
memory usage, as well as fast processing
*/
/* The BL address to be decoded depends on the protocol
* - flatten BLs: use 1-hot decoding
* - BL decoders: fully encoded
@ -183,38 +188,57 @@ static void rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(
*/
size_t cur_bl_index = bl_start_index_per_tile.at(tile_coord.x()) +
cur_mem_index[tile_coord] % num_bls_cur_tile;
std::vector<char> bl_addr_bits_vec;
if (BLWL_PROTOCOL_DECODER == config_protocol.bl_protocol_type()) {
bl_addr_bits_vec = itobin_charvec(cur_bl_index, bl_addr_size);
} else if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type() ||
BLWL_PROTOCOL_SHIFT_REGISTER ==
config_protocol.bl_protocol_type()) {
bl_addr_bits_vec =
ito1hot_charvec(cur_bl_index, bl_addr_size, DONT_CARE_CHAR);
if (BLWL_PROTOCOL_FLATTEN != config_protocol.bl_protocol_type() ||
BLWL_PROTOCOL_FLATTEN != config_protocol.wl_protocol_type()) {
// This is using old way
// We only do this kind of resource wasting storing if
// either protocol is not flatten
std::vector<char> bl_addr_bits_vec;
if (BLWL_PROTOCOL_DECODER == config_protocol.bl_protocol_type()) {
bl_addr_bits_vec = itobin_charvec(cur_bl_index, bl_addr_size);
} else if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type() ||
BLWL_PROTOCOL_SHIFT_REGISTER ==
config_protocol.bl_protocol_type()) {
bl_addr_bits_vec =
ito1hot_charvec(cur_bl_index, bl_addr_size, DONT_CARE_CHAR);
}
/* Set BL address */
fabric_bitstream.set_bit_bl_address(
fabric_bit, bl_addr_bits_vec,
BLWL_PROTOCOL_DECODER != config_protocol.bl_protocol_type());
}
/* Find WL address */
size_t cur_wl_index =
wl_start_index_per_tile.at(tile_coord.y()) +
std::floor(cur_mem_index[tile_coord] / num_bls_cur_tile);
std::vector<char> wl_addr_bits_vec;
if (BLWL_PROTOCOL_DECODER == config_protocol.wl_protocol_type()) {
wl_addr_bits_vec = itobin_charvec(cur_wl_index, wl_addr_size);
} else if (BLWL_PROTOCOL_FLATTEN == config_protocol.wl_protocol_type() ||
BLWL_PROTOCOL_SHIFT_REGISTER ==
config_protocol.wl_protocol_type()) {
wl_addr_bits_vec = ito1hot_charvec(cur_wl_index, wl_addr_size);
if (BLWL_PROTOCOL_FLATTEN != config_protocol.bl_protocol_type() ||
BLWL_PROTOCOL_FLATTEN != config_protocol.wl_protocol_type()) {
// This is using old way
// We only do this kind of resource wasting storing if
// either protocol is not flatten
std::vector<char> wl_addr_bits_vec;
if (BLWL_PROTOCOL_DECODER == config_protocol.wl_protocol_type()) {
wl_addr_bits_vec = itobin_charvec(cur_wl_index, wl_addr_size);
} else if (BLWL_PROTOCOL_FLATTEN == config_protocol.wl_protocol_type() ||
BLWL_PROTOCOL_SHIFT_REGISTER ==
config_protocol.wl_protocol_type()) {
wl_addr_bits_vec = ito1hot_charvec(cur_wl_index, wl_addr_size);
}
/* Set WL address */
fabric_bitstream.set_bit_wl_address(
fabric_bit, wl_addr_bits_vec,
BLWL_PROTOCOL_DECODER != config_protocol.wl_protocol_type());
}
if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type() &&
BLWL_PROTOCOL_FLATTEN == config_protocol.wl_protocol_type()) {
// New way of storing information in compact way
// Only for Flatten protocol (can easily support shift register as well)
// Need to understand decoder to better assessment
fabric_bitstream.set_memory_bank_info(
fabric_bit, fabric_bitstream_region, cur_bl_index, cur_wl_index,
bl_addr_size, wl_addr_size, bitstream_manager.bit_value(config_bit));
}
/* Set BL address */
fabric_bitstream.set_bit_bl_address(
fabric_bit, bl_addr_bits_vec,
BLWL_PROTOCOL_DECODER != config_protocol.bl_protocol_type());
/* Set WL address */
fabric_bitstream.set_bit_wl_address(
fabric_bit, wl_addr_bits_vec,
BLWL_PROTOCOL_DECODER != config_protocol.wl_protocol_type());
/* Set data input */
fabric_bitstream.set_bit_din(fabric_bit,

View File

@ -11,6 +11,128 @@
/* begin namespace openfpga */
namespace openfpga {
/**************************************************
* FabricBitstreamMemoryBank
*************************************************/
void FabricBitstreamMemoryBank::add_bit(const fabric_size_t& bit_id,
const fabric_size_t& region_id,
const fabric_size_t& bl,
const fabric_size_t& wl,
const fabric_size_t& bl_addr_size,
const fabric_size_t& wl_addr_size,
bool bit) {
// Fabric Bit is added in sequential manner and each bit is unique
VTR_ASSERT((size_t)(bit_id) == fabric_bit_datas.size());
// Region is added in sequntial manner but it is not unique from fabric bit
// perspective
VTR_ASSERT((size_t)(region_id) <= blwl_lengths.size());
if ((size_t)(region_id) == blwl_lengths.size()) {
// Add if this is first time
blwl_lengths.push_back(fabric_blwl_length(bl_addr_size, wl_addr_size));
} else {
// Otherwise if the region had been added, it must always be consistent
VTR_ASSERT(blwl_lengths[region_id].bl == bl_addr_size);
VTR_ASSERT(blwl_lengths[region_id].wl == wl_addr_size);
}
// The BL/WL index must be within respective length
VTR_ASSERT(bl < blwl_lengths[region_id].bl);
VTR_ASSERT(wl < blwl_lengths[region_id].wl);
// We might not need this at all to track the raw data
// But since it does not use a lot of memory, tracking for good
fabric_bit_datas.push_back(fabric_bit_data((fabric_size_t)(size_t)(region_id),
(fabric_size_t)(bl),
(fabric_size_t)(wl), bit));
// This is real compact data
VTR_ASSERT(datas.size() == masks.size());
while ((size_t)(region_id) >= datas.size()) {
datas.emplace_back();
masks.emplace_back();
}
VTR_ASSERT(datas[region_id].size() == masks[region_id].size());
while ((size_t)(wl) >= datas[region_id].size()) {
datas[region_id].push_back(std::vector<uint8_t>((bl_addr_size + 7) / 8, 0));
masks[region_id].push_back(std::vector<uint8_t>((bl_addr_size + 7) / 8, 0));
}
// Same uniqie config bit cannot be set twice
VTR_ASSERT((masks[region_id][wl][bl >> 3] & (1 << (bl & 7))) == 0);
if (bit) {
// Mark the data value if bit (or din) is true
datas[region_id][wl][bl >> 3] |= (1 << (bl & 7));
}
// Mark the mask to indicate we had used this bit
masks[region_id][wl][bl >> 3] |= (1 << (bl & 7));
}
void FabricBitstreamMemoryBank::fast_configuration(
const bool& fast, const bool& bit_value_to_skip) {
for (auto& wls : wls_to_skip) {
wls.clear();
}
wls_to_skip.clear();
for (size_t region = 0; region < datas.size(); region++) {
wls_to_skip.emplace_back();
if (fast) {
for (fabric_size_t wl = 0; wl < blwl_lengths[region].wl; wl++) {
VTR_ASSERT((size_t)(wl) < datas[region].size());
bool skip_wl = true;
for (fabric_size_t bl = 0; bl < blwl_lengths[region].bl && skip_wl;
bl++) {
// Only check the bit that being used (marked in the mask),
// otherwise it is just a don't care, we can skip
if (masks[region][wl][bl >> 3] & (1 << (bl & 7))) {
if (datas[region][wl][bl >> 3] & (1 << (bl & 7))) {
// If bit_value_to_skip=true, and yet the din (recorded in
// datas) also 1, then we can skip
skip_wl = bit_value_to_skip;
} else {
skip_wl = !bit_value_to_skip;
}
}
}
if (skip_wl) {
// Record down that for this region, we will skip this WL
wls_to_skip[region].push_back(wl);
}
}
}
}
}
fabric_size_t FabricBitstreamMemoryBank::get_longest_effective_wl_count()
const {
// This function check effective WL count
// Where effective WL is the WL that we want to program after considering
// fast configuration from all the region, it return the longest
fabric_size_t longest_wl = 0;
for (size_t region = 0; region < datas.size(); region++) {
VTR_ASSERT((size_t)(region) < wls_to_skip.size());
fabric_size_t current_wl =
(fabric_size_t)(datas[region].size() - wls_to_skip[region].size());
if (current_wl > longest_wl) {
longest_wl = current_wl;
}
}
return longest_wl;
}
fabric_size_t FabricBitstreamMemoryBank::get_total_bl_addr_size() const {
// Simply total up all the BL addr size
fabric_size_t bl = 0;
for (size_t region = 0; region < datas.size(); region++) {
bl += blwl_lengths[region].bl;
}
return bl;
}
fabric_size_t FabricBitstreamMemoryBank::get_total_wl_addr_size() const {
// Simply total up all the WL addr size
fabric_size_t wl = 0;
for (size_t region = 0; region < datas.size(); region++) {
wl += blwl_lengths[region].wl;
}
return wl;
}
/**************************************************
* Public Constructor
*************************************************/
@ -129,6 +251,15 @@ bool FabricBitstream::use_address() const { return use_address_; }
bool FabricBitstream::use_wl_address() const { return use_wl_address_; }
const FabricBitstreamMemoryBank& FabricBitstream::memory_bank_info(
const bool& fast, const bool& bit_value_to_skip) const {
VTR_ASSERT(true == use_address_);
VTR_ASSERT(true == use_wl_address_);
(const_cast<FabricBitstreamMemoryBank*>(&memory_bank_data_))
->fast_configuration(fast, bit_value_to_skip);
return memory_bank_data_;
}
/******************************************************************************
* Public Mutators
******************************************************************************/
@ -243,6 +374,27 @@ void FabricBitstream::set_bl_address_length(const size_t& length) {
set_address_length(length);
}
void FabricBitstream::set_memory_bank_info(const FabricBitId& bit_id,
const FabricBitRegionId& region_id,
const size_t& bl, const size_t& wl,
const size_t& bl_addr_size,
const size_t& wl_addr_size,
bool bit) {
// Bit must be valid one
// We only support this in protocol that use BL and WL address
VTR_ASSERT(true == valid_bit_id(bit_id));
VTR_ASSERT(true == use_address_);
VTR_ASSERT(true == use_wl_address_);
VTR_ASSERT(bl_addr_size);
VTR_ASSERT(wl_addr_size);
// All the basic checking had passed, we can add the data into
// memory_bank_data_
memory_bank_data_.add_bit(
(fabric_size_t)(size_t)(bit_id), (fabric_size_t)(size_t)(region_id),
(fabric_size_t)(bl), (fabric_size_t)(wl), (fabric_size_t)(bl_addr_size),
(fabric_size_t)(wl_addr_size), bit);
}
void FabricBitstream::set_use_wl_address(const bool& enable) {
/* Add a lock, only can be modified when num bits are zero*/
if (0 == num_bits_) {

View File

@ -41,6 +41,85 @@
/* begin namespace openfpga */
namespace openfpga {
// Use uint32_t (maximum of 4Gigs) is good enough, we have BL and WL,
// combination of both hold up to 18 quintillion of configuration bits (+ dont
// care)
typedef uint32_t fabric_size_t;
struct fabric_bit_data {
fabric_bit_data(fabric_size_t r, fabric_size_t b, fabric_size_t w, bool bi)
: region(r), bl(b), wl(w), bit(bi) {}
const fabric_size_t region = 0;
const fabric_size_t bl = 0;
const fabric_size_t wl = 0;
const bool bit = false;
};
struct fabric_blwl_length {
fabric_blwl_length(fabric_size_t b, fabric_size_t w) : bl(b), wl(w) {}
const fabric_size_t bl = 0;
const fabric_size_t wl = 0;
};
/*
This class arrange Memory Bank databae in a compact way
*/
struct FabricBitstreamMemoryBank {
void add_bit(const fabric_size_t& bit_id, const fabric_size_t& region_id,
const fabric_size_t& bl, const fabric_size_t& wl,
const fabric_size_t& bl_addr_size,
const fabric_size_t& wl_addr_size, bool bit);
void fast_configuration(const bool& fast, const bool& bit_value_to_skip);
fabric_size_t get_longest_effective_wl_count() const;
fabric_size_t get_total_bl_addr_size() const;
fabric_size_t get_total_wl_addr_size() const;
/*************************
* All the database (except fabric_bit_datas) is sorted by region
* 1. The very first layer of vector is region
* For the datas and masks
* 1. They are sorted by WL, hence second layer is WL
* 2. Last layer is BL data stored in vector of uint8_t
* 3. Each uint8_t will store up-to 8 configuration bit info
**************************/
// Store the BL WL of each region
std::vector<fabric_blwl_length> blwl_lengths;
// Store config ID raw data. Not used by bitstream generation
// Used by XML generation
/*
fabric_bit_datas[Bit #0] = (region, bl, wl)
fabric_bit_datas[Bit #1] = (region, bl, wl)
fabric_bit_datas[Bit #2] = (region, bl, wl)
*/
std::vector<fabric_bit_data> fabric_bit_datas;
// 100K LE FPGA only need few mega bytes
/*
datas represent the Din value of a given WL and BL (1bit)
datas[region #0][wl #0] = std::vector<uint8_t> to represent BLs
where uint8_t #0 = MSB{ BL#7, BL#6, .... BL #1, BL #0 } LSB
where uint8_t #1 = MSB{ BL#15, BL#14, .... BL #9, BL #8 } LSB
datas[region #0][wl #1] = std::vector<uint8_t> to represent BLs
datas[region #0][wl #2] = std::vector<uint8_t> to represent BLs
......
datas[region #0][wl #n-1] = std::vector<uint8_t> to represent BLs
......
datas[region #1][wl #0] = std::vector<uint8_t> to represent BLs
datas[region #1][wl #1] = std::vector<uint8_t> to represent BLs
......
*/
std::vector<std::vector<std::vector<uint8_t>>> datas;
/*
masks has same structure as datas
but masks presents data that being used
for exampe:
if mask's uint8_t #0 value = 0x41 it means for this WL
a. BL #0 is being used, and its Din is recoreded in datas
b. BL #6 is being used, and its Din is recoreded in datas
c. Other BLs #1, 2, 3, 4, 5, 7 are don't care bit (not being used)
*/
std::vector<std::vector<std::vector<uint8_t>>> masks;
// This track which WL to skip because of fast configuration
std::vector<std::vector<fabric_size_t>> wls_to_skip;
};
class FabricBitstream {
public: /* Type implementations */
/*
@ -144,6 +223,9 @@ class FabricBitstream {
bool use_address() const;
bool use_wl_address() const;
const FabricBitstreamMemoryBank& memory_bank_info(
const bool& fast = false, const bool& bit_value_to_skip = false) const;
public: /* Public Mutators */
/* Reserve config bits */
void reserve_bits(const size_t& num_bits);
@ -193,6 +275,18 @@ class FabricBitstream {
void set_address_length(const size_t& length);
void set_bl_address_length(const size_t& length);
/*
This is setting memory bank protocol in a more efficient way
Instead of building lengthy BL/WL bits of database (BL or Wl could be in
thousand bits of size), a small device like 100K LE (compared to other
vendors offer) might end up using tens of gig bytes.
*/
void set_memory_bank_info(const FabricBitId& bit_id,
const FabricBitRegionId& region_id,
const size_t& bl, const size_t& wl,
const size_t& bl_addr_size,
const size_t& wl_addr_size, bool bit);
/* Enable the use of WL-address related data
* Same priniciple as the set_use_address()
*/
@ -250,6 +344,9 @@ class FabricBitstream {
/* Data input (Din) bits: this is designed for memory decoders */
vtr::vector<FabricBitId, char> bit_dins_;
/* New way of dealing with memory bank protocol - fast and compact */
FabricBitstreamMemoryBank memory_bank_data_;
};
} /* end namespace openfpga */

View File

@ -245,6 +245,177 @@ static int write_memory_bank_flatten_fabric_bitstream_to_text_file(
return status;
}
/********************************************************************
* Write the fabric bitstream fitting a memory bank protocol
* to a plain text file in efficient method
*
* Old function is write_memory_bank_flatten_fabric_bitstream_to_text_file()
*
* As compared to original function, based on 100K LE FPGA:
* 1. Original function used 600 seconds and needs high memory usage
* 2. This new function only needs 1 second and 4M Bytes
*
* Old function only print WL in decremental order. It is not by intentional
* It is because of the map-key ordering
* In QL Memory Bank with Flatten BL/WL, data is stored by WL address,
* where we use WL string as map key
* WL #0 --- "1000000000000 .... 0000"
* WL #1 --- "0100000000000 .... 0000"
* WL #n-1 --- "0000000000000 .... 0001
* From string comparison wise, WL #n-1 will be first, and WL #0 will be last
* The sequence of WL does not really matter, but preferrable in some ordering
* manner. Using map key as ordering cannot guarantee the determinstic
*
* This new way of writting fabric guarantee the WL order in 100% deterministc
* way: either incremental (default) or decremental
*
* Return:
* - 0 if succeed
* - 1 if critical errors occured
*******************************************************************/
static int fast_write_memory_bank_flatten_fabric_bitstream_to_text_file(
std::fstream& fp, const bool& fast_configuration,
const bool& bit_value_to_skip, const FabricBitstream& fabric_bitstream,
const bool& keep_dont_care_bits, const bool& wl_incremental_order) {
int status = 0;
std::string dont_care_bit = "0";
if (keep_dont_care_bits) {
dont_care_bit = "x";
}
const FabricBitstreamMemoryBank& memory_bank =
fabric_bitstream.memory_bank_info(fast_configuration, bit_value_to_skip);
fabric_size_t longest_effective_wl_count =
memory_bank.get_longest_effective_wl_count();
/* Output information about how to intepret the bitstream */
fp << "// Bitstream length: " << longest_effective_wl_count << std::endl;
fp << "// Bitstream width (LSB -> MSB): ";
fp << "<bl_address " << memory_bank.get_total_bl_addr_size() << " bits>";
fp << "<wl_address " << memory_bank.get_total_wl_addr_size() << " bits>";
fp << std::endl;
// Step 1
// Initialize wl_indexes for every region
// The intialization depends the ordering of WL
// It could either be 0 (if wl_incremental_order=true) or
// last WL index (if wl_incremental_order=false)
std::vector<fabric_size_t> wl_indexes;
for (size_t region = 0; region < memory_bank.datas.size(); region++) {
if (wl_incremental_order) {
wl_indexes.push_back(0);
} else {
wl_indexes.push_back(
(fabric_size_t)(memory_bank.datas[region].size() - 1));
}
}
// Step 2
// Loop through total WL count that we would like to configure
for (size_t wl_index = 0; wl_index < longest_effective_wl_count; wl_index++) {
// Step 3
// Write BL address
// We cascade all regions: 0, 1, 2 ...
for (size_t region = 0; region < memory_bank.datas.size(); region++) {
// Step 3a
// The sequence of configuration of each region WL is not the same
// since WL to skip for each region is not the same
// If it happen that current WL that we are going to program is
// one of the WLs (stored in wls_to_skip) that we had determined
// to skip, the we will increment or decrement to next
// depending on wl_incremental_order
const fabric_blwl_length& lengths = memory_bank.blwl_lengths[region];
fabric_size_t current_wl = wl_indexes[region];
while (std::find(memory_bank.wls_to_skip[region].begin(),
memory_bank.wls_to_skip[region].end(),
current_wl) != memory_bank.wls_to_skip[region].end()) {
// We would like to skip this
if (wl_incremental_order) {
wl_indexes[region]++;
} else {
wl_indexes[region]--;
}
current_wl = wl_indexes[region];
}
// Step 3b
// If current WL still within the valid range, we will print BL
// Otherwise it is either
// overflow (wl_incremental_order=true) or
// underflow (to max fabric_blwl_length when wl_incremental_order=false)
// Since fabric_blwl_length is unsigned, hence underflow of -1 will be
// considered as overflow too
// If it is overflow/underflow, then we just print don't care
if (current_wl < memory_bank.datas[region].size()) {
const std::vector<uint8_t>& data =
memory_bank.datas[region][current_wl];
const std::vector<uint8_t>& mask =
memory_bank.masks[region][current_wl];
// Step 3c
// Real code to print BL data that we had stored
// mask tell you each BL is valid
// for invalid BL, we will print don't care
// data tell you the real din value
// (bl >> 3) - This is to find Byte index of the BL
// (1 << (bl & 7)) - This is to find Bit index of the BL
// within that Byte index
// When we '&' both, we can know if that BL is set or unset
for (size_t bl = 0; bl < lengths.bl; bl++) {
if (mask[bl >> 3] & (1 << (bl & 7))) {
if (data[bl >> 3] & (1 << (bl & 7))) {
fp << "1";
} else {
fp << "0";
}
} else {
fp << dont_care_bit.c_str();
}
}
} else {
/* However not all region has equal WL, for those that is shorter,
* print 'x' for all BL*/
for (size_t bl = 0; bl < lengths.bl; bl++) {
fp << dont_care_bit.c_str();
}
}
}
// Step 4
// Write WL address
// We cascade all regions: 0, 1, 2 ...
for (size_t region = 0; region < memory_bank.datas.size(); region++) {
const fabric_blwl_length& lengths = memory_bank.blwl_lengths[region];
fabric_size_t current_wl = wl_indexes[region];
// Step 4a
// If current WL still within the valid range, we will print WL
// Otherwise it is overflow/underflow then we will print don't care
if (current_wl < memory_bank.datas[region].size()) {
// Step 4b
// One hot printing
for (size_t wl_temp = 0; wl_temp < lengths.wl; wl_temp++) {
if (wl_temp == current_wl) {
fp << "1";
} else {
fp << "0";
}
}
// Step 4b
// Increment or decrement to next depending on wl_incremental_order
if (wl_incremental_order) {
wl_indexes[region]++;
} else {
wl_indexes[region]--;
}
} else {
/* However not all region has equal WL, for those that is shorter,
* print 'x' for all WL */
for (size_t wl_temp = 0; wl_temp < lengths.wl; wl_temp++) {
fp << dont_care_bit.c_str();
}
}
}
fp << std::endl;
}
return status;
}
/********************************************************************
* Write the fabric bitstream fitting a memory bank protocol
* to a plain text file
@ -393,7 +564,8 @@ int write_fabric_bitstream_to_text_file(
const ConfigProtocol& config_protocol,
const FabricGlobalPortInfo& global_ports, const std::string& fname,
const bool& fast_configuration, const bool& keep_dont_care_bits,
const bool& include_time_stamp, const bool& verbose) {
const bool& wl_incremental_order, const bool& include_time_stamp,
const bool& verbose) {
/* Ensure that we have a valid file name */
if (true == fname.empty()) {
VTR_LOG_ERROR(
@ -454,6 +626,14 @@ int write_fabric_bitstream_to_text_file(
if (BLWL_PROTOCOL_DECODER == config_protocol.bl_protocol_type()) {
status = write_memory_bank_fabric_bitstream_to_text_file(
fp, apply_fast_configuration, bit_value_to_skip, fabric_bitstream);
} else if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type() &&
BLWL_PROTOCOL_FLATTEN == config_protocol.wl_protocol_type()) {
// If both BL and WL protocols are flatten, use new way to write the
// bitstream
status = fast_write_memory_bank_flatten_fabric_bitstream_to_text_file(
fp, apply_fast_configuration, bit_value_to_skip, fabric_bitstream,
keep_dont_care_bits, wl_incremental_order);
} else if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type()) {
status = write_memory_bank_flatten_fabric_bitstream_to_text_file(
fp, apply_fast_configuration, bit_value_to_skip, fabric_bitstream,

View File

@ -27,7 +27,8 @@ int write_fabric_bitstream_to_text_file(
const ConfigProtocol& config_protocol,
const FabricGlobalPortInfo& global_ports, const std::string& fname,
const bool& fast_configuration, const bool& keep_dont_care_bits,
const bool& include_time_stamp, const bool& verbose);
const bool& wl_incremental_order, const bool& include_time_stamp,
const bool& verbose);
} /* end namespace openfpga */

View File

@ -71,7 +71,8 @@ static void write_fabric_bitstream_xml_file_head(
static int write_fabric_config_bit_to_xml_file(
std::fstream& fp, const BitstreamManager& bitstream_manager,
const FabricBitstream& fabric_bitstream, const FabricBitId& fabric_bit,
const e_config_protocol_type& config_type, const int& xml_hierarchy_depth) {
const e_config_protocol_type& config_type, bool fast_xml,
const int& xml_hierarchy_depth, std::string& bl_addr, std::string& wl_addr) {
if (false == valid_file_stream(fp)) {
return 1;
}
@ -106,22 +107,60 @@ static int write_fabric_config_bit_to_xml_file(
case CONFIG_MEM_STANDALONE:
case CONFIG_MEM_SCAN_CHAIN:
break;
case CONFIG_MEM_QL_MEMORY_BANK:
case CONFIG_MEM_MEMORY_BANK: {
/* Bit line address */
write_tab_to_file(fp, xml_hierarchy_depth + 1);
fp << "<bl address=\"";
for (const char& addr_bit : fabric_bitstream.bit_bl_address(fabric_bit)) {
fp << addr_bit;
}
fp << "\"/>\n";
case CONFIG_MEM_MEMORY_BANK:
case CONFIG_MEM_QL_MEMORY_BANK: {
if (fast_xml) {
// New way of printing XML
// This is fast (less than 100s) as compared to original 1300s seen in
// 100K LE FPFA
const FabricBitstreamMemoryBank& memory_bank =
fabric_bitstream.memory_bank_info();
/* Bit line address */
write_tab_to_file(fp, xml_hierarchy_depth + 1);
const fabric_bit_data& bit =
memory_bank.fabric_bit_datas[(size_t)(fabric_bit)];
const fabric_blwl_length& lengths =
memory_bank.blwl_lengths[bit.region];
if (bl_addr.size() == 0) {
VTR_ASSERT(wl_addr.size() == 0);
bl_addr.resize(lengths.bl);
wl_addr.resize(lengths.wl);
bl_addr.assign(lengths.bl, 'x');
wl_addr.assign(lengths.wl, '0');
} else {
VTR_ASSERT((fabric_size_t)(bl_addr.size()) == lengths.bl);
VTR_ASSERT((fabric_size_t)(wl_addr.size()) == lengths.wl);
}
fp << "<bl address=\"";
memset(&bl_addr[bit.bl], '1', 1);
fp << bl_addr.c_str();
memset(&bl_addr[bit.bl], 'x', 1);
fp << "\"/>\n";
/* Word line address */
write_tab_to_file(fp, xml_hierarchy_depth + 1);
fp << "<wl address=\"";
memset(&wl_addr[bit.wl], '1', 1);
fp << wl_addr.c_str();
memset(&wl_addr[bit.wl], '0', 1);
fp << "\"/>\n";
} else {
/* Bit line address */
write_tab_to_file(fp, xml_hierarchy_depth + 1);
fp << "<bl address=\"";
for (const char& addr_bit :
fabric_bitstream.bit_bl_address(fabric_bit)) {
fp << addr_bit;
}
fp << "\"/>\n";
write_tab_to_file(fp, xml_hierarchy_depth + 1);
fp << "<wl address=\"";
for (const char& addr_bit : fabric_bitstream.bit_wl_address(fabric_bit)) {
fp << addr_bit;
write_tab_to_file(fp, xml_hierarchy_depth + 1);
fp << "<wl address=\"";
for (const char& addr_bit :
fabric_bitstream.bit_wl_address(fabric_bit)) {
fp << addr_bit;
}
fp << "\"/>\n";
}
fp << "\"/>\n";
break;
}
case CONFIG_MEM_FRAME_BASED: {
@ -156,13 +195,25 @@ static int write_fabric_regional_config_bit_to_xml_file(
std::fstream& fp, const BitstreamManager& bitstream_manager,
const FabricBitstream& fabric_bitstream,
const FabricBitRegionId& fabric_region,
const e_config_protocol_type& config_type, const int& xml_hierarchy_depth) {
const e_config_protocol_type& config_type, bool fast_xml,
const int& xml_hierarchy_depth) {
if (false == valid_file_stream(fp)) {
return 1;
}
int status = 0;
// Use string to print, instead of char by char
// This is for Flatten BL/WL protocol
// You will find this much more faster than char by char
// We do not need to build the string for every BL/WL
// It is one-hot and sequal addr
// We start with all '0' (WL) or 'x' (BL)
// By setting "1' and resettting ('0' or 'x') at approriate bit position
// We could create one-hot string much faster
// Use FPGA 100K as example: old way needs 1300seconds to write 85Gig XML
// New way only needs 80seconds to write identical XML
std::string bl_addr = "";
std::string wl_addr = "";
write_tab_to_file(fp, xml_hierarchy_depth);
fp << "<region ";
fp << "id=\"";
@ -170,14 +221,24 @@ static int write_fabric_regional_config_bit_to_xml_file(
fp << "\"";
fp << ">\n";
size_t bit_index = 0;
size_t total_bits = fabric_bitstream.region_bits(fabric_region).size();
size_t percentage = 0;
for (const FabricBitId& fabric_bit :
fabric_bitstream.region_bits(fabric_region)) {
status = write_fabric_config_bit_to_xml_file(
fp, bitstream_manager, fabric_bitstream, fabric_bit, config_type,
xml_hierarchy_depth + 1);
fast_xml, xml_hierarchy_depth + 1, bl_addr, wl_addr);
if (1 == status) {
return status;
}
// Misc to print percentage of the process
bit_index++;
size_t temp = (bit_index * 100) / total_bits;
if (temp != percentage) {
VTR_LOG(" Progress: %lu%\r", percentage);
percentage = temp;
}
}
write_tab_to_file(fp, xml_hierarchy_depth);
@ -231,6 +292,8 @@ int write_fabric_bitstream_to_xml_file(
for (const FabricBitRegionId& region : fabric_bitstream.regions()) {
status = write_fabric_regional_config_bit_to_xml_file(
fp, bitstream_manager, fabric_bitstream, region, config_protocol.type(),
BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type() &&
BLWL_PROTOCOL_FLATTEN == config_protocol.wl_protocol_type(),
xml_hierarchy_depth + 1);
if (1 == status) {
break;

View File

@ -1061,6 +1061,17 @@ static size_t calculate_num_config_clock_cycles(
(float)full_num_config_clock_cycles -
1.));
}
} else if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type() &&
BLWL_PROTOCOL_FLATTEN == config_protocol.wl_protocol_type()) {
// Only support new fast way if both BL/WL protocols are flatten
// Based on 100K LE FPGA, we are wasting a lot of time to build
// MemoryBankFlattenFabricBitstream
// just to get the effective WL addr size. So wasteful of the resource
const FabricBitstreamMemoryBank& memory_bank =
fabric_bitstream.memory_bank_info(fast_configuration,
bit_value_to_skip);
num_config_clock_cycles =
1 + memory_bank.get_longest_effective_wl_count();
} else if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type()) {
num_config_clock_cycles =
1 + build_memory_bank_flatten_fabric_bitstream(

View File

@ -565,9 +565,12 @@ static void print_verilog_full_testbench_ql_memory_bank_flatten_bitstream(
valid_file_stream(fp);
/* Reorganize the fabric bitstream by the same address across regions */
MemoryBankFlattenFabricBitstream fabric_bits_by_addr =
build_memory_bank_flatten_fabric_bitstream(
fabric_bitstream, fast_configuration, bit_value_to_skip);
// New way to get the effective WL addr size.
// Based on 100K LE FPGA, we are wasting a lot of time to build
// MemoryBankFlattenFabricBitstream just to get size(). So wasteful of the
// resource
const FabricBitstreamMemoryBank& memory_bank =
fabric_bitstream.memory_bank_info(fast_configuration, bit_value_to_skip);
/* Feed address and data input pair one by one
* Note: the first cycle is reserved for programming reset
@ -604,7 +607,7 @@ static void print_verilog_full_testbench_ql_memory_bank_flatten_bitstream(
/* Define a constant for the bitstream length */
print_verilog_define_flag(fp, std::string(TOP_TB_BITSTREAM_LENGTH_VARIABLE),
fabric_bits_by_addr.size());
memory_bank.get_longest_effective_wl_count());
print_verilog_define_flag(fp, std::string(TOP_TB_BITSTREAM_WIDTH_VARIABLE),
bl_port_width + wl_port_width);