Merge pull request #1259 from chungshien/openfpga-issue-1256
Address issue 1256
This commit is contained in:
commit
1064520103
|
@ -200,6 +200,11 @@ ShellCommandId add_write_fabric_bitstream_command_template(
|
|||
"Keep don't care bits in bitstream file; If not enabled, don't care bits "
|
||||
"are converted to logic '0' or '1'");
|
||||
|
||||
/* Add an option '--wl_incremental_order' */
|
||||
shell_cmd.add_option(
|
||||
"wl_decremental_order", false,
|
||||
"Generate bitstream in WL decremental addressing order if supported");
|
||||
|
||||
/* Add an option '--no_time_stamp' */
|
||||
shell_cmd.add_option("no_time_stamp", false,
|
||||
"Do not print time stamp in output files");
|
||||
|
|
|
@ -91,6 +91,7 @@ int write_fabric_bitstream_template(const T& openfpga_ctx, const Command& cmd,
|
|||
CommandOptionId opt_file_format = cmd.option("format");
|
||||
CommandOptionId opt_fast_config = cmd.option("fast_configuration");
|
||||
CommandOptionId opt_keep_dont_care_bits = cmd.option("keep_dont_care_bits");
|
||||
CommandOptionId opt_wl_decremental_order = cmd.option("wl_decremental_order");
|
||||
CommandOptionId opt_no_time_stamp = cmd.option("no_time_stamp");
|
||||
|
||||
/* Write fabric bitstream if required */
|
||||
|
@ -127,6 +128,7 @@ int write_fabric_bitstream_template(const T& openfpga_ctx, const Command& cmd,
|
|||
cmd_context.option_value(cmd, opt_file),
|
||||
cmd_context.option_enable(cmd, opt_fast_config),
|
||||
cmd_context.option_enable(cmd, opt_keep_dont_care_bits),
|
||||
!cmd_context.option_enable(cmd, opt_wl_decremental_order),
|
||||
!cmd_context.option_enable(cmd, opt_no_time_stamp),
|
||||
cmd_context.option_enable(cmd, opt_verbose));
|
||||
}
|
||||
|
|
|
@ -176,6 +176,11 @@ static void rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(
|
|||
bitstream_manager.block_bits(parent_block)) {
|
||||
FabricBitId fabric_bit = fabric_bitstream.add_bit(config_bit);
|
||||
|
||||
/*
|
||||
If both BL and WL protocols are Flatten, we will have new way of
|
||||
storing information in fabric_bitstream. This will save high
|
||||
memory usage, as well as fast processing
|
||||
*/
|
||||
/* The BL address to be decoded depends on the protocol
|
||||
* - flatten BLs: use 1-hot decoding
|
||||
* - BL decoders: fully encoded
|
||||
|
@ -183,38 +188,57 @@ static void rec_build_module_fabric_dependent_ql_memory_bank_regional_bitstream(
|
|||
*/
|
||||
size_t cur_bl_index = bl_start_index_per_tile.at(tile_coord.x()) +
|
||||
cur_mem_index[tile_coord] % num_bls_cur_tile;
|
||||
std::vector<char> bl_addr_bits_vec;
|
||||
if (BLWL_PROTOCOL_DECODER == config_protocol.bl_protocol_type()) {
|
||||
bl_addr_bits_vec = itobin_charvec(cur_bl_index, bl_addr_size);
|
||||
} else if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type() ||
|
||||
BLWL_PROTOCOL_SHIFT_REGISTER ==
|
||||
config_protocol.bl_protocol_type()) {
|
||||
bl_addr_bits_vec =
|
||||
ito1hot_charvec(cur_bl_index, bl_addr_size, DONT_CARE_CHAR);
|
||||
if (BLWL_PROTOCOL_FLATTEN != config_protocol.bl_protocol_type() ||
|
||||
BLWL_PROTOCOL_FLATTEN != config_protocol.wl_protocol_type()) {
|
||||
// This is using old way
|
||||
// We only do this kind of resource wasting storing if
|
||||
// either protocol is not flatten
|
||||
std::vector<char> bl_addr_bits_vec;
|
||||
if (BLWL_PROTOCOL_DECODER == config_protocol.bl_protocol_type()) {
|
||||
bl_addr_bits_vec = itobin_charvec(cur_bl_index, bl_addr_size);
|
||||
} else if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type() ||
|
||||
BLWL_PROTOCOL_SHIFT_REGISTER ==
|
||||
config_protocol.bl_protocol_type()) {
|
||||
bl_addr_bits_vec =
|
||||
ito1hot_charvec(cur_bl_index, bl_addr_size, DONT_CARE_CHAR);
|
||||
}
|
||||
/* Set BL address */
|
||||
fabric_bitstream.set_bit_bl_address(
|
||||
fabric_bit, bl_addr_bits_vec,
|
||||
BLWL_PROTOCOL_DECODER != config_protocol.bl_protocol_type());
|
||||
}
|
||||
|
||||
/* Find WL address */
|
||||
size_t cur_wl_index =
|
||||
wl_start_index_per_tile.at(tile_coord.y()) +
|
||||
std::floor(cur_mem_index[tile_coord] / num_bls_cur_tile);
|
||||
std::vector<char> wl_addr_bits_vec;
|
||||
if (BLWL_PROTOCOL_DECODER == config_protocol.wl_protocol_type()) {
|
||||
wl_addr_bits_vec = itobin_charvec(cur_wl_index, wl_addr_size);
|
||||
} else if (BLWL_PROTOCOL_FLATTEN == config_protocol.wl_protocol_type() ||
|
||||
BLWL_PROTOCOL_SHIFT_REGISTER ==
|
||||
config_protocol.wl_protocol_type()) {
|
||||
wl_addr_bits_vec = ito1hot_charvec(cur_wl_index, wl_addr_size);
|
||||
if (BLWL_PROTOCOL_FLATTEN != config_protocol.bl_protocol_type() ||
|
||||
BLWL_PROTOCOL_FLATTEN != config_protocol.wl_protocol_type()) {
|
||||
// This is using old way
|
||||
// We only do this kind of resource wasting storing if
|
||||
// either protocol is not flatten
|
||||
std::vector<char> wl_addr_bits_vec;
|
||||
if (BLWL_PROTOCOL_DECODER == config_protocol.wl_protocol_type()) {
|
||||
wl_addr_bits_vec = itobin_charvec(cur_wl_index, wl_addr_size);
|
||||
} else if (BLWL_PROTOCOL_FLATTEN == config_protocol.wl_protocol_type() ||
|
||||
BLWL_PROTOCOL_SHIFT_REGISTER ==
|
||||
config_protocol.wl_protocol_type()) {
|
||||
wl_addr_bits_vec = ito1hot_charvec(cur_wl_index, wl_addr_size);
|
||||
}
|
||||
/* Set WL address */
|
||||
fabric_bitstream.set_bit_wl_address(
|
||||
fabric_bit, wl_addr_bits_vec,
|
||||
BLWL_PROTOCOL_DECODER != config_protocol.wl_protocol_type());
|
||||
}
|
||||
if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type() &&
|
||||
BLWL_PROTOCOL_FLATTEN == config_protocol.wl_protocol_type()) {
|
||||
// New way of storing information in compact way
|
||||
// Only for Flatten protocol (can easily support shift register as well)
|
||||
// Need to understand decoder to better assessment
|
||||
fabric_bitstream.set_memory_bank_info(
|
||||
fabric_bit, fabric_bitstream_region, cur_bl_index, cur_wl_index,
|
||||
bl_addr_size, wl_addr_size, bitstream_manager.bit_value(config_bit));
|
||||
}
|
||||
|
||||
/* Set BL address */
|
||||
fabric_bitstream.set_bit_bl_address(
|
||||
fabric_bit, bl_addr_bits_vec,
|
||||
BLWL_PROTOCOL_DECODER != config_protocol.bl_protocol_type());
|
||||
|
||||
/* Set WL address */
|
||||
fabric_bitstream.set_bit_wl_address(
|
||||
fabric_bit, wl_addr_bits_vec,
|
||||
BLWL_PROTOCOL_DECODER != config_protocol.wl_protocol_type());
|
||||
|
||||
/* Set data input */
|
||||
fabric_bitstream.set_bit_din(fabric_bit,
|
||||
|
|
|
@ -11,6 +11,128 @@
|
|||
/* begin namespace openfpga */
|
||||
namespace openfpga {
|
||||
|
||||
/**************************************************
|
||||
* FabricBitstreamMemoryBank
|
||||
*************************************************/
|
||||
void FabricBitstreamMemoryBank::add_bit(const fabric_size_t& bit_id,
|
||||
const fabric_size_t& region_id,
|
||||
const fabric_size_t& bl,
|
||||
const fabric_size_t& wl,
|
||||
const fabric_size_t& bl_addr_size,
|
||||
const fabric_size_t& wl_addr_size,
|
||||
bool bit) {
|
||||
// Fabric Bit is added in sequential manner and each bit is unique
|
||||
VTR_ASSERT((size_t)(bit_id) == fabric_bit_datas.size());
|
||||
// Region is added in sequntial manner but it is not unique from fabric bit
|
||||
// perspective
|
||||
VTR_ASSERT((size_t)(region_id) <= blwl_lengths.size());
|
||||
if ((size_t)(region_id) == blwl_lengths.size()) {
|
||||
// Add if this is first time
|
||||
blwl_lengths.push_back(fabric_blwl_length(bl_addr_size, wl_addr_size));
|
||||
} else {
|
||||
// Otherwise if the region had been added, it must always be consistent
|
||||
VTR_ASSERT(blwl_lengths[region_id].bl == bl_addr_size);
|
||||
VTR_ASSERT(blwl_lengths[region_id].wl == wl_addr_size);
|
||||
}
|
||||
// The BL/WL index must be within respective length
|
||||
VTR_ASSERT(bl < blwl_lengths[region_id].bl);
|
||||
VTR_ASSERT(wl < blwl_lengths[region_id].wl);
|
||||
// We might not need this at all to track the raw data
|
||||
// But since it does not use a lot of memory, tracking for good
|
||||
fabric_bit_datas.push_back(fabric_bit_data((fabric_size_t)(size_t)(region_id),
|
||||
(fabric_size_t)(bl),
|
||||
(fabric_size_t)(wl), bit));
|
||||
// This is real compact data
|
||||
VTR_ASSERT(datas.size() == masks.size());
|
||||
while ((size_t)(region_id) >= datas.size()) {
|
||||
datas.emplace_back();
|
||||
masks.emplace_back();
|
||||
}
|
||||
VTR_ASSERT(datas[region_id].size() == masks[region_id].size());
|
||||
while ((size_t)(wl) >= datas[region_id].size()) {
|
||||
datas[region_id].push_back(std::vector<uint8_t>((bl_addr_size + 7) / 8, 0));
|
||||
masks[region_id].push_back(std::vector<uint8_t>((bl_addr_size + 7) / 8, 0));
|
||||
}
|
||||
// Same uniqie config bit cannot be set twice
|
||||
VTR_ASSERT((masks[region_id][wl][bl >> 3] & (1 << (bl & 7))) == 0);
|
||||
if (bit) {
|
||||
// Mark the data value if bit (or din) is true
|
||||
datas[region_id][wl][bl >> 3] |= (1 << (bl & 7));
|
||||
}
|
||||
// Mark the mask to indicate we had used this bit
|
||||
masks[region_id][wl][bl >> 3] |= (1 << (bl & 7));
|
||||
}
|
||||
|
||||
void FabricBitstreamMemoryBank::fast_configuration(
|
||||
const bool& fast, const bool& bit_value_to_skip) {
|
||||
for (auto& wls : wls_to_skip) {
|
||||
wls.clear();
|
||||
}
|
||||
wls_to_skip.clear();
|
||||
for (size_t region = 0; region < datas.size(); region++) {
|
||||
wls_to_skip.emplace_back();
|
||||
if (fast) {
|
||||
for (fabric_size_t wl = 0; wl < blwl_lengths[region].wl; wl++) {
|
||||
VTR_ASSERT((size_t)(wl) < datas[region].size());
|
||||
bool skip_wl = true;
|
||||
for (fabric_size_t bl = 0; bl < blwl_lengths[region].bl && skip_wl;
|
||||
bl++) {
|
||||
// Only check the bit that being used (marked in the mask),
|
||||
// otherwise it is just a don't care, we can skip
|
||||
if (masks[region][wl][bl >> 3] & (1 << (bl & 7))) {
|
||||
if (datas[region][wl][bl >> 3] & (1 << (bl & 7))) {
|
||||
// If bit_value_to_skip=true, and yet the din (recorded in
|
||||
// datas) also 1, then we can skip
|
||||
skip_wl = bit_value_to_skip;
|
||||
} else {
|
||||
skip_wl = !bit_value_to_skip;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (skip_wl) {
|
||||
// Record down that for this region, we will skip this WL
|
||||
wls_to_skip[region].push_back(wl);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fabric_size_t FabricBitstreamMemoryBank::get_longest_effective_wl_count()
|
||||
const {
|
||||
// This function check effective WL count
|
||||
// Where effective WL is the WL that we want to program after considering
|
||||
// fast configuration from all the region, it return the longest
|
||||
fabric_size_t longest_wl = 0;
|
||||
for (size_t region = 0; region < datas.size(); region++) {
|
||||
VTR_ASSERT((size_t)(region) < wls_to_skip.size());
|
||||
fabric_size_t current_wl =
|
||||
(fabric_size_t)(datas[region].size() - wls_to_skip[region].size());
|
||||
if (current_wl > longest_wl) {
|
||||
longest_wl = current_wl;
|
||||
}
|
||||
}
|
||||
return longest_wl;
|
||||
}
|
||||
|
||||
fabric_size_t FabricBitstreamMemoryBank::get_total_bl_addr_size() const {
|
||||
// Simply total up all the BL addr size
|
||||
fabric_size_t bl = 0;
|
||||
for (size_t region = 0; region < datas.size(); region++) {
|
||||
bl += blwl_lengths[region].bl;
|
||||
}
|
||||
return bl;
|
||||
}
|
||||
|
||||
fabric_size_t FabricBitstreamMemoryBank::get_total_wl_addr_size() const {
|
||||
// Simply total up all the WL addr size
|
||||
fabric_size_t wl = 0;
|
||||
for (size_t region = 0; region < datas.size(); region++) {
|
||||
wl += blwl_lengths[region].wl;
|
||||
}
|
||||
return wl;
|
||||
}
|
||||
|
||||
/**************************************************
|
||||
* Public Constructor
|
||||
*************************************************/
|
||||
|
@ -129,6 +251,15 @@ bool FabricBitstream::use_address() const { return use_address_; }
|
|||
|
||||
bool FabricBitstream::use_wl_address() const { return use_wl_address_; }
|
||||
|
||||
const FabricBitstreamMemoryBank& FabricBitstream::memory_bank_info(
|
||||
const bool& fast, const bool& bit_value_to_skip) const {
|
||||
VTR_ASSERT(true == use_address_);
|
||||
VTR_ASSERT(true == use_wl_address_);
|
||||
(const_cast<FabricBitstreamMemoryBank*>(&memory_bank_data_))
|
||||
->fast_configuration(fast, bit_value_to_skip);
|
||||
return memory_bank_data_;
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
* Public Mutators
|
||||
******************************************************************************/
|
||||
|
@ -243,6 +374,27 @@ void FabricBitstream::set_bl_address_length(const size_t& length) {
|
|||
set_address_length(length);
|
||||
}
|
||||
|
||||
void FabricBitstream::set_memory_bank_info(const FabricBitId& bit_id,
|
||||
const FabricBitRegionId& region_id,
|
||||
const size_t& bl, const size_t& wl,
|
||||
const size_t& bl_addr_size,
|
||||
const size_t& wl_addr_size,
|
||||
bool bit) {
|
||||
// Bit must be valid one
|
||||
// We only support this in protocol that use BL and WL address
|
||||
VTR_ASSERT(true == valid_bit_id(bit_id));
|
||||
VTR_ASSERT(true == use_address_);
|
||||
VTR_ASSERT(true == use_wl_address_);
|
||||
VTR_ASSERT(bl_addr_size);
|
||||
VTR_ASSERT(wl_addr_size);
|
||||
// All the basic checking had passed, we can add the data into
|
||||
// memory_bank_data_
|
||||
memory_bank_data_.add_bit(
|
||||
(fabric_size_t)(size_t)(bit_id), (fabric_size_t)(size_t)(region_id),
|
||||
(fabric_size_t)(bl), (fabric_size_t)(wl), (fabric_size_t)(bl_addr_size),
|
||||
(fabric_size_t)(wl_addr_size), bit);
|
||||
}
|
||||
|
||||
void FabricBitstream::set_use_wl_address(const bool& enable) {
|
||||
/* Add a lock, only can be modified when num bits are zero*/
|
||||
if (0 == num_bits_) {
|
||||
|
|
|
@ -41,6 +41,85 @@
|
|||
/* begin namespace openfpga */
|
||||
namespace openfpga {
|
||||
|
||||
// Use uint32_t (maximum of 4Gigs) is good enough, we have BL and WL,
|
||||
// combination of both hold up to 18 quintillion of configuration bits (+ dont
|
||||
// care)
|
||||
typedef uint32_t fabric_size_t;
|
||||
struct fabric_bit_data {
|
||||
fabric_bit_data(fabric_size_t r, fabric_size_t b, fabric_size_t w, bool bi)
|
||||
: region(r), bl(b), wl(w), bit(bi) {}
|
||||
const fabric_size_t region = 0;
|
||||
const fabric_size_t bl = 0;
|
||||
const fabric_size_t wl = 0;
|
||||
const bool bit = false;
|
||||
};
|
||||
struct fabric_blwl_length {
|
||||
fabric_blwl_length(fabric_size_t b, fabric_size_t w) : bl(b), wl(w) {}
|
||||
const fabric_size_t bl = 0;
|
||||
const fabric_size_t wl = 0;
|
||||
};
|
||||
|
||||
/*
|
||||
This class arrange Memory Bank databae in a compact way
|
||||
*/
|
||||
struct FabricBitstreamMemoryBank {
|
||||
void add_bit(const fabric_size_t& bit_id, const fabric_size_t& region_id,
|
||||
const fabric_size_t& bl, const fabric_size_t& wl,
|
||||
const fabric_size_t& bl_addr_size,
|
||||
const fabric_size_t& wl_addr_size, bool bit);
|
||||
void fast_configuration(const bool& fast, const bool& bit_value_to_skip);
|
||||
fabric_size_t get_longest_effective_wl_count() const;
|
||||
fabric_size_t get_total_bl_addr_size() const;
|
||||
fabric_size_t get_total_wl_addr_size() const;
|
||||
|
||||
/*************************
|
||||
* All the database (except fabric_bit_datas) is sorted by region
|
||||
* 1. The very first layer of vector is region
|
||||
* For the datas and masks
|
||||
* 1. They are sorted by WL, hence second layer is WL
|
||||
* 2. Last layer is BL data stored in vector of uint8_t
|
||||
* 3. Each uint8_t will store up-to 8 configuration bit info
|
||||
**************************/
|
||||
// Store the BL WL of each region
|
||||
std::vector<fabric_blwl_length> blwl_lengths;
|
||||
// Store config ID raw data. Not used by bitstream generation
|
||||
// Used by XML generation
|
||||
/*
|
||||
fabric_bit_datas[Bit #0] = (region, bl, wl)
|
||||
fabric_bit_datas[Bit #1] = (region, bl, wl)
|
||||
fabric_bit_datas[Bit #2] = (region, bl, wl)
|
||||
*/
|
||||
std::vector<fabric_bit_data> fabric_bit_datas;
|
||||
// 100K LE FPGA only need few mega bytes
|
||||
/*
|
||||
datas represent the Din value of a given WL and BL (1bit)
|
||||
datas[region #0][wl #0] = std::vector<uint8_t> to represent BLs
|
||||
where uint8_t #0 = MSB{ BL#7, BL#6, .... BL #1, BL #0 } LSB
|
||||
where uint8_t #1 = MSB{ BL#15, BL#14, .... BL #9, BL #8 } LSB
|
||||
datas[region #0][wl #1] = std::vector<uint8_t> to represent BLs
|
||||
datas[region #0][wl #2] = std::vector<uint8_t> to represent BLs
|
||||
......
|
||||
datas[region #0][wl #n-1] = std::vector<uint8_t> to represent BLs
|
||||
......
|
||||
datas[region #1][wl #0] = std::vector<uint8_t> to represent BLs
|
||||
datas[region #1][wl #1] = std::vector<uint8_t> to represent BLs
|
||||
......
|
||||
*/
|
||||
std::vector<std::vector<std::vector<uint8_t>>> datas;
|
||||
/*
|
||||
masks has same structure as datas
|
||||
but masks presents data that being used
|
||||
for exampe:
|
||||
if mask's uint8_t #0 value = 0x41 it means for this WL
|
||||
a. BL #0 is being used, and its Din is recoreded in datas
|
||||
b. BL #6 is being used, and its Din is recoreded in datas
|
||||
c. Other BLs #1, 2, 3, 4, 5, 7 are don't care bit (not being used)
|
||||
*/
|
||||
std::vector<std::vector<std::vector<uint8_t>>> masks;
|
||||
// This track which WL to skip because of fast configuration
|
||||
std::vector<std::vector<fabric_size_t>> wls_to_skip;
|
||||
};
|
||||
|
||||
class FabricBitstream {
|
||||
public: /* Type implementations */
|
||||
/*
|
||||
|
@ -144,6 +223,9 @@ class FabricBitstream {
|
|||
bool use_address() const;
|
||||
bool use_wl_address() const;
|
||||
|
||||
const FabricBitstreamMemoryBank& memory_bank_info(
|
||||
const bool& fast = false, const bool& bit_value_to_skip = false) const;
|
||||
|
||||
public: /* Public Mutators */
|
||||
/* Reserve config bits */
|
||||
void reserve_bits(const size_t& num_bits);
|
||||
|
@ -193,6 +275,18 @@ class FabricBitstream {
|
|||
void set_address_length(const size_t& length);
|
||||
void set_bl_address_length(const size_t& length);
|
||||
|
||||
/*
|
||||
This is setting memory bank protocol in a more efficient way
|
||||
Instead of building lengthy BL/WL bits of database (BL or Wl could be in
|
||||
thousand bits of size), a small device like 100K LE (compared to other
|
||||
vendors offer) might end up using tens of gig bytes.
|
||||
*/
|
||||
void set_memory_bank_info(const FabricBitId& bit_id,
|
||||
const FabricBitRegionId& region_id,
|
||||
const size_t& bl, const size_t& wl,
|
||||
const size_t& bl_addr_size,
|
||||
const size_t& wl_addr_size, bool bit);
|
||||
|
||||
/* Enable the use of WL-address related data
|
||||
* Same priniciple as the set_use_address()
|
||||
*/
|
||||
|
@ -250,6 +344,9 @@ class FabricBitstream {
|
|||
|
||||
/* Data input (Din) bits: this is designed for memory decoders */
|
||||
vtr::vector<FabricBitId, char> bit_dins_;
|
||||
|
||||
/* New way of dealing with memory bank protocol - fast and compact */
|
||||
FabricBitstreamMemoryBank memory_bank_data_;
|
||||
};
|
||||
|
||||
} /* end namespace openfpga */
|
||||
|
|
|
@ -245,6 +245,177 @@ static int write_memory_bank_flatten_fabric_bitstream_to_text_file(
|
|||
return status;
|
||||
}
|
||||
|
||||
/********************************************************************
|
||||
* Write the fabric bitstream fitting a memory bank protocol
|
||||
* to a plain text file in efficient method
|
||||
*
|
||||
* Old function is write_memory_bank_flatten_fabric_bitstream_to_text_file()
|
||||
*
|
||||
* As compared to original function, based on 100K LE FPGA:
|
||||
* 1. Original function used 600 seconds and needs high memory usage
|
||||
* 2. This new function only needs 1 second and 4M Bytes
|
||||
*
|
||||
* Old function only print WL in decremental order. It is not by intentional
|
||||
* It is because of the map-key ordering
|
||||
* In QL Memory Bank with Flatten BL/WL, data is stored by WL address,
|
||||
* where we use WL string as map key
|
||||
* WL #0 --- "1000000000000 .... 0000"
|
||||
* WL #1 --- "0100000000000 .... 0000"
|
||||
* WL #n-1 --- "0000000000000 .... 0001
|
||||
* From string comparison wise, WL #n-1 will be first, and WL #0 will be last
|
||||
* The sequence of WL does not really matter, but preferrable in some ordering
|
||||
* manner. Using map key as ordering cannot guarantee the determinstic
|
||||
*
|
||||
* This new way of writting fabric guarantee the WL order in 100% deterministc
|
||||
* way: either incremental (default) or decremental
|
||||
*
|
||||
* Return:
|
||||
* - 0 if succeed
|
||||
* - 1 if critical errors occured
|
||||
*******************************************************************/
|
||||
static int fast_write_memory_bank_flatten_fabric_bitstream_to_text_file(
|
||||
std::fstream& fp, const bool& fast_configuration,
|
||||
const bool& bit_value_to_skip, const FabricBitstream& fabric_bitstream,
|
||||
const bool& keep_dont_care_bits, const bool& wl_incremental_order) {
|
||||
int status = 0;
|
||||
|
||||
std::string dont_care_bit = "0";
|
||||
if (keep_dont_care_bits) {
|
||||
dont_care_bit = "x";
|
||||
}
|
||||
const FabricBitstreamMemoryBank& memory_bank =
|
||||
fabric_bitstream.memory_bank_info(fast_configuration, bit_value_to_skip);
|
||||
|
||||
fabric_size_t longest_effective_wl_count =
|
||||
memory_bank.get_longest_effective_wl_count();
|
||||
/* Output information about how to intepret the bitstream */
|
||||
fp << "// Bitstream length: " << longest_effective_wl_count << std::endl;
|
||||
fp << "// Bitstream width (LSB -> MSB): ";
|
||||
fp << "<bl_address " << memory_bank.get_total_bl_addr_size() << " bits>";
|
||||
fp << "<wl_address " << memory_bank.get_total_wl_addr_size() << " bits>";
|
||||
fp << std::endl;
|
||||
|
||||
// Step 1
|
||||
// Initialize wl_indexes for every region
|
||||
// The intialization depends the ordering of WL
|
||||
// It could either be 0 (if wl_incremental_order=true) or
|
||||
// last WL index (if wl_incremental_order=false)
|
||||
std::vector<fabric_size_t> wl_indexes;
|
||||
for (size_t region = 0; region < memory_bank.datas.size(); region++) {
|
||||
if (wl_incremental_order) {
|
||||
wl_indexes.push_back(0);
|
||||
} else {
|
||||
wl_indexes.push_back(
|
||||
(fabric_size_t)(memory_bank.datas[region].size() - 1));
|
||||
}
|
||||
}
|
||||
// Step 2
|
||||
// Loop through total WL count that we would like to configure
|
||||
for (size_t wl_index = 0; wl_index < longest_effective_wl_count; wl_index++) {
|
||||
// Step 3
|
||||
// Write BL address
|
||||
// We cascade all regions: 0, 1, 2 ...
|
||||
for (size_t region = 0; region < memory_bank.datas.size(); region++) {
|
||||
// Step 3a
|
||||
// The sequence of configuration of each region WL is not the same
|
||||
// since WL to skip for each region is not the same
|
||||
// If it happen that current WL that we are going to program is
|
||||
// one of the WLs (stored in wls_to_skip) that we had determined
|
||||
// to skip, the we will increment or decrement to next
|
||||
// depending on wl_incremental_order
|
||||
const fabric_blwl_length& lengths = memory_bank.blwl_lengths[region];
|
||||
fabric_size_t current_wl = wl_indexes[region];
|
||||
while (std::find(memory_bank.wls_to_skip[region].begin(),
|
||||
memory_bank.wls_to_skip[region].end(),
|
||||
current_wl) != memory_bank.wls_to_skip[region].end()) {
|
||||
// We would like to skip this
|
||||
if (wl_incremental_order) {
|
||||
wl_indexes[region]++;
|
||||
} else {
|
||||
wl_indexes[region]--;
|
||||
}
|
||||
current_wl = wl_indexes[region];
|
||||
}
|
||||
// Step 3b
|
||||
// If current WL still within the valid range, we will print BL
|
||||
// Otherwise it is either
|
||||
// overflow (wl_incremental_order=true) or
|
||||
// underflow (to max fabric_blwl_length when wl_incremental_order=false)
|
||||
// Since fabric_blwl_length is unsigned, hence underflow of -1 will be
|
||||
// considered as overflow too
|
||||
// If it is overflow/underflow, then we just print don't care
|
||||
if (current_wl < memory_bank.datas[region].size()) {
|
||||
const std::vector<uint8_t>& data =
|
||||
memory_bank.datas[region][current_wl];
|
||||
const std::vector<uint8_t>& mask =
|
||||
memory_bank.masks[region][current_wl];
|
||||
// Step 3c
|
||||
// Real code to print BL data that we had stored
|
||||
// mask tell you each BL is valid
|
||||
// for invalid BL, we will print don't care
|
||||
// data tell you the real din value
|
||||
// (bl >> 3) - This is to find Byte index of the BL
|
||||
// (1 << (bl & 7)) - This is to find Bit index of the BL
|
||||
// within that Byte index
|
||||
// When we '&' both, we can know if that BL is set or unset
|
||||
for (size_t bl = 0; bl < lengths.bl; bl++) {
|
||||
if (mask[bl >> 3] & (1 << (bl & 7))) {
|
||||
if (data[bl >> 3] & (1 << (bl & 7))) {
|
||||
fp << "1";
|
||||
} else {
|
||||
fp << "0";
|
||||
}
|
||||
} else {
|
||||
fp << dont_care_bit.c_str();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* However not all region has equal WL, for those that is shorter,
|
||||
* print 'x' for all BL*/
|
||||
for (size_t bl = 0; bl < lengths.bl; bl++) {
|
||||
fp << dont_care_bit.c_str();
|
||||
}
|
||||
}
|
||||
}
|
||||
// Step 4
|
||||
// Write WL address
|
||||
// We cascade all regions: 0, 1, 2 ...
|
||||
for (size_t region = 0; region < memory_bank.datas.size(); region++) {
|
||||
const fabric_blwl_length& lengths = memory_bank.blwl_lengths[region];
|
||||
fabric_size_t current_wl = wl_indexes[region];
|
||||
// Step 4a
|
||||
// If current WL still within the valid range, we will print WL
|
||||
// Otherwise it is overflow/underflow then we will print don't care
|
||||
if (current_wl < memory_bank.datas[region].size()) {
|
||||
// Step 4b
|
||||
// One hot printing
|
||||
for (size_t wl_temp = 0; wl_temp < lengths.wl; wl_temp++) {
|
||||
if (wl_temp == current_wl) {
|
||||
fp << "1";
|
||||
} else {
|
||||
fp << "0";
|
||||
}
|
||||
}
|
||||
// Step 4b
|
||||
// Increment or decrement to next depending on wl_incremental_order
|
||||
if (wl_incremental_order) {
|
||||
wl_indexes[region]++;
|
||||
} else {
|
||||
wl_indexes[region]--;
|
||||
}
|
||||
} else {
|
||||
/* However not all region has equal WL, for those that is shorter,
|
||||
* print 'x' for all WL */
|
||||
for (size_t wl_temp = 0; wl_temp < lengths.wl; wl_temp++) {
|
||||
fp << dont_care_bit.c_str();
|
||||
}
|
||||
}
|
||||
}
|
||||
fp << std::endl;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
/********************************************************************
|
||||
* Write the fabric bitstream fitting a memory bank protocol
|
||||
* to a plain text file
|
||||
|
@ -393,7 +564,8 @@ int write_fabric_bitstream_to_text_file(
|
|||
const ConfigProtocol& config_protocol,
|
||||
const FabricGlobalPortInfo& global_ports, const std::string& fname,
|
||||
const bool& fast_configuration, const bool& keep_dont_care_bits,
|
||||
const bool& include_time_stamp, const bool& verbose) {
|
||||
const bool& wl_incremental_order, const bool& include_time_stamp,
|
||||
const bool& verbose) {
|
||||
/* Ensure that we have a valid file name */
|
||||
if (true == fname.empty()) {
|
||||
VTR_LOG_ERROR(
|
||||
|
@ -454,6 +626,14 @@ int write_fabric_bitstream_to_text_file(
|
|||
if (BLWL_PROTOCOL_DECODER == config_protocol.bl_protocol_type()) {
|
||||
status = write_memory_bank_fabric_bitstream_to_text_file(
|
||||
fp, apply_fast_configuration, bit_value_to_skip, fabric_bitstream);
|
||||
} else if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type() &&
|
||||
BLWL_PROTOCOL_FLATTEN == config_protocol.wl_protocol_type()) {
|
||||
// If both BL and WL protocols are flatten, use new way to write the
|
||||
// bitstream
|
||||
status = fast_write_memory_bank_flatten_fabric_bitstream_to_text_file(
|
||||
fp, apply_fast_configuration, bit_value_to_skip, fabric_bitstream,
|
||||
keep_dont_care_bits, wl_incremental_order);
|
||||
|
||||
} else if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type()) {
|
||||
status = write_memory_bank_flatten_fabric_bitstream_to_text_file(
|
||||
fp, apply_fast_configuration, bit_value_to_skip, fabric_bitstream,
|
||||
|
|
|
@ -27,7 +27,8 @@ int write_fabric_bitstream_to_text_file(
|
|||
const ConfigProtocol& config_protocol,
|
||||
const FabricGlobalPortInfo& global_ports, const std::string& fname,
|
||||
const bool& fast_configuration, const bool& keep_dont_care_bits,
|
||||
const bool& include_time_stamp, const bool& verbose);
|
||||
const bool& wl_incremental_order, const bool& include_time_stamp,
|
||||
const bool& verbose);
|
||||
|
||||
} /* end namespace openfpga */
|
||||
|
||||
|
|
|
@ -71,7 +71,8 @@ static void write_fabric_bitstream_xml_file_head(
|
|||
static int write_fabric_config_bit_to_xml_file(
|
||||
std::fstream& fp, const BitstreamManager& bitstream_manager,
|
||||
const FabricBitstream& fabric_bitstream, const FabricBitId& fabric_bit,
|
||||
const e_config_protocol_type& config_type, const int& xml_hierarchy_depth) {
|
||||
const e_config_protocol_type& config_type, bool fast_xml,
|
||||
const int& xml_hierarchy_depth, std::string& bl_addr, std::string& wl_addr) {
|
||||
if (false == valid_file_stream(fp)) {
|
||||
return 1;
|
||||
}
|
||||
|
@ -106,22 +107,60 @@ static int write_fabric_config_bit_to_xml_file(
|
|||
case CONFIG_MEM_STANDALONE:
|
||||
case CONFIG_MEM_SCAN_CHAIN:
|
||||
break;
|
||||
case CONFIG_MEM_QL_MEMORY_BANK:
|
||||
case CONFIG_MEM_MEMORY_BANK: {
|
||||
/* Bit line address */
|
||||
write_tab_to_file(fp, xml_hierarchy_depth + 1);
|
||||
fp << "<bl address=\"";
|
||||
for (const char& addr_bit : fabric_bitstream.bit_bl_address(fabric_bit)) {
|
||||
fp << addr_bit;
|
||||
}
|
||||
fp << "\"/>\n";
|
||||
case CONFIG_MEM_MEMORY_BANK:
|
||||
case CONFIG_MEM_QL_MEMORY_BANK: {
|
||||
if (fast_xml) {
|
||||
// New way of printing XML
|
||||
// This is fast (less than 100s) as compared to original 1300s seen in
|
||||
// 100K LE FPFA
|
||||
const FabricBitstreamMemoryBank& memory_bank =
|
||||
fabric_bitstream.memory_bank_info();
|
||||
/* Bit line address */
|
||||
write_tab_to_file(fp, xml_hierarchy_depth + 1);
|
||||
const fabric_bit_data& bit =
|
||||
memory_bank.fabric_bit_datas[(size_t)(fabric_bit)];
|
||||
const fabric_blwl_length& lengths =
|
||||
memory_bank.blwl_lengths[bit.region];
|
||||
if (bl_addr.size() == 0) {
|
||||
VTR_ASSERT(wl_addr.size() == 0);
|
||||
bl_addr.resize(lengths.bl);
|
||||
wl_addr.resize(lengths.wl);
|
||||
bl_addr.assign(lengths.bl, 'x');
|
||||
wl_addr.assign(lengths.wl, '0');
|
||||
} else {
|
||||
VTR_ASSERT((fabric_size_t)(bl_addr.size()) == lengths.bl);
|
||||
VTR_ASSERT((fabric_size_t)(wl_addr.size()) == lengths.wl);
|
||||
}
|
||||
fp << "<bl address=\"";
|
||||
memset(&bl_addr[bit.bl], '1', 1);
|
||||
fp << bl_addr.c_str();
|
||||
memset(&bl_addr[bit.bl], 'x', 1);
|
||||
fp << "\"/>\n";
|
||||
/* Word line address */
|
||||
write_tab_to_file(fp, xml_hierarchy_depth + 1);
|
||||
fp << "<wl address=\"";
|
||||
memset(&wl_addr[bit.wl], '1', 1);
|
||||
fp << wl_addr.c_str();
|
||||
memset(&wl_addr[bit.wl], '0', 1);
|
||||
fp << "\"/>\n";
|
||||
} else {
|
||||
/* Bit line address */
|
||||
write_tab_to_file(fp, xml_hierarchy_depth + 1);
|
||||
fp << "<bl address=\"";
|
||||
for (const char& addr_bit :
|
||||
fabric_bitstream.bit_bl_address(fabric_bit)) {
|
||||
fp << addr_bit;
|
||||
}
|
||||
fp << "\"/>\n";
|
||||
|
||||
write_tab_to_file(fp, xml_hierarchy_depth + 1);
|
||||
fp << "<wl address=\"";
|
||||
for (const char& addr_bit : fabric_bitstream.bit_wl_address(fabric_bit)) {
|
||||
fp << addr_bit;
|
||||
write_tab_to_file(fp, xml_hierarchy_depth + 1);
|
||||
fp << "<wl address=\"";
|
||||
for (const char& addr_bit :
|
||||
fabric_bitstream.bit_wl_address(fabric_bit)) {
|
||||
fp << addr_bit;
|
||||
}
|
||||
fp << "\"/>\n";
|
||||
}
|
||||
fp << "\"/>\n";
|
||||
break;
|
||||
}
|
||||
case CONFIG_MEM_FRAME_BASED: {
|
||||
|
@ -156,13 +195,25 @@ static int write_fabric_regional_config_bit_to_xml_file(
|
|||
std::fstream& fp, const BitstreamManager& bitstream_manager,
|
||||
const FabricBitstream& fabric_bitstream,
|
||||
const FabricBitRegionId& fabric_region,
|
||||
const e_config_protocol_type& config_type, const int& xml_hierarchy_depth) {
|
||||
const e_config_protocol_type& config_type, bool fast_xml,
|
||||
const int& xml_hierarchy_depth) {
|
||||
if (false == valid_file_stream(fp)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
int status = 0;
|
||||
|
||||
// Use string to print, instead of char by char
|
||||
// This is for Flatten BL/WL protocol
|
||||
// You will find this much more faster than char by char
|
||||
// We do not need to build the string for every BL/WL
|
||||
// It is one-hot and sequal addr
|
||||
// We start with all '0' (WL) or 'x' (BL)
|
||||
// By setting "1' and resettting ('0' or 'x') at approriate bit position
|
||||
// We could create one-hot string much faster
|
||||
// Use FPGA 100K as example: old way needs 1300seconds to write 85Gig XML
|
||||
// New way only needs 80seconds to write identical XML
|
||||
std::string bl_addr = "";
|
||||
std::string wl_addr = "";
|
||||
write_tab_to_file(fp, xml_hierarchy_depth);
|
||||
fp << "<region ";
|
||||
fp << "id=\"";
|
||||
|
@ -170,14 +221,24 @@ static int write_fabric_regional_config_bit_to_xml_file(
|
|||
fp << "\"";
|
||||
fp << ">\n";
|
||||
|
||||
size_t bit_index = 0;
|
||||
size_t total_bits = fabric_bitstream.region_bits(fabric_region).size();
|
||||
size_t percentage = 0;
|
||||
for (const FabricBitId& fabric_bit :
|
||||
fabric_bitstream.region_bits(fabric_region)) {
|
||||
status = write_fabric_config_bit_to_xml_file(
|
||||
fp, bitstream_manager, fabric_bitstream, fabric_bit, config_type,
|
||||
xml_hierarchy_depth + 1);
|
||||
fast_xml, xml_hierarchy_depth + 1, bl_addr, wl_addr);
|
||||
if (1 == status) {
|
||||
return status;
|
||||
}
|
||||
// Misc to print percentage of the process
|
||||
bit_index++;
|
||||
size_t temp = (bit_index * 100) / total_bits;
|
||||
if (temp != percentage) {
|
||||
VTR_LOG(" Progress: %lu%\r", percentage);
|
||||
percentage = temp;
|
||||
}
|
||||
}
|
||||
|
||||
write_tab_to_file(fp, xml_hierarchy_depth);
|
||||
|
@ -231,6 +292,8 @@ int write_fabric_bitstream_to_xml_file(
|
|||
for (const FabricBitRegionId& region : fabric_bitstream.regions()) {
|
||||
status = write_fabric_regional_config_bit_to_xml_file(
|
||||
fp, bitstream_manager, fabric_bitstream, region, config_protocol.type(),
|
||||
BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type() &&
|
||||
BLWL_PROTOCOL_FLATTEN == config_protocol.wl_protocol_type(),
|
||||
xml_hierarchy_depth + 1);
|
||||
if (1 == status) {
|
||||
break;
|
||||
|
|
|
@ -1061,6 +1061,17 @@ static size_t calculate_num_config_clock_cycles(
|
|||
(float)full_num_config_clock_cycles -
|
||||
1.));
|
||||
}
|
||||
} else if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type() &&
|
||||
BLWL_PROTOCOL_FLATTEN == config_protocol.wl_protocol_type()) {
|
||||
// Only support new fast way if both BL/WL protocols are flatten
|
||||
// Based on 100K LE FPGA, we are wasting a lot of time to build
|
||||
// MemoryBankFlattenFabricBitstream
|
||||
// just to get the effective WL addr size. So wasteful of the resource
|
||||
const FabricBitstreamMemoryBank& memory_bank =
|
||||
fabric_bitstream.memory_bank_info(fast_configuration,
|
||||
bit_value_to_skip);
|
||||
num_config_clock_cycles =
|
||||
1 + memory_bank.get_longest_effective_wl_count();
|
||||
} else if (BLWL_PROTOCOL_FLATTEN == config_protocol.bl_protocol_type()) {
|
||||
num_config_clock_cycles =
|
||||
1 + build_memory_bank_flatten_fabric_bitstream(
|
||||
|
|
|
@ -565,9 +565,12 @@ static void print_verilog_full_testbench_ql_memory_bank_flatten_bitstream(
|
|||
valid_file_stream(fp);
|
||||
|
||||
/* Reorganize the fabric bitstream by the same address across regions */
|
||||
MemoryBankFlattenFabricBitstream fabric_bits_by_addr =
|
||||
build_memory_bank_flatten_fabric_bitstream(
|
||||
fabric_bitstream, fast_configuration, bit_value_to_skip);
|
||||
// New way to get the effective WL addr size.
|
||||
// Based on 100K LE FPGA, we are wasting a lot of time to build
|
||||
// MemoryBankFlattenFabricBitstream just to get size(). So wasteful of the
|
||||
// resource
|
||||
const FabricBitstreamMemoryBank& memory_bank =
|
||||
fabric_bitstream.memory_bank_info(fast_configuration, bit_value_to_skip);
|
||||
|
||||
/* Feed address and data input pair one by one
|
||||
* Note: the first cycle is reserved for programming reset
|
||||
|
@ -604,7 +607,7 @@ static void print_verilog_full_testbench_ql_memory_bank_flatten_bitstream(
|
|||
|
||||
/* Define a constant for the bitstream length */
|
||||
print_verilog_define_flag(fp, std::string(TOP_TB_BITSTREAM_LENGTH_VARIABLE),
|
||||
fabric_bits_by_addr.size());
|
||||
memory_bank.get_longest_effective_wl_count());
|
||||
print_verilog_define_flag(fp, std::string(TOP_TB_BITSTREAM_WIDTH_VARIABLE),
|
||||
bl_port_width + wl_port_width);
|
||||
|
||||
|
|
Loading…
Reference in New Issue