mirror of https://github.com/YosysHQ/yosys.git
cxxrtl: speed up bit repeats (sign extends, etc).
On Minerva SoC SRAM, depending on the compiler, this change improves overall time by 4-7%.
This commit is contained in:
parent
40ca9d038b
commit
b9721bedf0
|
@ -317,6 +317,14 @@ struct value : public expr_base<value<Bits>> {
|
|||
return sext_cast<NewBits>()(*this);
|
||||
}
|
||||
|
||||
// Bit replication is far more efficient than the equivalent concatenation.
|
||||
template<size_t Count>
|
||||
CXXRTL_ALWAYS_INLINE
|
||||
value<Bits * Count> repeat() const {
|
||||
static_assert(Bits == 1, "repeat() is implemented only for 1-bit values");
|
||||
return *this ? value<Bits * Count>().bit_not() : value<Bits * Count>();
|
||||
}
|
||||
|
||||
// Operations with run-time parameters (offsets, amounts, etc).
|
||||
//
|
||||
// These operations are used for computations.
|
||||
|
|
|
@ -832,11 +832,26 @@ struct CxxrtlWorker {
|
|||
} else if (sig.is_chunk()) {
|
||||
return dump_sigchunk(sig.as_chunk(), is_lhs, for_debug);
|
||||
} else {
|
||||
dump_sigchunk(*sig.chunks().rbegin(), is_lhs, for_debug);
|
||||
for (auto it = sig.chunks().rbegin() + 1; it != sig.chunks().rend(); ++it) {
|
||||
f << ".concat(";
|
||||
dump_sigchunk(*it, is_lhs, for_debug);
|
||||
f << ")";
|
||||
bool first = true;
|
||||
auto chunks = sig.chunks();
|
||||
for (auto it = chunks.rbegin(); it != chunks.rend(); it++) {
|
||||
if (!first)
|
||||
f << ".concat(";
|
||||
bool is_complex = dump_sigchunk(*it, is_lhs, for_debug);
|
||||
if (!is_lhs && it->width == 1) {
|
||||
size_t repeat = 1;
|
||||
while ((it + repeat) != chunks.rend() && *(it + repeat) == *it)
|
||||
repeat++;
|
||||
if (repeat > 1) {
|
||||
if (is_complex)
|
||||
f << ".val()";
|
||||
f << ".repeat<" << repeat << ">()";
|
||||
}
|
||||
it += repeat - 1;
|
||||
}
|
||||
if (!first)
|
||||
f << ")";
|
||||
first = false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue