cxxrtl: speed up bit repeats (sign extends, etc).

On Minerva SoC SRAM, depending on the compiler, this change improves
overall time by 4-7%.
This commit is contained in:
whitequark 2020-12-21 02:15:55 +00:00
parent 40ca9d038b
commit b9721bedf0
2 changed files with 28 additions and 5 deletions

View File

@ -317,6 +317,14 @@ struct value : public expr_base<value<Bits>> {
return sext_cast<NewBits>()(*this);
}
// Bit replication is far more efficient than the equivalent concatenation.
template<size_t Count>
CXXRTL_ALWAYS_INLINE
value<Bits * Count> repeat() const {
static_assert(Bits == 1, "repeat() is implemented only for 1-bit values");
return *this ? value<Bits * Count>().bit_not() : value<Bits * Count>();
}
// Operations with run-time parameters (offsets, amounts, etc).
//
// These operations are used for computations.

View File

@ -832,11 +832,26 @@ struct CxxrtlWorker {
} else if (sig.is_chunk()) {
return dump_sigchunk(sig.as_chunk(), is_lhs, for_debug);
} else {
dump_sigchunk(*sig.chunks().rbegin(), is_lhs, for_debug);
for (auto it = sig.chunks().rbegin() + 1; it != sig.chunks().rend(); ++it) {
f << ".concat(";
dump_sigchunk(*it, is_lhs, for_debug);
f << ")";
bool first = true;
auto chunks = sig.chunks();
for (auto it = chunks.rbegin(); it != chunks.rend(); it++) {
if (!first)
f << ".concat(";
bool is_complex = dump_sigchunk(*it, is_lhs, for_debug);
if (!is_lhs && it->width == 1) {
size_t repeat = 1;
while ((it + repeat) != chunks.rend() && *(it + repeat) == *it)
repeat++;
if (repeat > 1) {
if (is_complex)
f << ".val()";
f << ".repeat<" << repeat << ">()";
}
it += repeat - 1;
}
if (!first)
f << ")";
first = false;
}
return true;
}