diff --git a/backends/cxxrtl/cxxrtl.h b/backends/cxxrtl/cxxrtl.h index ce21cc1e6..d4a94fcb4 100644 --- a/backends/cxxrtl/cxxrtl.h +++ b/backends/cxxrtl/cxxrtl.h @@ -17,6 +17,11 @@ */ // This file is included by the designs generated with `write_cxxrtl`. It is not used in Yosys itself. +// +// The CXXRTL support library implements compile time specialized arbitrary width arithmetics, as well as provides +// composite lvalues made out of bit slices and concatenations of lvalues. This allows the `write_cxxrtl` pass +// to perform a straightforward translation of RTLIL structures to readable C++, relying on the C++ compiler +// to unwrap the abstraction and generate efficient code. #ifndef CXXRTL_H #define CXXRTL_H @@ -35,10 +40,19 @@ #include -// The CXXRTL support library implements compile time specialized arbitrary width arithmetics, as well as provides -// composite lvalues made out of bit slices and concatenations of lvalues. This allows the `write_cxxrtl` pass -// to perform a straightforward translation of RTLIL structures to readable C++, relying on the C++ compiler -// to unwrap the abstraction and generate efficient code. +// CXXRTL essentially uses the C++ compiler as a hygienic macro engine that feeds an instruction selector. +// It generates a lot of specialized template functions with relatively large bodies that, when inlined +// into the caller and (for those with loops) unrolled, often expose many new optimization opportunities. +// Because of this, most of the CXXRTL runtime must be always inlined for best performance. +#ifndef __has_attribute +# define __has_attribute(x) 0 +#endif +#if __has_attribute(always_inline) +#define CXXRTL_ALWAYS_INLINE inline __attribute__((__always_inline__)) +#else +#define CXXRTL_ALWAYS_INLINE inline +#endif + namespace cxxrtl { // All arbitrary-width values in CXXRTL are backed by arrays of unsigned integers called chunks. The chunk size @@ -85,6 +99,7 @@ struct value : public expr_base> { value &operator=(const value &) = default; // A (no-op) helper that forces the cast to value<>. + CXXRTL_ALWAYS_INLINE const value &val() const { return *this; } @@ -101,6 +116,7 @@ struct value : public expr_base> { // The trunc, zext and sext operations add or remove most significant bits (i.e. on the left); // the rtrunc and rzext operations add or remove least significant bits (i.e. on the right). template + CXXRTL_ALWAYS_INLINE value trunc() const { static_assert(NewBits <= Bits, "trunc() may not increase width"); value result; @@ -111,6 +127,7 @@ struct value : public expr_base> { } template + CXXRTL_ALWAYS_INLINE value zext() const { static_assert(NewBits >= Bits, "zext() may not decrease width"); value result; @@ -120,6 +137,7 @@ struct value : public expr_base> { } template + CXXRTL_ALWAYS_INLINE value sext() const { static_assert(NewBits >= Bits, "sext() may not decrease width"); value result; @@ -135,6 +153,7 @@ struct value : public expr_base> { } template + CXXRTL_ALWAYS_INLINE value rtrunc() const { static_assert(NewBits <= Bits, "rtrunc() may not increase width"); value result; @@ -154,6 +173,7 @@ struct value : public expr_base> { } template + CXXRTL_ALWAYS_INLINE value rzext() const { static_assert(NewBits >= Bits, "rzext() may not decrease width"); value result; @@ -172,6 +192,7 @@ struct value : public expr_base> { // Bit blit operation, i.e. a partial read-modify-write. template + CXXRTL_ALWAYS_INLINE value blit(const value &source) const { static_assert(Stop >= Start, "blit() may not reverse bit order"); constexpr chunk::type start_mask = ~(chunk::mask << (Start % chunk::bits)); @@ -196,6 +217,7 @@ struct value : public expr_base> { // than the operand. In C++17 these can be replaced with `if constexpr`. template struct zext_cast { + CXXRTL_ALWAYS_INLINE value operator()(const value &val) { return val.template zext(); } @@ -203,6 +225,7 @@ struct value : public expr_base> { template struct zext_cast::type> { + CXXRTL_ALWAYS_INLINE value operator()(const value &val) { return val.template trunc(); } @@ -210,6 +233,7 @@ struct value : public expr_base> { template struct sext_cast { + CXXRTL_ALWAYS_INLINE value operator()(const value &val) { return val.template sext(); } @@ -217,17 +241,20 @@ struct value : public expr_base> { template struct sext_cast::type> { + CXXRTL_ALWAYS_INLINE value operator()(const value &val) { return val.template trunc(); } }; template + CXXRTL_ALWAYS_INLINE value zcast() const { return zext_cast()(*this); } template + CXXRTL_ALWAYS_INLINE value scast() const { return sext_cast()(*this); } @@ -439,12 +466,14 @@ struct slice_expr : public expr_base> { slice_expr(T &expr) : expr(expr) {} slice_expr(const slice_expr &) = delete; + CXXRTL_ALWAYS_INLINE operator value() const { return static_cast &>(expr) .template rtrunc() .template trunc(); } + CXXRTL_ALWAYS_INLINE slice_expr &operator=(const value &rhs) { // Generic partial assignment implemented using a read-modify-write operation on the sliced expression. expr = static_cast &>(expr) @@ -453,6 +482,7 @@ struct slice_expr : public expr_base> { } // A helper that forces the cast to value<>, which allows deduction to work. + CXXRTL_ALWAYS_INLINE value val() const { return static_cast &>(*this); } @@ -469,6 +499,7 @@ struct concat_expr : public expr_base> { concat_expr(T &ms_expr, U &ls_expr) : ms_expr(ms_expr), ls_expr(ls_expr) {} concat_expr(const concat_expr &) = delete; + CXXRTL_ALWAYS_INLINE operator value() const { value ms_shifted = static_cast &>(ms_expr) .template rzext(); @@ -477,6 +508,7 @@ struct concat_expr : public expr_base> { return ms_shifted.bit_or(ls_extended); } + CXXRTL_ALWAYS_INLINE concat_expr &operator=(const value &rhs) { ms_expr = rhs.template rtrunc(); ls_expr = rhs.template trunc(); @@ -484,6 +516,7 @@ struct concat_expr : public expr_base> { } // A helper that forces the cast to value<>, which allows deduction to work. + CXXRTL_ALWAYS_INLINE value val() const { return static_cast &>(*this); } @@ -508,21 +541,25 @@ struct concat_expr : public expr_base> { template struct expr_base { template + CXXRTL_ALWAYS_INLINE slice_expr slice() const { return {*static_cast(this)}; } template + CXXRTL_ALWAYS_INLINE slice_expr slice() { return {*static_cast(this)}; } template + CXXRTL_ALWAYS_INLINE concat_expr::type> concat(const U &other) const { return {*static_cast(this), other}; } template + CXXRTL_ALWAYS_INLINE concat_expr::type> concat(U &&other) { return {*static_cast(this), other}; } @@ -851,271 +888,322 @@ using namespace cxxrtl; // std::max isn't constexpr until C++14 for no particular reason (it's an oversight), so we define our own. template +CXXRTL_ALWAYS_INLINE constexpr T max(const T &a, const T &b) { return a > b ? a : b; } // Logic operations template +CXXRTL_ALWAYS_INLINE value logic_not(const value &a) { return value { a ? 0u : 1u }; } template +CXXRTL_ALWAYS_INLINE value logic_and(const value &a, const value &b) { return value { (bool(a) & bool(b)) ? 1u : 0u }; } template +CXXRTL_ALWAYS_INLINE value logic_or(const value &a, const value &b) { return value { (bool(a) | bool(b)) ? 1u : 0u }; } // Reduction operations template +CXXRTL_ALWAYS_INLINE value reduce_and(const value &a) { return value { a.bit_not().is_zero() ? 1u : 0u }; } template +CXXRTL_ALWAYS_INLINE value reduce_or(const value &a) { return value { a ? 1u : 0u }; } template +CXXRTL_ALWAYS_INLINE value reduce_xor(const value &a) { return value { (a.ctpop() % 2) ? 1u : 0u }; } template +CXXRTL_ALWAYS_INLINE value reduce_xnor(const value &a) { return value { (a.ctpop() % 2) ? 0u : 1u }; } template +CXXRTL_ALWAYS_INLINE value reduce_bool(const value &a) { return value { a ? 1u : 0u }; } // Bitwise operations template +CXXRTL_ALWAYS_INLINE value not_u(const value &a) { return a.template zcast().bit_not(); } template +CXXRTL_ALWAYS_INLINE value not_s(const value &a) { return a.template scast().bit_not(); } template +CXXRTL_ALWAYS_INLINE value and_uu(const value &a, const value &b) { return a.template zcast().bit_and(b.template zcast()); } template +CXXRTL_ALWAYS_INLINE value and_ss(const value &a, const value &b) { return a.template scast().bit_and(b.template scast()); } template +CXXRTL_ALWAYS_INLINE value or_uu(const value &a, const value &b) { return a.template zcast().bit_or(b.template zcast()); } template +CXXRTL_ALWAYS_INLINE value or_ss(const value &a, const value &b) { return a.template scast().bit_or(b.template scast()); } template +CXXRTL_ALWAYS_INLINE value xor_uu(const value &a, const value &b) { return a.template zcast().bit_xor(b.template zcast()); } template +CXXRTL_ALWAYS_INLINE value xor_ss(const value &a, const value &b) { return a.template scast().bit_xor(b.template scast()); } template +CXXRTL_ALWAYS_INLINE value xnor_uu(const value &a, const value &b) { return a.template zcast().bit_xor(b.template zcast()).bit_not(); } template +CXXRTL_ALWAYS_INLINE value xnor_ss(const value &a, const value &b) { return a.template scast().bit_xor(b.template scast()).bit_not(); } template +CXXRTL_ALWAYS_INLINE value shl_uu(const value &a, const value &b) { return a.template zcast().template shl(b); } template +CXXRTL_ALWAYS_INLINE value shl_su(const value &a, const value &b) { return a.template scast().template shl(b); } template +CXXRTL_ALWAYS_INLINE value sshl_uu(const value &a, const value &b) { return a.template zcast().template shl(b); } template +CXXRTL_ALWAYS_INLINE value sshl_su(const value &a, const value &b) { return a.template scast().template shl(b); } template +CXXRTL_ALWAYS_INLINE value shr_uu(const value &a, const value &b) { return a.template shr(b).template zcast(); } template +CXXRTL_ALWAYS_INLINE value shr_su(const value &a, const value &b) { return a.template shr(b).template scast(); } template +CXXRTL_ALWAYS_INLINE value sshr_uu(const value &a, const value &b) { return a.template shr(b).template zcast(); } template +CXXRTL_ALWAYS_INLINE value sshr_su(const value &a, const value &b) { return a.template sshr(b).template scast(); } template +CXXRTL_ALWAYS_INLINE value shift_uu(const value &a, const value &b) { return shr_uu(a, b); } template +CXXRTL_ALWAYS_INLINE value shift_su(const value &a, const value &b) { return shr_su(a, b); } template +CXXRTL_ALWAYS_INLINE value shift_us(const value &a, const value &b) { return b.is_neg() ? shl_uu(a, b.template sext().neg()) : shr_uu(a, b); } template +CXXRTL_ALWAYS_INLINE value shift_ss(const value &a, const value &b) { return b.is_neg() ? shl_su(a, b.template sext().neg()) : shr_su(a, b); } template +CXXRTL_ALWAYS_INLINE value shiftx_uu(const value &a, const value &b) { return shift_uu(a, b); } template +CXXRTL_ALWAYS_INLINE value shiftx_su(const value &a, const value &b) { return shift_su(a, b); } template +CXXRTL_ALWAYS_INLINE value shiftx_us(const value &a, const value &b) { return shift_us(a, b); } template +CXXRTL_ALWAYS_INLINE value shiftx_ss(const value &a, const value &b) { return shift_ss(a, b); } // Comparison operations template +CXXRTL_ALWAYS_INLINE value eq_uu(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value{ a.template zext() == b.template zext() ? 1u : 0u }; } template +CXXRTL_ALWAYS_INLINE value eq_ss(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value{ a.template sext() == b.template sext() ? 1u : 0u }; } template +CXXRTL_ALWAYS_INLINE value ne_uu(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value{ a.template zext() != b.template zext() ? 1u : 0u }; } template +CXXRTL_ALWAYS_INLINE value ne_ss(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value{ a.template sext() != b.template sext() ? 1u : 0u }; } template +CXXRTL_ALWAYS_INLINE value eqx_uu(const value &a, const value &b) { return eq_uu(a, b); } template +CXXRTL_ALWAYS_INLINE value eqx_ss(const value &a, const value &b) { return eq_ss(a, b); } template +CXXRTL_ALWAYS_INLINE value nex_uu(const value &a, const value &b) { return ne_uu(a, b); } template +CXXRTL_ALWAYS_INLINE value nex_ss(const value &a, const value &b) { return ne_ss(a, b); } template +CXXRTL_ALWAYS_INLINE value gt_uu(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value { b.template zext().ucmp(a.template zext()) ? 1u : 0u }; } template +CXXRTL_ALWAYS_INLINE value gt_ss(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value { b.template sext().scmp(a.template sext()) ? 1u : 0u }; } template +CXXRTL_ALWAYS_INLINE value ge_uu(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value { !a.template zext().ucmp(b.template zext()) ? 1u : 0u }; } template +CXXRTL_ALWAYS_INLINE value ge_ss(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value { !a.template sext().scmp(b.template sext()) ? 1u : 0u }; } template +CXXRTL_ALWAYS_INLINE value lt_uu(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value { a.template zext().ucmp(b.template zext()) ? 1u : 0u }; } template +CXXRTL_ALWAYS_INLINE value lt_ss(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value { a.template sext().scmp(b.template sext()) ? 1u : 0u }; } template +CXXRTL_ALWAYS_INLINE value le_uu(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value { !b.template zext().ucmp(a.template zext()) ? 1u : 0u }; } template +CXXRTL_ALWAYS_INLINE value le_ss(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value { !b.template sext().scmp(a.template sext()) ? 1u : 0u }; @@ -1123,46 +1211,55 @@ value le_ss(const value &a, const value &b) { // Arithmetic operations template +CXXRTL_ALWAYS_INLINE value pos_u(const value &a) { return a.template zcast(); } template +CXXRTL_ALWAYS_INLINE value pos_s(const value &a) { return a.template scast(); } template +CXXRTL_ALWAYS_INLINE value neg_u(const value &a) { return a.template zcast().neg(); } template +CXXRTL_ALWAYS_INLINE value neg_s(const value &a) { return a.template scast().neg(); } template +CXXRTL_ALWAYS_INLINE value add_uu(const value &a, const value &b) { return a.template zcast().add(b.template zcast()); } template +CXXRTL_ALWAYS_INLINE value add_ss(const value &a, const value &b) { return a.template scast().add(b.template scast()); } template +CXXRTL_ALWAYS_INLINE value sub_uu(const value &a, const value &b) { return a.template zcast().sub(b.template zcast()); } template +CXXRTL_ALWAYS_INLINE value sub_ss(const value &a, const value &b) { return a.template scast().sub(b.template scast()); } template +CXXRTL_ALWAYS_INLINE value mul_uu(const value &a, const value &b) { value product; value multiplicand = a.template zcast(); @@ -1180,6 +1277,7 @@ value mul_uu(const value &a, const value &b) { } template +CXXRTL_ALWAYS_INLINE value mul_ss(const value &a, const value &b) { value ub = b.template sext(); if (ub.is_neg()) ub = ub.neg(); @@ -1188,6 +1286,7 @@ value mul_ss(const value &a, const value &b) { } template +CXXRTL_ALWAYS_INLINE std::pair, value> divmod_uu(const value &a, const value &b) { constexpr size_t Bits = max(BitsY, max(BitsA, BitsB)); value quotient; @@ -1209,6 +1308,7 @@ std::pair, value> divmod_uu(const value &a, const val } template +CXXRTL_ALWAYS_INLINE std::pair, value> divmod_ss(const value &a, const value &b) { value ua = a.template sext(); value ub = b.template sext(); @@ -1222,21 +1322,25 @@ std::pair, value> divmod_ss(const value &a, const val } template +CXXRTL_ALWAYS_INLINE value div_uu(const value &a, const value &b) { return divmod_uu(a, b).first; } template +CXXRTL_ALWAYS_INLINE value div_ss(const value &a, const value &b) { return divmod_ss(a, b).first; } template +CXXRTL_ALWAYS_INLINE value mod_uu(const value &a, const value &b) { return divmod_uu(a, b).second; } template +CXXRTL_ALWAYS_INLINE value mod_ss(const value &a, const value &b) { return divmod_ss(a, b).second; }