/* * yosys -- Yosys Open SYnthesis Suite * * Copyright (C) 2019-2020 whitequark * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * */ // This file is included by the designs generated with `write_cxxrtl`. It is not used in Yosys itself. // // The CXXRTL support library implements compile time specialized arbitrary width arithmetics, as well as provides // composite lvalues made out of bit slices and concatenations of lvalues. This allows the `write_cxxrtl` pass // to perform a straightforward translation of RTLIL structures to readable C++, relying on the C++ compiler // to unwrap the abstraction and generate efficient code. #ifndef CXXRTL_H #define CXXRTL_H #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef __has_attribute # define __has_attribute(x) 0 #endif // CXXRTL essentially uses the C++ compiler as a hygienic macro engine that feeds an instruction selector. // It generates a lot of specialized template functions with relatively large bodies that, when inlined // into the caller and (for those with loops) unrolled, often expose many new optimization opportunities. // Because of this, most of the CXXRTL runtime must be always inlined for best performance. #if __has_attribute(always_inline) #define CXXRTL_ALWAYS_INLINE inline __attribute__((__always_inline__)) #else #define CXXRTL_ALWAYS_INLINE inline #endif // Conversely, some functions in the generated code are extremely large yet very cold, with both of these // properties being extreme enough to confuse C++ compilers into spending pathological amounts of time // on a futile (the code becomes worse) attempt to optimize the least important parts of code. #if __has_attribute(optnone) #define CXXRTL_EXTREMELY_COLD __attribute__((__optnone__)) #elif __has_attribute(optimize) #define CXXRTL_EXTREMELY_COLD __attribute__((__optimize__(0))) #else #define CXXRTL_EXTREMELY_COLD #endif // CXXRTL uses assert() to check for C++ contract violations (which may result in e.g. undefined behavior // of the simulation code itself), and CXXRTL_ASSERT to check for RTL contract violations (which may at // most result in undefined simulation results). // // Though by default, CXXRTL_ASSERT() expands to assert(), it may be overridden e.g. when integrating // the simulation into another process that should survive violating RTL contracts. #ifndef CXXRTL_ASSERT #ifndef CXXRTL_NDEBUG #define CXXRTL_ASSERT(x) assert(x) #else #define CXXRTL_ASSERT(x) #endif #endif namespace cxxrtl { // All arbitrary-width values in CXXRTL are backed by arrays of unsigned integers called chunks. The chunk size // is the same regardless of the value width to simplify manipulating values via FFI interfaces, e.g. driving // and introspecting the simulation in Python. // // It is practical to use chunk sizes between 32 bits and platform register size because when arithmetics on // narrower integer types is legalized by the C++ compiler, it inserts code to clear the high bits of the register. // However, (a) most of our operations do not change those bits in the first place because of invariants that are // invisible to the compiler, (b) we often operate on non-power-of-2 values and have to clear the high bits anyway. // Therefore, using relatively wide chunks and clearing the high bits explicitly and only when we know they may be // clobbered results in simpler generated code. typedef uint32_t chunk_t; typedef uint64_t wide_chunk_t; template struct chunk_traits { static_assert(std::is_integral::value && std::is_unsigned::value, "chunk type must be an unsigned integral type"); using type = T; static constexpr size_t bits = std::numeric_limits::digits; static constexpr T mask = std::numeric_limits::max(); }; template struct expr_base; template struct value : public expr_base> { static constexpr size_t bits = Bits; using chunk = chunk_traits; static constexpr chunk::type msb_mask = (Bits % chunk::bits == 0) ? chunk::mask : chunk::mask >> (chunk::bits - (Bits % chunk::bits)); static constexpr size_t chunks = (Bits + chunk::bits - 1) / chunk::bits; chunk::type data[chunks] = {}; value() = default; template explicit constexpr value(Init ...init) : data{init...} {} value(const value &) = default; value &operator=(const value &) = default; value(value &&) = default; value &operator=(value &&) = default; // A (no-op) helper that forces the cast to value<>. CXXRTL_ALWAYS_INLINE const value &val() const { return *this; } std::string str() const { std::stringstream ss; ss << *this; return ss.str(); } // Conversion operations. // // These functions ensure that a conversion is never out of range, and should be always used, if at all // possible, instead of direct manipulation of the `data` member. For very large types, .slice() and // .concat() can be used to split them into more manageable parts. template CXXRTL_ALWAYS_INLINE IntegerT get() const { static_assert(std::numeric_limits::is_integer && !std::numeric_limits::is_signed, "get() requires T to be an unsigned integral type"); static_assert(std::numeric_limits::digits >= Bits, "get() requires T to be at least as wide as the value is"); IntegerT result = 0; for (size_t n = 0; n < chunks; n++) result |= IntegerT(data[n]) << (n * chunk::bits); return result; } template CXXRTL_ALWAYS_INLINE void set(IntegerT other) { static_assert(std::numeric_limits::is_integer && !std::numeric_limits::is_signed, "set() requires T to be an unsigned integral type"); static_assert(std::numeric_limits::digits >= Bits, "set() requires the value to be at least as wide as T is"); for (size_t n = 0; n < chunks; n++) data[n] = (other >> (n * chunk::bits)) & chunk::mask; } // Operations with compile-time parameters. // // These operations are used to implement slicing, concatenation, and blitting. // The trunc, zext and sext operations add or remove most significant bits (i.e. on the left); // the rtrunc and rzext operations add or remove least significant bits (i.e. on the right). template CXXRTL_ALWAYS_INLINE value trunc() const { static_assert(NewBits <= Bits, "trunc() may not increase width"); value result; for (size_t n = 0; n < result.chunks; n++) result.data[n] = data[n]; result.data[result.chunks - 1] &= result.msb_mask; return result; } template CXXRTL_ALWAYS_INLINE value zext() const { static_assert(NewBits >= Bits, "zext() may not decrease width"); value result; for (size_t n = 0; n < chunks; n++) result.data[n] = data[n]; return result; } template CXXRTL_ALWAYS_INLINE value sext() const { static_assert(NewBits >= Bits, "sext() may not decrease width"); value result; for (size_t n = 0; n < chunks; n++) result.data[n] = data[n]; if (is_neg()) { result.data[chunks - 1] |= ~msb_mask; for (size_t n = chunks; n < result.chunks; n++) result.data[n] = chunk::mask; result.data[result.chunks - 1] &= result.msb_mask; } return result; } template CXXRTL_ALWAYS_INLINE value rtrunc() const { static_assert(NewBits <= Bits, "rtrunc() may not increase width"); value result; constexpr size_t shift_chunks = (Bits - NewBits) / chunk::bits; constexpr size_t shift_bits = (Bits - NewBits) % chunk::bits; chunk::type carry = 0; if (shift_chunks + result.chunks < chunks) { carry = (shift_bits == 0) ? 0 : data[shift_chunks + result.chunks] << (chunk::bits - shift_bits); } for (size_t n = result.chunks; n > 0; n--) { result.data[n - 1] = carry | (data[shift_chunks + n - 1] >> shift_bits); carry = (shift_bits == 0) ? 0 : data[shift_chunks + n - 1] << (chunk::bits - shift_bits); } return result; } template CXXRTL_ALWAYS_INLINE value rzext() const { static_assert(NewBits >= Bits, "rzext() may not decrease width"); value result; constexpr size_t shift_chunks = (NewBits - Bits) / chunk::bits; constexpr size_t shift_bits = (NewBits - Bits) % chunk::bits; chunk::type carry = 0; for (size_t n = 0; n < chunks; n++) { result.data[shift_chunks + n] = (data[n] << shift_bits) | carry; carry = (shift_bits == 0) ? 0 : data[n] >> (chunk::bits - shift_bits); } if (shift_chunks + chunks < result.chunks) result.data[shift_chunks + chunks] = carry; return result; } // Bit blit operation, i.e. a partial read-modify-write. template CXXRTL_ALWAYS_INLINE value blit(const value &source) const { static_assert(Stop >= Start, "blit() may not reverse bit order"); constexpr chunk::type start_mask = ~(chunk::mask << (Start % chunk::bits)); constexpr chunk::type stop_mask = (Stop % chunk::bits + 1 == chunk::bits) ? 0 : (chunk::mask << (Stop % chunk::bits + 1)); value masked = *this; if (Start / chunk::bits == Stop / chunk::bits) { masked.data[Start / chunk::bits] &= stop_mask | start_mask; } else { masked.data[Start / chunk::bits] &= start_mask; for (size_t n = Start / chunk::bits + 1; n < Stop / chunk::bits; n++) masked.data[n] = 0; masked.data[Stop / chunk::bits] &= stop_mask; } value shifted = source .template rzext() .template zext(); return masked.bit_or(shifted); } // Helpers for selecting extending or truncating operation depending on whether the result is wider or narrower // than the operand. In C++17 these can be replaced with `if constexpr`. template struct zext_cast { CXXRTL_ALWAYS_INLINE value operator()(const value &val) { return val.template zext(); } }; template struct zext_cast::type> { CXXRTL_ALWAYS_INLINE value operator()(const value &val) { return val.template trunc(); } }; template struct sext_cast { CXXRTL_ALWAYS_INLINE value operator()(const value &val) { return val.template sext(); } }; template struct sext_cast::type> { CXXRTL_ALWAYS_INLINE value operator()(const value &val) { return val.template trunc(); } }; template CXXRTL_ALWAYS_INLINE value zcast() const { return zext_cast()(*this); } template CXXRTL_ALWAYS_INLINE value scast() const { return sext_cast()(*this); } // Bit replication is far more efficient than the equivalent concatenation. template CXXRTL_ALWAYS_INLINE value repeat() const { static_assert(Bits == 1, "repeat() is implemented only for 1-bit values"); return *this ? value().bit_not() : value(); } // Operations with run-time parameters (offsets, amounts, etc). // // These operations are used for computations. bool bit(size_t offset) const { return data[offset / chunk::bits] & (1 << (offset % chunk::bits)); } void set_bit(size_t offset, bool value = true) { size_t offset_chunks = offset / chunk::bits; size_t offset_bits = offset % chunk::bits; data[offset_chunks] &= ~(1 << offset_bits); data[offset_chunks] |= value ? 1 << offset_bits : 0; } explicit operator bool() const { return !is_zero(); } bool is_zero() const { for (size_t n = 0; n < chunks; n++) if (data[n] != 0) return false; return true; } bool is_neg() const { return data[chunks - 1] & (1 << ((Bits - 1) % chunk::bits)); } bool operator ==(const value &other) const { for (size_t n = 0; n < chunks; n++) if (data[n] != other.data[n]) return false; return true; } bool operator !=(const value &other) const { return !(*this == other); } value bit_not() const { value result; for (size_t n = 0; n < chunks; n++) result.data[n] = ~data[n]; result.data[chunks - 1] &= msb_mask; return result; } value bit_and(const value &other) const { value result; for (size_t n = 0; n < chunks; n++) result.data[n] = data[n] & other.data[n]; return result; } value bit_or(const value &other) const { value result; for (size_t n = 0; n < chunks; n++) result.data[n] = data[n] | other.data[n]; return result; } value bit_xor(const value &other) const { value result; for (size_t n = 0; n < chunks; n++) result.data[n] = data[n] ^ other.data[n]; return result; } value update(const value &val, const value &mask) const { return bit_and(mask.bit_not()).bit_or(val.bit_and(mask)); } template value shl(const value &amount) const { // Ensure our early return is correct by prohibiting values larger than 4 Gbit. static_assert(Bits <= chunk::mask, "shl() of unreasonably large values is not supported"); // Detect shifts definitely large than Bits early. for (size_t n = 1; n < amount.chunks; n++) if (amount.data[n] != 0) return {}; // Past this point we can use the least significant chunk as the shift size. size_t shift_chunks = amount.data[0] / chunk::bits; size_t shift_bits = amount.data[0] % chunk::bits; if (shift_chunks >= chunks) return {}; value result; chunk::type carry = 0; for (size_t n = 0; n < chunks - shift_chunks; n++) { result.data[shift_chunks + n] = (data[n] << shift_bits) | carry; carry = (shift_bits == 0) ? 0 : data[n] >> (chunk::bits - shift_bits); } return result; } template value shr(const value &amount) const { // Ensure our early return is correct by prohibiting values larger than 4 Gbit. static_assert(Bits <= chunk::mask, "shr() of unreasonably large values is not supported"); // Detect shifts definitely large than Bits early. for (size_t n = 1; n < amount.chunks; n++) if (amount.data[n] != 0) return {}; // Past this point we can use the least significant chunk as the shift size. size_t shift_chunks = amount.data[0] / chunk::bits; size_t shift_bits = amount.data[0] % chunk::bits; if (shift_chunks >= chunks) return {}; value result; chunk::type carry = 0; for (size_t n = 0; n < chunks - shift_chunks; n++) { result.data[chunks - shift_chunks - 1 - n] = carry | (data[chunks - 1 - n] >> shift_bits); carry = (shift_bits == 0) ? 0 : data[chunks - 1 - n] << (chunk::bits - shift_bits); } if (Signed && is_neg()) { size_t top_chunk_idx = (Bits - shift_bits) / chunk::bits; size_t top_chunk_bits = (Bits - shift_bits) % chunk::bits; for (size_t n = top_chunk_idx + 1; n < chunks; n++) result.data[n] = chunk::mask; if (shift_bits != 0) result.data[top_chunk_idx] |= chunk::mask << top_chunk_bits; } return result; } template value sshr(const value &amount) const { return shr(amount); } template value bmux(const value &sel) const { static_assert(ResultBits << SelBits == Bits, "invalid sizes used in bmux()"); size_t amount = sel.data[0] * ResultBits; size_t shift_chunks = amount / chunk::bits; size_t shift_bits = amount % chunk::bits; value result; chunk::type carry = 0; if (ResultBits % chunk::bits + shift_bits > chunk::bits) carry = data[result.chunks + shift_chunks] << (chunk::bits - shift_bits); for (size_t n = 0; n < result.chunks; n++) { result.data[result.chunks - 1 - n] = carry | (data[result.chunks + shift_chunks - 1 - n] >> shift_bits); carry = (shift_bits == 0) ? 0 : data[result.chunks + shift_chunks - 1 - n] << (chunk::bits - shift_bits); } return result; } template value demux(const value &sel) const { static_assert(Bits << SelBits == ResultBits, "invalid sizes used in demux()"); size_t amount = sel.data[0] * Bits; size_t shift_chunks = amount / chunk::bits; size_t shift_bits = amount % chunk::bits; value result; chunk::type carry = 0; for (size_t n = 0; n < chunks; n++) { result.data[shift_chunks + n] = (data[n] << shift_bits) | carry; carry = (shift_bits == 0) ? 0 : data[n] >> (chunk::bits - shift_bits); } if (Bits % chunk::bits + shift_bits > chunk::bits) result.data[shift_chunks + chunks] = carry; return result; } size_t ctpop() const { size_t count = 0; for (size_t n = 0; n < chunks; n++) { // This loop implements the population count idiom as recognized by LLVM and GCC. for (chunk::type x = data[n]; x != 0; count++) x = x & (x - 1); } return count; } size_t ctlz() const { size_t count = 0; for (size_t n = 0; n < chunks; n++) { chunk::type x = data[chunks - 1 - n]; if (x == 0) { count += (n == 0 ? Bits % chunk::bits : chunk::bits); } else { // This loop implements the find first set idiom as recognized by LLVM. for (; x != 0; count++) x >>= 1; } } return count; } size_t chunks_used() const { for (size_t n = chunks; n > 0; n--) { if (data[n - 1] != 0) return n; } return 0; } template std::pair, bool /*CarryOut*/> alu(const value &other) const { value result; bool carry = CarryIn; for (size_t n = 0; n < result.chunks; n++) { result.data[n] = data[n] + (Invert ? ~other.data[n] : other.data[n]) + carry; if (result.chunks - 1 == n) result.data[result.chunks - 1] &= result.msb_mask; carry = (result.data[n] < data[n]) || (result.data[n] == data[n] && carry); } return {result, carry}; } value add(const value &other) const { return alu(other).first; } value sub(const value &other) const { return alu(other).first; } value neg() const { return value { 0u }.sub(*this); } bool ucmp(const value &other) const { bool carry; std::tie(std::ignore, carry) = alu(other); return !carry; // a.ucmp(b) ≡ a u< b } bool scmp(const value &other) const { value result; bool carry; std::tie(result, carry) = alu(other); bool overflow = (is_neg() == !other.is_neg()) && (is_neg() != result.is_neg()); return result.is_neg() ^ overflow; // a.scmp(b) ≡ a s< b } template value mul(const value &other) const { value result; wide_chunk_t wide_result[result.chunks + 1] = {}; for (size_t n = 0; n < chunks; n++) { for (size_t m = 0; m < chunks && n + m < result.chunks; m++) { wide_result[n + m] += wide_chunk_t(data[n]) * wide_chunk_t(other.data[m]); wide_result[n + m + 1] += wide_result[n + m] >> chunk::bits; wide_result[n + m] &= chunk::mask; } } for (size_t n = 0; n < result.chunks; n++) { result.data[n] = wide_result[n]; } result.data[result.chunks - 1] &= result.msb_mask; return result; } // parallel to BigUnsigned::divideWithRemainder; quotient is stored in q, // *this is left with the remainder. See that function for commentary describing // how/why this works. void divideWithRemainder(const value &b, value &q) { assert(this != &q); if (this == &b || &q == &b) { value tmpB(b); divideWithRemainder(tmpB, q); return; } q = value {0u}; size_t blen = b.chunks_used(); if (blen == 0) { return; } size_t len = chunks_used(); if (len < blen) { return; } size_t i, j, k; size_t i2; chunk_t temp; bool borrowIn, borrowOut; size_t origLen = len; len++; chunk::type blk[len]; std::copy(data, data + origLen, blk); blk[origLen] = 0; chunk::type subtractBuf[len]; std::fill(subtractBuf, subtractBuf + len, 0); size_t qlen = origLen - blen + 1; i = qlen; while (i > 0) { i--; i2 = chunk::bits; while (i2 > 0) { i2--; for (j = 0, k = i, borrowIn = false; j <= blen; j++, k++) { temp = blk[k] - getShiftedBlock(b, j, i2); borrowOut = (temp > blk[k]); if (borrowIn) { borrowOut |= (temp == 0); temp--; } subtractBuf[k] = temp; borrowIn = borrowOut; } for (; k < origLen && borrowIn; k++) { borrowIn = (blk[k] == 0); subtractBuf[k] = blk[k] - 1; } if (!borrowIn) { q.data[i] |= (chunk::type(1) << i2); while (k > i) { k--; blk[k] = subtractBuf[k]; } } } } std::copy(blk, blk + origLen, data); } static chunk::type getShiftedBlock(const value &num, size_t x, size_t y) { chunk::type part1 = (x == 0 || y == 0) ? 0 : (num.data[x - 1] >> (chunk::bits - y)); chunk::type part2 = (x == num.chunks) ? 0 : (num.data[x] << y); return part1 | part2; } }; // Expression template for a slice, usable as lvalue or rvalue, and composable with other expression templates here. template struct slice_expr : public expr_base> { static_assert(Stop >= Start, "slice_expr() may not reverse bit order"); static_assert(Start < T::bits && Stop < T::bits, "slice_expr() must be within bounds"); static constexpr size_t bits = Stop - Start + 1; T &expr; slice_expr(T &expr) : expr(expr) {} slice_expr(const slice_expr &) = delete; CXXRTL_ALWAYS_INLINE operator value() const { return static_cast &>(expr) .template rtrunc() .template trunc(); } CXXRTL_ALWAYS_INLINE slice_expr &operator=(const value &rhs) { // Generic partial assignment implemented using a read-modify-write operation on the sliced expression. expr = static_cast &>(expr) .template blit(rhs); return *this; } // A helper that forces the cast to value<>, which allows deduction to work. CXXRTL_ALWAYS_INLINE value val() const { return static_cast &>(*this); } }; // Expression template for a concatenation, usable as lvalue or rvalue, and composable with other expression templates here. template struct concat_expr : public expr_base> { static constexpr size_t bits = T::bits + U::bits; T &ms_expr; U &ls_expr; concat_expr(T &ms_expr, U &ls_expr) : ms_expr(ms_expr), ls_expr(ls_expr) {} concat_expr(const concat_expr &) = delete; CXXRTL_ALWAYS_INLINE operator value() const { value ms_shifted = static_cast &>(ms_expr) .template rzext(); value ls_extended = static_cast &>(ls_expr) .template zext(); return ms_shifted.bit_or(ls_extended); } CXXRTL_ALWAYS_INLINE concat_expr &operator=(const value &rhs) { ms_expr = rhs.template rtrunc(); ls_expr = rhs.template trunc(); return *this; } // A helper that forces the cast to value<>, which allows deduction to work. CXXRTL_ALWAYS_INLINE value val() const { return static_cast &>(*this); } }; // Base class for expression templates, providing helper methods for operations that are valid on both rvalues and lvalues. // // Note that expression objects (slices and concatenations) constructed in this way should NEVER be captured because // they refer to temporaries that will, in general, only live until the end of the statement. For example, both of // these snippets perform use-after-free: // // const auto &a = val.slice<7,0>().slice<1>(); // value<1> b = a; // // auto &&c = val.slice<7,0>().slice<1>(); // c = value<1>{1u}; // // An easy way to write code using slices and concatenations safely is to follow two simple rules: // * Never explicitly name any type except `value` or `const value &`. // * Never use a `const auto &` or `auto &&` in any such expression. // Then, any code that compiles will be well-defined. template struct expr_base { template CXXRTL_ALWAYS_INLINE slice_expr slice() const { return {*static_cast(this)}; } template CXXRTL_ALWAYS_INLINE slice_expr slice() { return {*static_cast(this)}; } template CXXRTL_ALWAYS_INLINE concat_expr::type> concat(const U &other) const { return {*static_cast(this), other}; } template CXXRTL_ALWAYS_INLINE concat_expr::type> concat(U &&other) { return {*static_cast(this), other}; } }; template std::ostream &operator<<(std::ostream &os, const value &val) { auto old_flags = os.flags(std::ios::right); auto old_width = os.width(0); auto old_fill = os.fill('0'); os << val.bits << '\'' << std::hex; for (size_t n = val.chunks - 1; n != (size_t)-1; n--) { if (n == val.chunks - 1 && Bits % value::chunk::bits != 0) os.width((Bits % value::chunk::bits + 3) / 4); else os.width((value::chunk::bits + 3) / 4); os << val.data[n]; } os.fill(old_fill); os.width(old_width); os.flags(old_flags); return os; } template struct value_formatted { const value &val; bool character; bool justify_left; char padding; int width; int base; bool signed_; bool plus; value_formatted(const value &val, bool character, bool justify_left, char padding, int width, int base, bool signed_, bool plus) : val(val), character(character), justify_left(justify_left), padding(padding), width(width), base(base), signed_(signed_), plus(plus) {} value_formatted(const value_formatted &) = delete; value_formatted &operator=(const value_formatted &rhs) = delete; }; template std::ostream &operator<<(std::ostream &os, const value_formatted &vf) { value val = vf.val; std::string buf; // We might want to replace some of these bit() calls with direct // chunk access if it turns out to be slow enough to matter. if (!vf.character) { size_t width = Bits; if (vf.base != 10) { width = 0; for (size_t index = 0; index < Bits; index++) if (val.bit(index)) width = index + 1; } if (vf.base == 2) { for (size_t i = width; i > 0; i--) buf += (val.bit(i - 1) ? '1' : '0'); } else if (vf.base == 8 || vf.base == 16) { size_t step = (vf.base == 16) ? 4 : 3; for (size_t index = 0; index < width; index += step) { uint8_t value = val.bit(index) | (val.bit(index + 1) << 1) | (val.bit(index + 2) << 2); if (step == 4) value |= val.bit(index + 3) << 3; buf += "0123456789abcdef"[value]; } std::reverse(buf.begin(), buf.end()); } else if (vf.base == 10) { bool negative = vf.signed_ && val.is_neg(); if (negative) val = val.neg(); if (val.is_zero()) buf += '0'; while (!val.is_zero()) { value quotient; val.divideWithRemainder(value{10u}, quotient); buf += '0' + val.template trunc<(Bits > 4 ? 4 : Bits)>().val().template get(); val = quotient; } if (negative || vf.plus) buf += negative ? '-' : '+'; std::reverse(buf.begin(), buf.end()); } else assert(false); } else { buf.reserve(Bits/8); for (int i = 0; i < Bits; i += 8) { char ch = 0; for (int j = 0; j < 8 && i + j < int(Bits); j++) if (val.bit(i + j)) ch |= 1 << j; if (ch != 0) buf.append({ch}); } std::reverse(buf.begin(), buf.end()); } assert(vf.width == 0 || vf.padding != '\0'); if (!vf.justify_left && buf.size() < vf.width) { size_t pad_width = vf.width - buf.size(); if (vf.padding == '0' && (buf.front() == '+' || buf.front() == '-')) { os << buf.front(); buf.erase(0, 1); } os << std::string(pad_width, vf.padding); } os << buf; if (vf.justify_left && buf.size() < vf.width) os << std::string(vf.width - buf.size(), vf.padding); return os; } template struct wire { static constexpr size_t bits = Bits; value curr; value next; wire() = default; explicit constexpr wire(const value &init) : curr(init), next(init) {} template explicit constexpr wire(Init ...init) : curr{init...}, next{init...} {} // Copying and copy-assigning values is natural. If, however, a value is replaced with a wire, // e.g. because a module is built with a different optimization level, then existing code could // unintentionally copy a wire instead, which would create a subtle but serious bug. To make sure // this doesn't happen, prohibit copying and copy-assigning wires. wire(const wire &) = delete; wire &operator=(const wire &) = delete; wire(wire &&) = default; wire &operator=(wire &&) = default; template CXXRTL_ALWAYS_INLINE IntegerT get() const { return curr.template get(); } template CXXRTL_ALWAYS_INLINE void set(IntegerT other) { next.template set(other); } bool commit() { if (curr != next) { curr = next; return true; } return false; } }; template std::ostream &operator<<(std::ostream &os, const wire &val) { os << val.curr; return os; } template struct memory { const size_t depth; std::unique_ptr[]> data; explicit memory(size_t depth) : depth(depth), data(new value[depth]) {} memory(const memory &) = delete; memory &operator=(const memory &) = delete; memory(memory &&) = default; memory &operator=(memory &&other) { assert(depth == other.depth); data = std::move(other.data); write_queue = std::move(other.write_queue); return *this; } // An operator for direct memory reads. May be used at any time during the simulation. const value &operator [](size_t index) const { assert(index < depth); return data[index]; } // An operator for direct memory writes. May only be used before the simulation is started. If used // after the simulation is started, the design may malfunction. value &operator [](size_t index) { assert(index < depth); return data[index]; } // A simple way to make a writable memory would be to use an array of wires instead of an array of values. // However, there are two significant downsides to this approach: first, it has large overhead (2× space // overhead, and O(depth) time overhead during commit); second, it does not simplify handling write port // priorities. Although in principle write ports could be ordered or conditionally enabled in generated // code based on their priorities and selected addresses, the feedback arc set problem is computationally // expensive, and the heuristic based algorithms are not easily modified to guarantee (rather than prefer) // a particular write port evaluation order. // // The approach used here instead is to queue writes into a buffer during the eval phase, then perform // the writes during the commit phase in the priority order. This approach has low overhead, with both space // and time proportional to the amount of write ports. Because virtually every memory in a practical design // has at most two write ports, linear search is used on every write, being the fastest and simplest approach. struct write { size_t index; value val; value mask; int priority; }; std::vector write_queue; void update(size_t index, const value &val, const value &mask, int priority = 0) { assert(index < depth); // Queue up the write while keeping the queue sorted by priority. write_queue.insert( std::upper_bound(write_queue.begin(), write_queue.end(), priority, [](const int a, const write& b) { return a < b.priority; }), write { index, val, mask, priority }); } bool commit() { bool changed = false; for (const write &entry : write_queue) { value elem = data[entry.index]; elem = elem.update(entry.val, entry.mask); changed |= (data[entry.index] != elem); data[entry.index] = elem; } write_queue.clear(); return changed; } }; struct metadata { const enum { MISSING = 0, UINT = 1, SINT = 2, STRING = 3, DOUBLE = 4, } value_type; // In debug mode, using the wrong .as_*() function will assert. // In release mode, using the wrong .as_*() function will safely return a default value. const unsigned uint_value = 0; const signed sint_value = 0; const std::string string_value = ""; const double double_value = 0.0; metadata() : value_type(MISSING) {} metadata(unsigned value) : value_type(UINT), uint_value(value) {} metadata(signed value) : value_type(SINT), sint_value(value) {} metadata(const std::string &value) : value_type(STRING), string_value(value) {} metadata(const char *value) : value_type(STRING), string_value(value) {} metadata(double value) : value_type(DOUBLE), double_value(value) {} metadata(const metadata &) = default; metadata &operator=(const metadata &) = delete; unsigned as_uint() const { assert(value_type == UINT); return uint_value; } signed as_sint() const { assert(value_type == SINT); return sint_value; } const std::string &as_string() const { assert(value_type == STRING); return string_value; } double as_double() const { assert(value_type == DOUBLE); return double_value; } }; typedef std::map metadata_map; // Tag class to disambiguate values/wires and their aliases. struct debug_alias {}; // Tag declaration to disambiguate values and debug outlines. using debug_outline = ::_cxxrtl_outline; // This structure is intended for consumption via foreign function interfaces, like Python's ctypes. // Because of this it uses a C-style layout that is easy to parse rather than more idiomatic C++. // // To avoid violating strict aliasing rules, this structure has to be a subclass of the one used // in the C API, or it would not be possible to cast between the pointers to these. struct debug_item : ::cxxrtl_object { // Object types. enum : uint32_t { VALUE = CXXRTL_VALUE, WIRE = CXXRTL_WIRE, MEMORY = CXXRTL_MEMORY, ALIAS = CXXRTL_ALIAS, OUTLINE = CXXRTL_OUTLINE, }; // Object flags. enum : uint32_t { INPUT = CXXRTL_INPUT, OUTPUT = CXXRTL_OUTPUT, INOUT = CXXRTL_INOUT, DRIVEN_SYNC = CXXRTL_DRIVEN_SYNC, DRIVEN_COMB = CXXRTL_DRIVEN_COMB, UNDRIVEN = CXXRTL_UNDRIVEN, }; debug_item(const ::cxxrtl_object &object) : cxxrtl_object(object) {} template debug_item(value &item, size_t lsb_offset = 0, uint32_t flags_ = 0) { static_assert(sizeof(item) == value::chunks * sizeof(chunk_t), "value is not compatible with C layout"); type = VALUE; flags = flags_; width = Bits; lsb_at = lsb_offset; depth = 1; zero_at = 0; curr = item.data; next = item.data; outline = nullptr; } template debug_item(const value &item, size_t lsb_offset = 0) { static_assert(sizeof(item) == value::chunks * sizeof(chunk_t), "value is not compatible with C layout"); type = VALUE; flags = DRIVEN_COMB; width = Bits; lsb_at = lsb_offset; depth = 1; zero_at = 0; curr = const_cast(item.data); next = nullptr; outline = nullptr; } template debug_item(wire &item, size_t lsb_offset = 0, uint32_t flags_ = 0) { static_assert(sizeof(item.curr) == value::chunks * sizeof(chunk_t) && sizeof(item.next) == value::chunks * sizeof(chunk_t), "wire is not compatible with C layout"); type = WIRE; flags = flags_; width = Bits; lsb_at = lsb_offset; depth = 1; zero_at = 0; curr = item.curr.data; next = item.next.data; outline = nullptr; } template debug_item(memory &item, size_t zero_offset = 0) { static_assert(sizeof(item.data[0]) == value::chunks * sizeof(chunk_t), "memory is not compatible with C layout"); type = MEMORY; flags = 0; width = Width; lsb_at = 0; depth = item.depth; zero_at = zero_offset; curr = item.data ? item.data[0].data : nullptr; next = nullptr; outline = nullptr; } template debug_item(debug_alias, const value &item, size_t lsb_offset = 0) { static_assert(sizeof(item) == value::chunks * sizeof(chunk_t), "value is not compatible with C layout"); type = ALIAS; flags = DRIVEN_COMB; width = Bits; lsb_at = lsb_offset; depth = 1; zero_at = 0; curr = const_cast(item.data); next = nullptr; outline = nullptr; } template debug_item(debug_alias, const wire &item, size_t lsb_offset = 0) { static_assert(sizeof(item.curr) == value::chunks * sizeof(chunk_t) && sizeof(item.next) == value::chunks * sizeof(chunk_t), "wire is not compatible with C layout"); type = ALIAS; flags = DRIVEN_COMB; width = Bits; lsb_at = lsb_offset; depth = 1; zero_at = 0; curr = const_cast(item.curr.data); next = nullptr; outline = nullptr; } template debug_item(debug_outline &group, const value &item, size_t lsb_offset = 0) { static_assert(sizeof(item) == value::chunks * sizeof(chunk_t), "value is not compatible with C layout"); type = OUTLINE; flags = DRIVEN_COMB; width = Bits; lsb_at = lsb_offset; depth = 1; zero_at = 0; curr = const_cast(item.data); next = nullptr; outline = &group; } template IntegerT get() const { assert(width == Bits && depth == 1); value item; std::copy(curr, curr + value::chunks, item.data); return item.template get(); } template void set(IntegerT other) const { assert(width == Bits && depth == 1); value item; item.template set(other); std::copy(item.data, item.data + value::chunks, next); } }; static_assert(std::is_standard_layout::value, "debug_item is not compatible with C layout"); struct debug_items { std::map> table; void add(const std::string &name, debug_item &&item) { std::vector &parts = table[name]; parts.emplace_back(item); std::sort(parts.begin(), parts.end(), [](const debug_item &a, const debug_item &b) { return a.lsb_at < b.lsb_at; }); } size_t count(const std::string &name) const { if (table.count(name) == 0) return 0; return table.at(name).size(); } const std::vector &parts_at(const std::string &name) const { return table.at(name); } const debug_item &at(const std::string &name) const { const std::vector &parts = table.at(name); assert(parts.size() == 1); return parts.at(0); } const debug_item &operator [](const std::string &name) const { return at(name); } }; // Tag class to disambiguate the default constructor used by the toplevel module that calls reset(), // and the constructor of interior modules that should not call it. struct interior {}; struct module { module() {} virtual ~module() {} // Modules with black boxes cannot be copied. Although not all designs include black boxes, // delete the copy constructor and copy assignment operator to make sure that any downstream // code that manipulates modules doesn't accidentally depend on their availability. module(const module &) = delete; module &operator=(const module &) = delete; module(module &&) = default; module &operator=(module &&) = default; virtual void reset() = 0; virtual bool eval() = 0; virtual bool commit() = 0; unsigned int steps = 0; size_t step() { ++steps; size_t deltas = 0; bool converged = false; do { converged = eval(); deltas++; } while (commit() && !converged); return deltas; } virtual void debug_info(debug_items &items, std::string path = "") { (void)items, (void)path; } }; } // namespace cxxrtl // Internal structures used to communicate with the implementation of the C interface. typedef struct _cxxrtl_toplevel { std::unique_ptr module; } *cxxrtl_toplevel; typedef struct _cxxrtl_outline { std::function eval; } *cxxrtl_outline; // Definitions of internal Yosys cells. Other than the functions in this namespace, CXXRTL is fully generic // and indepenent of Yosys implementation details. // // The `write_cxxrtl` pass translates internal cells (cells with names that start with `$`) to calls of these // functions. All of Yosys arithmetic and logical cells perform sign or zero extension on their operands, // whereas basic operations on arbitrary width values require operands to be of the same width. These functions // bridge the gap by performing the necessary casts. They are named similar to `cell_A[B]`, where A and B are `u` // if the corresponding operand is unsigned, and `s` if it is signed. namespace cxxrtl_yosys { using namespace cxxrtl; // std::max isn't constexpr until C++14 for no particular reason (it's an oversight), so we define our own. template CXXRTL_ALWAYS_INLINE constexpr T max(const T &a, const T &b) { return a > b ? a : b; } // Logic operations template CXXRTL_ALWAYS_INLINE value logic_not(const value &a) { return value { a ? 0u : 1u }; } template CXXRTL_ALWAYS_INLINE value logic_and(const value &a, const value &b) { return value { (bool(a) && bool(b)) ? 1u : 0u }; } template CXXRTL_ALWAYS_INLINE value logic_or(const value &a, const value &b) { return value { (bool(a) || bool(b)) ? 1u : 0u }; } // Reduction operations template CXXRTL_ALWAYS_INLINE value reduce_and(const value &a) { return value { a.bit_not().is_zero() ? 1u : 0u }; } template CXXRTL_ALWAYS_INLINE value reduce_or(const value &a) { return value { a ? 1u : 0u }; } template CXXRTL_ALWAYS_INLINE value reduce_xor(const value &a) { return value { (a.ctpop() % 2) ? 1u : 0u }; } template CXXRTL_ALWAYS_INLINE value reduce_xnor(const value &a) { return value { (a.ctpop() % 2) ? 0u : 1u }; } template CXXRTL_ALWAYS_INLINE value reduce_bool(const value &a) { return value { a ? 1u : 0u }; } // Bitwise operations template CXXRTL_ALWAYS_INLINE value not_u(const value &a) { return a.template zcast().bit_not(); } template CXXRTL_ALWAYS_INLINE value not_s(const value &a) { return a.template scast().bit_not(); } template CXXRTL_ALWAYS_INLINE value and_uu(const value &a, const value &b) { return a.template zcast().bit_and(b.template zcast()); } template CXXRTL_ALWAYS_INLINE value and_ss(const value &a, const value &b) { return a.template scast().bit_and(b.template scast()); } template CXXRTL_ALWAYS_INLINE value or_uu(const value &a, const value &b) { return a.template zcast().bit_or(b.template zcast()); } template CXXRTL_ALWAYS_INLINE value or_ss(const value &a, const value &b) { return a.template scast().bit_or(b.template scast()); } template CXXRTL_ALWAYS_INLINE value xor_uu(const value &a, const value &b) { return a.template zcast().bit_xor(b.template zcast()); } template CXXRTL_ALWAYS_INLINE value xor_ss(const value &a, const value &b) { return a.template scast().bit_xor(b.template scast()); } template CXXRTL_ALWAYS_INLINE value xnor_uu(const value &a, const value &b) { return a.template zcast().bit_xor(b.template zcast()).bit_not(); } template CXXRTL_ALWAYS_INLINE value xnor_ss(const value &a, const value &b) { return a.template scast().bit_xor(b.template scast()).bit_not(); } template CXXRTL_ALWAYS_INLINE value shl_uu(const value &a, const value &b) { return a.template zcast().shl(b); } template CXXRTL_ALWAYS_INLINE value shl_su(const value &a, const value &b) { return a.template scast().shl(b); } template CXXRTL_ALWAYS_INLINE value sshl_uu(const value &a, const value &b) { return a.template zcast().shl(b); } template CXXRTL_ALWAYS_INLINE value sshl_su(const value &a, const value &b) { return a.template scast().shl(b); } template CXXRTL_ALWAYS_INLINE value shr_uu(const value &a, const value &b) { return a.shr(b).template zcast(); } template CXXRTL_ALWAYS_INLINE value shr_su(const value &a, const value &b) { return a.shr(b).template scast(); } template CXXRTL_ALWAYS_INLINE value sshr_uu(const value &a, const value &b) { return a.shr(b).template zcast(); } template CXXRTL_ALWAYS_INLINE value sshr_su(const value &a, const value &b) { return a.sshr(b).template scast(); } template CXXRTL_ALWAYS_INLINE value shift_uu(const value &a, const value &b) { return shr_uu(a, b); } template CXXRTL_ALWAYS_INLINE value shift_su(const value &a, const value &b) { return shr_su(a, b); } template CXXRTL_ALWAYS_INLINE value shift_us(const value &a, const value &b) { return b.is_neg() ? shl_uu(a, b.template sext().neg()) : shr_uu(a, b); } template CXXRTL_ALWAYS_INLINE value shift_ss(const value &a, const value &b) { return b.is_neg() ? shl_su(a, b.template sext().neg()) : shr_su(a, b); } template CXXRTL_ALWAYS_INLINE value shiftx_uu(const value &a, const value &b) { return shift_uu(a, b); } template CXXRTL_ALWAYS_INLINE value shiftx_su(const value &a, const value &b) { return shift_su(a, b); } template CXXRTL_ALWAYS_INLINE value shiftx_us(const value &a, const value &b) { return shift_us(a, b); } template CXXRTL_ALWAYS_INLINE value shiftx_ss(const value &a, const value &b) { return shift_ss(a, b); } // Comparison operations template CXXRTL_ALWAYS_INLINE value eq_uu(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value{ a.template zext() == b.template zext() ? 1u : 0u }; } template CXXRTL_ALWAYS_INLINE value eq_ss(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value{ a.template sext() == b.template sext() ? 1u : 0u }; } template CXXRTL_ALWAYS_INLINE value ne_uu(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value{ a.template zext() != b.template zext() ? 1u : 0u }; } template CXXRTL_ALWAYS_INLINE value ne_ss(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value{ a.template sext() != b.template sext() ? 1u : 0u }; } template CXXRTL_ALWAYS_INLINE value eqx_uu(const value &a, const value &b) { return eq_uu(a, b); } template CXXRTL_ALWAYS_INLINE value eqx_ss(const value &a, const value &b) { return eq_ss(a, b); } template CXXRTL_ALWAYS_INLINE value nex_uu(const value &a, const value &b) { return ne_uu(a, b); } template CXXRTL_ALWAYS_INLINE value nex_ss(const value &a, const value &b) { return ne_ss(a, b); } template CXXRTL_ALWAYS_INLINE value gt_uu(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value { b.template zext().ucmp(a.template zext()) ? 1u : 0u }; } template CXXRTL_ALWAYS_INLINE value gt_ss(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value { b.template sext().scmp(a.template sext()) ? 1u : 0u }; } template CXXRTL_ALWAYS_INLINE value ge_uu(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value { !a.template zext().ucmp(b.template zext()) ? 1u : 0u }; } template CXXRTL_ALWAYS_INLINE value ge_ss(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value { !a.template sext().scmp(b.template sext()) ? 1u : 0u }; } template CXXRTL_ALWAYS_INLINE value lt_uu(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value { a.template zext().ucmp(b.template zext()) ? 1u : 0u }; } template CXXRTL_ALWAYS_INLINE value lt_ss(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value { a.template sext().scmp(b.template sext()) ? 1u : 0u }; } template CXXRTL_ALWAYS_INLINE value le_uu(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value { !b.template zext().ucmp(a.template zext()) ? 1u : 0u }; } template CXXRTL_ALWAYS_INLINE value le_ss(const value &a, const value &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value { !b.template sext().scmp(a.template sext()) ? 1u : 0u }; } // Arithmetic operations template CXXRTL_ALWAYS_INLINE value pos_u(const value &a) { return a.template zcast(); } template CXXRTL_ALWAYS_INLINE value pos_s(const value &a) { return a.template scast(); } template CXXRTL_ALWAYS_INLINE value neg_u(const value &a) { return a.template zcast().neg(); } template CXXRTL_ALWAYS_INLINE value neg_s(const value &a) { return a.template scast().neg(); } template CXXRTL_ALWAYS_INLINE value add_uu(const value &a, const value &b) { return a.template zcast().add(b.template zcast()); } template CXXRTL_ALWAYS_INLINE value add_ss(const value &a, const value &b) { return a.template scast().add(b.template scast()); } template CXXRTL_ALWAYS_INLINE value sub_uu(const value &a, const value &b) { return a.template zcast().sub(b.template zcast()); } template CXXRTL_ALWAYS_INLINE value sub_ss(const value &a, const value &b) { return a.template scast().sub(b.template scast()); } template CXXRTL_ALWAYS_INLINE value mul_uu(const value &a, const value &b) { constexpr size_t BitsM = BitsA >= BitsB ? BitsA : BitsB; return a.template zcast().template mul(b.template zcast()); } template CXXRTL_ALWAYS_INLINE value mul_ss(const value &a, const value &b) { return a.template scast().template mul(b.template scast()); } template CXXRTL_ALWAYS_INLINE std::pair, value> divmod_uu(const value &a, const value &b) { constexpr size_t Bits = max(BitsY, max(BitsA, BitsB)); value quotient; value dividend = a.template zext(); value divisor = b.template zext(); if (dividend.ucmp(divisor)) return {/*quotient=*/value { 0u }, /*remainder=*/dividend.template trunc()}; uint32_t divisor_shift = dividend.ctlz() - divisor.ctlz(); divisor = divisor.shl(value<32> { divisor_shift }); for (size_t step = 0; step <= divisor_shift; step++) { quotient = quotient.shl(value<1> { 1u }); if (!dividend.ucmp(divisor)) { dividend = dividend.sub(divisor); quotient.set_bit(0, true); } divisor = divisor.shr(value<1> { 1u }); } return {quotient.template trunc(), /*remainder=*/dividend.template trunc()}; } template CXXRTL_ALWAYS_INLINE std::pair, value> divmod_ss(const value &a, const value &b) { value ua = a.template sext(); value ub = b.template sext(); if (ua.is_neg()) ua = ua.neg(); if (ub.is_neg()) ub = ub.neg(); value y, r; std::tie(y, r) = divmod_uu(ua, ub); if (a.is_neg() != b.is_neg()) y = y.neg(); if (a.is_neg()) r = r.neg(); return {y, r}; } template CXXRTL_ALWAYS_INLINE value div_uu(const value &a, const value &b) { return divmod_uu(a, b).first; } template CXXRTL_ALWAYS_INLINE value div_ss(const value &a, const value &b) { return divmod_ss(a, b).first; } template CXXRTL_ALWAYS_INLINE value mod_uu(const value &a, const value &b) { return divmod_uu(a, b).second; } template CXXRTL_ALWAYS_INLINE value mod_ss(const value &a, const value &b) { return divmod_ss(a, b).second; } template CXXRTL_ALWAYS_INLINE value modfloor_uu(const value &a, const value &b) { return divmod_uu(a, b).second; } // GHDL Modfloor operator. Returns r=a mod b, such that r has the same sign as b and // a=b*N+r where N is some integer // In practical terms, when a and b have different signs and the remainder returned by divmod_ss is not 0 // then return the remainder + b template CXXRTL_ALWAYS_INLINE value modfloor_ss(const value &a, const value &b) { value r; r = divmod_ss(a, b).second; if((b.is_neg() != a.is_neg()) && !r.is_zero()) return add_ss(b, r); return r; } template CXXRTL_ALWAYS_INLINE value divfloor_uu(const value &a, const value &b) { return divmod_uu(a, b).first; } // Divfloor. Similar to above: returns q=a//b, where q has the sign of a*b and a=b*q+N. // In other words, returns (truncating) a/b, except if a and b have different signs // and there's non-zero remainder, subtract one more towards floor. template CXXRTL_ALWAYS_INLINE value divfloor_ss(const value &a, const value &b) { value q, r; std::tie(q, r) = divmod_ss(a, b); if ((b.is_neg() != a.is_neg()) && !r.is_zero()) return sub_uu(q, value<1> { 1u }); return q; } // Memory helper struct memory_index { bool valid; size_t index; template memory_index(const value &addr, size_t offset, size_t depth) { static_assert(value::chunks <= 1, "memory address is too wide"); size_t offset_index = addr.data[0]; valid = (offset_index >= offset && offset_index < offset + depth); index = offset_index - offset; } }; } // namespace cxxrtl_yosys #endif