From 82fca5030974f77a63b2cbd197a63ebadfeb9671 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Sat, 6 Jan 2024 16:44:36 +0100 Subject: [PATCH 01/13] write_verilog: Handle edge case with non-pruned processes This change only matters for processes that weren't processed by `proc_rmdead` for which follow-up cases after a default case are treated differently in Verilog and RTLIL semantics. --- backends/verilog/verilog_backend.cc | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/backends/verilog/verilog_backend.cc b/backends/verilog/verilog_backend.cc index 735672a43..fd70695d3 100644 --- a/backends/verilog/verilog_backend.cc +++ b/backends/verilog/verilog_backend.cc @@ -1988,12 +1988,10 @@ void dump_proc_switch(std::ostream &f, std::string indent, RTLIL::SwitchRule *sw dump_sigspec(f, sw->signal); f << stringf(")\n"); - bool got_default = false; for (auto it = sw->cases.begin(); it != sw->cases.end(); ++it) { + bool got_default = false; dump_attributes(f, indent + " ", (*it)->attributes, '\n', /*modattr=*/false, /*regattr=*/false, /*as_comment=*/true); if ((*it)->compare.size() == 0) { - if (got_default) - continue; f << stringf("%s default", indent.c_str()); got_default = true; } else { @@ -2006,6 +2004,14 @@ void dump_proc_switch(std::ostream &f, std::string indent, RTLIL::SwitchRule *sw } f << stringf(":\n"); dump_case_body(f, indent + " ", *it); + + if (got_default) { + // If we followed up the default with more cases the Verilog + // semantics would be to match those *before* the default, but + // the RTLIL semantics are to match those *after* the default + // (so they can never be selected). Exit now. + break; + } } if (sw->cases.empty()) { From 22370ad21e0308a912f0a443d4ae4064beeb9411 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 9 Jan 2024 00:16:54 +0000 Subject: [PATCH 02/13] Bump version --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3da89690f..946c323e7 100644 --- a/Makefile +++ b/Makefile @@ -141,7 +141,7 @@ LDLIBS += -lrt endif endif -YOSYS_VER := 0.36+67 +YOSYS_VER := 0.36+72 # Note: We arrange for .gitcommit to contain the (short) commit hash in # tarballs generated with git-archive(1) using .gitattributes. The git repo From c045c9a5c9ecff143fcac7095c93c0c1e8a72588 Mon Sep 17 00:00:00 2001 From: Miodrag Milanovic Date: Tue, 9 Jan 2024 10:58:31 +0100 Subject: [PATCH 03/13] Update macOS to Ventura --- .github/workflows/test-macos.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index 048457234..2b48b7252 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: os: - - { id: macos-11, name: 'Big Sur' } + - { id: macos-13, name: 'Ventura' } cpp_std: - 'c++11' - 'c++17' From 5aaf1f1d398686a28192d5488da4337303a329df Mon Sep 17 00:00:00 2001 From: Catherine Date: Fri, 5 Jan 2024 21:31:08 +0000 Subject: [PATCH 04/13] cxxrtl: implement `value.get()` and `value.set()` for signed types. --- backends/cxxrtl/runtime/cxxrtl/cxxrtl.h | 26 +++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/backends/cxxrtl/runtime/cxxrtl/cxxrtl.h b/backends/cxxrtl/runtime/cxxrtl/cxxrtl.h index 183fbb2c7..3f8247226 100644 --- a/backends/cxxrtl/runtime/cxxrtl/cxxrtl.h +++ b/backends/cxxrtl/runtime/cxxrtl/cxxrtl.h @@ -28,6 +28,7 @@ #include #include +#include #include #include #include @@ -145,7 +146,7 @@ struct value : public expr_base> { // These functions ensure that a conversion is never out of range, and should be always used, if at all // possible, instead of direct manipulation of the `data` member. For very large types, .slice() and // .concat() can be used to split them into more manageable parts. - template + template::value, int>::type = 0> CXXRTL_ALWAYS_INLINE IntegerT get() const { static_assert(std::numeric_limits::is_integer && !std::numeric_limits::is_signed, @@ -158,15 +159,32 @@ struct value : public expr_base> { return result; } - template + template::value, int>::type = 0> CXXRTL_ALWAYS_INLINE - void set(IntegerT other) { + IntegerT get() const { + auto unsigned_result = get::type>(); + IntegerT result; + memcpy(&result, &unsigned_result, sizeof(IntegerT)); + return result; + } + + template::value, int>::type = 0> + CXXRTL_ALWAYS_INLINE + void set(IntegerT value) { static_assert(std::numeric_limits::is_integer && !std::numeric_limits::is_signed, "set() requires T to be an unsigned integral type"); static_assert(std::numeric_limits::digits >= Bits, "set() requires the value to be at least as wide as T is"); for (size_t n = 0; n < chunks; n++) - data[n] = (other >> (n * chunk::bits)) & chunk::mask; + data[n] = (value >> (n * chunk::bits)) & chunk::mask; + } + + template::value, int>::type = 0> + CXXRTL_ALWAYS_INLINE + void set(IntegerT value) { + typename std::make_unsigned::type unsigned_value; + memcpy(&unsigned_value, &value, sizeof(IntegerT)); + set(unsigned_value); } // Operations with compile-time parameters. From a59d477098f4b533ac7cc95abdacb3943792334c Mon Sep 17 00:00:00 2001 From: Catherine Date: Fri, 5 Jan 2024 20:09:49 +0000 Subject: [PATCH 05/13] cxxrtl: improve robustness of `cxxrtl::time`. Avoid overflow during conversion for any representable raw value. --- backends/cxxrtl/runtime/cxxrtl/cxxrtl_time.h | 36 +++++++++++--------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/backends/cxxrtl/runtime/cxxrtl/cxxrtl_time.h b/backends/cxxrtl/runtime/cxxrtl/cxxrtl_time.h index 51f59321e..f37c2b656 100644 --- a/backends/cxxrtl/runtime/cxxrtl/cxxrtl_time.h +++ b/backends/cxxrtl/runtime/cxxrtl/cxxrtl_time.h @@ -26,17 +26,19 @@ namespace cxxrtl { -// A timestamp or a difference in time, stored as a 96-bit number of femtoseconds (10e-15 s). The dynamic range and -// resolution of this format can represent any VCD timestamp within 136 years, without the need for a timescale. +// A timestamp or a difference in time, stored as a 96-bit number of femtoseconds (10e-15 s). The range and resolution +// of this format can represent any VCD timestamp within approx. ±1255321.2 years, without the need for a timescale. class time { public: static constexpr size_t bits = 96; // 3 chunks private: + static constexpr size_t resolution_digits = 15; + + static_assert(sizeof(chunk_t) == 4, "a chunk is expected to be 32-bit"); static constexpr value resolution = value { chunk_t(1000000000000000ull & 0xffffffffull), chunk_t(1000000000000000ull >> 32), 0u }; - static constexpr size_t resolution_digits = 15; // Signed number of femtoseconds from the beginning of time. value raw; @@ -51,11 +53,11 @@ public: return time(value { 0xffffffffu, 0xffffffffu, 0x7fffffffu }); } - time(int32_t secs, int64_t femtos) { - value<32> secs_val; - secs_val.set((uint32_t)secs); + time(int64_t secs, int64_t femtos) { + value<64> secs_val; + secs_val.set(secs); value<64> femtos_val; - femtos_val.set((uint64_t)femtos); + femtos_val.set(femtos); raw = secs_val.sext().mul(resolution).add(femtos_val.sext()); } @@ -68,14 +70,14 @@ public: return raw.is_neg(); } - // Extracts the absolute number of whole seconds. Negative if the value is negative. - int32_t secs() const { - return raw.sdivmod(resolution).first.trunc<32>().get(); + // Extracts the number of whole seconds. Negative if the value is negative. + int64_t secs() const { + return raw.sdivmod(resolution).first.trunc<64>().get(); } - // Extracts the absolute number of femtoseconds in the fractional second. Negative if the value is negative. + // Extracts the number of femtoseconds in the fractional second. Negative if the value is negative. int64_t femtos() const { - return raw.sdivmod(resolution).second.trunc<64>().get(); + return raw.sdivmod(resolution).second.trunc<64>().get(); } bool operator==(const time &other) const { @@ -125,10 +127,10 @@ public: } operator std::string() const { - char buf[38]; // len(f"-{2**64}.{10**15-1}") + 1 == 38 - int32_t secs = this->secs(); + char buf[48]; // x=2**95; len(f"-{x/1_000_000_000_000_000}.{x^1_000_000_000_000_000}") == 48 + int64_t secs = this->secs(); int64_t femtos = this->femtos(); - snprintf(buf, sizeof(buf), "%s%" PRIi32 ".%015" PRIi64, + snprintf(buf, sizeof(buf), "%s%" PRIi64 ".%015" PRIi64, is_negative() ? "-" : "", secs >= 0 ? secs : -secs, femtos >= 0 ? femtos : -femtos); return buf; } @@ -143,7 +145,7 @@ public: parse_fractional, } state = parse_sign_opt; bool negative = false; - int32_t integral = 0; + int64_t integral = 0; int64_t fractional = 0; size_t frac_digits = 0; for (auto chr : str) { @@ -199,7 +201,7 @@ std::ostream &operator<<(std::ostream &os, const time &val) { namespace time_literals { time operator""_s(unsigned long long seconds) { - return time { (int32_t)seconds, 0 }; + return time { (int64_t)seconds, 0 }; } time operator""_ms(unsigned long long milliseconds) { From f6e36f0e540bedc82d98cfda6560ab0ee04e887e Mon Sep 17 00:00:00 2001 From: Catherine Date: Fri, 1 Dec 2023 16:01:28 +0000 Subject: [PATCH 06/13] cxxrtl: implement a generic record/replay interface. This commit adds a reader/writer implementation for a file format optimized for fast, single-pass storage and retrieval of design state changes, as well as a recorder/replayer that integrate with the eval and commit simulation steps to create replay logs and reproduce them later. This feature makes it possible to run a simulation once, recording the stimulus as well as changes to the registers, and navigate to a past time point in the simulation later without rerunning it. Both the changes in inputs (stimulus) and changes in state are saved so that navigation does not require calling `eval()` or `commit()`; only a series of memory copy operations. On a representative example of a SoC netlist, saving the replay log while simulating it takes 150% of the time it would take to simulate the same design without logging, which is a much lower overhead than writing an equivalent full view (including memories) VCD waveform dump. The replay log is also several times smaller than the VCD dump, and more space savings are available as low hanging fruit. Replaying the log has not been optimized and currently takes about the same time as running the simulation in first place. However, it is still useful since it provides fast navigation to an arbitrary time point, something that rerunning the simulation does not allow for. The current file format should be considered preliminary. It is not very space-efficient, and my testing shows that a lot of time is spent in the write() syscall in the kernel. Most likely, compression and/or writing in another thread could improve performance by 10-20%. This may be done at a later time. --- .../cxxrtl/runtime/cxxrtl/cxxrtl_replay.h | 785 ++++++++++++++++++ 1 file changed, 785 insertions(+) create mode 100644 backends/cxxrtl/runtime/cxxrtl/cxxrtl_replay.h diff --git a/backends/cxxrtl/runtime/cxxrtl/cxxrtl_replay.h b/backends/cxxrtl/runtime/cxxrtl/cxxrtl_replay.h new file mode 100644 index 000000000..94f59bb0d --- /dev/null +++ b/backends/cxxrtl/runtime/cxxrtl/cxxrtl_replay.h @@ -0,0 +1,785 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2023 Catherine + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#ifndef CXXRTL_REPLAY_H +#define CXXRTL_REPLAY_H + +#if !defined(WIN32) +#include +#define O_BINARY 0 +#else +#include +#endif + +#include +#include +#include +#include +#include + +#include +#include + +// Theory of operation +// =================== +// +// Log format +// ---------- +// +// The replay log is a simple data format based on a sequence of 32-bit words. The following BNF-like grammar describes +// enough detail to understand the overall structure of the log data and be able to read hex dumps. For a greater +// degree of detail see the source code. The format is considered fully internal to CXXRTL and is subject to change +// without notice. +// +// ::= + +// ::= 0x52585843 0x00004c54 +// ::= * +// ::= * +// ::= 0xc0000000 ... +// ::= 0xc0000001 ... +// ::= 0x0??????? + | 0x1??????? + | 0x2??????? | 0x3??????? +// , ::= 0x???????? +// ::= 0xFFFFFFFF +// +// The replay log contains sample data, however, it does not cover the entire design. Rather, it only contains sample +// data for the subset of debug items containing _design state_: inputs and registers/latches. This keeps its size to +// a minimum, and recording speed to a maximum. The player samples any missing data by setting the design state items +// to the same values they had during recording, and re-evaluating the design. +// +// Limits +// ------ +// +// The log may contain: +// +// * Up to 2**28-1 debug items containing design state. +// * Up to 2**32 chunks per debug item. +// * Up to 2**32 rows per memory. +// * Up to 2**32 samples. +// +// Of these limits, the last two are most likely to be eventually exceeded by practical recordings. However, other +// performance considerations will likely limit the size of such practical recordings first, so the log data format +// will undergo a breaking change at that point. +// +// Operations +// ---------- +// +// As suggested by the name "replay log", this format is designed for recording (writing) once and playing (reading) +// many times afterwards, such that reading the format can be done linearly and quickly. The log format is designed to +// support three primary read operations: +// +// 1. Initialization +// 2. Rewinding (to time T) +// 3. Replaying (for N samples) +// +// During initialization, the player establishes the mapping between debug item names and their 28-bit identifiers in +// the log. It is done once. +// +// During rewinding, the player begins reading at the latest non-incremental sample that still lies before the requested +// sample time. It continues reading incremental samples after that point until it reaches the requested sample time. +// This process is very cheap as the design is not evaluated; it is essentially a (convoluted) memory copy operation. +// +// During replaying, the player evaluates the design at the current time, which causes all debug items to assume +// the values they had before recording. This process is expensive. Once done, the player advances to the next state +// by reading the next (complete or incremental) sample, as above. Since a range of samples is replayed, this process +// is repeated several times in a row. +// +// In principle, when replaying, the player could only read the state of the inputs and the time delta and use a normal +// eval/commit loop to progress the simulation, which is fully deterministic so its calculated design state should be +// exactly the same as the recorded design state. In practice, it is both faster and more reliable (in presence of e.g. +// user-defined black boxes) to read the recorded values instead of calculating them. +// +// Note: The operations described above are conceptual and do not correspond exactly to methods on `cxxrtl::player`. +// The `cxxrtl::player::replay()` method does not evaluate the design. This is so that delta cycles could be ignored +// if they are not of interest while replaying. + +namespace cxxrtl { + +// A spool stores CXXRTL design state changes in a file. +class spool { +public: + // Unique pointer to a specific sample within a replay log. (Timestamps are not unique.) + typedef uint32_t pointer_t; + + // Numeric identifier assigned to a debug item within a replay log. Range limited to [1, MAXIMUM_IDENT]. + typedef uint32_t ident_t; + + static constexpr uint16_t VERSION = 0x0400; + + static constexpr uint64_t HEADER_MAGIC = 0x00004c5452585843; + static constexpr uint64_t VERSION_MASK = 0xffff000000000000; + + static constexpr uint32_t PACKET_DEFINE = 0xc0000000; + + static constexpr uint32_t PACKET_SAMPLE = 0xc0000001; + enum sample_flag : uint32_t { + EMPTY = 0, + INCREMENTAL = 1, + }; + + static constexpr uint32_t MAXIMUM_IDENT = 0x0fffffff; + static constexpr uint32_t CHANGE_MASK = 0x30000000; + + static constexpr uint32_t PACKET_CHANGE = 0x00000000/* | ident */; + static constexpr uint32_t PACKET_CHANGEI = 0x10000000/* | ident */; + static constexpr uint32_t PACKET_CHANGEL = 0x20000000/* | ident */; + static constexpr uint32_t PACKET_CHANGEH = 0x30000000/* | ident */; + + static constexpr uint32_t PACKET_END = 0xffffffff; + + // Writing spools. + + class writer { + int fd; + size_t position; + std::vector buffer; + + // These functions aren't overloaded because of implicit numeric conversions. + + void emit_word(uint32_t word) { + if (position + 1 == buffer.size()) + flush(); + buffer[position++] = word; + } + + void emit_dword(uint64_t dword) { + emit_word(dword >> 0); + emit_word(dword >> 32); + } + + void emit_ident(ident_t ident) { + assert(ident <= MAXIMUM_IDENT); + emit_word(ident); + } + + void emit_size(size_t size) { + assert(size <= std::numeric_limits::max()); + emit_word(size); + } + + // Same implementation as `emit_size()`, different declared intent. + void emit_index(size_t index) { + assert(index <= std::numeric_limits::max()); + emit_word(index); + } + + void emit_string(std::string str) { + // Align to a word boundary, and add at least one terminating \0. + str.resize(str.size() + (sizeof(uint32_t) - (str.size() + sizeof(uint32_t)) % sizeof(uint32_t))); + for (size_t index = 0; index < str.size(); index += sizeof(uint32_t)) { + uint32_t word; + memcpy(&word, &str[index], sizeof(uint32_t)); + emit_word(word); + } + } + + void emit_time(const time ×tamp) { + const value &raw_timestamp(timestamp); + emit_word(raw_timestamp.data[0]); + emit_word(raw_timestamp.data[1]); + emit_word(raw_timestamp.data[2]); + } + + public: + // Creates a writer, and transfers ownership of `fd`, which must be open for appending. + // + // The buffer size is currently fixed to a "reasonably large" size, determined empirically by measuring writer + // performance on a representative design; large but not so large it would e.g. cause address space exhaustion + // on 32-bit platforms. + writer(spool &spool) : fd(spool.take_write()), position(0), buffer(32 * 1024 * 1024) { + assert(fd != -1); +#if !defined(WIN32) + int result = ftruncate(fd, 0); +#else + int result = _chsize_s(fd, 0); +#endif + assert(result == 0); + } + + writer(writer &&moved) : fd(moved.fd), position(moved.position), buffer(moved.buffer) { + moved.fd = -1; + moved.position = 0; + } + + writer(const writer &) = delete; + writer &operator=(const writer &) = delete; + + // Both write() calls and fwrite() calls are too expensive to perform implicitly. The API consumer must determine + // the optimal time to flush the writer and do that explicitly for best performance. + void flush() { + assert(fd != -1); + size_t data_size = position * sizeof(uint32_t); + size_t data_written = write(fd, buffer.data(), data_size); + assert(data_size == data_written); + position = 0; + } + + ~writer() { + if (fd != -1) { + flush(); + close(fd); + } + } + + void write_magic() { + // `CXXRTL` followed by version in binary. This header will read backwards on big-endian machines, which allows + // detection of this case, both visually and programmatically. + emit_dword(((uint64_t)VERSION << 48) | HEADER_MAGIC); + } + + void write_define(ident_t ident, const std::string &name, size_t part_index, size_t chunks, size_t depth) { + emit_word(PACKET_DEFINE); + emit_ident(ident); + emit_string(name); + emit_index(part_index); + emit_size(chunks); + emit_size(depth); + } + + void write_sample(bool incremental, pointer_t pointer, const time ×tamp) { + uint32_t flags = (incremental ? sample_flag::INCREMENTAL : 0); + emit_word(PACKET_SAMPLE); + emit_word(flags); + emit_word(pointer); + emit_time(timestamp); + } + + void write_change(ident_t ident, size_t chunks, const chunk_t *data) { + assert(ident <= MAXIMUM_IDENT); + + if (chunks == 1 && *data == 0) { + emit_word(PACKET_CHANGEL | ident); + } else if (chunks == 1 && *data == 1) { + emit_word(PACKET_CHANGEH | ident); + } else { + emit_word(PACKET_CHANGE | ident); + for (size_t offset = 0; offset < chunks; offset++) + emit_word(data[offset]); + } + } + + void write_change(ident_t ident, size_t chunks, const chunk_t *data, size_t index) { + assert(ident <= MAXIMUM_IDENT); + + emit_word(PACKET_CHANGEI | ident); + emit_index(index); + for (size_t offset = 0; offset < chunks; offset++) + emit_word(data[offset]); + } + + void write_end() { + emit_word(PACKET_END); + } + }; + + // Reading spools. + + class reader { + FILE *f; + + uint32_t absorb_word() { + // If we're at end of file, `fread` will not write to `word`, and `PACKET_END` will be returned. + uint32_t word = PACKET_END; + fread(&word, sizeof(word), 1, f); + return word; + } + + uint64_t absorb_dword() { + uint32_t lo = absorb_word(); + uint32_t hi = absorb_word(); + return ((uint64_t)hi << 32) | lo; + } + + ident_t absorb_ident() { + ident_t ident = absorb_word(); + assert(ident <= MAXIMUM_IDENT); + return ident; + } + + size_t absorb_size() { + return absorb_word(); + } + + size_t absorb_index() { + return absorb_word(); + } + + std::string absorb_string() { + std::string str; + do { + size_t end = str.size(); + str.resize(end + 4); + uint32_t word = absorb_word(); + memcpy(&str[end], &word, sizeof(uint32_t)); + } while (str.back() != '\0'); + // Strings have no embedded zeroes besides the terminating one(s). + return str.substr(0, str.find('\0')); + } + + time absorb_time() { + value raw_timestamp; + raw_timestamp.data[0] = absorb_word(); + raw_timestamp.data[1] = absorb_word(); + raw_timestamp.data[2] = absorb_word(); + return time(raw_timestamp); + } + + public: + typedef uint64_t pos_t; + + // Creates a reader, and transfers ownership of `fd`, which must be open for reading. + reader(spool &spool) : f(fdopen(spool.take_read(), "r")) { + assert(f != nullptr); + } + + reader(reader &&moved) : f(moved.f) { + moved.f = nullptr; + } + + reader(const reader &) = delete; + reader &operator=(const reader &) = delete; + + ~reader() { + if (f != nullptr) + fclose(f); + } + + pos_t position() { + return ftell(f); + } + + void rewind(pos_t position) { + fseek(f, position, SEEK_SET); + } + + void read_magic() { + uint64_t magic = absorb_dword(); + assert((magic & ~VERSION_MASK) == HEADER_MAGIC); + assert((magic >> 48) == VERSION); + } + + bool read_define(ident_t &ident, std::string &name, size_t &part_index, size_t &chunks, size_t &depth) { + uint32_t header = absorb_word(); + if (header == PACKET_END) + return false; + assert(header == PACKET_DEFINE); + ident = absorb_ident(); + name = absorb_string(); + part_index = absorb_index(); + chunks = absorb_size(); + depth = absorb_size(); + return true; + } + + bool read_sample(bool &incremental, pointer_t &pointer, time ×tamp) { + uint32_t header = absorb_word(); + if (header == PACKET_END) + return false; + assert(header == PACKET_SAMPLE); + uint32_t flags = absorb_word(); + incremental = (flags & sample_flag::INCREMENTAL); + pointer = absorb_word(); + timestamp = absorb_time(); + return true; + } + + bool read_change_header(uint32_t &header, ident_t &ident) { + header = absorb_word(); + if (header == PACKET_END) + return false; + assert((header & ~(CHANGE_MASK | MAXIMUM_IDENT)) == 0); + ident = header & MAXIMUM_IDENT; + return true; + } + + void read_change_data(uint32_t header, size_t chunks, size_t depth, chunk_t *data) { + uint32_t index = 0; + switch (header & CHANGE_MASK) { + case PACKET_CHANGEL: + *data = 0; + return; + case PACKET_CHANGEH: + *data = 1; + return; + case PACKET_CHANGE: + break; + case PACKET_CHANGEI: + index = absorb_word(); + assert(index < depth); + break; + default: + assert(false && "Unrecognized change packet"); + } + for (size_t offset = 0; offset < chunks; offset++) + data[chunks * index + offset] = absorb_word(); + } + }; + + // Opening spools. For certain uses of the record/replay mechanism, two distinct open files (two open files, i.e. + // two distinct file pointers, and not just file descriptors, which share the file pointer if duplicated) are used, + // for a reader and writer thread. This class manages the lifetime of the descriptors for these files. When only + // one of them is used, the other is closed harmlessly when the spool is destroyed. +private: + std::atomic writefd; + std::atomic readfd; + +public: + spool(const std::string &filename) + : writefd(open(filename.c_str(), O_CREAT|O_BINARY|O_WRONLY|O_APPEND, 0644)), + readfd(open(filename.c_str(), O_BINARY|O_RDONLY)) { + assert(writefd.load() != -1 && readfd.load() != -1); + } + + spool(spool &&moved) : writefd(moved.writefd.exchange(-1)), readfd(moved.readfd.exchange(-1)) {} + + spool(const spool &) = delete; + spool &operator=(const spool &) = delete; + + ~spool() { + if (int fd = writefd.exchange(-1)) + close(fd); + if (int fd = readfd.exchange(-1)) + close(fd); + } + + // Atomically acquire a write file descriptor for the spool. Can be called once, and will return -1 the next time + // it is called. Thread-safe. + int take_write() { + return writefd.exchange(-1); + } + + // Atomically acquire a read file descriptor for the spool. Can be called once, and will return -1 the next time + // it is called. Thread-safe. + int take_read() { + return readfd.exchange(-1); + } +}; + +// A CXXRTL recorder samples design state, producing complete or incremental updates, and writes them to a spool. +class recorder { + struct variable { + spool::ident_t ident; /* <= spool::MAXIMUM_IDENT */ + size_t chunks; + size_t depth; /* == 1 for wires */ + chunk_t *curr; + bool memory; + }; + + spool::writer writer; + std::vector variables; + std::vector inputs; // values of inputs must be recorded explicitly, as their changes are not observed + std::unordered_map ident_lookup; + bool streaming = false; // whether variable definitions have been written + spool::pointer_t pointer = 0; + time timestamp; + +public: + template + recorder(Args &&...args) : writer(std::forward(args)...) {} + + void start(module &module) { + debug_items items; + module.debug_info(items); + start(items); + } + + void start(const debug_items &items) { + assert(!streaming); + + writer.write_magic(); + for (auto item : items.table) + for (size_t part_index = 0; part_index < item.second.size(); part_index++) { + auto &part = item.second[part_index]; + if ((part.flags & debug_item::INPUT) || (part.flags & debug_item::DRIVEN_SYNC) || + (part.type == debug_item::MEMORY)) { + variable var; + var.ident = variables.size() + 1; + var.chunks = (part.width + sizeof(chunk_t) * 8 - 1) / (sizeof(chunk_t) * 8); + var.depth = part.depth; + var.curr = part.curr; + var.memory = (part.type == debug_item::MEMORY); + ident_lookup[var.curr] = var.ident; + + assert(variables.size() < spool::MAXIMUM_IDENT); + if (part.flags & debug_item::INPUT) + inputs.push_back(variables.size()); + variables.push_back(var); + + writer.write_define(var.ident, item.first, part_index, var.chunks, var.depth); + } + } + writer.write_end(); + streaming = true; + } + + const time &latest_time() { + return timestamp; + } + + const time &advance_time(const time &delta) { + assert(!delta.is_negative()); + timestamp += delta; + return timestamp; + } + + void record_complete() { + assert(streaming); + + writer.write_sample(/*incremental=*/false, pointer++, timestamp); + for (auto var : variables) { + assert(var.ident != 0); + if (!var.memory) + writer.write_change(var.ident, var.chunks, var.curr); + else + for (size_t index = 0; index < var.depth; index++) + writer.write_change(var.ident, var.chunks, &var.curr[var.chunks * index], index); + } + writer.write_end(); + } + + // This function is generic over ModuleT to encourage observer callbacks to be inlined into the commit function. + template + bool record_incremental(ModuleT &module) { + assert(streaming); + + struct : public observer { + std::unordered_map *ident_lookup; + spool::writer *writer; + + CXXRTL_ALWAYS_INLINE + void on_commit(size_t chunks, const chunk_t *base, const chunk_t *value) override { + writer->write_change(ident_lookup->at(base), chunks, value); + } + + CXXRTL_ALWAYS_INLINE + void on_commit(size_t chunks, const chunk_t *base, const chunk_t *value, size_t index) override { + writer->write_change(ident_lookup->at(base), chunks, value, index); + } + } record_observer; + record_observer.ident_lookup = &ident_lookup; + record_observer.writer = &writer; + + writer.write_sample(/*incremental=*/true, pointer++, timestamp); + for (auto input_index : inputs) { + variable &var = variables.at(input_index); + assert(!var.memory); + writer.write_change(var.ident, var.chunks, var.curr); + } + bool changed = module.commit(record_observer); + writer.write_end(); + return changed; + } + + void flush() { + writer.flush(); + } +}; + +// A CXXRTL player reads samples from a spool, and changes the design state accordingly. To start reading samples, +// a spool must have been initialized: the recorder must have been started and an initial complete sample must have +// been written. +class player { + struct variable { + size_t chunks; + size_t depth; /* == 1 for wires */ + chunk_t *curr; + }; + + spool::reader reader; + std::unordered_map variables; + bool streaming = false; // whether variable definitions have been read + bool initialized = false; // whether a sample has ever been read + spool::pointer_t pointer = 0; + time timestamp; + + std::map> index_by_pointer; + std::map> index_by_timestamp; + + bool peek_sample(spool::pointer_t &pointer, time ×tamp) { + bool incremental; + auto position = reader.position(); + bool success = reader.read_sample(incremental, pointer, timestamp); + reader.rewind(position); + return success; + } + +public: + template + player(Args &&...args) : reader(std::forward(args)...) {} + + void start(module &module) { + debug_items items; + module.debug_info(items); + start(items); + } + + void start(const debug_items &items) { + assert(!streaming); + + reader.read_magic(); + while (true) { + spool::ident_t ident; + std::string name; + size_t part_index; + size_t chunks; + size_t depth; + if (!reader.read_define(ident, name, part_index, chunks, depth)) + break; + assert(variables.count(ident) == 0); + assert(items.count(name) != 0); + assert(part_index < items.count(name)); + + const debug_item &part = items.parts_at(name).at(part_index); + assert(chunks == (part.width + sizeof(chunk_t) * 8 - 1) / (sizeof(chunk_t) * 8)); + assert(depth == part.depth); + + variable &var = variables[ident]; + var.chunks = chunks; + var.depth = depth; + var.curr = part.curr; + } + assert(variables.size() > 0); + streaming = true; + + // Establish the initial state of the design. + initialized = replay(); + assert(initialized); + } + + // Returns the pointer of the current sample. + spool::pointer_t current_pointer() { + assert(initialized); + return pointer; + } + + // Returns the time of the current sample. + const time ¤t_time() { + assert(initialized); + return timestamp; + } + + // Returns `true` if there is a next sample to read, and sets `pointer` to its pointer if there is. + bool get_next_pointer(spool::pointer_t &pointer) { + assert(streaming); + time timestamp; + return peek_sample(pointer, timestamp); + } + + // Returns `true` if there is a next sample to read, and sets `timestamp` to its time if there is. + bool get_next_time(time ×tamp) { + assert(streaming); + uint32_t pointer; + return peek_sample(pointer, timestamp); + } + + // If this function returns `true`, then `current_pointer() == at_pointer`, and the module contains values that + // correspond to this pointer in the replay log. To obtain a valid pointer, call `current_pointer()`; while pointers + // are monotonically increasing for each consecutive sample, using arithmetic operations to create a new pointer is + // not allowed. + bool rewind_to(spool::pointer_t at_pointer) { + assert(initialized); + + // The pointers in the replay log start from one that is greater than `at_pointer`. In this case the pointer will + // never be reached. + assert(index_by_pointer.size() > 0); + if (at_pointer < index_by_pointer.rbegin()->first) + return false; + + // Find the last complete sample whose pointer is less than or equal to `at_pointer`. Note that the comparison + // function used here is `std::greater`, inverting the direction of `lower_bound`. + auto position_it = index_by_pointer.lower_bound(at_pointer); + assert(position_it != index_by_pointer.end()); + reader.rewind(position_it->second); + + // Replay samples until eventually arriving to `at_pointer` or encountering end of file. + while(replay()) { + if (pointer == at_pointer) + return true; + } + return false; + } + + // If this function returns `true`, then `current_time() <= at_or_before_timestamp`, and the module contains values + // that correspond to `current_time()` in the replay log. If `current_time() == at_or_before_timestamp` and there + // are several consecutive samples with the same time, the module contains values that correspond to the first of + // these samples. + bool rewind_to_or_before(const time &at_or_before_timestamp) { + assert(initialized); + + // The timestamps in the replay log start from one that is greater than `at_or_before_timestamp`. In this case + // the timestamp will never be reached. Otherwise, this function will always succeed. + assert(index_by_timestamp.size() > 0); + if (at_or_before_timestamp < index_by_timestamp.rbegin()->first) + return false; + + // Find the last complete sample whose timestamp is less than or equal to `at_or_before_timestamp`. Note that + // the comparison function used here is `std::greater`, inverting the direction of `lower_bound`. + auto position_it = index_by_timestamp.lower_bound(at_or_before_timestamp); + assert(position_it != index_by_timestamp.end()); + reader.rewind(position_it->second); + + // Replay samples until eventually arriving to or past `at_or_before_timestamp` or encountering end of file. + while (replay()) { + if (timestamp == at_or_before_timestamp) + break; + + time next_timestamp; + if (!get_next_time(next_timestamp)) + break; + if (next_timestamp > at_or_before_timestamp) + break; + } + return true; + } + + // If this function returns `true`, then `current_pointer()` and `current_time()` are updated for the next sample + // and the module now contains values that correspond to that sample. If it returns `false`, there was no next sample + // to read. + bool replay() { + assert(streaming); + + bool incremental; + auto position = reader.position(); + if (!reader.read_sample(incremental, pointer, timestamp)) + return false; + + // The very first sample that is read must be a complete sample. This is required for the rewind functions to work. + assert(initialized || !incremental); + + // It is possible (though not very useful) to have several complete samples with the same timestamp in a row. + // Ensure that we associate the timestamp with the position of the first such complete sample. (This condition + // works because the player never jumps over a sample.) + if (!incremental && !index_by_pointer.count(pointer)) { + assert(!index_by_timestamp.count(timestamp)); + index_by_pointer[pointer] = position; + index_by_timestamp[timestamp] = position; + } + + uint32_t header; + spool::ident_t ident; + variable var; + while (reader.read_change_header(header, ident)) { + variable &var = variables.at(ident); + reader.read_change_data(header, var.chunks, var.depth, var.curr); + } + return true; + } +}; + +} + +#endif From bc9206f0f58b04804716cbffe400ecd04535a614 Mon Sep 17 00:00:00 2001 From: Catherine Date: Thu, 21 Dec 2023 02:02:39 +0000 Subject: [PATCH 07/13] write_verilog: emit `casez` as `if/elif/else` whenever possible. --- backends/verilog/verilog_backend.cc | 87 +++++++++++++++++++++++------ 1 file changed, 70 insertions(+), 17 deletions(-) diff --git a/backends/verilog/verilog_backend.cc b/backends/verilog/verilog_backend.cc index fd70695d3..9ff2c5c86 100644 --- a/backends/verilog/verilog_backend.cc +++ b/backends/verilog/verilog_backend.cc @@ -376,7 +376,7 @@ void dump_sigspec(std::ostream &f, const RTLIL::SigSpec &sig) } } -void dump_attributes(std::ostream &f, std::string indent, dict &attributes, char term = '\n', bool modattr = false, bool regattr = false, bool as_comment = false) +void dump_attributes(std::ostream &f, std::string indent, dict &attributes, std::string term = "\n", bool modattr = false, bool regattr = false, bool as_comment = false) { if (noattr) return; @@ -392,13 +392,13 @@ void dump_attributes(std::ostream &f, std::string indent, dictsecond, -1, 0, false, as_comment); - f << stringf(" %s%c", as_comment ? "*/" : "*)", term); + f << stringf(" %s%s", as_comment ? "*/" : "*)", term.c_str()); } } void dump_wire(std::ostream &f, std::string indent, RTLIL::Wire *wire) { - dump_attributes(f, indent, wire->attributes, '\n', /*modattr=*/false, /*regattr=*/reg_wires.count(wire->name)); + dump_attributes(f, indent, wire->attributes, "\n", /*modattr=*/false, /*regattr=*/reg_wires.count(wire->name)); #if 0 if (wire->port_input && !wire->port_output) f << stringf("%s" "input %s", indent.c_str(), reg_wires.count(wire->name) ? "reg " : ""); @@ -989,7 +989,7 @@ void dump_cell_expr_uniop(std::ostream &f, std::string indent, RTLIL::Cell *cell f << stringf("%s" "assign ", indent.c_str()); dump_sigspec(f, cell->getPort(ID::Y)); f << stringf(" = %s ", op.c_str()); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); dump_cell_expr_port(f, cell, "A", true); f << stringf(";\n"); } @@ -1001,7 +1001,7 @@ void dump_cell_expr_binop(std::ostream &f, std::string indent, RTLIL::Cell *cell f << stringf(" = "); dump_cell_expr_port(f, cell, "A", true); f << stringf(" %s ", op.c_str()); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); dump_cell_expr_port(f, cell, "B", true); f << stringf(";\n"); } @@ -1048,7 +1048,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) dump_sigspec(f, cell->getPort(ID::Y)); f << stringf(" = "); f << stringf("~"); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); dump_cell_expr_port(f, cell, "A", false); f << stringf(";\n"); return true; @@ -1068,7 +1068,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) f << stringf("|"); if (cell->type.in(ID($_XOR_), ID($_XNOR_))) f << stringf("^"); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); f << stringf(" "); if (cell->type.in(ID($_ANDNOT_), ID($_ORNOT_))) f << stringf("~("); @@ -1085,7 +1085,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) f << stringf(" = "); dump_cell_expr_port(f, cell, "S", false); f << stringf(" ? "); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); dump_cell_expr_port(f, cell, "B", false); f << stringf(" : "); dump_cell_expr_port(f, cell, "A", false); @@ -1099,7 +1099,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) f << stringf(" = !("); dump_cell_expr_port(f, cell, "S", false); f << stringf(" ? "); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); dump_cell_expr_port(f, cell, "B", false); f << stringf(" : "); dump_cell_expr_port(f, cell, "A", false); @@ -1115,7 +1115,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) f << stringf(cell->type == ID($_AOI3_) ? " & " : " | "); dump_cell_expr_port(f, cell, "B", false); f << stringf(cell->type == ID($_AOI3_) ? ") |" : ") &"); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); f << stringf(" "); dump_cell_expr_port(f, cell, "C", false); f << stringf(");\n"); @@ -1130,7 +1130,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) f << stringf(cell->type == ID($_AOI4_) ? " & " : " | "); dump_cell_expr_port(f, cell, "B", false); f << stringf(cell->type == ID($_AOI4_) ? ") |" : ") &"); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); f << stringf(" ("); dump_cell_expr_port(f, cell, "C", false); f << stringf(cell->type == ID($_AOI4_) ? " & " : " | "); @@ -1232,7 +1232,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) f << stringf("%s" "assign ", indent.c_str()); dump_sigspec(f, cell->getPort(ID::Y)); f << stringf(" = $signed(%s) / ", buf_num.c_str()); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); f << stringf("$signed(%s);\n", buf_b.c_str()); return true; } else { @@ -1255,7 +1255,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) f << stringf("%s" "wire [%d:0] %s = ", indent.c_str(), GetSize(cell->getPort(ID::A))-1, temp_id.c_str()); dump_cell_expr_port(f, cell, "A", true); f << stringf(" %% "); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); dump_cell_expr_port(f, cell, "B", true); f << stringf(";\n"); @@ -1330,7 +1330,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) f << stringf(" = "); dump_sigspec(f, cell->getPort(ID::S)); f << stringf(" ? "); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); dump_sigspec(f, cell->getPort(ID::B)); f << stringf(" : "); dump_sigspec(f, cell->getPort(ID::A)); @@ -1439,7 +1439,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) f << stringf(" = "); dump_const(f, cell->parameters.at(ID::LUT)); f << stringf(" >> "); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); dump_sigspec(f, cell->getPort(ID::A)); f << stringf(";\n"); return true; @@ -1971,6 +1971,56 @@ void dump_case_body(std::ostream &f, std::string indent, RTLIL::CaseRule *cs, bo f << stringf("%s" "end\n", indent.c_str()); } +bool dump_proc_switch_ifelse(std::ostream &f, std::string indent, RTLIL::SwitchRule *sw) +{ + for (auto it = sw->cases.begin(); it != sw->cases.end(); ++it) { + if ((*it)->compare.size() == 0) { + break; + } else if ((*it)->compare.size() == 1) { + int case_index = it - sw->cases.begin(); + SigSpec compare = (*it)->compare.at(0); + if (case_index >= compare.size()) + return false; + if (compare[case_index] != State::S1) + return false; + for (int bit_index = 0; bit_index < compare.size(); bit_index++) + if (bit_index != case_index && compare[bit_index] != State::Sa) + return false; + } else { + return false; + } + } + + f << indent; + auto sig_it = sw->signal.begin(); + for (auto it = sw->cases.begin(); it != sw->cases.end(); ++it, ++sig_it) { + bool had_newline = true; + if (it != sw->cases.begin()) { + if ((*it)->compare.empty()) { + f << indent << "else\n"; + had_newline = true; + } else { + f << indent << "else "; + had_newline = false; + } + } + if (!(*it)->compare.empty()) { + if (!(*it)->attributes.empty()) { + if (!had_newline) + f << "\n" << indent; + dump_attributes(f, "", (*it)->attributes, "\n" + indent); + } + f << stringf("if ("); + dump_sigspec(f, *sig_it); + f << stringf(")\n"); + } + dump_case_body(f, indent, *it); + if ((*it)->compare.empty()) + break; + } + return true; +} + void dump_proc_switch(std::ostream &f, std::string indent, RTLIL::SwitchRule *sw) { if (sw->signal.size() == 0) { @@ -1983,6 +2033,9 @@ void dump_proc_switch(std::ostream &f, std::string indent, RTLIL::SwitchRule *sw return; } + if (dump_proc_switch_ifelse(f, indent, sw)) + return; + dump_attributes(f, indent, sw->attributes); f << stringf("%s" "casez (", indent.c_str()); dump_sigspec(f, sw->signal); @@ -1990,7 +2043,7 @@ void dump_proc_switch(std::ostream &f, std::string indent, RTLIL::SwitchRule *sw for (auto it = sw->cases.begin(); it != sw->cases.end(); ++it) { bool got_default = false; - dump_attributes(f, indent + " ", (*it)->attributes, '\n', /*modattr=*/false, /*regattr=*/false, /*as_comment=*/true); + dump_attributes(f, indent + " ", (*it)->attributes, "\n", /*modattr=*/false, /*regattr=*/false, /*as_comment=*/true); if ((*it)->compare.size() == 0) { f << stringf("%s default", indent.c_str()); got_default = true; @@ -2173,7 +2226,7 @@ void dump_module(std::ostream &f, std::string indent, RTLIL::Module *module) } } - dump_attributes(f, indent, module->attributes, '\n', /*modattr=*/true); + dump_attributes(f, indent, module->attributes, "\n", /*modattr=*/true); f << stringf("%s" "module %s(", indent.c_str(), id(module->name, false).c_str()); bool keep_running = true; int cnt = 0; From e131a7895a8b0bd854f94be5d8440c66f737c6c6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 10 Jan 2024 00:16:19 +0000 Subject: [PATCH 08/13] Bump version --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 946c323e7..c6d117fc5 100644 --- a/Makefile +++ b/Makefile @@ -141,7 +141,7 @@ LDLIBS += -lrt endif endif -YOSYS_VER := 0.36+72 +YOSYS_VER := 0.36+79 # Note: We arrange for .gitcommit to contain the (short) commit hash in # tarballs generated with git-archive(1) using .gitattributes. The git repo From 2cab4ff17368156a73d9357f1ec5bd39d75ebdd6 Mon Sep 17 00:00:00 2001 From: Dag Lem Date: Fri, 4 Aug 2023 23:45:47 +0200 Subject: [PATCH 09/13] Correction and optimization of nowrshmsk This makes tests/verilog/dynamic_range_lhs.v pass, after ensuring that nowrshmsk is actually tested. Stride is extracted from indexing of two-dimensional packed arrays and variable slices on the form dst[i*stride +: width] = src, and is used to optimize the generated CASE block. Also uses less confusing variable names for indexing of lhs wires. --- frontends/ast/simplify.cc | 156 +++++++++++++++++++++--------- tests/verilog/dynamic_range_lhs.v | 2 +- 2 files changed, 113 insertions(+), 45 deletions(-) diff --git a/frontends/ast/simplify.cc b/frontends/ast/simplify.cc index dfa1ed6af..98a922ff4 100644 --- a/frontends/ast/simplify.cc +++ b/frontends/ast/simplify.cc @@ -35,12 +35,25 @@ #include #include #include +// For std::gcd in C++17 +// #include YOSYS_NAMESPACE_BEGIN using namespace AST; using namespace AST_INTERNAL; +// gcd computed by Euclidian division. +// To be replaced by C++17 std::gcd +template I gcd(I a, I b) { + while (b != 0) { + I tmp = b; + b = a%b; + a = tmp; + } + return std::abs(a); +} + void AstNode::set_in_lvalue_flag(bool flag, bool no_descend) { if (flag != in_lvalue_from_above) { @@ -2818,27 +2831,12 @@ bool AstNode::simplify(bool const_fold, int stage, int width_hint, bool sign_hin if (!children[0]->id2ast->range_valid) goto skip_dynamic_range_lvalue_expansion; - int source_width = children[0]->id2ast->range_left - children[0]->id2ast->range_right + 1; - int source_offset = children[0]->id2ast->range_right; - int result_width = 1; - int stride = 1; AST::AstNode *member_node = get_struct_member(children[0]); - if (member_node) { - // Clamp chunk to range of member within struct/union. - log_assert(!source_offset && !children[0]->id2ast->range_swapped); - source_width = member_node->range_left - member_node->range_right + 1; - - // When the (* nowrshmsk *) attribute is set, a CASE block is generated below - // to select the indexed bit slice. When a multirange array is indexed, the - // start of each possible slice is separated by the bit stride of the last - // index dimension, and we can optimize the CASE block accordingly. - // The dimension of the original array expression is saved in the 'integer' field. - int dims = children[0]->integer; - stride = source_width; - for (int dim = 0; dim < dims; dim++) { - stride /= get_struct_range_width(member_node, dim); - } - } + int wire_width = member_node ? + member_node->range_left - member_node->range_right + 1 : + children[0]->id2ast->range_left - children[0]->id2ast->range_right + 1; + int wire_offset = children[0]->id2ast->range_right; + int result_width = 1; AstNode *shift_expr = NULL; AstNode *range = children[0]->children[0]; @@ -2851,30 +2849,102 @@ bool AstNode::simplify(bool const_fold, int stage, int width_hint, bool sign_hin else shift_expr = range->children[0]->clone(); - bool use_case_method = false; - - if (children[0]->id2ast->attributes.count(ID::nowrshmsk)) { - AstNode *node = children[0]->id2ast->attributes.at(ID::nowrshmsk); - while (node->simplify(true, stage, -1, false)) { } - if (node->type != AST_CONSTANT) - input_error("Non-constant value for `nowrshmsk' attribute on `%s'!\n", children[0]->id2ast->str.c_str()); - if (node->asAttrConst().as_bool()) - use_case_method = true; - } + bool use_case_method = children[0]->id2ast->get_bool_attribute(ID::nowrshmsk); if (!use_case_method && current_always->detect_latch(children[0]->str)) use_case_method = true; - if (use_case_method) - { + if (use_case_method) { // big case block + int stride = 1; + long long bitno_div = stride; + + int case_width_hint; + bool case_sign_hint; + shift_expr->detectSignWidth(case_width_hint, case_sign_hint); + int max_width = case_width_hint; + + if (member_node) { // Member in packed struct/union + // Clamp chunk to range of member within struct/union. + log_assert(!wire_offset && !children[0]->id2ast->range_swapped); + + // When the (* nowrshmsk *) attribute is set, a CASE block is generated below + // to select the indexed bit slice. When a multirange array is indexed, the + // start of each possible slice is separated by the bit stride of the last + // index dimension, and we can optimize the CASE block accordingly. + // The dimension of the original array expression is saved in the 'integer' field. + int dims = children[0]->integer; + stride = wire_width; + for (int dim = 0; dim < dims; dim++) { + stride /= get_struct_range_width(member_node, dim); + } + bitno_div = stride; + } else { + // Extract (index)*(width) from non_opt_range pattern ((@selfsz@((index)*(width)))+(0)). + AstNode *lsb_expr = + shift_expr->type == AST_ADD && shift_expr->children[0]->type == AST_SELFSZ && + shift_expr->children[1]->type == AST_CONSTANT && shift_expr->children[1]->integer == 0 ? + shift_expr->children[0]->children[0] : + shift_expr; + + // Extract stride from indexing of two-dimensional packed arrays and + // variable slices on the form dst[i*stride +: width] = src. + if (lsb_expr->type == AST_MUL && + (lsb_expr->children[0]->type == AST_CONSTANT || + lsb_expr->children[1]->type == AST_CONSTANT)) + { + int stride_ix = lsb_expr->children[1]->type == AST_CONSTANT; + stride = (int)lsb_expr->children[stride_ix]->integer; + bitno_div = stride != 0 ? stride : 1; + + // Check whether i*stride can overflow. + int i_width; + bool i_sign; + lsb_expr->children[1 - stride_ix]->detectSignWidth(i_width, i_sign); + int stride_width; + bool stride_sign; + lsb_expr->children[stride_ix]->detectSignWidth(stride_width, stride_sign); + max_width = std::max(i_width, stride_width); + // Stride width calculated from actual stride value. + stride_width = std::ceil(std::log2(std::abs(stride))); + + if (i_width + stride_width > max_width) { + // For (truncated) i*stride to be within the range of dst, the following must hold: + // i*stride ≡ bitno (mod shift_mod), i.e. + // i*stride = k*shift_mod + bitno + // + // The Diophantine equation on the form ax + by = c: + // stride*i - shift_mod*k = bitno + // has solutions iff c is a multiple of d = gcd(a, b), i.e. + // bitno mod gcd(stride, shift_mod) = 0 + // + // long long is at least 64 bits in C++11 + long long shift_mod = 1ll << (max_width - case_sign_hint); + // std::gcd requires C++17 + // bitno_div = std::gcd(stride, shift_mod); + bitno_div = gcd((long long)stride, shift_mod); + } + } + } + + // long long is at least 64 bits in C++11 + long long max_offset = (1ll << (max_width - case_sign_hint)) - 1; + long long min_offset = case_sign_hint ? -(1ll << (max_width - 1)) : 0; + did_something = true; newNode = new AstNode(AST_CASE, shift_expr); - for (int i = 0; i < source_width; i += stride) { - int start_bit = source_offset + i; - int end_bit = std::min(start_bit+result_width,source_width) - 1; - AstNode *cond = new AstNode(AST_COND, mkconst_int(start_bit, true)); + for (int i = 1 - result_width; i < wire_width; i++) { + // Out of range indexes are handled in genrtlil.cc + int start_bit = wire_offset + i; + int end_bit = start_bit + result_width - 1; + // Check whether the current index can be generated by shift_expr. + if (start_bit < min_offset || start_bit > max_offset) + continue; + if (start_bit%bitno_div != 0 || (stride == 0 && start_bit != 0)) + continue; + + AstNode *cond = new AstNode(AST_COND, mkconst_int(start_bit, case_sign_hint, max_width)); AstNode *lvalue = children[0]->clone(); lvalue->delete_children(); if (member_node) @@ -2884,19 +2954,17 @@ bool AstNode::simplify(bool const_fold, int stage, int width_hint, bool sign_hin cond->children.push_back(new AstNode(AST_BLOCK, new AstNode(type, lvalue, children[1]->clone()))); newNode->children.push_back(cond); } - } - else - { - // mask and shift operations, disabled for now + } else { + // mask and shift operations - AstNode *wire_mask = new AstNode(AST_WIRE, new AstNode(AST_RANGE, mkconst_int(source_width-1, true), mkconst_int(0, true))); + AstNode *wire_mask = new AstNode(AST_WIRE, new AstNode(AST_RANGE, mkconst_int(wire_width-1, true), mkconst_int(0, true))); wire_mask->str = stringf("$bitselwrite$mask$%s:%d$%d", RTLIL::encode_filename(filename).c_str(), location.first_line, autoidx++); wire_mask->set_attribute(ID::nosync, AstNode::mkconst_int(1, false)); wire_mask->is_logic = true; while (wire_mask->simplify(true, 1, -1, false)) { } current_ast_mod->children.push_back(wire_mask); - AstNode *wire_data = new AstNode(AST_WIRE, new AstNode(AST_RANGE, mkconst_int(source_width-1, true), mkconst_int(0, true))); + AstNode *wire_data = new AstNode(AST_WIRE, new AstNode(AST_RANGE, mkconst_int(wire_width-1, true), mkconst_int(0, true))); wire_data->str = stringf("$bitselwrite$data$%s:%d$%d", RTLIL::encode_filename(filename).c_str(), location.first_line, autoidx++); wire_data->set_attribute(ID::nosync, AstNode::mkconst_int(1, false)); wire_data->is_logic = true; @@ -2952,12 +3020,12 @@ bool AstNode::simplify(bool const_fold, int stage, int width_hint, bool sign_hin shamt = new AstNode(AST_TO_SIGNED, shamt); // offset the shift amount by the lower bound of the dimension - int start_bit = source_offset; + int start_bit = wire_offset; shamt = new AstNode(AST_SUB, shamt, mkconst_int(start_bit, true)); // reflect the shift amount if the dimension is swapped if (children[0]->id2ast->range_swapped) - shamt = new AstNode(AST_SUB, mkconst_int(source_width - result_width, true), shamt); + shamt = new AstNode(AST_SUB, mkconst_int(wire_width - result_width, true), shamt); // AST_SHIFT uses negative amounts for shifting left shamt = new AstNode(AST_NEG, shamt); diff --git a/tests/verilog/dynamic_range_lhs.v b/tests/verilog/dynamic_range_lhs.v index ae291374d..56fe3ef3b 100644 --- a/tests/verilog/dynamic_range_lhs.v +++ b/tests/verilog/dynamic_range_lhs.v @@ -1,6 +1,6 @@ module gate( - output reg [`LEFT:`RIGHT] out_u, out_s, (* nowrshmsk = `ALT *) + output reg [`LEFT:`RIGHT] out_u, out_s, input wire data, input wire [1:0] sel1, sel2 ); From a105d2c050eda59a2bbc7af14190ed2b050dc061 Mon Sep 17 00:00:00 2001 From: Dag Lem Date: Wed, 22 Nov 2023 06:52:04 +0100 Subject: [PATCH 10/13] Add torture test for (* nowrshmsk *) stride optimization --- tests/various/dynamic_part_select.ys | 18 +++++++++++++++++ .../forloop_select_nowrshmsk.v | 20 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 tests/various/dynamic_part_select/forloop_select_nowrshmsk.v diff --git a/tests/various/dynamic_part_select.ys b/tests/various/dynamic_part_select.ys index 2dc061e89..9e303b9db 100644 --- a/tests/various/dynamic_part_select.ys +++ b/tests/various/dynamic_part_select.ys @@ -69,6 +69,24 @@ design -copy-from gate -as gate gate miter -equiv -make_assert -make_outcmp -flatten gold gate equiv sat -prove-asserts -seq 10 -show-public -verify -set-init-zero equiv +### For-loop select, one dynamic input, (* nowrshmsk *) +design -reset +read_verilog ./dynamic_part_select/forloop_select_nowrshmsk.v +proc +rename -top gold +design -stash gold + +read_verilog ./dynamic_part_select/forloop_select_gate.v +proc +rename -top gate +design -stash gate + +design -copy-from gold -as gold gold +design -copy-from gate -as gate gate + +miter -equiv -make_assert -make_outcmp -flatten gold gate equiv +sat -prove-asserts -seq 10 -show-public -verify -set-init-zero equiv + #### Double loop (part-select, reset) ### design -reset read_verilog ./dynamic_part_select/reset_test.v diff --git a/tests/various/dynamic_part_select/forloop_select_nowrshmsk.v b/tests/various/dynamic_part_select/forloop_select_nowrshmsk.v new file mode 100644 index 000000000..75415c313 --- /dev/null +++ b/tests/various/dynamic_part_select/forloop_select_nowrshmsk.v @@ -0,0 +1,20 @@ +`default_nettype none +module forloop_select #(parameter WIDTH=16, SELW=4, CTRLW=$clog2(WIDTH), DINW=2**SELW) + (input wire clk, + input wire [CTRLW-1:0] ctrl, + input wire [DINW-1:0] din, + input wire en, + (* nowrshmsk *) + output reg [WIDTH-1:0] dout); + + reg [SELW:0] sel; + localparam SLICE = WIDTH/(SELW**2); + + always @(posedge clk) + begin + if (en) begin + for (sel = 0; sel <= 4'hf; sel=sel+1'b1) + dout[(ctrl*sel)+:SLICE] <= din; + end + end +endmodule From dbec704b49aebd8f1ac5ff9ed36b357cd9b99973 Mon Sep 17 00:00:00 2001 From: Dag Lem Date: Thu, 7 Dec 2023 13:45:56 +0100 Subject: [PATCH 11/13] Include x bits in test of lhs dynamic part-select --- tests/verilog/dynamic_range_lhs.sh | 2 +- tests/verilog/dynamic_range_lhs.v | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/verilog/dynamic_range_lhs.sh b/tests/verilog/dynamic_range_lhs.sh index 77b4a2918..f36c74bd2 100755 --- a/tests/verilog/dynamic_range_lhs.sh +++ b/tests/verilog/dynamic_range_lhs.sh @@ -15,7 +15,7 @@ run() { -p "read_verilog dynamic_range_lhs.v" \ -p "proc" \ -p "equiv_make gold gate equiv" \ - -p "equiv_simple" \ + -p "equiv_simple -undef" \ -p "equiv_status -assert" } diff --git a/tests/verilog/dynamic_range_lhs.v b/tests/verilog/dynamic_range_lhs.v index 56fe3ef3b..6eb952165 100644 --- a/tests/verilog/dynamic_range_lhs.v +++ b/tests/verilog/dynamic_range_lhs.v @@ -5,8 +5,8 @@ module gate( input wire [1:0] sel1, sel2 ); always @* begin - out_u = 0; - out_s = 0; + out_u = 'x; + out_s = 'x; case (`SPAN) 1: begin out_u[sel1*sel2] = data; @@ -43,8 +43,8 @@ task set; out_s[b] = data; endtask always @* begin - out_u = 0; - out_s = 0; + out_u = 'x; + out_s = 'x; case (sel1*sel2) 2'b00: set(0, 0); 2'b01: set(1, 1); From 1a2b4759e85928d305f9b082d30f0b8a2fc46e0e Mon Sep 17 00:00:00 2001 From: Dag Lem Date: Fri, 8 Dec 2023 20:47:43 +0100 Subject: [PATCH 12/13] Assign from rvalue via temporary register in nowrshmsk CASE Avoid repeating complex rvalue expressions for each condition. --- frontends/ast/ast.cc | 19 +++++++++++++++++++ frontends/ast/ast.h | 3 +++ frontends/ast/simplify.cc | 16 +++++++++++++--- 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/frontends/ast/ast.cc b/frontends/ast/ast.cc index 34e624993..4b2b7a822 100644 --- a/frontends/ast/ast.cc +++ b/frontends/ast/ast.cc @@ -850,6 +850,25 @@ AstNode *AstNode::mkconst_str(const std::string &str) return node; } +// create a temporary register +AstNode *AstNode::mktemp_logic(const std::string &name, AstNode *mod, bool nosync, int range_left, int range_right, bool is_signed) +{ + AstNode *wire = new AstNode(AST_WIRE, new AstNode(AST_RANGE, mkconst_int(range_left, true), mkconst_int(range_right, true))); + wire->str = stringf("%s%s:%d$%d", name.c_str(), RTLIL::encode_filename(filename).c_str(), location.first_line, autoidx++); + if (nosync) + wire->set_attribute(ID::nosync, AstNode::mkconst_int(1, false)); + wire->is_signed = is_signed; + wire->is_logic = true; + mod->children.push_back(wire); + while (wire->simplify(true, 1, -1, false)) { } + + AstNode *ident = new AstNode(AST_IDENTIFIER); + ident->str = wire->str; + ident->id2ast = wire; + + return ident; +} + bool AstNode::bits_only_01() const { for (auto bit : bits) diff --git a/frontends/ast/ast.h b/frontends/ast/ast.h index f789e930b..97903d0a0 100644 --- a/frontends/ast/ast.h +++ b/frontends/ast/ast.h @@ -321,6 +321,9 @@ namespace AST static AstNode *mkconst_str(const std::vector &v); static AstNode *mkconst_str(const std::string &str); + // helper function to create an AST node for a temporary register + AstNode *mktemp_logic(const std::string &name, AstNode *mod, bool nosync, int range_left, int range_right, bool is_signed); + // helper function for creating sign-extended const objects RTLIL::Const bitsAsConst(int width, bool is_signed); RTLIL::Const bitsAsConst(int width = -1); diff --git a/frontends/ast/simplify.cc b/frontends/ast/simplify.cc index 98a922ff4..945f286a1 100644 --- a/frontends/ast/simplify.cc +++ b/frontends/ast/simplify.cc @@ -2932,8 +2932,18 @@ bool AstNode::simplify(bool const_fold, int stage, int width_hint, bool sign_hin long long max_offset = (1ll << (max_width - case_sign_hint)) - 1; long long min_offset = case_sign_hint ? -(1ll << (max_width - 1)) : 0; + // A temporary register holds the result of the (possibly complex) rvalue expression, + // avoiding repetition in each AST_COND below. + int rvalue_width; + bool rvalue_sign; + children[1]->detectSignWidth(rvalue_width, rvalue_sign); + AstNode *rvalue = mktemp_logic("$bitselwrite$rvalue$", current_ast_mod, true, rvalue_width - 1, 0, rvalue_sign); + AstNode *caseNode = new AstNode(AST_CASE, shift_expr); + newNode = new AstNode(AST_BLOCK, + new AstNode(AST_ASSIGN_EQ, rvalue, children[1]->clone()), + caseNode); + did_something = true; - newNode = new AstNode(AST_CASE, shift_expr); for (int i = 1 - result_width; i < wire_width; i++) { // Out of range indexes are handled in genrtlil.cc int start_bit = wire_offset + i; @@ -2951,8 +2961,8 @@ bool AstNode::simplify(bool const_fold, int stage, int width_hint, bool sign_hin lvalue->set_attribute(ID::wiretype, member_node->clone()); lvalue->children.push_back(new AstNode(AST_RANGE, mkconst_int(end_bit, true), mkconst_int(start_bit, true))); - cond->children.push_back(new AstNode(AST_BLOCK, new AstNode(type, lvalue, children[1]->clone()))); - newNode->children.push_back(cond); + cond->children.push_back(new AstNode(AST_BLOCK, new AstNode(type, lvalue, rvalue->clone()))); + caseNode->children.push_back(cond); } } else { // mask and shift operations From f26495e54d936830e067e66b91bfac824011897c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 11 Jan 2024 00:16:28 +0000 Subject: [PATCH 13/13] Bump version --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c6d117fc5..8bde98a1c 100644 --- a/Makefile +++ b/Makefile @@ -141,7 +141,7 @@ LDLIBS += -lrt endif endif -YOSYS_VER := 0.36+79 +YOSYS_VER := 0.36+85 # Note: We arrange for .gitcommit to contain the (short) commit hash in # tarballs generated with git-archive(1) using .gitattributes. The git repo