diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index 048457234..2b48b7252 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: os: - - { id: macos-11, name: 'Big Sur' } + - { id: macos-13, name: 'Ventura' } cpp_std: - 'c++11' - 'c++17' diff --git a/Makefile b/Makefile index 3da89690f..8bde98a1c 100644 --- a/Makefile +++ b/Makefile @@ -141,7 +141,7 @@ LDLIBS += -lrt endif endif -YOSYS_VER := 0.36+67 +YOSYS_VER := 0.36+85 # Note: We arrange for .gitcommit to contain the (short) commit hash in # tarballs generated with git-archive(1) using .gitattributes. The git repo diff --git a/backends/cxxrtl/runtime/cxxrtl/cxxrtl.h b/backends/cxxrtl/runtime/cxxrtl/cxxrtl.h index 183fbb2c7..3f8247226 100644 --- a/backends/cxxrtl/runtime/cxxrtl/cxxrtl.h +++ b/backends/cxxrtl/runtime/cxxrtl/cxxrtl.h @@ -28,6 +28,7 @@ #include #include +#include #include #include #include @@ -145,7 +146,7 @@ struct value : public expr_base> { // These functions ensure that a conversion is never out of range, and should be always used, if at all // possible, instead of direct manipulation of the `data` member. For very large types, .slice() and // .concat() can be used to split them into more manageable parts. - template + template::value, int>::type = 0> CXXRTL_ALWAYS_INLINE IntegerT get() const { static_assert(std::numeric_limits::is_integer && !std::numeric_limits::is_signed, @@ -158,15 +159,32 @@ struct value : public expr_base> { return result; } - template + template::value, int>::type = 0> CXXRTL_ALWAYS_INLINE - void set(IntegerT other) { + IntegerT get() const { + auto unsigned_result = get::type>(); + IntegerT result; + memcpy(&result, &unsigned_result, sizeof(IntegerT)); + return result; + } + + template::value, int>::type = 0> + CXXRTL_ALWAYS_INLINE + void set(IntegerT value) { static_assert(std::numeric_limits::is_integer && !std::numeric_limits::is_signed, "set() requires T to be an unsigned integral type"); static_assert(std::numeric_limits::digits >= Bits, "set() requires the value to be at least as wide as T is"); for (size_t n = 0; n < chunks; n++) - data[n] = (other >> (n * chunk::bits)) & chunk::mask; + data[n] = (value >> (n * chunk::bits)) & chunk::mask; + } + + template::value, int>::type = 0> + CXXRTL_ALWAYS_INLINE + void set(IntegerT value) { + typename std::make_unsigned::type unsigned_value; + memcpy(&unsigned_value, &value, sizeof(IntegerT)); + set(unsigned_value); } // Operations with compile-time parameters. diff --git a/backends/cxxrtl/runtime/cxxrtl/cxxrtl_replay.h b/backends/cxxrtl/runtime/cxxrtl/cxxrtl_replay.h new file mode 100644 index 000000000..94f59bb0d --- /dev/null +++ b/backends/cxxrtl/runtime/cxxrtl/cxxrtl_replay.h @@ -0,0 +1,785 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2023 Catherine + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#ifndef CXXRTL_REPLAY_H +#define CXXRTL_REPLAY_H + +#if !defined(WIN32) +#include +#define O_BINARY 0 +#else +#include +#endif + +#include +#include +#include +#include +#include + +#include +#include + +// Theory of operation +// =================== +// +// Log format +// ---------- +// +// The replay log is a simple data format based on a sequence of 32-bit words. The following BNF-like grammar describes +// enough detail to understand the overall structure of the log data and be able to read hex dumps. For a greater +// degree of detail see the source code. The format is considered fully internal to CXXRTL and is subject to change +// without notice. +// +// ::= + +// ::= 0x52585843 0x00004c54 +// ::= * +// ::= * +// ::= 0xc0000000 ... +// ::= 0xc0000001 ... +// ::= 0x0??????? + | 0x1??????? + | 0x2??????? | 0x3??????? +// , ::= 0x???????? +// ::= 0xFFFFFFFF +// +// The replay log contains sample data, however, it does not cover the entire design. Rather, it only contains sample +// data for the subset of debug items containing _design state_: inputs and registers/latches. This keeps its size to +// a minimum, and recording speed to a maximum. The player samples any missing data by setting the design state items +// to the same values they had during recording, and re-evaluating the design. +// +// Limits +// ------ +// +// The log may contain: +// +// * Up to 2**28-1 debug items containing design state. +// * Up to 2**32 chunks per debug item. +// * Up to 2**32 rows per memory. +// * Up to 2**32 samples. +// +// Of these limits, the last two are most likely to be eventually exceeded by practical recordings. However, other +// performance considerations will likely limit the size of such practical recordings first, so the log data format +// will undergo a breaking change at that point. +// +// Operations +// ---------- +// +// As suggested by the name "replay log", this format is designed for recording (writing) once and playing (reading) +// many times afterwards, such that reading the format can be done linearly and quickly. The log format is designed to +// support three primary read operations: +// +// 1. Initialization +// 2. Rewinding (to time T) +// 3. Replaying (for N samples) +// +// During initialization, the player establishes the mapping between debug item names and their 28-bit identifiers in +// the log. It is done once. +// +// During rewinding, the player begins reading at the latest non-incremental sample that still lies before the requested +// sample time. It continues reading incremental samples after that point until it reaches the requested sample time. +// This process is very cheap as the design is not evaluated; it is essentially a (convoluted) memory copy operation. +// +// During replaying, the player evaluates the design at the current time, which causes all debug items to assume +// the values they had before recording. This process is expensive. Once done, the player advances to the next state +// by reading the next (complete or incremental) sample, as above. Since a range of samples is replayed, this process +// is repeated several times in a row. +// +// In principle, when replaying, the player could only read the state of the inputs and the time delta and use a normal +// eval/commit loop to progress the simulation, which is fully deterministic so its calculated design state should be +// exactly the same as the recorded design state. In practice, it is both faster and more reliable (in presence of e.g. +// user-defined black boxes) to read the recorded values instead of calculating them. +// +// Note: The operations described above are conceptual and do not correspond exactly to methods on `cxxrtl::player`. +// The `cxxrtl::player::replay()` method does not evaluate the design. This is so that delta cycles could be ignored +// if they are not of interest while replaying. + +namespace cxxrtl { + +// A spool stores CXXRTL design state changes in a file. +class spool { +public: + // Unique pointer to a specific sample within a replay log. (Timestamps are not unique.) + typedef uint32_t pointer_t; + + // Numeric identifier assigned to a debug item within a replay log. Range limited to [1, MAXIMUM_IDENT]. + typedef uint32_t ident_t; + + static constexpr uint16_t VERSION = 0x0400; + + static constexpr uint64_t HEADER_MAGIC = 0x00004c5452585843; + static constexpr uint64_t VERSION_MASK = 0xffff000000000000; + + static constexpr uint32_t PACKET_DEFINE = 0xc0000000; + + static constexpr uint32_t PACKET_SAMPLE = 0xc0000001; + enum sample_flag : uint32_t { + EMPTY = 0, + INCREMENTAL = 1, + }; + + static constexpr uint32_t MAXIMUM_IDENT = 0x0fffffff; + static constexpr uint32_t CHANGE_MASK = 0x30000000; + + static constexpr uint32_t PACKET_CHANGE = 0x00000000/* | ident */; + static constexpr uint32_t PACKET_CHANGEI = 0x10000000/* | ident */; + static constexpr uint32_t PACKET_CHANGEL = 0x20000000/* | ident */; + static constexpr uint32_t PACKET_CHANGEH = 0x30000000/* | ident */; + + static constexpr uint32_t PACKET_END = 0xffffffff; + + // Writing spools. + + class writer { + int fd; + size_t position; + std::vector buffer; + + // These functions aren't overloaded because of implicit numeric conversions. + + void emit_word(uint32_t word) { + if (position + 1 == buffer.size()) + flush(); + buffer[position++] = word; + } + + void emit_dword(uint64_t dword) { + emit_word(dword >> 0); + emit_word(dword >> 32); + } + + void emit_ident(ident_t ident) { + assert(ident <= MAXIMUM_IDENT); + emit_word(ident); + } + + void emit_size(size_t size) { + assert(size <= std::numeric_limits::max()); + emit_word(size); + } + + // Same implementation as `emit_size()`, different declared intent. + void emit_index(size_t index) { + assert(index <= std::numeric_limits::max()); + emit_word(index); + } + + void emit_string(std::string str) { + // Align to a word boundary, and add at least one terminating \0. + str.resize(str.size() + (sizeof(uint32_t) - (str.size() + sizeof(uint32_t)) % sizeof(uint32_t))); + for (size_t index = 0; index < str.size(); index += sizeof(uint32_t)) { + uint32_t word; + memcpy(&word, &str[index], sizeof(uint32_t)); + emit_word(word); + } + } + + void emit_time(const time ×tamp) { + const value &raw_timestamp(timestamp); + emit_word(raw_timestamp.data[0]); + emit_word(raw_timestamp.data[1]); + emit_word(raw_timestamp.data[2]); + } + + public: + // Creates a writer, and transfers ownership of `fd`, which must be open for appending. + // + // The buffer size is currently fixed to a "reasonably large" size, determined empirically by measuring writer + // performance on a representative design; large but not so large it would e.g. cause address space exhaustion + // on 32-bit platforms. + writer(spool &spool) : fd(spool.take_write()), position(0), buffer(32 * 1024 * 1024) { + assert(fd != -1); +#if !defined(WIN32) + int result = ftruncate(fd, 0); +#else + int result = _chsize_s(fd, 0); +#endif + assert(result == 0); + } + + writer(writer &&moved) : fd(moved.fd), position(moved.position), buffer(moved.buffer) { + moved.fd = -1; + moved.position = 0; + } + + writer(const writer &) = delete; + writer &operator=(const writer &) = delete; + + // Both write() calls and fwrite() calls are too expensive to perform implicitly. The API consumer must determine + // the optimal time to flush the writer and do that explicitly for best performance. + void flush() { + assert(fd != -1); + size_t data_size = position * sizeof(uint32_t); + size_t data_written = write(fd, buffer.data(), data_size); + assert(data_size == data_written); + position = 0; + } + + ~writer() { + if (fd != -1) { + flush(); + close(fd); + } + } + + void write_magic() { + // `CXXRTL` followed by version in binary. This header will read backwards on big-endian machines, which allows + // detection of this case, both visually and programmatically. + emit_dword(((uint64_t)VERSION << 48) | HEADER_MAGIC); + } + + void write_define(ident_t ident, const std::string &name, size_t part_index, size_t chunks, size_t depth) { + emit_word(PACKET_DEFINE); + emit_ident(ident); + emit_string(name); + emit_index(part_index); + emit_size(chunks); + emit_size(depth); + } + + void write_sample(bool incremental, pointer_t pointer, const time ×tamp) { + uint32_t flags = (incremental ? sample_flag::INCREMENTAL : 0); + emit_word(PACKET_SAMPLE); + emit_word(flags); + emit_word(pointer); + emit_time(timestamp); + } + + void write_change(ident_t ident, size_t chunks, const chunk_t *data) { + assert(ident <= MAXIMUM_IDENT); + + if (chunks == 1 && *data == 0) { + emit_word(PACKET_CHANGEL | ident); + } else if (chunks == 1 && *data == 1) { + emit_word(PACKET_CHANGEH | ident); + } else { + emit_word(PACKET_CHANGE | ident); + for (size_t offset = 0; offset < chunks; offset++) + emit_word(data[offset]); + } + } + + void write_change(ident_t ident, size_t chunks, const chunk_t *data, size_t index) { + assert(ident <= MAXIMUM_IDENT); + + emit_word(PACKET_CHANGEI | ident); + emit_index(index); + for (size_t offset = 0; offset < chunks; offset++) + emit_word(data[offset]); + } + + void write_end() { + emit_word(PACKET_END); + } + }; + + // Reading spools. + + class reader { + FILE *f; + + uint32_t absorb_word() { + // If we're at end of file, `fread` will not write to `word`, and `PACKET_END` will be returned. + uint32_t word = PACKET_END; + fread(&word, sizeof(word), 1, f); + return word; + } + + uint64_t absorb_dword() { + uint32_t lo = absorb_word(); + uint32_t hi = absorb_word(); + return ((uint64_t)hi << 32) | lo; + } + + ident_t absorb_ident() { + ident_t ident = absorb_word(); + assert(ident <= MAXIMUM_IDENT); + return ident; + } + + size_t absorb_size() { + return absorb_word(); + } + + size_t absorb_index() { + return absorb_word(); + } + + std::string absorb_string() { + std::string str; + do { + size_t end = str.size(); + str.resize(end + 4); + uint32_t word = absorb_word(); + memcpy(&str[end], &word, sizeof(uint32_t)); + } while (str.back() != '\0'); + // Strings have no embedded zeroes besides the terminating one(s). + return str.substr(0, str.find('\0')); + } + + time absorb_time() { + value raw_timestamp; + raw_timestamp.data[0] = absorb_word(); + raw_timestamp.data[1] = absorb_word(); + raw_timestamp.data[2] = absorb_word(); + return time(raw_timestamp); + } + + public: + typedef uint64_t pos_t; + + // Creates a reader, and transfers ownership of `fd`, which must be open for reading. + reader(spool &spool) : f(fdopen(spool.take_read(), "r")) { + assert(f != nullptr); + } + + reader(reader &&moved) : f(moved.f) { + moved.f = nullptr; + } + + reader(const reader &) = delete; + reader &operator=(const reader &) = delete; + + ~reader() { + if (f != nullptr) + fclose(f); + } + + pos_t position() { + return ftell(f); + } + + void rewind(pos_t position) { + fseek(f, position, SEEK_SET); + } + + void read_magic() { + uint64_t magic = absorb_dword(); + assert((magic & ~VERSION_MASK) == HEADER_MAGIC); + assert((magic >> 48) == VERSION); + } + + bool read_define(ident_t &ident, std::string &name, size_t &part_index, size_t &chunks, size_t &depth) { + uint32_t header = absorb_word(); + if (header == PACKET_END) + return false; + assert(header == PACKET_DEFINE); + ident = absorb_ident(); + name = absorb_string(); + part_index = absorb_index(); + chunks = absorb_size(); + depth = absorb_size(); + return true; + } + + bool read_sample(bool &incremental, pointer_t &pointer, time ×tamp) { + uint32_t header = absorb_word(); + if (header == PACKET_END) + return false; + assert(header == PACKET_SAMPLE); + uint32_t flags = absorb_word(); + incremental = (flags & sample_flag::INCREMENTAL); + pointer = absorb_word(); + timestamp = absorb_time(); + return true; + } + + bool read_change_header(uint32_t &header, ident_t &ident) { + header = absorb_word(); + if (header == PACKET_END) + return false; + assert((header & ~(CHANGE_MASK | MAXIMUM_IDENT)) == 0); + ident = header & MAXIMUM_IDENT; + return true; + } + + void read_change_data(uint32_t header, size_t chunks, size_t depth, chunk_t *data) { + uint32_t index = 0; + switch (header & CHANGE_MASK) { + case PACKET_CHANGEL: + *data = 0; + return; + case PACKET_CHANGEH: + *data = 1; + return; + case PACKET_CHANGE: + break; + case PACKET_CHANGEI: + index = absorb_word(); + assert(index < depth); + break; + default: + assert(false && "Unrecognized change packet"); + } + for (size_t offset = 0; offset < chunks; offset++) + data[chunks * index + offset] = absorb_word(); + } + }; + + // Opening spools. For certain uses of the record/replay mechanism, two distinct open files (two open files, i.e. + // two distinct file pointers, and not just file descriptors, which share the file pointer if duplicated) are used, + // for a reader and writer thread. This class manages the lifetime of the descriptors for these files. When only + // one of them is used, the other is closed harmlessly when the spool is destroyed. +private: + std::atomic writefd; + std::atomic readfd; + +public: + spool(const std::string &filename) + : writefd(open(filename.c_str(), O_CREAT|O_BINARY|O_WRONLY|O_APPEND, 0644)), + readfd(open(filename.c_str(), O_BINARY|O_RDONLY)) { + assert(writefd.load() != -1 && readfd.load() != -1); + } + + spool(spool &&moved) : writefd(moved.writefd.exchange(-1)), readfd(moved.readfd.exchange(-1)) {} + + spool(const spool &) = delete; + spool &operator=(const spool &) = delete; + + ~spool() { + if (int fd = writefd.exchange(-1)) + close(fd); + if (int fd = readfd.exchange(-1)) + close(fd); + } + + // Atomically acquire a write file descriptor for the spool. Can be called once, and will return -1 the next time + // it is called. Thread-safe. + int take_write() { + return writefd.exchange(-1); + } + + // Atomically acquire a read file descriptor for the spool. Can be called once, and will return -1 the next time + // it is called. Thread-safe. + int take_read() { + return readfd.exchange(-1); + } +}; + +// A CXXRTL recorder samples design state, producing complete or incremental updates, and writes them to a spool. +class recorder { + struct variable { + spool::ident_t ident; /* <= spool::MAXIMUM_IDENT */ + size_t chunks; + size_t depth; /* == 1 for wires */ + chunk_t *curr; + bool memory; + }; + + spool::writer writer; + std::vector variables; + std::vector inputs; // values of inputs must be recorded explicitly, as their changes are not observed + std::unordered_map ident_lookup; + bool streaming = false; // whether variable definitions have been written + spool::pointer_t pointer = 0; + time timestamp; + +public: + template + recorder(Args &&...args) : writer(std::forward(args)...) {} + + void start(module &module) { + debug_items items; + module.debug_info(items); + start(items); + } + + void start(const debug_items &items) { + assert(!streaming); + + writer.write_magic(); + for (auto item : items.table) + for (size_t part_index = 0; part_index < item.second.size(); part_index++) { + auto &part = item.second[part_index]; + if ((part.flags & debug_item::INPUT) || (part.flags & debug_item::DRIVEN_SYNC) || + (part.type == debug_item::MEMORY)) { + variable var; + var.ident = variables.size() + 1; + var.chunks = (part.width + sizeof(chunk_t) * 8 - 1) / (sizeof(chunk_t) * 8); + var.depth = part.depth; + var.curr = part.curr; + var.memory = (part.type == debug_item::MEMORY); + ident_lookup[var.curr] = var.ident; + + assert(variables.size() < spool::MAXIMUM_IDENT); + if (part.flags & debug_item::INPUT) + inputs.push_back(variables.size()); + variables.push_back(var); + + writer.write_define(var.ident, item.first, part_index, var.chunks, var.depth); + } + } + writer.write_end(); + streaming = true; + } + + const time &latest_time() { + return timestamp; + } + + const time &advance_time(const time &delta) { + assert(!delta.is_negative()); + timestamp += delta; + return timestamp; + } + + void record_complete() { + assert(streaming); + + writer.write_sample(/*incremental=*/false, pointer++, timestamp); + for (auto var : variables) { + assert(var.ident != 0); + if (!var.memory) + writer.write_change(var.ident, var.chunks, var.curr); + else + for (size_t index = 0; index < var.depth; index++) + writer.write_change(var.ident, var.chunks, &var.curr[var.chunks * index], index); + } + writer.write_end(); + } + + // This function is generic over ModuleT to encourage observer callbacks to be inlined into the commit function. + template + bool record_incremental(ModuleT &module) { + assert(streaming); + + struct : public observer { + std::unordered_map *ident_lookup; + spool::writer *writer; + + CXXRTL_ALWAYS_INLINE + void on_commit(size_t chunks, const chunk_t *base, const chunk_t *value) override { + writer->write_change(ident_lookup->at(base), chunks, value); + } + + CXXRTL_ALWAYS_INLINE + void on_commit(size_t chunks, const chunk_t *base, const chunk_t *value, size_t index) override { + writer->write_change(ident_lookup->at(base), chunks, value, index); + } + } record_observer; + record_observer.ident_lookup = &ident_lookup; + record_observer.writer = &writer; + + writer.write_sample(/*incremental=*/true, pointer++, timestamp); + for (auto input_index : inputs) { + variable &var = variables.at(input_index); + assert(!var.memory); + writer.write_change(var.ident, var.chunks, var.curr); + } + bool changed = module.commit(record_observer); + writer.write_end(); + return changed; + } + + void flush() { + writer.flush(); + } +}; + +// A CXXRTL player reads samples from a spool, and changes the design state accordingly. To start reading samples, +// a spool must have been initialized: the recorder must have been started and an initial complete sample must have +// been written. +class player { + struct variable { + size_t chunks; + size_t depth; /* == 1 for wires */ + chunk_t *curr; + }; + + spool::reader reader; + std::unordered_map variables; + bool streaming = false; // whether variable definitions have been read + bool initialized = false; // whether a sample has ever been read + spool::pointer_t pointer = 0; + time timestamp; + + std::map> index_by_pointer; + std::map> index_by_timestamp; + + bool peek_sample(spool::pointer_t &pointer, time ×tamp) { + bool incremental; + auto position = reader.position(); + bool success = reader.read_sample(incremental, pointer, timestamp); + reader.rewind(position); + return success; + } + +public: + template + player(Args &&...args) : reader(std::forward(args)...) {} + + void start(module &module) { + debug_items items; + module.debug_info(items); + start(items); + } + + void start(const debug_items &items) { + assert(!streaming); + + reader.read_magic(); + while (true) { + spool::ident_t ident; + std::string name; + size_t part_index; + size_t chunks; + size_t depth; + if (!reader.read_define(ident, name, part_index, chunks, depth)) + break; + assert(variables.count(ident) == 0); + assert(items.count(name) != 0); + assert(part_index < items.count(name)); + + const debug_item &part = items.parts_at(name).at(part_index); + assert(chunks == (part.width + sizeof(chunk_t) * 8 - 1) / (sizeof(chunk_t) * 8)); + assert(depth == part.depth); + + variable &var = variables[ident]; + var.chunks = chunks; + var.depth = depth; + var.curr = part.curr; + } + assert(variables.size() > 0); + streaming = true; + + // Establish the initial state of the design. + initialized = replay(); + assert(initialized); + } + + // Returns the pointer of the current sample. + spool::pointer_t current_pointer() { + assert(initialized); + return pointer; + } + + // Returns the time of the current sample. + const time ¤t_time() { + assert(initialized); + return timestamp; + } + + // Returns `true` if there is a next sample to read, and sets `pointer` to its pointer if there is. + bool get_next_pointer(spool::pointer_t &pointer) { + assert(streaming); + time timestamp; + return peek_sample(pointer, timestamp); + } + + // Returns `true` if there is a next sample to read, and sets `timestamp` to its time if there is. + bool get_next_time(time ×tamp) { + assert(streaming); + uint32_t pointer; + return peek_sample(pointer, timestamp); + } + + // If this function returns `true`, then `current_pointer() == at_pointer`, and the module contains values that + // correspond to this pointer in the replay log. To obtain a valid pointer, call `current_pointer()`; while pointers + // are monotonically increasing for each consecutive sample, using arithmetic operations to create a new pointer is + // not allowed. + bool rewind_to(spool::pointer_t at_pointer) { + assert(initialized); + + // The pointers in the replay log start from one that is greater than `at_pointer`. In this case the pointer will + // never be reached. + assert(index_by_pointer.size() > 0); + if (at_pointer < index_by_pointer.rbegin()->first) + return false; + + // Find the last complete sample whose pointer is less than or equal to `at_pointer`. Note that the comparison + // function used here is `std::greater`, inverting the direction of `lower_bound`. + auto position_it = index_by_pointer.lower_bound(at_pointer); + assert(position_it != index_by_pointer.end()); + reader.rewind(position_it->second); + + // Replay samples until eventually arriving to `at_pointer` or encountering end of file. + while(replay()) { + if (pointer == at_pointer) + return true; + } + return false; + } + + // If this function returns `true`, then `current_time() <= at_or_before_timestamp`, and the module contains values + // that correspond to `current_time()` in the replay log. If `current_time() == at_or_before_timestamp` and there + // are several consecutive samples with the same time, the module contains values that correspond to the first of + // these samples. + bool rewind_to_or_before(const time &at_or_before_timestamp) { + assert(initialized); + + // The timestamps in the replay log start from one that is greater than `at_or_before_timestamp`. In this case + // the timestamp will never be reached. Otherwise, this function will always succeed. + assert(index_by_timestamp.size() > 0); + if (at_or_before_timestamp < index_by_timestamp.rbegin()->first) + return false; + + // Find the last complete sample whose timestamp is less than or equal to `at_or_before_timestamp`. Note that + // the comparison function used here is `std::greater`, inverting the direction of `lower_bound`. + auto position_it = index_by_timestamp.lower_bound(at_or_before_timestamp); + assert(position_it != index_by_timestamp.end()); + reader.rewind(position_it->second); + + // Replay samples until eventually arriving to or past `at_or_before_timestamp` or encountering end of file. + while (replay()) { + if (timestamp == at_or_before_timestamp) + break; + + time next_timestamp; + if (!get_next_time(next_timestamp)) + break; + if (next_timestamp > at_or_before_timestamp) + break; + } + return true; + } + + // If this function returns `true`, then `current_pointer()` and `current_time()` are updated for the next sample + // and the module now contains values that correspond to that sample. If it returns `false`, there was no next sample + // to read. + bool replay() { + assert(streaming); + + bool incremental; + auto position = reader.position(); + if (!reader.read_sample(incremental, pointer, timestamp)) + return false; + + // The very first sample that is read must be a complete sample. This is required for the rewind functions to work. + assert(initialized || !incremental); + + // It is possible (though not very useful) to have several complete samples with the same timestamp in a row. + // Ensure that we associate the timestamp with the position of the first such complete sample. (This condition + // works because the player never jumps over a sample.) + if (!incremental && !index_by_pointer.count(pointer)) { + assert(!index_by_timestamp.count(timestamp)); + index_by_pointer[pointer] = position; + index_by_timestamp[timestamp] = position; + } + + uint32_t header; + spool::ident_t ident; + variable var; + while (reader.read_change_header(header, ident)) { + variable &var = variables.at(ident); + reader.read_change_data(header, var.chunks, var.depth, var.curr); + } + return true; + } +}; + +} + +#endif diff --git a/backends/cxxrtl/runtime/cxxrtl/cxxrtl_time.h b/backends/cxxrtl/runtime/cxxrtl/cxxrtl_time.h index 51f59321e..f37c2b656 100644 --- a/backends/cxxrtl/runtime/cxxrtl/cxxrtl_time.h +++ b/backends/cxxrtl/runtime/cxxrtl/cxxrtl_time.h @@ -26,17 +26,19 @@ namespace cxxrtl { -// A timestamp or a difference in time, stored as a 96-bit number of femtoseconds (10e-15 s). The dynamic range and -// resolution of this format can represent any VCD timestamp within 136 years, without the need for a timescale. +// A timestamp or a difference in time, stored as a 96-bit number of femtoseconds (10e-15 s). The range and resolution +// of this format can represent any VCD timestamp within approx. ±1255321.2 years, without the need for a timescale. class time { public: static constexpr size_t bits = 96; // 3 chunks private: + static constexpr size_t resolution_digits = 15; + + static_assert(sizeof(chunk_t) == 4, "a chunk is expected to be 32-bit"); static constexpr value resolution = value { chunk_t(1000000000000000ull & 0xffffffffull), chunk_t(1000000000000000ull >> 32), 0u }; - static constexpr size_t resolution_digits = 15; // Signed number of femtoseconds from the beginning of time. value raw; @@ -51,11 +53,11 @@ public: return time(value { 0xffffffffu, 0xffffffffu, 0x7fffffffu }); } - time(int32_t secs, int64_t femtos) { - value<32> secs_val; - secs_val.set((uint32_t)secs); + time(int64_t secs, int64_t femtos) { + value<64> secs_val; + secs_val.set(secs); value<64> femtos_val; - femtos_val.set((uint64_t)femtos); + femtos_val.set(femtos); raw = secs_val.sext().mul(resolution).add(femtos_val.sext()); } @@ -68,14 +70,14 @@ public: return raw.is_neg(); } - // Extracts the absolute number of whole seconds. Negative if the value is negative. - int32_t secs() const { - return raw.sdivmod(resolution).first.trunc<32>().get(); + // Extracts the number of whole seconds. Negative if the value is negative. + int64_t secs() const { + return raw.sdivmod(resolution).first.trunc<64>().get(); } - // Extracts the absolute number of femtoseconds in the fractional second. Negative if the value is negative. + // Extracts the number of femtoseconds in the fractional second. Negative if the value is negative. int64_t femtos() const { - return raw.sdivmod(resolution).second.trunc<64>().get(); + return raw.sdivmod(resolution).second.trunc<64>().get(); } bool operator==(const time &other) const { @@ -125,10 +127,10 @@ public: } operator std::string() const { - char buf[38]; // len(f"-{2**64}.{10**15-1}") + 1 == 38 - int32_t secs = this->secs(); + char buf[48]; // x=2**95; len(f"-{x/1_000_000_000_000_000}.{x^1_000_000_000_000_000}") == 48 + int64_t secs = this->secs(); int64_t femtos = this->femtos(); - snprintf(buf, sizeof(buf), "%s%" PRIi32 ".%015" PRIi64, + snprintf(buf, sizeof(buf), "%s%" PRIi64 ".%015" PRIi64, is_negative() ? "-" : "", secs >= 0 ? secs : -secs, femtos >= 0 ? femtos : -femtos); return buf; } @@ -143,7 +145,7 @@ public: parse_fractional, } state = parse_sign_opt; bool negative = false; - int32_t integral = 0; + int64_t integral = 0; int64_t fractional = 0; size_t frac_digits = 0; for (auto chr : str) { @@ -199,7 +201,7 @@ std::ostream &operator<<(std::ostream &os, const time &val) { namespace time_literals { time operator""_s(unsigned long long seconds) { - return time { (int32_t)seconds, 0 }; + return time { (int64_t)seconds, 0 }; } time operator""_ms(unsigned long long milliseconds) { diff --git a/backends/verilog/verilog_backend.cc b/backends/verilog/verilog_backend.cc index 735672a43..9ff2c5c86 100644 --- a/backends/verilog/verilog_backend.cc +++ b/backends/verilog/verilog_backend.cc @@ -376,7 +376,7 @@ void dump_sigspec(std::ostream &f, const RTLIL::SigSpec &sig) } } -void dump_attributes(std::ostream &f, std::string indent, dict &attributes, char term = '\n', bool modattr = false, bool regattr = false, bool as_comment = false) +void dump_attributes(std::ostream &f, std::string indent, dict &attributes, std::string term = "\n", bool modattr = false, bool regattr = false, bool as_comment = false) { if (noattr) return; @@ -392,13 +392,13 @@ void dump_attributes(std::ostream &f, std::string indent, dictsecond, -1, 0, false, as_comment); - f << stringf(" %s%c", as_comment ? "*/" : "*)", term); + f << stringf(" %s%s", as_comment ? "*/" : "*)", term.c_str()); } } void dump_wire(std::ostream &f, std::string indent, RTLIL::Wire *wire) { - dump_attributes(f, indent, wire->attributes, '\n', /*modattr=*/false, /*regattr=*/reg_wires.count(wire->name)); + dump_attributes(f, indent, wire->attributes, "\n", /*modattr=*/false, /*regattr=*/reg_wires.count(wire->name)); #if 0 if (wire->port_input && !wire->port_output) f << stringf("%s" "input %s", indent.c_str(), reg_wires.count(wire->name) ? "reg " : ""); @@ -989,7 +989,7 @@ void dump_cell_expr_uniop(std::ostream &f, std::string indent, RTLIL::Cell *cell f << stringf("%s" "assign ", indent.c_str()); dump_sigspec(f, cell->getPort(ID::Y)); f << stringf(" = %s ", op.c_str()); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); dump_cell_expr_port(f, cell, "A", true); f << stringf(";\n"); } @@ -1001,7 +1001,7 @@ void dump_cell_expr_binop(std::ostream &f, std::string indent, RTLIL::Cell *cell f << stringf(" = "); dump_cell_expr_port(f, cell, "A", true); f << stringf(" %s ", op.c_str()); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); dump_cell_expr_port(f, cell, "B", true); f << stringf(";\n"); } @@ -1048,7 +1048,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) dump_sigspec(f, cell->getPort(ID::Y)); f << stringf(" = "); f << stringf("~"); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); dump_cell_expr_port(f, cell, "A", false); f << stringf(";\n"); return true; @@ -1068,7 +1068,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) f << stringf("|"); if (cell->type.in(ID($_XOR_), ID($_XNOR_))) f << stringf("^"); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); f << stringf(" "); if (cell->type.in(ID($_ANDNOT_), ID($_ORNOT_))) f << stringf("~("); @@ -1085,7 +1085,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) f << stringf(" = "); dump_cell_expr_port(f, cell, "S", false); f << stringf(" ? "); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); dump_cell_expr_port(f, cell, "B", false); f << stringf(" : "); dump_cell_expr_port(f, cell, "A", false); @@ -1099,7 +1099,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) f << stringf(" = !("); dump_cell_expr_port(f, cell, "S", false); f << stringf(" ? "); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); dump_cell_expr_port(f, cell, "B", false); f << stringf(" : "); dump_cell_expr_port(f, cell, "A", false); @@ -1115,7 +1115,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) f << stringf(cell->type == ID($_AOI3_) ? " & " : " | "); dump_cell_expr_port(f, cell, "B", false); f << stringf(cell->type == ID($_AOI3_) ? ") |" : ") &"); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); f << stringf(" "); dump_cell_expr_port(f, cell, "C", false); f << stringf(");\n"); @@ -1130,7 +1130,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) f << stringf(cell->type == ID($_AOI4_) ? " & " : " | "); dump_cell_expr_port(f, cell, "B", false); f << stringf(cell->type == ID($_AOI4_) ? ") |" : ") &"); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); f << stringf(" ("); dump_cell_expr_port(f, cell, "C", false); f << stringf(cell->type == ID($_AOI4_) ? " & " : " | "); @@ -1232,7 +1232,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) f << stringf("%s" "assign ", indent.c_str()); dump_sigspec(f, cell->getPort(ID::Y)); f << stringf(" = $signed(%s) / ", buf_num.c_str()); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); f << stringf("$signed(%s);\n", buf_b.c_str()); return true; } else { @@ -1255,7 +1255,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) f << stringf("%s" "wire [%d:0] %s = ", indent.c_str(), GetSize(cell->getPort(ID::A))-1, temp_id.c_str()); dump_cell_expr_port(f, cell, "A", true); f << stringf(" %% "); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); dump_cell_expr_port(f, cell, "B", true); f << stringf(";\n"); @@ -1330,7 +1330,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) f << stringf(" = "); dump_sigspec(f, cell->getPort(ID::S)); f << stringf(" ? "); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); dump_sigspec(f, cell->getPort(ID::B)); f << stringf(" : "); dump_sigspec(f, cell->getPort(ID::A)); @@ -1439,7 +1439,7 @@ bool dump_cell_expr(std::ostream &f, std::string indent, RTLIL::Cell *cell) f << stringf(" = "); dump_const(f, cell->parameters.at(ID::LUT)); f << stringf(" >> "); - dump_attributes(f, "", cell->attributes, ' '); + dump_attributes(f, "", cell->attributes, " "); dump_sigspec(f, cell->getPort(ID::A)); f << stringf(";\n"); return true; @@ -1971,6 +1971,56 @@ void dump_case_body(std::ostream &f, std::string indent, RTLIL::CaseRule *cs, bo f << stringf("%s" "end\n", indent.c_str()); } +bool dump_proc_switch_ifelse(std::ostream &f, std::string indent, RTLIL::SwitchRule *sw) +{ + for (auto it = sw->cases.begin(); it != sw->cases.end(); ++it) { + if ((*it)->compare.size() == 0) { + break; + } else if ((*it)->compare.size() == 1) { + int case_index = it - sw->cases.begin(); + SigSpec compare = (*it)->compare.at(0); + if (case_index >= compare.size()) + return false; + if (compare[case_index] != State::S1) + return false; + for (int bit_index = 0; bit_index < compare.size(); bit_index++) + if (bit_index != case_index && compare[bit_index] != State::Sa) + return false; + } else { + return false; + } + } + + f << indent; + auto sig_it = sw->signal.begin(); + for (auto it = sw->cases.begin(); it != sw->cases.end(); ++it, ++sig_it) { + bool had_newline = true; + if (it != sw->cases.begin()) { + if ((*it)->compare.empty()) { + f << indent << "else\n"; + had_newline = true; + } else { + f << indent << "else "; + had_newline = false; + } + } + if (!(*it)->compare.empty()) { + if (!(*it)->attributes.empty()) { + if (!had_newline) + f << "\n" << indent; + dump_attributes(f, "", (*it)->attributes, "\n" + indent); + } + f << stringf("if ("); + dump_sigspec(f, *sig_it); + f << stringf(")\n"); + } + dump_case_body(f, indent, *it); + if ((*it)->compare.empty()) + break; + } + return true; +} + void dump_proc_switch(std::ostream &f, std::string indent, RTLIL::SwitchRule *sw) { if (sw->signal.size() == 0) { @@ -1983,17 +2033,18 @@ void dump_proc_switch(std::ostream &f, std::string indent, RTLIL::SwitchRule *sw return; } + if (dump_proc_switch_ifelse(f, indent, sw)) + return; + dump_attributes(f, indent, sw->attributes); f << stringf("%s" "casez (", indent.c_str()); dump_sigspec(f, sw->signal); f << stringf(")\n"); - bool got_default = false; for (auto it = sw->cases.begin(); it != sw->cases.end(); ++it) { - dump_attributes(f, indent + " ", (*it)->attributes, '\n', /*modattr=*/false, /*regattr=*/false, /*as_comment=*/true); + bool got_default = false; + dump_attributes(f, indent + " ", (*it)->attributes, "\n", /*modattr=*/false, /*regattr=*/false, /*as_comment=*/true); if ((*it)->compare.size() == 0) { - if (got_default) - continue; f << stringf("%s default", indent.c_str()); got_default = true; } else { @@ -2006,6 +2057,14 @@ void dump_proc_switch(std::ostream &f, std::string indent, RTLIL::SwitchRule *sw } f << stringf(":\n"); dump_case_body(f, indent + " ", *it); + + if (got_default) { + // If we followed up the default with more cases the Verilog + // semantics would be to match those *before* the default, but + // the RTLIL semantics are to match those *after* the default + // (so they can never be selected). Exit now. + break; + } } if (sw->cases.empty()) { @@ -2167,7 +2226,7 @@ void dump_module(std::ostream &f, std::string indent, RTLIL::Module *module) } } - dump_attributes(f, indent, module->attributes, '\n', /*modattr=*/true); + dump_attributes(f, indent, module->attributes, "\n", /*modattr=*/true); f << stringf("%s" "module %s(", indent.c_str(), id(module->name, false).c_str()); bool keep_running = true; int cnt = 0; diff --git a/frontends/ast/ast.cc b/frontends/ast/ast.cc index 34e624993..4b2b7a822 100644 --- a/frontends/ast/ast.cc +++ b/frontends/ast/ast.cc @@ -850,6 +850,25 @@ AstNode *AstNode::mkconst_str(const std::string &str) return node; } +// create a temporary register +AstNode *AstNode::mktemp_logic(const std::string &name, AstNode *mod, bool nosync, int range_left, int range_right, bool is_signed) +{ + AstNode *wire = new AstNode(AST_WIRE, new AstNode(AST_RANGE, mkconst_int(range_left, true), mkconst_int(range_right, true))); + wire->str = stringf("%s%s:%d$%d", name.c_str(), RTLIL::encode_filename(filename).c_str(), location.first_line, autoidx++); + if (nosync) + wire->set_attribute(ID::nosync, AstNode::mkconst_int(1, false)); + wire->is_signed = is_signed; + wire->is_logic = true; + mod->children.push_back(wire); + while (wire->simplify(true, 1, -1, false)) { } + + AstNode *ident = new AstNode(AST_IDENTIFIER); + ident->str = wire->str; + ident->id2ast = wire; + + return ident; +} + bool AstNode::bits_only_01() const { for (auto bit : bits) diff --git a/frontends/ast/ast.h b/frontends/ast/ast.h index f789e930b..97903d0a0 100644 --- a/frontends/ast/ast.h +++ b/frontends/ast/ast.h @@ -321,6 +321,9 @@ namespace AST static AstNode *mkconst_str(const std::vector &v); static AstNode *mkconst_str(const std::string &str); + // helper function to create an AST node for a temporary register + AstNode *mktemp_logic(const std::string &name, AstNode *mod, bool nosync, int range_left, int range_right, bool is_signed); + // helper function for creating sign-extended const objects RTLIL::Const bitsAsConst(int width, bool is_signed); RTLIL::Const bitsAsConst(int width = -1); diff --git a/frontends/ast/simplify.cc b/frontends/ast/simplify.cc index dfa1ed6af..945f286a1 100644 --- a/frontends/ast/simplify.cc +++ b/frontends/ast/simplify.cc @@ -35,12 +35,25 @@ #include #include #include +// For std::gcd in C++17 +// #include YOSYS_NAMESPACE_BEGIN using namespace AST; using namespace AST_INTERNAL; +// gcd computed by Euclidian division. +// To be replaced by C++17 std::gcd +template I gcd(I a, I b) { + while (b != 0) { + I tmp = b; + b = a%b; + a = tmp; + } + return std::abs(a); +} + void AstNode::set_in_lvalue_flag(bool flag, bool no_descend) { if (flag != in_lvalue_from_above) { @@ -2818,27 +2831,12 @@ bool AstNode::simplify(bool const_fold, int stage, int width_hint, bool sign_hin if (!children[0]->id2ast->range_valid) goto skip_dynamic_range_lvalue_expansion; - int source_width = children[0]->id2ast->range_left - children[0]->id2ast->range_right + 1; - int source_offset = children[0]->id2ast->range_right; - int result_width = 1; - int stride = 1; AST::AstNode *member_node = get_struct_member(children[0]); - if (member_node) { - // Clamp chunk to range of member within struct/union. - log_assert(!source_offset && !children[0]->id2ast->range_swapped); - source_width = member_node->range_left - member_node->range_right + 1; - - // When the (* nowrshmsk *) attribute is set, a CASE block is generated below - // to select the indexed bit slice. When a multirange array is indexed, the - // start of each possible slice is separated by the bit stride of the last - // index dimension, and we can optimize the CASE block accordingly. - // The dimension of the original array expression is saved in the 'integer' field. - int dims = children[0]->integer; - stride = source_width; - for (int dim = 0; dim < dims; dim++) { - stride /= get_struct_range_width(member_node, dim); - } - } + int wire_width = member_node ? + member_node->range_left - member_node->range_right + 1 : + children[0]->id2ast->range_left - children[0]->id2ast->range_right + 1; + int wire_offset = children[0]->id2ast->range_right; + int result_width = 1; AstNode *shift_expr = NULL; AstNode *range = children[0]->children[0]; @@ -2851,52 +2849,132 @@ bool AstNode::simplify(bool const_fold, int stage, int width_hint, bool sign_hin else shift_expr = range->children[0]->clone(); - bool use_case_method = false; - - if (children[0]->id2ast->attributes.count(ID::nowrshmsk)) { - AstNode *node = children[0]->id2ast->attributes.at(ID::nowrshmsk); - while (node->simplify(true, stage, -1, false)) { } - if (node->type != AST_CONSTANT) - input_error("Non-constant value for `nowrshmsk' attribute on `%s'!\n", children[0]->id2ast->str.c_str()); - if (node->asAttrConst().as_bool()) - use_case_method = true; - } + bool use_case_method = children[0]->id2ast->get_bool_attribute(ID::nowrshmsk); if (!use_case_method && current_always->detect_latch(children[0]->str)) use_case_method = true; - if (use_case_method) - { + if (use_case_method) { // big case block + int stride = 1; + long long bitno_div = stride; + + int case_width_hint; + bool case_sign_hint; + shift_expr->detectSignWidth(case_width_hint, case_sign_hint); + int max_width = case_width_hint; + + if (member_node) { // Member in packed struct/union + // Clamp chunk to range of member within struct/union. + log_assert(!wire_offset && !children[0]->id2ast->range_swapped); + + // When the (* nowrshmsk *) attribute is set, a CASE block is generated below + // to select the indexed bit slice. When a multirange array is indexed, the + // start of each possible slice is separated by the bit stride of the last + // index dimension, and we can optimize the CASE block accordingly. + // The dimension of the original array expression is saved in the 'integer' field. + int dims = children[0]->integer; + stride = wire_width; + for (int dim = 0; dim < dims; dim++) { + stride /= get_struct_range_width(member_node, dim); + } + bitno_div = stride; + } else { + // Extract (index)*(width) from non_opt_range pattern ((@selfsz@((index)*(width)))+(0)). + AstNode *lsb_expr = + shift_expr->type == AST_ADD && shift_expr->children[0]->type == AST_SELFSZ && + shift_expr->children[1]->type == AST_CONSTANT && shift_expr->children[1]->integer == 0 ? + shift_expr->children[0]->children[0] : + shift_expr; + + // Extract stride from indexing of two-dimensional packed arrays and + // variable slices on the form dst[i*stride +: width] = src. + if (lsb_expr->type == AST_MUL && + (lsb_expr->children[0]->type == AST_CONSTANT || + lsb_expr->children[1]->type == AST_CONSTANT)) + { + int stride_ix = lsb_expr->children[1]->type == AST_CONSTANT; + stride = (int)lsb_expr->children[stride_ix]->integer; + bitno_div = stride != 0 ? stride : 1; + + // Check whether i*stride can overflow. + int i_width; + bool i_sign; + lsb_expr->children[1 - stride_ix]->detectSignWidth(i_width, i_sign); + int stride_width; + bool stride_sign; + lsb_expr->children[stride_ix]->detectSignWidth(stride_width, stride_sign); + max_width = std::max(i_width, stride_width); + // Stride width calculated from actual stride value. + stride_width = std::ceil(std::log2(std::abs(stride))); + + if (i_width + stride_width > max_width) { + // For (truncated) i*stride to be within the range of dst, the following must hold: + // i*stride ≡ bitno (mod shift_mod), i.e. + // i*stride = k*shift_mod + bitno + // + // The Diophantine equation on the form ax + by = c: + // stride*i - shift_mod*k = bitno + // has solutions iff c is a multiple of d = gcd(a, b), i.e. + // bitno mod gcd(stride, shift_mod) = 0 + // + // long long is at least 64 bits in C++11 + long long shift_mod = 1ll << (max_width - case_sign_hint); + // std::gcd requires C++17 + // bitno_div = std::gcd(stride, shift_mod); + bitno_div = gcd((long long)stride, shift_mod); + } + } + } + + // long long is at least 64 bits in C++11 + long long max_offset = (1ll << (max_width - case_sign_hint)) - 1; + long long min_offset = case_sign_hint ? -(1ll << (max_width - 1)) : 0; + + // A temporary register holds the result of the (possibly complex) rvalue expression, + // avoiding repetition in each AST_COND below. + int rvalue_width; + bool rvalue_sign; + children[1]->detectSignWidth(rvalue_width, rvalue_sign); + AstNode *rvalue = mktemp_logic("$bitselwrite$rvalue$", current_ast_mod, true, rvalue_width - 1, 0, rvalue_sign); + AstNode *caseNode = new AstNode(AST_CASE, shift_expr); + newNode = new AstNode(AST_BLOCK, + new AstNode(AST_ASSIGN_EQ, rvalue, children[1]->clone()), + caseNode); + did_something = true; - newNode = new AstNode(AST_CASE, shift_expr); - for (int i = 0; i < source_width; i += stride) { - int start_bit = source_offset + i; - int end_bit = std::min(start_bit+result_width,source_width) - 1; - AstNode *cond = new AstNode(AST_COND, mkconst_int(start_bit, true)); + for (int i = 1 - result_width; i < wire_width; i++) { + // Out of range indexes are handled in genrtlil.cc + int start_bit = wire_offset + i; + int end_bit = start_bit + result_width - 1; + // Check whether the current index can be generated by shift_expr. + if (start_bit < min_offset || start_bit > max_offset) + continue; + if (start_bit%bitno_div != 0 || (stride == 0 && start_bit != 0)) + continue; + + AstNode *cond = new AstNode(AST_COND, mkconst_int(start_bit, case_sign_hint, max_width)); AstNode *lvalue = children[0]->clone(); lvalue->delete_children(); if (member_node) lvalue->set_attribute(ID::wiretype, member_node->clone()); lvalue->children.push_back(new AstNode(AST_RANGE, mkconst_int(end_bit, true), mkconst_int(start_bit, true))); - cond->children.push_back(new AstNode(AST_BLOCK, new AstNode(type, lvalue, children[1]->clone()))); - newNode->children.push_back(cond); + cond->children.push_back(new AstNode(AST_BLOCK, new AstNode(type, lvalue, rvalue->clone()))); + caseNode->children.push_back(cond); } - } - else - { - // mask and shift operations, disabled for now + } else { + // mask and shift operations - AstNode *wire_mask = new AstNode(AST_WIRE, new AstNode(AST_RANGE, mkconst_int(source_width-1, true), mkconst_int(0, true))); + AstNode *wire_mask = new AstNode(AST_WIRE, new AstNode(AST_RANGE, mkconst_int(wire_width-1, true), mkconst_int(0, true))); wire_mask->str = stringf("$bitselwrite$mask$%s:%d$%d", RTLIL::encode_filename(filename).c_str(), location.first_line, autoidx++); wire_mask->set_attribute(ID::nosync, AstNode::mkconst_int(1, false)); wire_mask->is_logic = true; while (wire_mask->simplify(true, 1, -1, false)) { } current_ast_mod->children.push_back(wire_mask); - AstNode *wire_data = new AstNode(AST_WIRE, new AstNode(AST_RANGE, mkconst_int(source_width-1, true), mkconst_int(0, true))); + AstNode *wire_data = new AstNode(AST_WIRE, new AstNode(AST_RANGE, mkconst_int(wire_width-1, true), mkconst_int(0, true))); wire_data->str = stringf("$bitselwrite$data$%s:%d$%d", RTLIL::encode_filename(filename).c_str(), location.first_line, autoidx++); wire_data->set_attribute(ID::nosync, AstNode::mkconst_int(1, false)); wire_data->is_logic = true; @@ -2952,12 +3030,12 @@ bool AstNode::simplify(bool const_fold, int stage, int width_hint, bool sign_hin shamt = new AstNode(AST_TO_SIGNED, shamt); // offset the shift amount by the lower bound of the dimension - int start_bit = source_offset; + int start_bit = wire_offset; shamt = new AstNode(AST_SUB, shamt, mkconst_int(start_bit, true)); // reflect the shift amount if the dimension is swapped if (children[0]->id2ast->range_swapped) - shamt = new AstNode(AST_SUB, mkconst_int(source_width - result_width, true), shamt); + shamt = new AstNode(AST_SUB, mkconst_int(wire_width - result_width, true), shamt); // AST_SHIFT uses negative amounts for shifting left shamt = new AstNode(AST_NEG, shamt); diff --git a/tests/various/dynamic_part_select.ys b/tests/various/dynamic_part_select.ys index 2dc061e89..9e303b9db 100644 --- a/tests/various/dynamic_part_select.ys +++ b/tests/various/dynamic_part_select.ys @@ -69,6 +69,24 @@ design -copy-from gate -as gate gate miter -equiv -make_assert -make_outcmp -flatten gold gate equiv sat -prove-asserts -seq 10 -show-public -verify -set-init-zero equiv +### For-loop select, one dynamic input, (* nowrshmsk *) +design -reset +read_verilog ./dynamic_part_select/forloop_select_nowrshmsk.v +proc +rename -top gold +design -stash gold + +read_verilog ./dynamic_part_select/forloop_select_gate.v +proc +rename -top gate +design -stash gate + +design -copy-from gold -as gold gold +design -copy-from gate -as gate gate + +miter -equiv -make_assert -make_outcmp -flatten gold gate equiv +sat -prove-asserts -seq 10 -show-public -verify -set-init-zero equiv + #### Double loop (part-select, reset) ### design -reset read_verilog ./dynamic_part_select/reset_test.v diff --git a/tests/various/dynamic_part_select/forloop_select_nowrshmsk.v b/tests/various/dynamic_part_select/forloop_select_nowrshmsk.v new file mode 100644 index 000000000..75415c313 --- /dev/null +++ b/tests/various/dynamic_part_select/forloop_select_nowrshmsk.v @@ -0,0 +1,20 @@ +`default_nettype none +module forloop_select #(parameter WIDTH=16, SELW=4, CTRLW=$clog2(WIDTH), DINW=2**SELW) + (input wire clk, + input wire [CTRLW-1:0] ctrl, + input wire [DINW-1:0] din, + input wire en, + (* nowrshmsk *) + output reg [WIDTH-1:0] dout); + + reg [SELW:0] sel; + localparam SLICE = WIDTH/(SELW**2); + + always @(posedge clk) + begin + if (en) begin + for (sel = 0; sel <= 4'hf; sel=sel+1'b1) + dout[(ctrl*sel)+:SLICE] <= din; + end + end +endmodule diff --git a/tests/verilog/dynamic_range_lhs.sh b/tests/verilog/dynamic_range_lhs.sh index 77b4a2918..f36c74bd2 100755 --- a/tests/verilog/dynamic_range_lhs.sh +++ b/tests/verilog/dynamic_range_lhs.sh @@ -15,7 +15,7 @@ run() { -p "read_verilog dynamic_range_lhs.v" \ -p "proc" \ -p "equiv_make gold gate equiv" \ - -p "equiv_simple" \ + -p "equiv_simple -undef" \ -p "equiv_status -assert" } diff --git a/tests/verilog/dynamic_range_lhs.v b/tests/verilog/dynamic_range_lhs.v index ae291374d..6eb952165 100644 --- a/tests/verilog/dynamic_range_lhs.v +++ b/tests/verilog/dynamic_range_lhs.v @@ -1,12 +1,12 @@ module gate( - output reg [`LEFT:`RIGHT] out_u, out_s, (* nowrshmsk = `ALT *) + output reg [`LEFT:`RIGHT] out_u, out_s, input wire data, input wire [1:0] sel1, sel2 ); always @* begin - out_u = 0; - out_s = 0; + out_u = 'x; + out_s = 'x; case (`SPAN) 1: begin out_u[sel1*sel2] = data; @@ -43,8 +43,8 @@ task set; out_s[b] = data; endtask always @* begin - out_u = 0; - out_s = 0; + out_u = 'x; + out_s = 'x; case (sel1*sel2) 2'b00: set(0, 0); 2'b01: set(1, 1);