From 080f311040a0c1bb5fd877e4ad06fc613fdc3314 Mon Sep 17 00:00:00 2001 From: whitequark Date: Sat, 12 Dec 2020 20:50:37 +0000 Subject: [PATCH 1/9] kernel: make IdString::isPublic() const. --- kernel/rtlil.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rtlil.h b/kernel/rtlil.h index a03e8933c..cd966b815 100644 --- a/kernel/rtlil.h +++ b/kernel/rtlil.h @@ -376,7 +376,7 @@ namespace RTLIL bool in(const std::string &rhs) const { return *this == rhs; } bool in(const pool &rhs) const { return rhs.count(*this) != 0; } - bool isPublic() { return begins_with("\\"); } + bool isPublic() const { return begins_with("\\"); } }; namespace ID { From ac1a78923af80cec3760cfbd0f6f4a96d180cce8 Mon Sep 17 00:00:00 2001 From: whitequark Date: Sun, 13 Dec 2020 00:54:12 +0000 Subject: [PATCH 2/9] cxxrtl: use IdString::isPublic(). NFC. --- backends/cxxrtl/cxxrtl_backend.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backends/cxxrtl/cxxrtl_backend.cc b/backends/cxxrtl/cxxrtl_backend.cc index c193d78e9..f1ed47541 100644 --- a/backends/cxxrtl/cxxrtl_backend.cc +++ b/backends/cxxrtl/cxxrtl_backend.cc @@ -211,7 +211,7 @@ bool is_ff_cell(RTLIL::IdString type) bool is_internal_cell(RTLIL::IdString type) { - return type[0] == '$' && !type.begins_with("$paramod"); + return !type.isPublic() && !type.begins_with("$paramod"); } bool is_cxxrtl_blackbox_cell(const RTLIL::Cell *cell) @@ -1665,7 +1665,7 @@ struct CxxrtlWorker { inc_indent(); f << indent << "assert(path.empty() || path[path.size() - 1] == ' ');\n"; for (auto wire : module->wires()) { - if (wire->name[0] != '\\') + if (!wire->name.isPublic()) continue; if (module->get_bool_attribute(ID(cxxrtl_blackbox)) && (wire->port_id == 0)) continue; @@ -1743,7 +1743,7 @@ struct CxxrtlWorker { } if (!module->get_bool_attribute(ID(cxxrtl_blackbox))) { for (auto &memory_it : module->memories) { - if (memory_it.first[0] != '\\') + if (!memory_it.first.isPublic()) continue; f << indent << "items.add(path + " << escape_cxx_string(get_hdl_name(memory_it.second)); f << ", debug_item(" << mangle(memory_it.second) << ", "; @@ -2338,7 +2338,7 @@ struct CxxrtlWorker { // Note that the information collected here can't be used for optimizing the netlist: debug information queries // are pure and run on a design in a stable state, which allows assumptions that do not otherwise hold. for (auto wire : module->wires()) { - if (wire->name[0] != '\\') + if (!wire->name.isPublic()) continue; if (!unbuffered_wires[wire]) continue; From 57759c3d1fa573c99552533758a0c198c0641b51 Mon Sep 17 00:00:00 2001 From: whitequark Date: Sat, 12 Dec 2020 20:24:53 +0000 Subject: [PATCH 3/9] cxxrtl: fix outdated comment. NFC. --- backends/cxxrtl/cxxrtl_backend.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/cxxrtl/cxxrtl_backend.cc b/backends/cxxrtl/cxxrtl_backend.cc index f1ed47541..8e51f3043 100644 --- a/backends/cxxrtl/cxxrtl_backend.cc +++ b/backends/cxxrtl/cxxrtl_backend.cc @@ -1157,8 +1157,8 @@ struct CxxrtlWorker { } // The generated code has two bounds checks; one in an assertion, and another that guards the read. // This is done so that the code does not invoke undefined behavior under any conditions, but nevertheless - // loudly crashes if an illegal condition is encountered. The assert may be turned off with -DNDEBUG not - // just for release builds, but also to make sure the simulator (which is presumably embedded in some + // loudly crashes if an illegal condition is encountered. The assert may be turned off with -DCXXRTL_NDEBUG + // not only for release builds, but also to make sure the simulator (which is presumably embedded in some // larger program) will never crash the code that calls into it. // // If assertions are disabled, out of bounds reads are defined to return zero. From 3b5a1314cd02d093cb1328d7c2f7abced876a514 Mon Sep 17 00:00:00 2001 From: whitequark Date: Sun, 13 Dec 2020 00:34:32 +0000 Subject: [PATCH 4/9] cxxrtl: rename "elision" to "inlining". NFC. "Elision" in this context is an unusual and not very descriptive term whereas "inlining" is common and straightforward. Also, introducing "inlining" makes it easier to introduce its dual under the obvious name "outlining". --- backends/cxxrtl/cxxrtl_backend.cc | 154 +++++++++++++++--------------- 1 file changed, 77 insertions(+), 77 deletions(-) diff --git a/backends/cxxrtl/cxxrtl_backend.cc b/backends/cxxrtl/cxxrtl_backend.cc index 8e51f3043..ca7f3a3cc 100644 --- a/backends/cxxrtl/cxxrtl_backend.cc +++ b/backends/cxxrtl/cxxrtl_backend.cc @@ -195,7 +195,7 @@ bool is_extending_cell(RTLIL::IdString type) ID($reduce_and), ID($reduce_or), ID($reduce_xor), ID($reduce_xnor), ID($reduce_bool)); } -bool is_elidable_cell(RTLIL::IdString type) +bool is_inlinable_cell(RTLIL::IdString type) { return is_unary_cell(type) || is_binary_cell(type) || type.in( ID($mux), ID($concat), ID($slice), ID($pmux)); @@ -273,7 +273,7 @@ struct FlowGraph { std::vector nodes; dict> wire_comb_defs, wire_sync_defs, wire_uses; - dict wire_def_elidable, wire_use_elidable; + dict wire_def_inlinable, wire_use_inlinable; dict bit_has_state; ~FlowGraph() @@ -282,7 +282,7 @@ struct FlowGraph { delete node; } - void add_defs(Node *node, const RTLIL::SigSpec &sig, bool is_ff, bool elidable) + void add_defs(Node *node, const RTLIL::SigSpec &sig, bool is_ff, bool inlinable) { for (auto chunk : sig.chunks()) if (chunk.wire) { @@ -298,9 +298,9 @@ struct FlowGraph { } for (auto bit : sig.bits()) bit_has_state[bit] |= is_ff; - // Only comb defs of an entire wire in the right order can be elided. + // Only comb defs of an entire wire in the right order can be inlined. if (!is_ff && sig.is_wire()) - wire_def_elidable[sig.as_wire()] = elidable; + wire_def_inlinable[sig.as_wire()] = inlinable; } void add_uses(Node *node, const RTLIL::SigSpec &sig) @@ -308,26 +308,26 @@ struct FlowGraph { for (auto chunk : sig.chunks()) if (chunk.wire) { wire_uses[chunk.wire].insert(node); - // Only a single use of an entire wire in the right order can be elided. + // Only a single use of an entire wire in the right order can be inlined. // (But the use can include other chunks.) - if (!wire_use_elidable.count(chunk.wire)) - wire_use_elidable[chunk.wire] = true; + if (!wire_use_inlinable.count(chunk.wire)) + wire_use_inlinable[chunk.wire] = true; else - wire_use_elidable[chunk.wire] = false; + wire_use_inlinable[chunk.wire] = false; } } - bool is_elidable(const RTLIL::Wire *wire) const + bool is_inlinable(const RTLIL::Wire *wire) const { - if (wire_def_elidable.count(wire) && wire_use_elidable.count(wire)) - return wire_def_elidable.at(wire) && wire_use_elidable.at(wire); + if (wire_def_inlinable.count(wire) && wire_use_inlinable.count(wire)) + return wire_def_inlinable.at(wire) && wire_use_inlinable.at(wire); return false; } // Connections void add_connect_defs_uses(Node *node, const RTLIL::SigSig &conn) { - add_defs(node, conn.first, /*is_ff=*/false, /*elidable=*/true); + add_defs(node, conn.first, /*is_ff=*/false, /*inlinable=*/true); add_uses(node, conn.second); } @@ -373,8 +373,8 @@ struct FlowGraph { for (auto conn : cell->connections()) if (cell->output(conn.first)) if (is_cxxrtl_sync_port(cell, conn.first)) { - // See note regarding elidability below. - add_defs(node, conn.second, /*is_ff=*/false, /*elidable=*/false); + // See note regarding inlinability below. + add_defs(node, conn.second, /*is_ff=*/false, /*inlinable=*/false); } } @@ -382,19 +382,19 @@ struct FlowGraph { { for (auto conn : cell->connections()) { if (cell->output(conn.first)) { - if (is_elidable_cell(cell->type)) - add_defs(node, conn.second, /*is_ff=*/false, /*elidable=*/true); + if (is_inlinable_cell(cell->type)) + add_defs(node, conn.second, /*is_ff=*/false, /*inlinable=*/true); else if (is_ff_cell(cell->type) || (cell->type == ID($memrd) && cell->getParam(ID::CLK_ENABLE).as_bool())) - add_defs(node, conn.second, /*is_ff=*/true, /*elidable=*/false); + add_defs(node, conn.second, /*is_ff=*/true, /*inlinable=*/false); else if (is_internal_cell(cell->type)) - add_defs(node, conn.second, /*is_ff=*/false, /*elidable=*/false); + add_defs(node, conn.second, /*is_ff=*/false, /*inlinable=*/false); else if (!is_cxxrtl_sync_port(cell, conn.first)) { - // Although at first it looks like outputs of user-defined cells may always be elided, the reality is - // more complex. Fully sync outputs produce no defs and so don't participate in elision. Fully comb + // Although at first it looks like outputs of user-defined cells may always be inlined, the reality is + // more complex. Fully sync outputs produce no defs and so don't participate in inlining. Fully comb // outputs are assigned in a different way depending on whether the cell's eval() immediately converged. - // Unknown/mixed outputs could be elided, but should be rare in practical designs and don't justify - // the infrastructure required to elide outputs of cells with many of them. - add_defs(node, conn.second, /*is_ff=*/false, /*elidable=*/false); + // Unknown/mixed outputs could be inlined, but should be rare in practical designs and don't justify + // the infrastructure required to inline outputs of cells with many of them. + add_defs(node, conn.second, /*is_ff=*/false, /*inlinable=*/false); } } if (cell->input(conn.first)) @@ -432,7 +432,7 @@ struct FlowGraph { void add_case_defs_uses(Node *node, const RTLIL::CaseRule *case_) { for (auto &action : case_->actions) { - add_defs(node, action.first, /*is_ff=*/false, /*elidable=*/false); + add_defs(node, action.first, /*is_ff=*/false, /*inlinable=*/false); add_uses(node, action.second); } for (auto sub_switch : case_->switches) { @@ -451,9 +451,9 @@ struct FlowGraph { for (auto sync : process->syncs) for (auto action : sync->actions) { if (sync->type == RTLIL::STp || sync->type == RTLIL::STn || sync->type == RTLIL::STe) - add_defs(node, action.first, /*is_ff=*/true, /*elidable=*/false); + add_defs(node, action.first, /*is_ff=*/true, /*inlinable=*/false); else - add_defs(node, action.first, /*is_ff=*/false, /*elidable=*/false); + add_defs(node, action.first, /*is_ff=*/false, /*inlinable=*/false); add_uses(node, action.second); } } @@ -535,8 +535,8 @@ struct CxxrtlWorker { bool unbuffer_public = false; bool localize_internal = false; bool localize_public = false; - bool elide_internal = false; - bool elide_public = false; + bool inline_internal = false; + bool inline_public = false; bool debug_info = false; @@ -549,10 +549,10 @@ struct CxxrtlWorker { dict edge_types; pool writable_memories; dict> transparent_for; - dict elided_wires; dict> schedule; pool unbuffered_wires; pool localized_wires; + dict inlined_wires; dict debug_alias_wires; dict debug_const_wires; dict bit_has_state; @@ -792,16 +792,16 @@ struct CxxrtlWorker { dump_const(chunk.data, chunk.width, chunk.offset); return false; } else { - if (elided_wires.count(chunk.wire)) { + if (inlined_wires.count(chunk.wire)) { log_assert(!is_lhs); - const FlowGraph::Node &node = elided_wires[chunk.wire]; + const FlowGraph::Node &node = inlined_wires[chunk.wire]; switch (node.type) { case FlowGraph::Node::Type::CONNECT: - dump_connect_elided(node.connect); + dump_connect_expr(node.connect); break; case FlowGraph::Node::Type::CELL_EVAL: - log_assert(is_elidable_cell(node.cell->type)); - dump_cell_elided(node.cell); + log_assert(is_inlinable_cell(node.cell->type)); + dump_cell_expr(node.cell); break; default: log_assert(false); @@ -858,10 +858,10 @@ struct CxxrtlWorker { void collect_sigspec_rhs(const RTLIL::SigSpec &sig, std::vector &cells) { for (auto chunk : sig.chunks()) { - if (!chunk.wire || !elided_wires.count(chunk.wire)) + if (!chunk.wire || !inlined_wires.count(chunk.wire)) continue; - const FlowGraph::Node &node = elided_wires[chunk.wire]; + const FlowGraph::Node &node = inlined_wires[chunk.wire]; switch (node.type) { case FlowGraph::Node::Type::CONNECT: collect_connect(node.connect, cells); @@ -875,19 +875,19 @@ struct CxxrtlWorker { } } - void dump_connect_elided(const RTLIL::SigSig &conn) + void dump_connect_expr(const RTLIL::SigSig &conn) { dump_sigspec_rhs(conn.second); } - bool is_connect_elided(const RTLIL::SigSig &conn) + bool is_connect_inlined(const RTLIL::SigSig &conn) { - return conn.first.is_wire() && elided_wires.count(conn.first.as_wire()); + return conn.first.is_wire() && inlined_wires.count(conn.first.as_wire()); } void collect_connect(const RTLIL::SigSig &conn, std::vector &cells) { - if (!is_connect_elided(conn)) + if (!is_connect_inlined(conn)) return; collect_sigspec_rhs(conn.second, cells); @@ -895,14 +895,14 @@ struct CxxrtlWorker { void dump_connect(const RTLIL::SigSig &conn) { - if (is_connect_elided(conn)) + if (is_connect_inlined(conn)) return; f << indent << "// connection\n"; f << indent; dump_sigspec_lhs(conn.first); f << " = "; - dump_connect_elided(conn); + dump_connect_expr(conn); f << ";\n"; } @@ -919,7 +919,7 @@ struct CxxrtlWorker { } } - void dump_cell_elided(const RTLIL::Cell *cell) + void dump_cell_expr(const RTLIL::Cell *cell) { // Unary cells if (is_unary_cell(cell->type)) { @@ -983,15 +983,15 @@ struct CxxrtlWorker { } } - bool is_cell_elided(const RTLIL::Cell *cell) + bool is_cell_inlined(const RTLIL::Cell *cell) { - return is_elidable_cell(cell->type) && cell->hasPort(ID::Y) && cell->getPort(ID::Y).is_wire() && - elided_wires.count(cell->getPort(ID::Y).as_wire()); + return is_inlinable_cell(cell->type) && cell->hasPort(ID::Y) && cell->getPort(ID::Y).is_wire() && + inlined_wires.count(cell->getPort(ID::Y).as_wire()); } void collect_cell_eval(const RTLIL::Cell *cell, std::vector &cells) { - if (!is_cell_elided(cell)) + if (!is_cell_inlined(cell)) return; cells.push_back(cell->name); @@ -1002,33 +1002,33 @@ struct CxxrtlWorker { void dump_cell_eval(const RTLIL::Cell *cell) { - if (is_cell_elided(cell)) + if (is_cell_inlined(cell)) return; if (cell->type == ID($meminit)) return; // Handled elsewhere. - std::vector elided_cells; - if (is_elidable_cell(cell->type)) { + std::vector inlined_cells; + if (is_inlinable_cell(cell->type)) { for (auto port : cell->connections()) if (port.first != ID::Y) - collect_sigspec_rhs(port.second, elided_cells); + collect_sigspec_rhs(port.second, inlined_cells); } - if (elided_cells.empty()) { + if (inlined_cells.empty()) { dump_attrs(cell); f << indent << "// cell " << cell->name.str() << "\n"; } else { f << indent << "// cells"; - for (auto elided_cell : elided_cells) - f << " " << elided_cell.str(); + for (auto inlined_cell : inlined_cells) + f << " " << inlined_cell.str(); f << "\n"; } // Elidable cells - if (is_elidable_cell(cell->type)) { + if (is_inlinable_cell(cell->type)) { f << indent; dump_sigspec_lhs(cell->getPort(ID::Y)); f << " = "; - dump_cell_elided(cell); + dump_cell_expr(cell); f << ";\n"; // Flip-flops } else if (is_ff_cell(cell->type)) { @@ -1458,16 +1458,16 @@ struct CxxrtlWorker { } } - void dump_wire(const RTLIL::Wire *wire, bool is_local_context) + void dump_wire(const RTLIL::Wire *wire, bool is_local) { - if (elided_wires.count(wire)) + if (inlined_wires.count(wire)) return; - if (localized_wires[wire] && is_local_context) { + if (localized_wires[wire] && is_local) { dump_attrs(wire); f << indent << "value<" << wire->width << "> " << mangle(wire) << ";\n"; } - if (!localized_wires[wire] && !is_local_context) { + if (!localized_wires[wire] && !is_local) { std::string width; if (wire->module->has_attribute(ID(cxxrtl_blackbox)) && wire->has_attribute(ID(cxxrtl_width))) { width = wire->get_string_attribute(ID(cxxrtl_width)); @@ -1597,7 +1597,7 @@ struct CxxrtlWorker { } } for (auto wire : module->wires()) - dump_wire(wire, /*is_local_context=*/true); + dump_wire(wire, /*is_local=*/true); for (auto node : schedule[module]) { switch (node.type) { case FlowGraph::Node::Type::CONNECT: @@ -1624,7 +1624,7 @@ struct CxxrtlWorker { inc_indent(); f << indent << "bool changed = false;\n"; for (auto wire : module->wires()) { - if (elided_wires.count(wire)) + if (inlined_wires.count(wire)) continue; if (unbuffered_wires[wire]) { if (edge_wires[wire]) @@ -1808,7 +1808,7 @@ struct CxxrtlWorker { inc_indent(); for (auto wire : module->wires()) { if (wire->port_id != 0) - dump_wire(wire, /*is_local_context=*/false); + dump_wire(wire, /*is_local=*/false); } f << "\n"; f << indent << "bool eval() override {\n"; @@ -1854,7 +1854,7 @@ struct CxxrtlWorker { f << indent << "struct " << mangle(module) << " : public module {\n"; inc_indent(); for (auto wire : module->wires()) - dump_wire(wire, /*is_local_context=*/false); + dump_wire(wire, /*is_local=*/false); f << "\n"; bool has_memories = false; for (auto memory : module->memories) { @@ -2234,16 +2234,16 @@ struct CxxrtlWorker { } for (auto wire : module->wires()) { - if (!flow.is_elidable(wire)) continue; + if (!flow.is_inlinable(wire)) continue; if (wire->port_id != 0) continue; if (wire->get_bool_attribute(ID::keep)) continue; - if (wire->name.begins_with("$") && !elide_internal) continue; - if (wire->name.begins_with("\\") && !elide_public) continue; + if (wire->name.begins_with("$") && !inline_internal) continue; + if (wire->name.begins_with("\\") && !inline_public) continue; if (edge_wires[wire]) continue; if (flow.wire_comb_defs[wire].size() > 1) log_cmd_error("Wire %s.%s has multiple drivers.\n", log_id(module), log_id(wire)); log_assert(flow.wire_comb_defs[wire].size() == 1); - elided_wires[wire] = **flow.wire_comb_defs[wire].begin(); + inlined_wires[wire] = **flow.wire_comb_defs[wire].begin(); } dict, hash_ptr_ops> node_defs; @@ -2280,9 +2280,9 @@ struct CxxrtlWorker { for (auto succ_node : flow.wire_uses[wire]) if (evaluated[succ_node]) { feedback_wires.insert(wire); - // Feedback wires may never be elided because feedback requires state, but the point of elision - // (and localization) is to eliminate state. - elided_wires.erase(wire); + // Feedback wires may never be inlined because feedback requires state, but the point of + // inlining (and localization) is to eliminate state. + inlined_wires.erase(wire); } } @@ -2344,7 +2344,7 @@ struct CxxrtlWorker { continue; const RTLIL::Wire *wire_it = wire; while (1) { - if (!(flow.wire_def_elidable.count(wire_it) && flow.wire_def_elidable[wire_it])) + if (!(flow.wire_def_inlinable.count(wire_it) && flow.wire_def_inlinable[wire_it])) break; // not an alias: complex def log_assert(flow.wire_comb_defs[wire_it].size() == 1); FlowGraph::Node *node = *flow.wire_comb_defs[wire_it].begin(); @@ -2660,7 +2660,7 @@ struct CxxrtlBackend : public Backend { log(" like -O1, and unbuffer internal wires if possible.\n"); log("\n"); log(" -O3\n"); - log(" like -O2, and elide internal wires if possible.\n"); + log(" like -O2, and inline internal wires if possible.\n"); log("\n"); log(" -O4\n"); log(" like -O3, and unbuffer public wires not marked (*keep*) if possible.\n"); @@ -2669,7 +2669,7 @@ struct CxxrtlBackend : public Backend { log(" like -O4, and localize public wires not marked (*keep*) if possible.\n"); log("\n"); log(" -O6\n"); - log(" like -O5, and elide public wires not marked (*keep*) if possible.\n"); + log(" like -O5, and inline public wires not marked (*keep*) if possible.\n"); log("\n"); log(" -Og\n"); log(" highest optimization level that provides debug information for all\n"); @@ -2757,7 +2757,7 @@ struct CxxrtlBackend : public Backend { switch (opt_level) { // the highest level here must match DEFAULT_OPT_LEVEL case 6: - worker.elide_public = true; + worker.inline_public = true; YS_FALLTHROUGH case 5: worker.localize_public = true; @@ -2766,7 +2766,7 @@ struct CxxrtlBackend : public Backend { worker.unbuffer_public = true; YS_FALLTHROUGH case 3: - worker.elide_internal = true; + worker.inline_internal = true; YS_FALLTHROUGH case 2: worker.localize_internal = true; From ece25a45d4b12f0436be238a13e622b58282036e Mon Sep 17 00:00:00 2001 From: whitequark Date: Sun, 13 Dec 2020 07:03:16 +0000 Subject: [PATCH 5/9] cxxrtl: implement debug information outlining. Aggressive wire localization and inlining is necessary for CXXRTL to achieve high performance. However, that comes with a cost: reduced debug information coverage. Previously, as a workaround, the `-Og` option could have been used to guarantee complete coverage, at a cost of a significant performance penalty. This commit introduces debug information outlining. The main eval() function is compiled with the user-specified optimization settings. In tandem, an auxiliary debug_eval() function, compiled from the same netlist, can be used to reconstruct the values of localized/inlined signals on demand. To the extent that it is possible, debug_eval() reuses the results of computations performed by eval(), only filling in the missing values. Benchmarking a representative design (Minerva SoC SRAM) shows that: * Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%. * Switching from `-g1` to `-g2`, both used with `-O6`, increases compile time by ~25%. * Although `-g2` increases the resident size of generated modules, this has no effect on runtime. Because the impact of `-g2` is minimal and the benefits of having unconditional 100% debug information coverage (and the performance improvement as well) are major, this commit removes `-Og` and changes the defaults to `-O6 -g2`. We'll have our cake and eat it too! --- backends/cxxrtl/cxxrtl.h | 41 +++++- backends/cxxrtl/cxxrtl_backend.cc | 232 +++++++++++++++++++++++------- backends/cxxrtl/cxxrtl_capi.cc | 4 + backends/cxxrtl/cxxrtl_capi.h | 40 +++++- backends/cxxrtl/cxxrtl_vcd.h | 32 ++++- 5 files changed, 278 insertions(+), 71 deletions(-) diff --git a/backends/cxxrtl/cxxrtl.h b/backends/cxxrtl/cxxrtl.h index d850fdba4..59393e415 100644 --- a/backends/cxxrtl/cxxrtl.h +++ b/backends/cxxrtl/cxxrtl.h @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -843,6 +844,9 @@ typedef std::map metadata_map; // Tag class to disambiguate values/wires and their aliases. struct debug_alias {}; +// Tag declaration to disambiguate values and debug outlines. +using debug_outline = ::_cxxrtl_outline; + // This structure is intended for consumption via foreign function interfaces, like Python's ctypes. // Because of this it uses a C-style layout that is easy to parse rather than more idiomatic C++. // @@ -851,10 +855,11 @@ struct debug_alias {}; struct debug_item : ::cxxrtl_object { // Object types. enum : uint32_t { - VALUE = CXXRTL_VALUE, - WIRE = CXXRTL_WIRE, - MEMORY = CXXRTL_MEMORY, - ALIAS = CXXRTL_ALIAS, + VALUE = CXXRTL_VALUE, + WIRE = CXXRTL_WIRE, + MEMORY = CXXRTL_MEMORY, + ALIAS = CXXRTL_ALIAS, + OUTLINE = CXXRTL_OUTLINE, }; // Object flags. @@ -881,6 +886,7 @@ struct debug_item : ::cxxrtl_object { zero_at = 0; curr = item.data; next = item.data; + outline = nullptr; } template @@ -895,6 +901,7 @@ struct debug_item : ::cxxrtl_object { zero_at = 0; curr = const_cast(item.data); next = nullptr; + outline = nullptr; } template @@ -910,6 +917,7 @@ struct debug_item : ::cxxrtl_object { zero_at = 0; curr = item.curr.data; next = item.next.data; + outline = nullptr; } template @@ -924,6 +932,7 @@ struct debug_item : ::cxxrtl_object { zero_at = zero_offset; curr = item.data.empty() ? nullptr : item.data[0].data; next = nullptr; + outline = nullptr; } template @@ -938,6 +947,7 @@ struct debug_item : ::cxxrtl_object { zero_at = 0; curr = const_cast(item.data); next = nullptr; + outline = nullptr; } template @@ -953,6 +963,22 @@ struct debug_item : ::cxxrtl_object { zero_at = 0; curr = const_cast(item.curr.data); next = nullptr; + outline = nullptr; + } + + template + debug_item(debug_outline &group, const value &item, size_t lsb_offset = 0) { + static_assert(sizeof(item) == value::chunks * sizeof(chunk_t), + "value is not compatible with C layout"); + type = OUTLINE; + flags = DRIVEN_COMB; + width = Bits; + lsb_at = lsb_offset; + depth = 1; + zero_at = 0; + curr = const_cast(item.data); + next = nullptr; + outline = &group; } }; static_assert(std::is_standard_layout::value, "debug_item is not compatible with C layout"); @@ -1029,11 +1055,16 @@ struct module { } // namespace cxxrtl -// Internal structure used to communicate with the implementation of the C interface. +// Internal structures used to communicate with the implementation of the C interface. + typedef struct _cxxrtl_toplevel { std::unique_ptr module; } *cxxrtl_toplevel; +typedef struct _cxxrtl_outline { + std::function eval; +} *cxxrtl_outline; + // Definitions of internal Yosys cells. Other than the functions in this namespace, CXXRTL is fully generic // and indepenent of Yosys implementation details. // diff --git a/backends/cxxrtl/cxxrtl_backend.cc b/backends/cxxrtl/cxxrtl_backend.cc index ca7f3a3cc..5e2f4f31a 100644 --- a/backends/cxxrtl/cxxrtl_backend.cc +++ b/backends/cxxrtl/cxxrtl_backend.cc @@ -539,6 +539,7 @@ struct CxxrtlWorker { bool inline_public = false; bool debug_info = false; + bool debug_eval = false; std::ostringstream f; std::string indent; @@ -553,8 +554,9 @@ struct CxxrtlWorker { pool unbuffered_wires; pool localized_wires; dict inlined_wires; - dict debug_alias_wires; dict debug_const_wires; + dict debug_alias_wires; + pool debug_outlined_wires; dict bit_has_state; dict> blackbox_specializations; dict eval_converges; @@ -786,22 +788,22 @@ struct CxxrtlWorker { dump_const(data, data.size()); } - bool dump_sigchunk(const RTLIL::SigChunk &chunk, bool is_lhs) + bool dump_sigchunk(const RTLIL::SigChunk &chunk, bool is_lhs, bool for_debug = false) { if (chunk.wire == NULL) { dump_const(chunk.data, chunk.width, chunk.offset); return false; } else { - if (inlined_wires.count(chunk.wire)) { + if (inlined_wires.count(chunk.wire) && (!for_debug || !debug_outlined_wires[chunk.wire])) { log_assert(!is_lhs); const FlowGraph::Node &node = inlined_wires[chunk.wire]; switch (node.type) { case FlowGraph::Node::Type::CONNECT: - dump_connect_expr(node.connect); + dump_connect_expr(node.connect, for_debug); break; case FlowGraph::Node::Type::CELL_EVAL: log_assert(is_inlinable_cell(node.cell->type)); - dump_cell_expr(node.cell); + dump_cell_expr(node.cell, for_debug); break; default: log_assert(false); @@ -821,36 +823,36 @@ struct CxxrtlWorker { } } - bool dump_sigspec(const RTLIL::SigSpec &sig, bool is_lhs) + bool dump_sigspec(const RTLIL::SigSpec &sig, bool is_lhs, bool for_debug = false) { if (sig.empty()) { f << "value<0>()"; return false; } else if (sig.is_chunk()) { - return dump_sigchunk(sig.as_chunk(), is_lhs); + return dump_sigchunk(sig.as_chunk(), is_lhs, for_debug); } else { - dump_sigchunk(*sig.chunks().rbegin(), is_lhs); + dump_sigchunk(*sig.chunks().rbegin(), is_lhs, for_debug); for (auto it = sig.chunks().rbegin() + 1; it != sig.chunks().rend(); ++it) { f << ".concat("; - dump_sigchunk(*it, is_lhs); + dump_sigchunk(*it, is_lhs, for_debug); f << ")"; } return true; } } - void dump_sigspec_lhs(const RTLIL::SigSpec &sig) + void dump_sigspec_lhs(const RTLIL::SigSpec &sig, bool for_debug = false) { - dump_sigspec(sig, /*is_lhs=*/true); + dump_sigspec(sig, /*is_lhs=*/true, for_debug); } - void dump_sigspec_rhs(const RTLIL::SigSpec &sig) + void dump_sigspec_rhs(const RTLIL::SigSpec &sig, bool for_debug = false) { // In the contexts where we want template argument deduction to occur for `template ... value`, // it is necessary to have the argument to already be a `value`, since template argument deduction and implicit // type conversion are mutually exclusive. In these contexts, we use dump_sigspec_rhs() to emit an explicit // type conversion, but only if the expression needs it. - bool is_complex = dump_sigspec(sig, /*is_lhs=*/false); + bool is_complex = dump_sigspec(sig, /*is_lhs=*/false, for_debug); if (is_complex) f << ".val()"; } @@ -875,9 +877,9 @@ struct CxxrtlWorker { } } - void dump_connect_expr(const RTLIL::SigSig &conn) + void dump_connect_expr(const RTLIL::SigSig &conn, bool for_debug = false) { - dump_sigspec_rhs(conn.second); + dump_sigspec_rhs(conn.second, for_debug); } bool is_connect_inlined(const RTLIL::SigSig &conn) @@ -885,6 +887,14 @@ struct CxxrtlWorker { return conn.first.is_wire() && inlined_wires.count(conn.first.as_wire()); } + bool is_connect_outlined(const RTLIL::SigSig &conn) + { + for (auto chunk : conn.first.chunks()) + if (debug_outlined_wires.count(chunk.wire)) + return true; + return false; + } + void collect_connect(const RTLIL::SigSig &conn, std::vector &cells) { if (!is_connect_inlined(conn)) @@ -893,16 +903,18 @@ struct CxxrtlWorker { collect_sigspec_rhs(conn.second, cells); } - void dump_connect(const RTLIL::SigSig &conn) + void dump_connect(const RTLIL::SigSig &conn, bool for_debug = false) { - if (is_connect_inlined(conn)) + if (!for_debug && is_connect_inlined(conn)) + return; + if (for_debug && !is_connect_outlined(conn)) return; f << indent << "// connection\n"; f << indent; - dump_sigspec_lhs(conn.first); + dump_sigspec_lhs(conn.first, for_debug); f << " = "; - dump_connect_expr(conn); + dump_connect_expr(conn, for_debug); f << ";\n"; } @@ -919,7 +931,7 @@ struct CxxrtlWorker { } } - void dump_cell_expr(const RTLIL::Cell *cell) + void dump_cell_expr(const RTLIL::Cell *cell, bool for_debug = false) { // Unary cells if (is_unary_cell(cell->type)) { @@ -927,7 +939,7 @@ struct CxxrtlWorker { if (is_extending_cell(cell->type)) f << '_' << (cell->getParam(ID::A_SIGNED).as_bool() ? 's' : 'u'); f << "<" << cell->getParam(ID::Y_WIDTH).as_int() << ">("; - dump_sigspec_rhs(cell->getPort(ID::A)); + dump_sigspec_rhs(cell->getPort(ID::A), for_debug); f << ")"; // Binary cells } else if (is_binary_cell(cell->type)) { @@ -936,18 +948,18 @@ struct CxxrtlWorker { f << '_' << (cell->getParam(ID::A_SIGNED).as_bool() ? 's' : 'u') << (cell->getParam(ID::B_SIGNED).as_bool() ? 's' : 'u'); f << "<" << cell->getParam(ID::Y_WIDTH).as_int() << ">("; - dump_sigspec_rhs(cell->getPort(ID::A)); + dump_sigspec_rhs(cell->getPort(ID::A), for_debug); f << ", "; - dump_sigspec_rhs(cell->getPort(ID::B)); + dump_sigspec_rhs(cell->getPort(ID::B), for_debug); f << ")"; // Muxes } else if (cell->type == ID($mux)) { f << "("; - dump_sigspec_rhs(cell->getPort(ID::S)); + dump_sigspec_rhs(cell->getPort(ID::S), for_debug); f << " ? "; - dump_sigspec_rhs(cell->getPort(ID::B)); + dump_sigspec_rhs(cell->getPort(ID::B), for_debug); f << " : "; - dump_sigspec_rhs(cell->getPort(ID::A)); + dump_sigspec_rhs(cell->getPort(ID::A), for_debug); f << ")"; // Parallel (one-hot) muxes } else if (cell->type == ID($pmux)) { @@ -955,24 +967,24 @@ struct CxxrtlWorker { int s_width = cell->getParam(ID::S_WIDTH).as_int(); for (int part = 0; part < s_width; part++) { f << "("; - dump_sigspec_rhs(cell->getPort(ID::S).extract(part)); + dump_sigspec_rhs(cell->getPort(ID::S).extract(part), for_debug); f << " ? "; - dump_sigspec_rhs(cell->getPort(ID::B).extract(part * width, width)); + dump_sigspec_rhs(cell->getPort(ID::B).extract(part * width, width), for_debug); f << " : "; } - dump_sigspec_rhs(cell->getPort(ID::A)); + dump_sigspec_rhs(cell->getPort(ID::A), for_debug); for (int part = 0; part < s_width; part++) { f << ")"; } // Concats } else if (cell->type == ID($concat)) { - dump_sigspec_rhs(cell->getPort(ID::B)); + dump_sigspec_rhs(cell->getPort(ID::B), for_debug); f << ".concat("; - dump_sigspec_rhs(cell->getPort(ID::A)); + dump_sigspec_rhs(cell->getPort(ID::A), for_debug); f << ").val()"; // Slices } else if (cell->type == ID($slice)) { - dump_sigspec_rhs(cell->getPort(ID::A)); + dump_sigspec_rhs(cell->getPort(ID::A), for_debug); f << ".slice<"; f << cell->getParam(ID::OFFSET).as_int() + cell->getParam(ID::Y_WIDTH).as_int() - 1; f << ","; @@ -989,6 +1001,17 @@ struct CxxrtlWorker { inlined_wires.count(cell->getPort(ID::Y).as_wire()); } + bool is_cell_outlined(const RTLIL::Cell *cell) + { + if (is_internal_cell(cell->type)) + for (auto conn : cell->connections()) + if (cell->output(conn.first)) + for (auto chunk : conn.second.chunks()) + if (debug_outlined_wires.count(chunk.wire)) + return true; + return false; + } + void collect_cell_eval(const RTLIL::Cell *cell, std::vector &cells) { if (!is_cell_inlined(cell)) @@ -1000,9 +1023,11 @@ struct CxxrtlWorker { collect_sigspec_rhs(port.second, cells); } - void dump_cell_eval(const RTLIL::Cell *cell) + void dump_cell_eval(const RTLIL::Cell *cell, bool for_debug = false) { - if (is_cell_inlined(cell)) + if (!for_debug && is_cell_inlined(cell)) + return; + if (for_debug && !is_cell_outlined(cell)) return; if (cell->type == ID($meminit)) return; // Handled elsewhere. @@ -1026,9 +1051,9 @@ struct CxxrtlWorker { // Elidable cells if (is_inlinable_cell(cell->type)) { f << indent; - dump_sigspec_lhs(cell->getPort(ID::Y)); + dump_sigspec_lhs(cell->getPort(ID::Y), for_debug); f << " = "; - dump_cell_expr(cell); + dump_cell_expr(cell, for_debug); f << ";\n"; // Flip-flops } else if (is_ff_cell(cell->type)) { @@ -1460,14 +1485,11 @@ struct CxxrtlWorker { void dump_wire(const RTLIL::Wire *wire, bool is_local) { - if (inlined_wires.count(wire)) - return; - - if (localized_wires[wire] && is_local) { + if (is_local && localized_wires[wire] && !inlined_wires.count(wire)) { dump_attrs(wire); f << indent << "value<" << wire->width << "> " << mangle(wire) << ";\n"; } - if (!localized_wires[wire] && !is_local) { + if (!is_local && !localized_wires[wire]) { std::string width; if (wire->module->has_attribute(ID(cxxrtl_blackbox)) && wire->has_attribute(ID(cxxrtl_width))) { width = wire->get_string_attribute(ID(cxxrtl_width)); @@ -1530,6 +1552,23 @@ struct CxxrtlWorker { } } + void dump_debug_wire(const RTLIL::Wire *wire, bool is_local) + { + if (!debug_outlined_wires[wire]) + return; + + bool is_outlined_member = wire->name.isPublic() && + !(debug_const_wires.count(wire) || debug_alias_wires.count(wire)); + if (is_local && !is_outlined_member) { + dump_attrs(wire); + f << indent << "value<" << wire->width << "> " << mangle(wire) << ";\n"; + } + if (!is_local && is_outlined_member) { + dump_attrs(wire); + f << indent << "/*outline*/ value<" << wire->width << "> " << mangle(wire) << ";\n"; + } + } + void dump_memory(RTLIL::Module *module, const RTLIL::Memory *memory) { vector init_cells; @@ -1619,6 +1658,27 @@ struct CxxrtlWorker { dec_indent(); } + void dump_debug_eval_method(RTLIL::Module *module) + { + inc_indent(); + for (auto wire : module->wires()) + dump_debug_wire(wire, /*is_local=*/true); + for (auto node : schedule[module]) { + switch (node.type) { + case FlowGraph::Node::Type::CONNECT: + dump_connect(node.connect, /*for_debug=*/true); + break; + case FlowGraph::Node::Type::CELL_EVAL: + dump_cell_eval(node.cell, /*for_debug=*/true); + break; + case FlowGraph::Node::Type::CELL_SYNC: + case FlowGraph::Node::Type::PROCESS: + break; + } + } + dec_indent(); + } + void dump_commit_method(RTLIL::Module *module) { inc_indent(); @@ -1656,6 +1716,7 @@ struct CxxrtlWorker { size_t count_public_wires = 0; size_t count_const_wires = 0; size_t count_alias_wires = 0; + size_t count_inline_wires = 0; size_t count_member_wires = 0; size_t count_skipped_wires = 0; size_t count_driven_sync = 0; @@ -1685,6 +1746,12 @@ struct CxxrtlWorker { f << ", debug_item(debug_alias(), " << mangle(debug_alias_wires[wire]) << ", "; f << wire->start_offset << "));\n"; count_alias_wires++; + } else if (debug_outlined_wires.count(wire)) { + // Inlined but rematerializable wire + f << indent << "items.add(path + " << escape_cxx_string(get_hdl_name(wire)); + f << ", debug_item(debug_eval_outline, " << mangle(wire) << ", "; + f << wire->start_offset << "));\n"; + count_inline_wires++; } else if (!localized_wires.count(wire)) { // Member wire std::vector flags; @@ -1738,6 +1805,7 @@ struct CxxrtlWorker { f << "));\n"; count_member_wires++; } else { + // Localized or inlined wire with no debug information count_skipped_wires++; } } @@ -1761,14 +1829,16 @@ struct CxxrtlWorker { log_debug("Debug information statistics for module `%s':\n", log_id(module)); log_debug(" Public wires: %zu, of which:\n", count_public_wires); - log_debug(" Const wires: %zu\n", count_const_wires); - log_debug(" Alias wires: %zu\n", count_alias_wires); log_debug(" Member wires: %zu, of which:\n", count_member_wires); log_debug(" Driven sync: %zu\n", count_driven_sync); log_debug(" Driven comb: %zu\n", count_driven_comb); - log_debug(" Undriven: %zu\n", count_undriven); log_debug(" Mixed driver: %zu\n", count_mixed_driver); - log_debug(" Other wires: %zu (no debug information)\n", count_skipped_wires); + log_debug(" Undriven: %zu\n", count_undriven); + log_debug(" Inline wires: %zu\n", count_inline_wires); + log_debug(" Alias wires: %zu\n", count_alias_wires); + log_debug(" Const wires: %zu\n", count_const_wires); + log_debug(" Other wires: %zu%s\n", count_skipped_wires, + count_skipped_wires > 0 ? " (debug information unavailable)" : ""); } void dump_metadata_map(const dict &metadata_map) @@ -1855,7 +1925,8 @@ struct CxxrtlWorker { inc_indent(); for (auto wire : module->wires()) dump_wire(wire, /*is_local=*/false); - f << "\n"; + for (auto wire : module->wires()) + dump_debug_wire(wire, /*is_local=*/false); bool has_memories = false; for (auto memory : module->memories) { dump_memory(module, memory.second); @@ -1927,8 +1998,20 @@ struct CxxrtlWorker { f << "\n"; f << indent << "bool eval() override;\n"; f << indent << "bool commit() override;\n"; - if (debug_info) + if (debug_info) { + if (debug_eval) { + f << "\n"; + f << indent << "void debug_eval();\n"; + for (auto wire : module->wires()) + if (debug_outlined_wires.count(wire)) { + f << indent << "debug_outline debug_eval_outline { std::bind(&" + << mangle(module) << "::debug_eval, this) };\n"; + break; + } + } + f << "\n"; f << indent << "void debug_info(debug_items &items, std::string path = \"\") override;\n"; + } dec_indent(); f << indent << "}; // struct " << mangle(module) << "\n"; f << "\n"; @@ -1948,6 +2031,12 @@ struct CxxrtlWorker { f << indent << "}\n"; f << "\n"; if (debug_info) { + if (debug_eval) { + f << indent << "void " << mangle(module) << "::debug_eval() {\n"; + dump_debug_eval_method(module); + f << indent << "}\n"; + f << "\n"; + } f << indent << "void " << mangle(module) << "::debug_info(debug_items &items, std::string path) {\n"; dump_debug_info_method(module); f << indent << "}\n"; @@ -2251,6 +2340,11 @@ struct CxxrtlWorker { for (auto node : wire_comb_def.second) node_defs[node].insert(wire_comb_def.first); + dict, hash_ptr_ops> node_uses; + for (auto wire_use : flow.wire_uses) + for (auto node : wire_use.second) + node_uses[node].insert(wire_use.first); + Scheduler scheduler; dict::Vertex*, hash_ptr_ops> node_map; for (auto node : flow.nodes) @@ -2368,6 +2462,30 @@ struct CxxrtlWorker { } } } + if (debug_info && debug_eval) { + // Find wires that can be be outlined, i.e. whose values can be always recovered from + // the values of other wires. (This is the inverse of inlining--any wire that can be + // inlined can also be outlined.) Although this may seem strictly less efficient, since + // such values are computed at least twice, second-order effects make outlining useful. + pool worklist, visited; + for (auto wire : module->wires()) { + if (!wire->name.isPublic()) + continue; // only outline public wires + worklist.insert(wire); + } + while (!worklist.empty()) { + const RTLIL::Wire *wire = worklist.pop(); + visited.insert(wire); + if (!localized_wires.count(wire) && !inlined_wires.count(wire)) + continue; // member wire, doesn't need outlining + if (wire->name.isPublic() || !inlined_wires.count(wire)) + debug_outlined_wires.insert(wire); // allow outlining of internal wires only + for (auto node : flow.wire_comb_defs[wire]) + for (auto node_use : node_uses[node]) + if (!visited.count(node_use)) + worklist.insert(node_use); + } + } } if (has_feedback_arcs || has_buffered_comb_wires) { // Although both non-feedback buffered combinatorial wires and apparent feedback wires may be eliminated @@ -2457,8 +2575,7 @@ struct CxxrtlWorker { struct CxxrtlBackend : public Backend { static const int DEFAULT_OPT_LEVEL = 6; - static const int OPT_LEVEL_DEBUG = 4; - static const int DEFAULT_DEBUG_LEVEL = 1; + static const int DEFAULT_DEBUG_LEVEL = 2; CxxrtlBackend() : Backend("cxxrtl", "convert design to C++ RTL simulation") { } void help() override @@ -2671,10 +2788,6 @@ struct CxxrtlBackend : public Backend { log(" -O6\n"); log(" like -O5, and inline public wires not marked (*keep*) if possible.\n"); log("\n"); - log(" -Og\n"); - log(" highest optimization level that provides debug information for all\n"); - log(" public wires. currently, alias for -O%d.\n", OPT_LEVEL_DEBUG); - log("\n"); log(" -g \n"); log(" set the debug level. the default is -g%d. higher debug levels provide\n", DEFAULT_DEBUG_LEVEL); log(" more visibility and generate more code, but do not pessimize evaluation.\n"); @@ -2686,6 +2799,10 @@ struct CxxrtlBackend : public Backend { log(" debug information for non-optimized public wires. this also makes it\n"); log(" possible to use the C API.\n"); log("\n"); + log(" -g2\n"); + log(" like -g1, and compute debug information on demand for all public wires\n"); + log(" that were optimized out.\n"); + log("\n"); } void execute(std::ostream *&f, std::string filename, std::vector args, RTLIL::Design *design) override @@ -2715,12 +2832,14 @@ struct CxxrtlBackend : public Backend { continue; } if (args[argidx] == "-Og") { - opt_level = OPT_LEVEL_DEBUG; + log_warning("The `-Og` option has been removed. Use `-g2` instead for complete " + "design coverage regardless of optimization level.\n"); continue; } if (args[argidx] == "-O" && argidx+1 < args.size() && args[argidx+1] == "g") { argidx++; - opt_level = OPT_LEVEL_DEBUG; + log_warning("The `-Og` option has been removed. Use `-g2` instead for complete " + "design coverage regardless of optimization level.\n"); continue; } if (args[argidx] == "-O" && argidx+1 < args.size()) { @@ -2781,6 +2900,9 @@ struct CxxrtlBackend : public Backend { } switch (debug_level) { // the highest level here must match DEFAULT_DEBUG_LEVEL + case 2: + worker.debug_eval = true; + YS_FALLTHROUGH case 1: worker.debug_info = true; YS_FALLTHROUGH diff --git a/backends/cxxrtl/cxxrtl_capi.cc b/backends/cxxrtl/cxxrtl_capi.cc index f92709b46..227173ba8 100644 --- a/backends/cxxrtl/cxxrtl_capi.cc +++ b/backends/cxxrtl/cxxrtl_capi.cc @@ -86,3 +86,7 @@ void cxxrtl_enum(cxxrtl_handle handle, void *data, for (auto &it : handle->objects.table) callback(data, it.first.c_str(), static_cast(&it.second[0]), it.second.size()); } + +void cxxrtl_outline_eval(cxxrtl_outline outline) { + outline->eval(); +} diff --git a/backends/cxxrtl/cxxrtl_capi.h b/backends/cxxrtl/cxxrtl_capi.h index d67c58f94..7d9c60ac5 100644 --- a/backends/cxxrtl/cxxrtl_capi.h +++ b/backends/cxxrtl/cxxrtl_capi.h @@ -128,6 +128,18 @@ enum cxxrtl_type { // pointer is always NULL. CXXRTL_ALIAS = 3, + // Outlines correspond to netlist nodes that were optimized in a way that makes them inaccessible + // outside of a module's `eval()` function. At the highest debug information level, every inlined + // node has a corresponding outline object. + // + // Outlines can be inspected via the `curr` pointer and can never be modified; the `next` pointer + // is always NULL. Unlike all other objects, the bits of an outline object are meaningful only + // after a call to `cxxrtl_outline_eval` and until any subsequent modification to the netlist. + // Observing this requirement is the responsibility of the caller; it is not enforced. + // + // Outlines always correspond to combinatorial netlist nodes that are not ports. + CXXRTL_OUTLINE = 4, + // More object types may be added in the future, but the existing ones will never change. }; @@ -171,8 +183,8 @@ enum cxxrtl_flag { // Node has bits that are driven by a combinatorial cell or another node. // - // This flag can be set on objects of type `CXXRTL_VALUE` and `CXXRTL_WIRE`. It may be combined - // with `CXXRTL_DRIVEN_SYNC` and `CXXRTL_UNDRIVEN`, as well as other flags. + // This flag can be set on objects of type `CXXRTL_VALUE`, `CXXRTL_WIRE`, and `CXXRTL_OUTLINE`. + // It may be combined with `CXXRTL_DRIVEN_SYNC` and `CXXRTL_UNDRIVEN`, as well as other flags. // // This flag is set on objects that have bits connected to the output of a combinatorial cell, // or directly to another node. For designs without combinatorial loops, writing to such bits @@ -193,8 +205,8 @@ enum cxxrtl_flag { // Description of a simulated object. // -// The `data` array can be accessed directly to inspect and, if applicable, modify the bits -// stored in the object. +// The `curr` and `next` arrays can be accessed directly to inspect and, if applicable, modify +// the bits stored in the object. struct cxxrtl_object { // Type of the object. // @@ -231,6 +243,12 @@ struct cxxrtl_object { uint32_t *curr; uint32_t *next; + // Opaque reference to an outline. Only meaningful for outline objects. + // + // See the documentation of `cxxrtl_outline` for details. When creating a `cxxrtl_object`, set + // this field to NULL. + struct _cxxrtl_outline *outline; + // More description fields may be added in the future, but the existing ones will never change. }; @@ -272,6 +290,20 @@ void cxxrtl_enum(cxxrtl_handle handle, void *data, void (*callback)(void *data, const char *name, struct cxxrtl_object *object, size_t parts)); +// Opaque reference to an outline. +// +// An outline is a group of outline objects that are evaluated simultaneously. The identity of +// an outline can be compared to determine whether any two objects belong to the same outline. +typedef struct _cxxrtl_outline *cxxrtl_outline; + +// Evaluate an outline. +// +// After evaluating an outline, the bits of every outline object contained in it are consistent +// with the current state of the netlist. In general, any further modification to the netlist +// causes every outline object to become stale, after which the corresponding outline must be +// re-evaluated, otherwise the bits read from that object are meaningless. +void cxxrtl_outline_eval(cxxrtl_outline outline); + #ifdef __cplusplus } #endif diff --git a/backends/cxxrtl/cxxrtl_vcd.h b/backends/cxxrtl/cxxrtl_vcd.h index dbeabbaf2..6ee98b428 100644 --- a/backends/cxxrtl/cxxrtl_vcd.h +++ b/backends/cxxrtl/cxxrtl_vcd.h @@ -28,10 +28,13 @@ class vcd_writer { size_t ident; size_t width; chunk_t *curr; - size_t prev_off; + size_t cache_offset; + debug_outline *outline; + bool *outline_warm; }; std::vector current_scope; + std::map outlines; std::vector variables; std::vector cache; std::map aliases; @@ -112,16 +115,22 @@ class vcd_writer { buffer += '\n'; } - const variable ®ister_variable(size_t width, chunk_t *curr, bool constant = false) { + void reset_outlines() { + for (auto &outline_it : outlines) + outline_it.second = /*warm=*/(outline_it.first == nullptr); + } + + variable ®ister_variable(size_t width, chunk_t *curr, bool constant = false, debug_outline *outline = nullptr) { if (aliases.count(curr)) { return variables[aliases[curr]]; } else { + auto outline_it = outlines.emplace(outline, /*warm=*/(outline == nullptr)).first; const size_t chunks = (width + (sizeof(chunk_t) * 8 - 1)) / (sizeof(chunk_t) * 8); aliases[curr] = variables.size(); if (constant) { - variables.emplace_back(variable { variables.size(), width, curr, (size_t)-1 }); + variables.emplace_back(variable { variables.size(), width, curr, (size_t)-1, outline_it->first, &outline_it->second }); } else { - variables.emplace_back(variable { variables.size(), width, curr, cache.size() }); + variables.emplace_back(variable { variables.size(), width, curr, cache.size(), outline_it->first, &outline_it->second }); cache.insert(cache.end(), &curr[0], &curr[chunks]); } return variables.back(); @@ -129,13 +138,17 @@ class vcd_writer { } bool test_variable(const variable &var) { - if (var.prev_off == (size_t)-1) + if (var.cache_offset == (size_t)-1) return false; // constant + if (!*var.outline_warm) { + var.outline->eval(); + *var.outline_warm = true; + } const size_t chunks = (var.width + (sizeof(chunk_t) * 8 - 1)) / (sizeof(chunk_t) * 8); - if (std::equal(&var.curr[0], &var.curr[chunks], &cache[var.prev_off])) { + if (std::equal(&var.curr[0], &var.curr[chunks], &cache[var.cache_offset])) { return false; } else { - std::copy(&var.curr[0], &var.curr[chunks], &cache[var.prev_off]); + std::copy(&var.curr[0], &var.curr[chunks], &cache[var.cache_offset]); return true; } } @@ -197,6 +210,10 @@ public: emit_var(register_variable(item.width, item.curr), "wire", name, item.lsb_at, multipart); break; + case debug_item::OUTLINE: + emit_var(register_variable(item.width, item.curr, /*constant=*/false, item.outline), + "wire", name, item.lsb_at, multipart); + break; } } @@ -228,6 +245,7 @@ public: emit_scope({}); emit_enddefinitions(); } + reset_outlines(); emit_time(timestamp); for (auto var : variables) if (test_variable(var) || first_sample) { From dd6a761db04ca67205ad25d7f5dbb63daa8d9e9d Mon Sep 17 00:00:00 2001 From: whitequark Date: Sun, 13 Dec 2020 07:44:27 +0000 Subject: [PATCH 6/9] cxxrtl: add a "bare minimum" debug information level. Useful to reduce overhead when no debug capabilities are necessary except for access to design state. --- backends/cxxrtl/cxxrtl_backend.cc | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/backends/cxxrtl/cxxrtl_backend.cc b/backends/cxxrtl/cxxrtl_backend.cc index 5e2f4f31a..588cca12e 100644 --- a/backends/cxxrtl/cxxrtl_backend.cc +++ b/backends/cxxrtl/cxxrtl_backend.cc @@ -539,6 +539,7 @@ struct CxxrtlWorker { bool inline_public = false; bool debug_info = false; + bool debug_alias = false; bool debug_eval = false; std::ostringstream f; @@ -2425,7 +2426,7 @@ struct CxxrtlWorker { for (auto item : flow.bit_has_state) bit_has_state.insert(item); - if (debug_info) { + if (debug_info && debug_alias) { // Find wires that alias other wires or are tied to a constant; debug information can be enriched with these // at essentially zero additional cost. // @@ -2575,7 +2576,7 @@ struct CxxrtlWorker { struct CxxrtlBackend : public Backend { static const int DEFAULT_OPT_LEVEL = 6; - static const int DEFAULT_DEBUG_LEVEL = 2; + static const int DEFAULT_DEBUG_LEVEL = 3; CxxrtlBackend() : Backend("cxxrtl", "convert design to C++ RTL simulation") { } void help() override @@ -2793,14 +2794,18 @@ struct CxxrtlBackend : public Backend { log(" more visibility and generate more code, but do not pessimize evaluation.\n"); log("\n"); log(" -g0\n"); - log(" no debug information.\n"); + log(" no debug information. the C API is unavailable.\n"); log("\n"); log(" -g1\n"); - log(" debug information for non-optimized public wires. this also makes it\n"); - log(" possible to use the C API.\n"); + log(" debug information for member public wires only. this is the bare minimum\n"); + log(" necessary to access all design state. enables the C API.\n"); log("\n"); log(" -g2\n"); - log(" like -g1, and compute debug information on demand for all public wires\n"); + log(" like -g1, and include debug information for public wires that are tied\n"); + log(" to a constant or another public wire.\n"); + log("\n"); + log(" -g3\n"); + log(" like -g2, and compute debug information on demand for all public wires\n"); log(" that were optimized out.\n"); log("\n"); } @@ -2832,13 +2837,13 @@ struct CxxrtlBackend : public Backend { continue; } if (args[argidx] == "-Og") { - log_warning("The `-Og` option has been removed. Use `-g2` instead for complete " + log_warning("The `-Og` option has been removed. Use `-g3` instead for complete " "design coverage regardless of optimization level.\n"); continue; } if (args[argidx] == "-O" && argidx+1 < args.size() && args[argidx+1] == "g") { argidx++; - log_warning("The `-Og` option has been removed. Use `-g2` instead for complete " + log_warning("The `-Og` option has been removed. Use `-g3` instead for complete " "design coverage regardless of optimization level.\n"); continue; } @@ -2900,9 +2905,12 @@ struct CxxrtlBackend : public Backend { } switch (debug_level) { // the highest level here must match DEFAULT_DEBUG_LEVEL - case 2: + case 3: worker.debug_eval = true; YS_FALLTHROUGH + case 2: + worker.debug_alias = true; + YS_FALLTHROUGH case 1: worker.debug_info = true; YS_FALLTHROUGH From 4d40595d644cfe58425a3de023c712641c429010 Mon Sep 17 00:00:00 2001 From: whitequark Date: Sun, 13 Dec 2020 15:33:47 +0000 Subject: [PATCH 7/9] cxxrtl: make alias analysis outlining-aware. Before this commit, if a sequence of wires assigned in a chain would terminate on a cell, none of the wires would get marked as aliases, and typically all of the public wires would get outlined. The reason for this behavior is that alias analysis predates outlining and in fact runs before it. After this commit, alias analysis runs after outlining and considers outlined wires valid aliasees. More importantly, if the chained wires contain any valid aliasees, then all of the wires are aliased to the one that is topologically deepest. Aliased wires incur virtually no overhead for the VCD writer, unlike outlined wires that would otherwise take their place. On Minerva SoC SRAM, size of the full VCD dump is reduced by ~65%, and throughput is increased by ~55%. --- backends/cxxrtl/cxxrtl_backend.cc | 86 +++++++++++++++++-------------- 1 file changed, 48 insertions(+), 38 deletions(-) diff --git a/backends/cxxrtl/cxxrtl_backend.cc b/backends/cxxrtl/cxxrtl_backend.cc index 588cca12e..fa19a8dd6 100644 --- a/backends/cxxrtl/cxxrtl_backend.cc +++ b/backends/cxxrtl/cxxrtl_backend.cc @@ -2426,43 +2426,6 @@ struct CxxrtlWorker { for (auto item : flow.bit_has_state) bit_has_state.insert(item); - if (debug_info && debug_alias) { - // Find wires that alias other wires or are tied to a constant; debug information can be enriched with these - // at essentially zero additional cost. - // - // Note that the information collected here can't be used for optimizing the netlist: debug information queries - // are pure and run on a design in a stable state, which allows assumptions that do not otherwise hold. - for (auto wire : module->wires()) { - if (!wire->name.isPublic()) - continue; - if (!unbuffered_wires[wire]) - continue; - const RTLIL::Wire *wire_it = wire; - while (1) { - if (!(flow.wire_def_inlinable.count(wire_it) && flow.wire_def_inlinable[wire_it])) - break; // not an alias: complex def - log_assert(flow.wire_comb_defs[wire_it].size() == 1); - FlowGraph::Node *node = *flow.wire_comb_defs[wire_it].begin(); - if (node->type != FlowGraph::Node::Type::CONNECT) - break; // not an alias: def by cell - RTLIL::SigSpec rhs_sig = node->connect.second; - if (rhs_sig.is_wire()) { - RTLIL::Wire *rhs_wire = rhs_sig.as_wire(); - if (unbuffered_wires[rhs_wire]) { - wire_it = rhs_wire; // maybe an alias - } else { - debug_alias_wires[wire] = rhs_wire; // is an alias - break; - } - } else if (rhs_sig.is_fully_const()) { - debug_const_wires[wire] = rhs_sig.as_const(); // is a const - break; - } else { - break; // not an alias: complex rhs - } - } - } - } if (debug_info && debug_eval) { // Find wires that can be be outlined, i.e. whose values can be always recovered from // the values of other wires. (This is the inverse of inlining--any wire that can be @@ -2471,7 +2434,7 @@ struct CxxrtlWorker { pool worklist, visited; for (auto wire : module->wires()) { if (!wire->name.isPublic()) - continue; // only outline public wires + continue; worklist.insert(wire); } while (!worklist.empty()) { @@ -2487,6 +2450,53 @@ struct CxxrtlWorker { worklist.insert(node_use); } } + if (debug_info && debug_alias) { + // Find wires that alias other wires or are tied to a constant. Both of these cases are + // directly expressible in the debug information, improving coverage at zero cost. + for (auto wire : module->wires()) { + if (!wire->name.isPublic()) + continue; + const RTLIL::Wire *cursor = wire; + RTLIL::SigSpec alias_of; + while (1) { + if (!(flow.wire_def_inlinable.count(cursor) && flow.wire_def_inlinable[cursor])) + break; // not an alias: complex def + log_assert(flow.wire_comb_defs[cursor].size() == 1); + FlowGraph::Node *node = *flow.wire_comb_defs[cursor].begin(); + if (node->type != FlowGraph::Node::Type::CONNECT) + break; // not an alias: def by cell + RTLIL::SigSpec rhs_sig = node->connect.second; + if (rhs_sig.is_fully_const()) { + alias_of = rhs_sig; // alias of const + break; + } else if (rhs_sig.is_wire()) { + RTLIL::Wire *rhs_wire = rhs_sig.as_wire(); // possible alias of wire + if (rhs_wire->port_input && !rhs_wire->port_output) { + alias_of = rhs_wire; // alias of input + break; + } else if (!localized_wires.count(rhs_wire) && !inlined_wires.count(rhs_wire)) { + alias_of = rhs_wire; // alias of member + break; + } else { + if (rhs_wire->name.isPublic() && debug_outlined_wires.count(rhs_wire)) + alias_of = rhs_wire; // alias of either outline or another alias + cursor = rhs_wire; // keep looking + } + } else { + break; // not an alias: complex rhs + } + } + if (alias_of.empty()) { + continue; + } else if (alias_of.is_fully_const()) { + debug_const_wires[wire] = alias_of.as_const(); + } else if (alias_of.is_wire()) { + debug_alias_wires[wire] = alias_of.as_wire(); + } else log_abort(); + if (inlined_wires.count(wire)) + debug_outlined_wires.erase(wire); + } + } } if (has_feedback_arcs || has_buffered_comb_wires) { // Although both non-feedback buffered combinatorial wires and apparent feedback wires may be eliminated From f75bc6c7aac92c7c1c8954ec7fe5325a94e6e491 Mon Sep 17 00:00:00 2001 From: whitequark Date: Sun, 13 Dec 2020 18:16:55 +0000 Subject: [PATCH 8/9] cxxrtl: disable optimization of debug_items(). Implementing outlining has greatly increased the amount of debug information in a typical build, and consequently exposed performance issues in C++ compilers, which are similar for both GCC and Clang; the compile time of Minerva SoC SRAM increased almost twofold. Although one would expect the slowdown to be caused by the increased use of templates in `debug_eval()`, it is actually almost entirely attributable to optimizations and codegen for `debug_items()`. Fortunately, it is neither possible nor desirable to optimize `debug_items()`: in most cases it is called exactly once, and its body is a linear sequence of calls with unique arguments. This commit turns off optimizations for `debug_items()` on GCC and Clang, improving -Os compile time of Minerva SoC SRAM by ~40% (!) --- backends/cxxrtl/cxxrtl.h | 17 ++++++++++++++--- backends/cxxrtl/cxxrtl_backend.cc | 1 + 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/backends/cxxrtl/cxxrtl.h b/backends/cxxrtl/cxxrtl.h index 59393e415..3c315c7df 100644 --- a/backends/cxxrtl/cxxrtl.h +++ b/backends/cxxrtl/cxxrtl.h @@ -41,18 +41,29 @@ #include +#ifndef __has_attribute +# define __has_attribute(x) 0 +#endif + // CXXRTL essentially uses the C++ compiler as a hygienic macro engine that feeds an instruction selector. // It generates a lot of specialized template functions with relatively large bodies that, when inlined // into the caller and (for those with loops) unrolled, often expose many new optimization opportunities. // Because of this, most of the CXXRTL runtime must be always inlined for best performance. -#ifndef __has_attribute -# define __has_attribute(x) 0 -#endif #if __has_attribute(always_inline) #define CXXRTL_ALWAYS_INLINE inline __attribute__((__always_inline__)) #else #define CXXRTL_ALWAYS_INLINE inline #endif +// Conversely, some functions in the generated code are extremely large yet very cold, with both of these +// properties being extreme enough to confuse C++ compilers into spending pathological amounts of time +// on a futile (the code becomes worse) attempt to optimize the least important parts of code. +#if __has_attribute(optnone) +#define CXXRTL_EXTREMELY_COLD __attribute__((__optnone__)) +#elif __has_attribute(optimize) +#define CXXRTL_EXTREMELY_COLD __attribute__((__optimize__(0))) +#else +#define CXXRTL_EXTREMELY_COLD +#endif // CXXRTL uses assert() to check for C++ contract violations (which may result in e.g. undefined behavior // of the simulation code itself), and CXXRTL_ASSERT to check for RTL contract violations (which may at diff --git a/backends/cxxrtl/cxxrtl_backend.cc b/backends/cxxrtl/cxxrtl_backend.cc index fa19a8dd6..7bf44626a 100644 --- a/backends/cxxrtl/cxxrtl_backend.cc +++ b/backends/cxxrtl/cxxrtl_backend.cc @@ -2038,6 +2038,7 @@ struct CxxrtlWorker { f << indent << "}\n"; f << "\n"; } + f << indent << "CXXRTL_EXTREMELY_COLD\n"; f << indent << "void " << mangle(module) << "::debug_info(debug_items &items, std::string path) {\n"; dump_debug_info_method(module); f << indent << "}\n"; From d889a3df35e539b6dcfbee9c6a98461eca1a0b0e Mon Sep 17 00:00:00 2001 From: whitequark Date: Tue, 15 Dec 2020 03:46:06 +0000 Subject: [PATCH 9/9] cxxrtl: print names of cells inlined in connections. --- backends/cxxrtl/cxxrtl_backend.cc | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/backends/cxxrtl/cxxrtl_backend.cc b/backends/cxxrtl/cxxrtl_backend.cc index 7bf44626a..9875aba60 100644 --- a/backends/cxxrtl/cxxrtl_backend.cc +++ b/backends/cxxrtl/cxxrtl_backend.cc @@ -911,7 +911,16 @@ struct CxxrtlWorker { if (for_debug && !is_connect_outlined(conn)) return; - f << indent << "// connection\n"; + std::vector inlined_cells; + collect_sigspec_rhs(conn.second, inlined_cells); + if (for_debug || inlined_cells.empty()) { + f << indent << "// connection\n"; + } else { + f << indent << "// cells"; + for (auto inlined_cell : inlined_cells) + f << " " << inlined_cell.str(); + f << "\n"; + } f << indent; dump_sigspec_lhs(conn.first, for_debug); f << " = ";