2019-11-30 19:51:16 -06:00
|
|
|
/*
|
|
|
|
* yosys -- Yosys Open SYnthesis Suite
|
|
|
|
*
|
2020-04-03 11:07:43 -05:00
|
|
|
* Copyright (C) 2019-2020 whitequark <whitequark@whitequark.org>
|
2019-11-30 19:51:16 -06:00
|
|
|
*
|
|
|
|
* Permission to use, copy, modify, and/or distribute this software for any
|
|
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
|
|
* copyright notice and this permission notice appear in all copies.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "kernel/rtlil.h"
|
|
|
|
#include "kernel/register.h"
|
|
|
|
#include "kernel/sigtools.h"
|
2020-04-03 11:07:43 -05:00
|
|
|
#include "kernel/utils.h"
|
2019-11-30 19:51:16 -06:00
|
|
|
#include "kernel/celltypes.h"
|
2020-11-01 09:25:55 -06:00
|
|
|
#include "kernel/mem.h"
|
2019-11-30 19:51:16 -06:00
|
|
|
#include "kernel/log.h"
|
|
|
|
|
|
|
|
USING_YOSYS_NAMESPACE
|
|
|
|
PRIVATE_NAMESPACE_BEGIN
|
|
|
|
|
2019-12-10 14:09:24 -06:00
|
|
|
// [[CITE]]
|
|
|
|
// Peter Eades; Xuemin Lin; W. F. Smyth, "A Fast Effective Heuristic For The Feedback Arc Set Problem"
|
|
|
|
// Information Processing Letters, Vol. 47, pp 319-323, 1993
|
|
|
|
// https://pdfs.semanticscholar.org/c7ed/d9acce96ca357876540e19664eb9d976637f.pdf
|
|
|
|
|
|
|
|
// A topological sort (on a cell/wire graph) is always possible in a fully flattened RTLIL design without
|
|
|
|
// processes or logic loops where every wire has a single driver. Logic loops are illegal in RTLIL and wires
|
|
|
|
// with multiple drivers can be split by the `splitnets` pass; however, interdependencies between processes
|
|
|
|
// or module instances can create strongly connected components without introducing evaluation nondeterminism.
|
|
|
|
// We wish to support designs with such benign SCCs (as well as designs with multiple drivers per wire), so
|
|
|
|
// we sort the graph in a way that minimizes feedback arcs. If there are no feedback arcs in the sorted graph,
|
|
|
|
// then a more efficient evaluation method is possible, since eval() will always immediately converge.
|
|
|
|
template<class T>
|
|
|
|
struct Scheduler {
|
|
|
|
struct Vertex {
|
|
|
|
T *data;
|
|
|
|
Vertex *prev, *next;
|
|
|
|
pool<Vertex*, hash_ptr_ops> preds, succs;
|
|
|
|
|
|
|
|
Vertex() : data(NULL), prev(this), next(this) {}
|
|
|
|
Vertex(T *data) : data(data), prev(NULL), next(NULL) {}
|
|
|
|
|
|
|
|
bool empty() const
|
|
|
|
{
|
|
|
|
log_assert(data == NULL);
|
|
|
|
if (next == this) {
|
|
|
|
log_assert(prev == next);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void link(Vertex *list)
|
|
|
|
{
|
|
|
|
log_assert(prev == NULL && next == NULL);
|
|
|
|
next = list;
|
|
|
|
prev = list->prev;
|
|
|
|
list->prev->next = this;
|
|
|
|
list->prev = this;
|
|
|
|
}
|
|
|
|
|
|
|
|
void unlink()
|
|
|
|
{
|
|
|
|
log_assert(prev->next == this && next->prev == this);
|
|
|
|
prev->next = next;
|
|
|
|
next->prev = prev;
|
|
|
|
next = prev = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
int delta() const
|
|
|
|
{
|
|
|
|
return succs.size() - preds.size();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
std::vector<Vertex*> vertices;
|
|
|
|
Vertex *sources = new Vertex;
|
|
|
|
Vertex *sinks = new Vertex;
|
|
|
|
dict<int, Vertex*> bins;
|
|
|
|
|
|
|
|
~Scheduler()
|
|
|
|
{
|
|
|
|
delete sources;
|
|
|
|
delete sinks;
|
|
|
|
for (auto bin : bins)
|
|
|
|
delete bin.second;
|
|
|
|
for (auto vertex : vertices)
|
|
|
|
delete vertex;
|
|
|
|
}
|
|
|
|
|
|
|
|
Vertex *add(T *data)
|
|
|
|
{
|
|
|
|
Vertex *vertex = new Vertex(data);
|
|
|
|
vertices.push_back(vertex);
|
|
|
|
return vertex;
|
|
|
|
}
|
|
|
|
|
|
|
|
void relink(Vertex *vertex)
|
|
|
|
{
|
|
|
|
if (vertex->succs.empty())
|
|
|
|
vertex->link(sinks);
|
|
|
|
else if (vertex->preds.empty())
|
|
|
|
vertex->link(sources);
|
|
|
|
else {
|
|
|
|
int delta = vertex->delta();
|
|
|
|
if (!bins.count(delta))
|
|
|
|
bins[delta] = new Vertex;
|
|
|
|
vertex->link(bins[delta]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Vertex *remove(Vertex *vertex)
|
|
|
|
{
|
|
|
|
vertex->unlink();
|
|
|
|
for (auto pred : vertex->preds) {
|
|
|
|
if (pred == vertex)
|
|
|
|
continue;
|
|
|
|
log_assert(pred->succs[vertex]);
|
|
|
|
pred->unlink();
|
|
|
|
pred->succs.erase(vertex);
|
|
|
|
relink(pred);
|
|
|
|
}
|
|
|
|
for (auto succ : vertex->succs) {
|
|
|
|
if (succ == vertex)
|
|
|
|
continue;
|
|
|
|
log_assert(succ->preds[vertex]);
|
|
|
|
succ->unlink();
|
|
|
|
succ->preds.erase(vertex);
|
|
|
|
relink(succ);
|
|
|
|
}
|
|
|
|
vertex->preds.clear();
|
|
|
|
vertex->succs.clear();
|
|
|
|
return vertex;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<Vertex*> schedule()
|
|
|
|
{
|
|
|
|
std::vector<Vertex*> s1, s2r;
|
|
|
|
for (auto vertex : vertices)
|
|
|
|
relink(vertex);
|
|
|
|
bool bins_empty = false;
|
|
|
|
while (!(sinks->empty() && sources->empty() && bins_empty)) {
|
|
|
|
while (!sinks->empty())
|
|
|
|
s2r.push_back(remove(sinks->next));
|
|
|
|
while (!sources->empty())
|
|
|
|
s1.push_back(remove(sources->next));
|
|
|
|
// Choosing u in this implementation isn't O(1), but the paper handwaves which data structure they suggest
|
|
|
|
// using to get O(1) relinking *and* find-max-key ("it is clear"... no it isn't), so this code uses a very
|
|
|
|
// naive implementation of find-max-key.
|
|
|
|
bins_empty = true;
|
|
|
|
bins.template sort<std::greater<int>>();
|
|
|
|
for (auto bin : bins) {
|
|
|
|
if (!bin.second->empty()) {
|
|
|
|
bins_empty = false;
|
|
|
|
s1.push_back(remove(bin.second->next));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s1.insert(s1.end(), s2r.rbegin(), s2r.rend());
|
|
|
|
return s1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2020-04-16 20:41:08 -05:00
|
|
|
bool is_unary_cell(RTLIL::IdString type)
|
2019-12-09 13:05:52 -06:00
|
|
|
{
|
|
|
|
return type.in(
|
|
|
|
ID($not), ID($logic_not), ID($reduce_and), ID($reduce_or), ID($reduce_xor), ID($reduce_xnor), ID($reduce_bool),
|
|
|
|
ID($pos), ID($neg));
|
|
|
|
}
|
|
|
|
|
2020-04-16 20:41:08 -05:00
|
|
|
bool is_binary_cell(RTLIL::IdString type)
|
2019-12-09 13:05:52 -06:00
|
|
|
{
|
|
|
|
return type.in(
|
|
|
|
ID($and), ID($or), ID($xor), ID($xnor), ID($logic_and), ID($logic_or),
|
|
|
|
ID($shl), ID($sshl), ID($shr), ID($sshr), ID($shift), ID($shiftx),
|
|
|
|
ID($eq), ID($ne), ID($eqx), ID($nex), ID($gt), ID($ge), ID($lt), ID($le),
|
|
|
|
ID($add), ID($sub), ID($mul), ID($div), ID($mod));
|
|
|
|
}
|
|
|
|
|
2020-06-09 02:26:13 -05:00
|
|
|
bool is_extending_cell(RTLIL::IdString type)
|
|
|
|
{
|
|
|
|
return !type.in(
|
|
|
|
ID($logic_not), ID($logic_and), ID($logic_or),
|
|
|
|
ID($reduce_and), ID($reduce_or), ID($reduce_xor), ID($reduce_xnor), ID($reduce_bool));
|
|
|
|
}
|
|
|
|
|
2020-12-12 18:34:32 -06:00
|
|
|
bool is_inlinable_cell(RTLIL::IdString type)
|
2019-12-09 13:05:52 -06:00
|
|
|
{
|
2020-04-05 02:46:42 -05:00
|
|
|
return is_unary_cell(type) || is_binary_cell(type) || type.in(
|
2020-06-11 21:40:30 -05:00
|
|
|
ID($mux), ID($concat), ID($slice), ID($pmux));
|
2019-12-09 13:05:52 -06:00
|
|
|
}
|
|
|
|
|
2020-04-16 20:41:08 -05:00
|
|
|
bool is_ff_cell(RTLIL::IdString type)
|
2020-04-05 04:13:13 -05:00
|
|
|
{
|
2020-09-02 11:03:35 -05:00
|
|
|
return type.in(
|
|
|
|
ID($dff), ID($dffe), ID($sdff), ID($sdffe), ID($sdffce),
|
|
|
|
ID($adff), ID($adffe), ID($dffsr), ID($dffsre),
|
|
|
|
ID($dlatch), ID($adlatch), ID($dlatchsr), ID($sr));
|
2019-12-09 13:05:52 -06:00
|
|
|
}
|
|
|
|
|
2020-04-16 20:41:08 -05:00
|
|
|
bool is_internal_cell(RTLIL::IdString type)
|
2020-04-03 11:07:43 -05:00
|
|
|
{
|
2020-12-12 18:54:12 -06:00
|
|
|
return !type.isPublic() && !type.begins_with("$paramod");
|
2020-04-03 11:07:43 -05:00
|
|
|
}
|
|
|
|
|
2020-04-21 16:48:17 -05:00
|
|
|
bool is_cxxrtl_blackbox_cell(const RTLIL::Cell *cell)
|
|
|
|
{
|
|
|
|
RTLIL::Module *cell_module = cell->module->design->module(cell->type);
|
|
|
|
log_assert(cell_module != nullptr);
|
2020-04-24 13:35:53 -05:00
|
|
|
return cell_module->get_bool_attribute(ID(cxxrtl_blackbox));
|
2020-04-21 16:48:17 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
enum class CxxrtlPortType {
|
|
|
|
UNKNOWN = 0, // or mixed comb/sync
|
|
|
|
COMB = 1,
|
|
|
|
SYNC = 2,
|
|
|
|
};
|
|
|
|
|
|
|
|
CxxrtlPortType cxxrtl_port_type(const RTLIL::Cell *cell, RTLIL::IdString port)
|
|
|
|
{
|
|
|
|
RTLIL::Module *cell_module = cell->module->design->module(cell->type);
|
2020-04-24 13:35:53 -05:00
|
|
|
if (cell_module == nullptr || !cell_module->get_bool_attribute(ID(cxxrtl_blackbox)))
|
2020-04-21 16:48:17 -05:00
|
|
|
return CxxrtlPortType::UNKNOWN;
|
|
|
|
RTLIL::Wire *cell_output_wire = cell_module->wire(port);
|
|
|
|
log_assert(cell_output_wire != nullptr);
|
2020-04-24 13:35:53 -05:00
|
|
|
bool is_comb = cell_output_wire->get_bool_attribute(ID(cxxrtl_comb));
|
|
|
|
bool is_sync = cell_output_wire->get_bool_attribute(ID(cxxrtl_sync));
|
2020-04-21 16:48:17 -05:00
|
|
|
if (is_comb && is_sync)
|
2020-04-24 13:35:53 -05:00
|
|
|
log_cmd_error("Port `%s.%s' is marked as both `cxxrtl_comb` and `cxxrtl_sync`.\n",
|
2020-04-21 16:48:17 -05:00
|
|
|
log_id(cell_module), log_signal(cell_output_wire));
|
|
|
|
else if (is_comb)
|
|
|
|
return CxxrtlPortType::COMB;
|
|
|
|
else if (is_sync)
|
|
|
|
return CxxrtlPortType::SYNC;
|
|
|
|
return CxxrtlPortType::UNKNOWN;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool is_cxxrtl_comb_port(const RTLIL::Cell *cell, RTLIL::IdString port)
|
|
|
|
{
|
|
|
|
return cxxrtl_port_type(cell, port) == CxxrtlPortType::COMB;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool is_cxxrtl_sync_port(const RTLIL::Cell *cell, RTLIL::IdString port)
|
|
|
|
{
|
|
|
|
return cxxrtl_port_type(cell, port) == CxxrtlPortType::SYNC;
|
|
|
|
}
|
|
|
|
|
2019-12-09 13:05:52 -06:00
|
|
|
struct FlowGraph {
|
|
|
|
struct Node {
|
|
|
|
enum class Type {
|
|
|
|
CONNECT,
|
2020-04-21 16:48:17 -05:00
|
|
|
CELL_SYNC,
|
|
|
|
CELL_EVAL,
|
2019-12-09 13:05:52 -06:00
|
|
|
PROCESS
|
|
|
|
};
|
|
|
|
|
|
|
|
Type type;
|
|
|
|
RTLIL::SigSig connect = {};
|
|
|
|
const RTLIL::Cell *cell = NULL;
|
|
|
|
const RTLIL::Process *process = NULL;
|
|
|
|
};
|
|
|
|
|
|
|
|
std::vector<Node*> nodes;
|
cxxrtl: localize wires with multiple comb drivers, too.
Before this commit, any wire that was not driven by an output port of
exactly one comb cell would not be localized, even if there were no
feedback arcs through that wire. This would cause the wire to become
buffered and require (often quite a few) extraneous delta cycles
during evaluation. To alleviate this problem, -O5 was running
`splitnets -driver`.
However, this solution was mistaken. Because `splitnets -driver`
followed by `opt_clean -purge` would produce more nets with multiple
drivers, it would have to be iterated to fixpoint. Moreover, even if
this was done, it would not be sufficient because `opt_clean -purge`
does not currently remove wires with the `\init` attribute (and it
is not desirable to remove such wires, since they correspond to
registers and may be useful for debugging).
The proper solution is to consider the condition in which a wire
may be localized. Specifically, if there are no feedback arcs through
this wire, and no part of the wire is driven by an output of a sync
cell, then the wire holds no state and is localizable.
After this commit, the original condition for not localizing a wire
is replaced by a check for any sync cell driving it. This makes it
unnecessary to run `splitnets -driver` in the majority of cases
to get a design with no buffered wires, and -O5 no longer includes
that pass. As a result, Minerva SRAM SoC no longer has any buffered
wires, and runs ~27% faster.
In addition, this commit prepares the flow graph for introduction
of sync outputs of black boxes.
Co-authored-by: Jean-François Nguyen <jf@lambdaconcept.com>
2020-04-21 08:33:42 -05:00
|
|
|
dict<const RTLIL::Wire*, pool<Node*, hash_ptr_ops>> wire_comb_defs, wire_sync_defs, wire_uses;
|
2020-12-12 18:34:32 -06:00
|
|
|
dict<const RTLIL::Wire*, bool> wire_def_inlinable, wire_use_inlinable;
|
2020-09-02 12:16:10 -05:00
|
|
|
dict<RTLIL::SigBit, bool> bit_has_state;
|
2019-12-09 13:05:52 -06:00
|
|
|
|
|
|
|
~FlowGraph()
|
|
|
|
{
|
|
|
|
for (auto node : nodes)
|
|
|
|
delete node;
|
|
|
|
}
|
|
|
|
|
2020-12-12 18:34:32 -06:00
|
|
|
void add_defs(Node *node, const RTLIL::SigSpec &sig, bool is_ff, bool inlinable)
|
2019-12-09 13:05:52 -06:00
|
|
|
{
|
|
|
|
for (auto chunk : sig.chunks())
|
cxxrtl: localize wires with multiple comb drivers, too.
Before this commit, any wire that was not driven by an output port of
exactly one comb cell would not be localized, even if there were no
feedback arcs through that wire. This would cause the wire to become
buffered and require (often quite a few) extraneous delta cycles
during evaluation. To alleviate this problem, -O5 was running
`splitnets -driver`.
However, this solution was mistaken. Because `splitnets -driver`
followed by `opt_clean -purge` would produce more nets with multiple
drivers, it would have to be iterated to fixpoint. Moreover, even if
this was done, it would not be sufficient because `opt_clean -purge`
does not currently remove wires with the `\init` attribute (and it
is not desirable to remove such wires, since they correspond to
registers and may be useful for debugging).
The proper solution is to consider the condition in which a wire
may be localized. Specifically, if there are no feedback arcs through
this wire, and no part of the wire is driven by an output of a sync
cell, then the wire holds no state and is localizable.
After this commit, the original condition for not localizing a wire
is replaced by a check for any sync cell driving it. This makes it
unnecessary to run `splitnets -driver` in the majority of cases
to get a design with no buffered wires, and -O5 no longer includes
that pass. As a result, Minerva SRAM SoC no longer has any buffered
wires, and runs ~27% faster.
In addition, this commit prepares the flow graph for introduction
of sync outputs of black boxes.
Co-authored-by: Jean-François Nguyen <jf@lambdaconcept.com>
2020-04-21 08:33:42 -05:00
|
|
|
if (chunk.wire) {
|
2020-09-02 11:03:35 -05:00
|
|
|
if (is_ff) {
|
|
|
|
// A sync def means that a wire holds design state because it is driven directly by
|
|
|
|
// a flip-flop output. Such a wire can never be unbuffered.
|
cxxrtl: localize wires with multiple comb drivers, too.
Before this commit, any wire that was not driven by an output port of
exactly one comb cell would not be localized, even if there were no
feedback arcs through that wire. This would cause the wire to become
buffered and require (often quite a few) extraneous delta cycles
during evaluation. To alleviate this problem, -O5 was running
`splitnets -driver`.
However, this solution was mistaken. Because `splitnets -driver`
followed by `opt_clean -purge` would produce more nets with multiple
drivers, it would have to be iterated to fixpoint. Moreover, even if
this was done, it would not be sufficient because `opt_clean -purge`
does not currently remove wires with the `\init` attribute (and it
is not desirable to remove such wires, since they correspond to
registers and may be useful for debugging).
The proper solution is to consider the condition in which a wire
may be localized. Specifically, if there are no feedback arcs through
this wire, and no part of the wire is driven by an output of a sync
cell, then the wire holds no state and is localizable.
After this commit, the original condition for not localizing a wire
is replaced by a check for any sync cell driving it. This makes it
unnecessary to run `splitnets -driver` in the majority of cases
to get a design with no buffered wires, and -O5 no longer includes
that pass. As a result, Minerva SRAM SoC no longer has any buffered
wires, and runs ~27% faster.
In addition, this commit prepares the flow graph for introduction
of sync outputs of black boxes.
Co-authored-by: Jean-François Nguyen <jf@lambdaconcept.com>
2020-04-21 08:33:42 -05:00
|
|
|
wire_sync_defs[chunk.wire].insert(node);
|
2020-09-02 11:03:35 -05:00
|
|
|
} else {
|
|
|
|
// A comb def means that a wire doesn't hold design state. It might still be connected,
|
|
|
|
// indirectly, to a flip-flop output.
|
cxxrtl: localize wires with multiple comb drivers, too.
Before this commit, any wire that was not driven by an output port of
exactly one comb cell would not be localized, even if there were no
feedback arcs through that wire. This would cause the wire to become
buffered and require (often quite a few) extraneous delta cycles
during evaluation. To alleviate this problem, -O5 was running
`splitnets -driver`.
However, this solution was mistaken. Because `splitnets -driver`
followed by `opt_clean -purge` would produce more nets with multiple
drivers, it would have to be iterated to fixpoint. Moreover, even if
this was done, it would not be sufficient because `opt_clean -purge`
does not currently remove wires with the `\init` attribute (and it
is not desirable to remove such wires, since they correspond to
registers and may be useful for debugging).
The proper solution is to consider the condition in which a wire
may be localized. Specifically, if there are no feedback arcs through
this wire, and no part of the wire is driven by an output of a sync
cell, then the wire holds no state and is localizable.
After this commit, the original condition for not localizing a wire
is replaced by a check for any sync cell driving it. This makes it
unnecessary to run `splitnets -driver` in the majority of cases
to get a design with no buffered wires, and -O5 no longer includes
that pass. As a result, Minerva SRAM SoC no longer has any buffered
wires, and runs ~27% faster.
In addition, this commit prepares the flow graph for introduction
of sync outputs of black boxes.
Co-authored-by: Jean-François Nguyen <jf@lambdaconcept.com>
2020-04-21 08:33:42 -05:00
|
|
|
wire_comb_defs[chunk.wire].insert(node);
|
2020-09-02 11:03:35 -05:00
|
|
|
}
|
cxxrtl: localize wires with multiple comb drivers, too.
Before this commit, any wire that was not driven by an output port of
exactly one comb cell would not be localized, even if there were no
feedback arcs through that wire. This would cause the wire to become
buffered and require (often quite a few) extraneous delta cycles
during evaluation. To alleviate this problem, -O5 was running
`splitnets -driver`.
However, this solution was mistaken. Because `splitnets -driver`
followed by `opt_clean -purge` would produce more nets with multiple
drivers, it would have to be iterated to fixpoint. Moreover, even if
this was done, it would not be sufficient because `opt_clean -purge`
does not currently remove wires with the `\init` attribute (and it
is not desirable to remove such wires, since they correspond to
registers and may be useful for debugging).
The proper solution is to consider the condition in which a wire
may be localized. Specifically, if there are no feedback arcs through
this wire, and no part of the wire is driven by an output of a sync
cell, then the wire holds no state and is localizable.
After this commit, the original condition for not localizing a wire
is replaced by a check for any sync cell driving it. This makes it
unnecessary to run `splitnets -driver` in the majority of cases
to get a design with no buffered wires, and -O5 no longer includes
that pass. As a result, Minerva SRAM SoC no longer has any buffered
wires, and runs ~27% faster.
In addition, this commit prepares the flow graph for introduction
of sync outputs of black boxes.
Co-authored-by: Jean-François Nguyen <jf@lambdaconcept.com>
2020-04-21 08:33:42 -05:00
|
|
|
}
|
2020-09-02 12:16:10 -05:00
|
|
|
for (auto bit : sig.bits())
|
|
|
|
bit_has_state[bit] |= is_ff;
|
2020-12-12 18:34:32 -06:00
|
|
|
// Only comb defs of an entire wire in the right order can be inlined.
|
2020-09-02 11:03:35 -05:00
|
|
|
if (!is_ff && sig.is_wire())
|
2020-12-12 18:34:32 -06:00
|
|
|
wire_def_inlinable[sig.as_wire()] = inlinable;
|
2019-12-09 13:05:52 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
void add_uses(Node *node, const RTLIL::SigSpec &sig)
|
|
|
|
{
|
|
|
|
for (auto chunk : sig.chunks())
|
|
|
|
if (chunk.wire) {
|
|
|
|
wire_uses[chunk.wire].insert(node);
|
2020-12-12 18:34:32 -06:00
|
|
|
// Only a single use of an entire wire in the right order can be inlined.
|
2019-12-09 13:05:52 -06:00
|
|
|
// (But the use can include other chunks.)
|
2020-12-12 18:34:32 -06:00
|
|
|
if (!wire_use_inlinable.count(chunk.wire))
|
|
|
|
wire_use_inlinable[chunk.wire] = true;
|
2019-12-09 13:05:52 -06:00
|
|
|
else
|
2020-12-12 18:34:32 -06:00
|
|
|
wire_use_inlinable[chunk.wire] = false;
|
2019-12-09 13:05:52 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-12 18:34:32 -06:00
|
|
|
bool is_inlinable(const RTLIL::Wire *wire) const
|
2019-12-09 13:05:52 -06:00
|
|
|
{
|
2020-12-12 18:34:32 -06:00
|
|
|
if (wire_def_inlinable.count(wire) && wire_use_inlinable.count(wire))
|
|
|
|
return wire_def_inlinable.at(wire) && wire_use_inlinable.at(wire);
|
2019-12-09 13:05:52 -06:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Connections
|
|
|
|
void add_connect_defs_uses(Node *node, const RTLIL::SigSig &conn)
|
|
|
|
{
|
2020-12-12 18:34:32 -06:00
|
|
|
add_defs(node, conn.first, /*is_ff=*/false, /*inlinable=*/true);
|
2019-12-09 13:05:52 -06:00
|
|
|
add_uses(node, conn.second);
|
|
|
|
}
|
|
|
|
|
2019-12-10 14:09:24 -06:00
|
|
|
Node *add_node(const RTLIL::SigSig &conn)
|
2019-12-09 13:05:52 -06:00
|
|
|
{
|
|
|
|
Node *node = new Node;
|
|
|
|
node->type = Node::Type::CONNECT;
|
|
|
|
node->connect = conn;
|
|
|
|
nodes.push_back(node);
|
|
|
|
add_connect_defs_uses(node, conn);
|
2019-12-10 14:09:24 -06:00
|
|
|
return node;
|
2019-12-09 13:05:52 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// Cells
|
2020-04-21 16:48:17 -05:00
|
|
|
void add_cell_sync_defs(Node *node, const RTLIL::Cell *cell)
|
|
|
|
{
|
|
|
|
// To understand why this node type is necessary and why it produces comb defs, consider a cell
|
|
|
|
// with input \i and sync output \o, used in a design such that \i is connected to \o. This does
|
|
|
|
// not result in a feedback arc because the output is synchronous. However, a naive implementation
|
|
|
|
// of code generation for cells that assigns to inputs, evaluates cells, assigns from outputs
|
|
|
|
// would not be able to immediately converge...
|
|
|
|
//
|
|
|
|
// wire<1> i_tmp;
|
|
|
|
// cell->p_i = i_tmp.curr;
|
|
|
|
// cell->eval();
|
|
|
|
// i_tmp.next = cell->p_o.curr;
|
|
|
|
//
|
|
|
|
// ... since the wire connecting the input and output ports would not be localizable. To solve
|
|
|
|
// this, the cell is split into two scheduling nodes; one exclusively for sync outputs, and
|
|
|
|
// another for inputs and all non-sync outputs. This way the generated code can be rearranged...
|
|
|
|
//
|
|
|
|
// value<1> i_tmp;
|
|
|
|
// i_tmp = cell->p_o.curr;
|
|
|
|
// cell->p_i = i_tmp;
|
|
|
|
// cell->eval();
|
|
|
|
//
|
|
|
|
// eliminating the unnecessary delta cycle. Conceptually, the CELL_SYNC node type is a series of
|
|
|
|
// connections of the form `connect \lhs \cell.\sync_output`; the right-hand side of these is not
|
2020-06-07 23:08:09 -05:00
|
|
|
// expressible as a wire in RTLIL. If it was expressible, then `\cell.\sync_output` would have
|
|
|
|
// a sync def, and this node would be an ordinary CONNECT node, with `\lhs` having a comb def.
|
|
|
|
// Because it isn't, a special node type is used, the right-hand side does not appear anywhere,
|
|
|
|
// and the left-hand side has a comb def.
|
2020-04-21 16:48:17 -05:00
|
|
|
for (auto conn : cell->connections())
|
|
|
|
if (cell->output(conn.first))
|
|
|
|
if (is_cxxrtl_sync_port(cell, conn.first)) {
|
2020-12-12 18:34:32 -06:00
|
|
|
// See note regarding inlinability below.
|
|
|
|
add_defs(node, conn.second, /*is_ff=*/false, /*inlinable=*/false);
|
2020-04-21 16:48:17 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void add_cell_eval_defs_uses(Node *node, const RTLIL::Cell *cell)
|
2019-12-09 13:05:52 -06:00
|
|
|
{
|
|
|
|
for (auto conn : cell->connections()) {
|
|
|
|
if (cell->output(conn.first)) {
|
2020-12-12 18:34:32 -06:00
|
|
|
if (is_inlinable_cell(cell->type))
|
|
|
|
add_defs(node, conn.second, /*is_ff=*/false, /*inlinable=*/true);
|
2020-09-02 11:03:35 -05:00
|
|
|
else if (is_ff_cell(cell->type) || (cell->type == ID($memrd) && cell->getParam(ID::CLK_ENABLE).as_bool()))
|
2020-12-12 18:34:32 -06:00
|
|
|
add_defs(node, conn.second, /*is_ff=*/true, /*inlinable=*/false);
|
2020-04-03 11:07:43 -05:00
|
|
|
else if (is_internal_cell(cell->type))
|
2020-12-12 18:34:32 -06:00
|
|
|
add_defs(node, conn.second, /*is_ff=*/false, /*inlinable=*/false);
|
2020-04-21 16:48:17 -05:00
|
|
|
else if (!is_cxxrtl_sync_port(cell, conn.first)) {
|
2020-12-12 18:34:32 -06:00
|
|
|
// Although at first it looks like outputs of user-defined cells may always be inlined, the reality is
|
|
|
|
// more complex. Fully sync outputs produce no defs and so don't participate in inlining. Fully comb
|
2020-04-21 16:48:17 -05:00
|
|
|
// outputs are assigned in a different way depending on whether the cell's eval() immediately converged.
|
2020-12-12 18:34:32 -06:00
|
|
|
// Unknown/mixed outputs could be inlined, but should be rare in practical designs and don't justify
|
|
|
|
// the infrastructure required to inline outputs of cells with many of them.
|
|
|
|
add_defs(node, conn.second, /*is_ff=*/false, /*inlinable=*/false);
|
2020-04-03 11:07:43 -05:00
|
|
|
}
|
2019-12-09 13:05:52 -06:00
|
|
|
}
|
|
|
|
if (cell->input(conn.first))
|
|
|
|
add_uses(node, conn.second);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-10 14:09:24 -06:00
|
|
|
Node *add_node(const RTLIL::Cell *cell)
|
2019-12-09 13:05:52 -06:00
|
|
|
{
|
2020-04-21 16:48:17 -05:00
|
|
|
log_assert(cell->known());
|
|
|
|
|
|
|
|
bool has_fully_sync_outputs = false;
|
|
|
|
for (auto conn : cell->connections())
|
|
|
|
if (cell->output(conn.first) && is_cxxrtl_sync_port(cell, conn.first)) {
|
|
|
|
has_fully_sync_outputs = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (has_fully_sync_outputs) {
|
|
|
|
Node *node = new Node;
|
|
|
|
node->type = Node::Type::CELL_SYNC;
|
|
|
|
node->cell = cell;
|
|
|
|
nodes.push_back(node);
|
|
|
|
add_cell_sync_defs(node, cell);
|
|
|
|
}
|
|
|
|
|
2019-12-09 13:05:52 -06:00
|
|
|
Node *node = new Node;
|
2020-04-21 16:48:17 -05:00
|
|
|
node->type = Node::Type::CELL_EVAL;
|
2019-12-09 13:05:52 -06:00
|
|
|
node->cell = cell;
|
|
|
|
nodes.push_back(node);
|
2020-04-21 16:48:17 -05:00
|
|
|
add_cell_eval_defs_uses(node, cell);
|
2019-12-10 14:09:24 -06:00
|
|
|
return node;
|
2019-12-09 13:05:52 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// Processes
|
|
|
|
void add_case_defs_uses(Node *node, const RTLIL::CaseRule *case_)
|
|
|
|
{
|
|
|
|
for (auto &action : case_->actions) {
|
2020-12-12 18:34:32 -06:00
|
|
|
add_defs(node, action.first, /*is_ff=*/false, /*inlinable=*/false);
|
2019-12-09 13:05:52 -06:00
|
|
|
add_uses(node, action.second);
|
|
|
|
}
|
|
|
|
for (auto sub_switch : case_->switches) {
|
|
|
|
add_uses(node, sub_switch->signal);
|
|
|
|
for (auto sub_case : sub_switch->cases) {
|
|
|
|
for (auto &compare : sub_case->compare)
|
|
|
|
add_uses(node, compare);
|
|
|
|
add_case_defs_uses(node, sub_case);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void add_process_defs_uses(Node *node, const RTLIL::Process *process)
|
|
|
|
{
|
|
|
|
add_case_defs_uses(node, &process->root_case);
|
|
|
|
for (auto sync : process->syncs)
|
|
|
|
for (auto action : sync->actions) {
|
|
|
|
if (sync->type == RTLIL::STp || sync->type == RTLIL::STn || sync->type == RTLIL::STe)
|
2020-12-12 18:34:32 -06:00
|
|
|
add_defs(node, action.first, /*is_ff=*/true, /*inlinable=*/false);
|
2019-12-09 13:05:52 -06:00
|
|
|
else
|
2020-12-12 18:34:32 -06:00
|
|
|
add_defs(node, action.first, /*is_ff=*/false, /*inlinable=*/false);
|
2019-12-09 13:05:52 -06:00
|
|
|
add_uses(node, action.second);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-10 14:09:24 -06:00
|
|
|
Node *add_node(const RTLIL::Process *process)
|
2019-12-09 13:05:52 -06:00
|
|
|
{
|
|
|
|
Node *node = new Node;
|
|
|
|
node->type = Node::Type::PROCESS;
|
|
|
|
node->process = process;
|
|
|
|
nodes.push_back(node);
|
|
|
|
add_process_defs_uses(node, process);
|
2019-12-10 14:09:24 -06:00
|
|
|
return node;
|
2019-12-09 13:05:52 -06:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2020-04-18 03:04:57 -05:00
|
|
|
std::vector<std::string> split_by(const std::string &str, const std::string &sep)
|
|
|
|
{
|
|
|
|
std::vector<std::string> result;
|
|
|
|
size_t prev = 0;
|
|
|
|
while (true) {
|
2020-06-09 06:05:35 -05:00
|
|
|
size_t curr = str.find_first_of(sep, prev);
|
|
|
|
if (curr == std::string::npos) {
|
|
|
|
std::string part = str.substr(prev);
|
|
|
|
if (!part.empty()) result.push_back(part);
|
2020-04-18 03:04:57 -05:00
|
|
|
break;
|
2020-06-09 06:05:35 -05:00
|
|
|
} else {
|
|
|
|
std::string part = str.substr(prev, curr - prev);
|
|
|
|
if (!part.empty()) result.push_back(part);
|
|
|
|
prev = curr + 1;
|
|
|
|
}
|
2020-04-18 03:04:57 -05:00
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2020-04-16 20:41:08 -05:00
|
|
|
std::string escape_cxx_string(const std::string &input)
|
|
|
|
{
|
|
|
|
std::string output = "\"";
|
|
|
|
for (auto c : input) {
|
|
|
|
if (::isprint(c)) {
|
|
|
|
if (c == '\\')
|
|
|
|
output.push_back('\\');
|
|
|
|
output.push_back(c);
|
|
|
|
} else {
|
|
|
|
char l = c & 0xf, h = (c >> 4) & 0xf;
|
|
|
|
output.append("\\x");
|
|
|
|
output.push_back((h < 10 ? '0' + h : 'a' + h - 10));
|
|
|
|
output.push_back((l < 10 ? '0' + l : 'a' + l - 10));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
output.push_back('"');
|
|
|
|
if (output.find('\0') != std::string::npos) {
|
|
|
|
output.insert(0, "std::string {");
|
|
|
|
output.append(stringf(", %zu}", input.size()));
|
|
|
|
}
|
|
|
|
return output;
|
|
|
|
}
|
|
|
|
|
2020-05-26 19:21:15 -05:00
|
|
|
template<class T>
|
|
|
|
std::string get_hdl_name(T *object)
|
|
|
|
{
|
|
|
|
if (object->has_attribute(ID::hdlname))
|
|
|
|
return object->get_string_attribute(ID::hdlname);
|
|
|
|
else
|
2020-06-08 14:50:09 -05:00
|
|
|
return object->name.str().substr(1);
|
2020-05-26 19:21:15 -05:00
|
|
|
}
|
|
|
|
|
2019-11-30 19:51:16 -06:00
|
|
|
struct CxxrtlWorker {
|
2020-04-14 07:07:58 -05:00
|
|
|
bool split_intf = false;
|
|
|
|
std::string intf_filename;
|
|
|
|
std::string design_ns = "cxxrtl_design";
|
|
|
|
std::ostream *impl_f = nullptr;
|
|
|
|
std::ostream *intf_f = nullptr;
|
|
|
|
|
2020-11-02 13:18:56 -06:00
|
|
|
bool run_hierarchy = false;
|
2020-06-09 15:18:07 -05:00
|
|
|
bool run_flatten = false;
|
|
|
|
bool run_proc = false;
|
|
|
|
|
2020-06-09 16:50:09 -05:00
|
|
|
bool unbuffer_internal = false;
|
|
|
|
bool unbuffer_public = false;
|
2019-12-10 14:09:24 -06:00
|
|
|
bool localize_internal = false;
|
|
|
|
bool localize_public = false;
|
2020-12-12 18:34:32 -06:00
|
|
|
bool inline_internal = false;
|
|
|
|
bool inline_public = false;
|
2019-12-09 13:05:52 -06:00
|
|
|
|
2020-05-26 19:21:15 -05:00
|
|
|
bool debug_info = false;
|
2020-12-13 01:44:27 -06:00
|
|
|
bool debug_alias = false;
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
bool debug_eval = false;
|
2020-05-26 19:21:15 -05:00
|
|
|
|
2020-04-14 07:07:58 -05:00
|
|
|
std::ostringstream f;
|
2019-11-30 19:51:16 -06:00
|
|
|
std::string indent;
|
|
|
|
int temporary = 0;
|
|
|
|
|
|
|
|
dict<const RTLIL::Module*, SigMap> sigmaps;
|
2020-04-21 13:46:36 -05:00
|
|
|
pool<const RTLIL::Wire*> edge_wires;
|
|
|
|
dict<RTLIL::SigBit, RTLIL::SyncType> edge_types;
|
2019-11-30 19:51:16 -06:00
|
|
|
pool<const RTLIL::Memory*> writable_memories;
|
2019-12-10 14:09:24 -06:00
|
|
|
dict<const RTLIL::Cell*, pool<const RTLIL::Cell*>> transparent_for;
|
|
|
|
dict<const RTLIL::Module*, std::vector<FlowGraph::Node>> schedule;
|
2020-06-09 16:50:09 -05:00
|
|
|
pool<const RTLIL::Wire*> unbuffered_wires;
|
2019-12-10 14:09:24 -06:00
|
|
|
pool<const RTLIL::Wire*> localized_wires;
|
2020-12-12 18:34:32 -06:00
|
|
|
dict<const RTLIL::Wire*, FlowGraph::Node> inlined_wires;
|
2020-06-08 12:29:08 -05:00
|
|
|
dict<const RTLIL::Wire*, RTLIL::Const> debug_const_wires;
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dict<const RTLIL::Wire*, const RTLIL::Wire*> debug_alias_wires;
|
|
|
|
pool<const RTLIL::Wire*> debug_outlined_wires;
|
2020-09-02 12:16:10 -05:00
|
|
|
dict<RTLIL::SigBit, bool> bit_has_state;
|
2020-04-18 03:04:57 -05:00
|
|
|
dict<const RTLIL::Module*, pool<std::string>> blackbox_specializations;
|
2020-04-21 10:51:09 -05:00
|
|
|
dict<const RTLIL::Module*, bool> eval_converges;
|
2019-11-30 19:51:16 -06:00
|
|
|
|
|
|
|
void inc_indent() {
|
|
|
|
indent += "\t";
|
|
|
|
}
|
|
|
|
void dec_indent() {
|
|
|
|
indent.resize(indent.size() - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// RTLIL allows any characters in names other than whitespace. This presents an issue for generating C++ code
|
|
|
|
// because C++ identifiers may be only alphanumeric, cannot clash with C++ keywords, and cannot clash with cxxrtl
|
|
|
|
// identifiers. This issue can be solved with a name mangling scheme. We choose a name mangling scheme that results
|
|
|
|
// in readable identifiers, does not depend on an up-to-date list of C++ keywords, and is easy to apply. Its rules:
|
|
|
|
// 1. All generated identifiers start with `_`.
|
|
|
|
// 1a. Generated identifiers for public names (beginning with `\`) start with `p_`.
|
|
|
|
// 1b. Generated identifiers for internal names (beginning with `$`) start with `i_`.
|
|
|
|
// 2. An underscore is escaped with another underscore, i.e. `__`.
|
|
|
|
// 3. Any other non-alnum character is escaped with underscores around its lowercase hex code, e.g. `@` as `_40_`.
|
|
|
|
std::string mangle_name(const RTLIL::IdString &name)
|
|
|
|
{
|
|
|
|
std::string mangled;
|
|
|
|
bool first = true;
|
|
|
|
for (char c : name.str()) {
|
|
|
|
if (first) {
|
|
|
|
first = false;
|
|
|
|
if (c == '\\')
|
|
|
|
mangled += "p_";
|
|
|
|
else if (c == '$')
|
|
|
|
mangled += "i_";
|
|
|
|
else
|
|
|
|
log_assert(false);
|
|
|
|
} else {
|
|
|
|
if (isalnum(c)) {
|
|
|
|
mangled += c;
|
|
|
|
} else if (c == '_') {
|
|
|
|
mangled += "__";
|
|
|
|
} else {
|
|
|
|
char l = c & 0xf, h = (c >> 4) & 0xf;
|
|
|
|
mangled += '_';
|
|
|
|
mangled += (h < 10 ? '0' + h : 'a' + h - 10);
|
|
|
|
mangled += (l < 10 ? '0' + l : 'a' + l - 10);
|
|
|
|
mangled += '_';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return mangled;
|
|
|
|
}
|
|
|
|
|
2020-04-16 20:41:08 -05:00
|
|
|
std::string mangle_module_name(const RTLIL::IdString &name, bool is_blackbox = false)
|
2019-11-30 19:51:16 -06:00
|
|
|
{
|
|
|
|
// Class namespace.
|
2020-04-16 20:41:08 -05:00
|
|
|
if (is_blackbox)
|
|
|
|
return "bb_" + mangle_name(name);
|
2019-11-30 19:51:16 -06:00
|
|
|
return mangle_name(name);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string mangle_memory_name(const RTLIL::IdString &name)
|
|
|
|
{
|
|
|
|
// Class member namespace.
|
|
|
|
return "memory_" + mangle_name(name);
|
|
|
|
}
|
|
|
|
|
2020-04-03 11:07:43 -05:00
|
|
|
std::string mangle_cell_name(const RTLIL::IdString &name)
|
|
|
|
{
|
|
|
|
// Class member namespace.
|
|
|
|
return "cell_" + mangle_name(name);
|
|
|
|
}
|
|
|
|
|
2019-11-30 19:51:16 -06:00
|
|
|
std::string mangle_wire_name(const RTLIL::IdString &name)
|
|
|
|
{
|
|
|
|
// Class member namespace.
|
|
|
|
return mangle_name(name);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string mangle(const RTLIL::Module *module)
|
|
|
|
{
|
2020-04-24 13:35:53 -05:00
|
|
|
return mangle_module_name(module->name, /*is_blackbox=*/module->get_bool_attribute(ID(cxxrtl_blackbox)));
|
2019-11-30 19:51:16 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
std::string mangle(const RTLIL::Memory *memory)
|
|
|
|
{
|
|
|
|
return mangle_memory_name(memory->name);
|
|
|
|
}
|
|
|
|
|
2020-04-03 11:07:43 -05:00
|
|
|
std::string mangle(const RTLIL::Cell *cell)
|
|
|
|
{
|
|
|
|
return mangle_cell_name(cell->name);
|
|
|
|
}
|
|
|
|
|
2019-11-30 19:51:16 -06:00
|
|
|
std::string mangle(const RTLIL::Wire *wire)
|
|
|
|
{
|
|
|
|
return mangle_wire_name(wire->name);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string mangle(RTLIL::SigBit sigbit)
|
|
|
|
{
|
|
|
|
log_assert(sigbit.wire != NULL);
|
|
|
|
if (sigbit.wire->width == 1)
|
|
|
|
return mangle(sigbit.wire);
|
|
|
|
return mangle(sigbit.wire) + "_" + std::to_string(sigbit.offset);
|
|
|
|
}
|
|
|
|
|
2020-04-18 03:04:57 -05:00
|
|
|
std::vector<std::string> template_param_names(const RTLIL::Module *module)
|
|
|
|
{
|
2020-04-24 13:35:53 -05:00
|
|
|
if (!module->has_attribute(ID(cxxrtl_template)))
|
2020-04-18 03:04:57 -05:00
|
|
|
return {};
|
|
|
|
|
2020-04-24 13:35:53 -05:00
|
|
|
if (module->attributes.at(ID(cxxrtl_template)).flags != RTLIL::CONST_FLAG_STRING)
|
|
|
|
log_cmd_error("Attribute `cxxrtl_template' of module `%s' is not a string.\n", log_id(module));
|
2020-04-18 03:04:57 -05:00
|
|
|
|
2020-04-24 13:35:53 -05:00
|
|
|
std::vector<std::string> param_names = split_by(module->get_string_attribute(ID(cxxrtl_template)), " \t");
|
2020-04-18 03:04:57 -05:00
|
|
|
for (const auto ¶m_name : param_names) {
|
|
|
|
// Various lowercase prefixes (p_, i_, cell_, ...) are used for member variables, so require
|
|
|
|
// parameters to start with an uppercase letter to avoid name conflicts. (This is the convention
|
|
|
|
// in both Verilog and C++, anyway.)
|
|
|
|
if (!isupper(param_name[0]))
|
2020-04-24 13:35:53 -05:00
|
|
|
log_cmd_error("Attribute `cxxrtl_template' of module `%s' includes a parameter `%s', "
|
2020-04-18 03:04:57 -05:00
|
|
|
"which does not start with an uppercase letter.\n",
|
|
|
|
log_id(module), param_name.c_str());
|
|
|
|
}
|
|
|
|
return param_names;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string template_params(const RTLIL::Module *module, bool is_decl)
|
|
|
|
{
|
|
|
|
std::vector<std::string> param_names = template_param_names(module);
|
|
|
|
if (param_names.empty())
|
|
|
|
return "";
|
|
|
|
|
|
|
|
std::string params = "<";
|
|
|
|
bool first = true;
|
|
|
|
for (const auto ¶m_name : param_names) {
|
|
|
|
if (!first)
|
|
|
|
params += ", ";
|
|
|
|
first = false;
|
|
|
|
if (is_decl)
|
|
|
|
params += "size_t ";
|
|
|
|
params += param_name;
|
|
|
|
}
|
|
|
|
params += ">";
|
|
|
|
return params;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string template_args(const RTLIL::Cell *cell)
|
|
|
|
{
|
|
|
|
RTLIL::Module *cell_module = cell->module->design->module(cell->type);
|
|
|
|
log_assert(cell_module != nullptr);
|
2020-04-24 13:35:53 -05:00
|
|
|
if (!cell_module->get_bool_attribute(ID(cxxrtl_blackbox)))
|
2020-04-18 03:04:57 -05:00
|
|
|
return "";
|
|
|
|
|
|
|
|
std::vector<std::string> param_names = template_param_names(cell_module);
|
|
|
|
if (param_names.empty())
|
|
|
|
return "";
|
|
|
|
|
|
|
|
std::string params = "<";
|
|
|
|
bool first = true;
|
|
|
|
for (const auto ¶m_name : param_names) {
|
|
|
|
if (!first)
|
|
|
|
params += ", ";
|
|
|
|
first = false;
|
|
|
|
params += "/*" + param_name + "=*/";
|
|
|
|
RTLIL::IdString id_param_name = '\\' + param_name;
|
|
|
|
if (!cell->hasParam(id_param_name))
|
|
|
|
log_cmd_error("Cell `%s.%s' does not have a parameter `%s', which is required by the templated module `%s'.\n",
|
|
|
|
log_id(cell->module), log_id(cell), param_name.c_str(), log_id(cell_module));
|
|
|
|
RTLIL::Const param_value = cell->getParam(id_param_name);
|
|
|
|
if (((param_value.flags & ~RTLIL::CONST_FLAG_SIGNED) != 0) || param_value.as_int() < 0)
|
|
|
|
log_cmd_error("Parameter `%s' of cell `%s.%s', which is required by the templated module `%s', "
|
|
|
|
"is not a positive integer.\n",
|
|
|
|
param_name.c_str(), log_id(cell->module), log_id(cell), log_id(cell_module));
|
|
|
|
params += std::to_string(cell->getParam(id_param_name).as_int());
|
|
|
|
}
|
|
|
|
params += ">";
|
|
|
|
return params;
|
|
|
|
}
|
|
|
|
|
2019-11-30 19:51:16 -06:00
|
|
|
std::string fresh_temporary()
|
|
|
|
{
|
|
|
|
return stringf("tmp_%d", temporary++);
|
|
|
|
}
|
|
|
|
|
|
|
|
void dump_attrs(const RTLIL::AttrObject *object)
|
|
|
|
{
|
|
|
|
for (auto attr : object->attributes) {
|
|
|
|
f << indent << "// " << attr.first.str() << ": ";
|
|
|
|
if (attr.second.flags & RTLIL::CONST_FLAG_STRING) {
|
|
|
|
f << attr.second.decode_string();
|
|
|
|
} else {
|
|
|
|
f << attr.second.as_int(/*is_signed=*/attr.second.flags & RTLIL::CONST_FLAG_SIGNED);
|
|
|
|
}
|
|
|
|
f << "\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void dump_const_init(const RTLIL::Const &data, int width, int offset = 0, bool fixed_width = false)
|
|
|
|
{
|
2020-04-24 00:50:10 -05:00
|
|
|
const int CHUNK_SIZE = 32;
|
2019-11-30 19:51:16 -06:00
|
|
|
f << "{";
|
|
|
|
while (width > 0) {
|
2020-04-24 00:50:10 -05:00
|
|
|
int chunk_width = min(width, CHUNK_SIZE);
|
|
|
|
uint32_t chunk = data.extract(offset, chunk_width).as_int();
|
2019-11-30 19:51:16 -06:00
|
|
|
if (fixed_width)
|
2020-04-25 04:42:21 -05:00
|
|
|
f << stringf("0x%.*xu", (3 + chunk_width) / 4, chunk);
|
2019-11-30 19:51:16 -06:00
|
|
|
else
|
|
|
|
f << stringf("%#xu", chunk);
|
|
|
|
if (width > CHUNK_SIZE)
|
|
|
|
f << ',';
|
|
|
|
offset += CHUNK_SIZE;
|
|
|
|
width -= CHUNK_SIZE;
|
|
|
|
}
|
|
|
|
f << "}";
|
|
|
|
}
|
|
|
|
|
|
|
|
void dump_const_init(const RTLIL::Const &data)
|
|
|
|
{
|
|
|
|
dump_const_init(data, data.size());
|
|
|
|
}
|
|
|
|
|
|
|
|
void dump_const(const RTLIL::Const &data, int width, int offset = 0, bool fixed_width = false)
|
|
|
|
{
|
|
|
|
f << "value<" << width << ">";
|
|
|
|
dump_const_init(data, width, offset, fixed_width);
|
|
|
|
}
|
|
|
|
|
|
|
|
void dump_const(const RTLIL::Const &data)
|
|
|
|
{
|
|
|
|
dump_const(data, data.size());
|
|
|
|
}
|
|
|
|
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
bool dump_sigchunk(const RTLIL::SigChunk &chunk, bool is_lhs, bool for_debug = false)
|
2019-11-30 19:51:16 -06:00
|
|
|
{
|
|
|
|
if (chunk.wire == NULL) {
|
|
|
|
dump_const(chunk.data, chunk.width, chunk.offset);
|
|
|
|
return false;
|
|
|
|
} else {
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
if (inlined_wires.count(chunk.wire) && (!for_debug || !debug_outlined_wires[chunk.wire])) {
|
2020-06-09 16:50:09 -05:00
|
|
|
log_assert(!is_lhs);
|
2020-12-12 18:34:32 -06:00
|
|
|
const FlowGraph::Node &node = inlined_wires[chunk.wire];
|
2019-12-09 13:05:52 -06:00
|
|
|
switch (node.type) {
|
|
|
|
case FlowGraph::Node::Type::CONNECT:
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_connect_expr(node.connect, for_debug);
|
2019-12-09 13:05:52 -06:00
|
|
|
break;
|
2020-04-21 16:48:17 -05:00
|
|
|
case FlowGraph::Node::Type::CELL_EVAL:
|
2020-12-12 18:34:32 -06:00
|
|
|
log_assert(is_inlinable_cell(node.cell->type));
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_cell_expr(node.cell, for_debug);
|
2019-12-09 13:05:52 -06:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
log_assert(false);
|
|
|
|
}
|
2020-06-11 17:21:30 -05:00
|
|
|
} else if (unbuffered_wires[chunk.wire]) {
|
2019-12-10 14:09:24 -06:00
|
|
|
f << mangle(chunk.wire);
|
2019-12-09 13:05:52 -06:00
|
|
|
} else {
|
|
|
|
f << mangle(chunk.wire) << (is_lhs ? ".next" : ".curr");
|
|
|
|
}
|
2019-11-30 19:51:16 -06:00
|
|
|
if (chunk.width == chunk.wire->width && chunk.offset == 0)
|
|
|
|
return false;
|
|
|
|
else if (chunk.width == 1)
|
|
|
|
f << ".slice<" << chunk.offset << ">()";
|
|
|
|
else
|
|
|
|
f << ".slice<" << chunk.offset+chunk.width-1 << "," << chunk.offset << ">()";
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
bool dump_sigspec(const RTLIL::SigSpec &sig, bool is_lhs, bool for_debug = false)
|
2019-11-30 19:51:16 -06:00
|
|
|
{
|
|
|
|
if (sig.empty()) {
|
|
|
|
f << "value<0>()";
|
|
|
|
return false;
|
|
|
|
} else if (sig.is_chunk()) {
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
return dump_sigchunk(sig.as_chunk(), is_lhs, for_debug);
|
2019-11-30 19:51:16 -06:00
|
|
|
} else {
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_sigchunk(*sig.chunks().rbegin(), is_lhs, for_debug);
|
2019-11-30 19:51:16 -06:00
|
|
|
for (auto it = sig.chunks().rbegin() + 1; it != sig.chunks().rend(); ++it) {
|
|
|
|
f << ".concat(";
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_sigchunk(*it, is_lhs, for_debug);
|
2019-11-30 19:51:16 -06:00
|
|
|
f << ")";
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
void dump_sigspec_lhs(const RTLIL::SigSpec &sig, bool for_debug = false)
|
2019-11-30 19:51:16 -06:00
|
|
|
{
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_sigspec(sig, /*is_lhs=*/true, for_debug);
|
2019-11-30 19:51:16 -06:00
|
|
|
}
|
|
|
|
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
void dump_sigspec_rhs(const RTLIL::SigSpec &sig, bool for_debug = false)
|
2019-11-30 19:51:16 -06:00
|
|
|
{
|
|
|
|
// In the contexts where we want template argument deduction to occur for `template<size_t Bits> ... value<Bits>`,
|
|
|
|
// it is necessary to have the argument to already be a `value<N>`, since template argument deduction and implicit
|
|
|
|
// type conversion are mutually exclusive. In these contexts, we use dump_sigspec_rhs() to emit an explicit
|
|
|
|
// type conversion, but only if the expression needs it.
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
bool is_complex = dump_sigspec(sig, /*is_lhs=*/false, for_debug);
|
2019-11-30 19:51:16 -06:00
|
|
|
if (is_complex)
|
|
|
|
f << ".val()";
|
|
|
|
}
|
|
|
|
|
2019-12-09 13:05:52 -06:00
|
|
|
void collect_sigspec_rhs(const RTLIL::SigSpec &sig, std::vector<RTLIL::IdString> &cells)
|
|
|
|
{
|
|
|
|
for (auto chunk : sig.chunks()) {
|
2020-12-12 18:34:32 -06:00
|
|
|
if (!chunk.wire || !inlined_wires.count(chunk.wire))
|
2019-12-09 13:05:52 -06:00
|
|
|
continue;
|
|
|
|
|
2020-12-12 18:34:32 -06:00
|
|
|
const FlowGraph::Node &node = inlined_wires[chunk.wire];
|
2019-12-09 13:05:52 -06:00
|
|
|
switch (node.type) {
|
|
|
|
case FlowGraph::Node::Type::CONNECT:
|
|
|
|
collect_connect(node.connect, cells);
|
|
|
|
break;
|
2020-04-21 16:48:17 -05:00
|
|
|
case FlowGraph::Node::Type::CELL_EVAL:
|
|
|
|
collect_cell_eval(node.cell, cells);
|
2019-12-09 13:05:52 -06:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
log_assert(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
void dump_connect_expr(const RTLIL::SigSig &conn, bool for_debug = false)
|
2019-12-09 13:05:52 -06:00
|
|
|
{
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_sigspec_rhs(conn.second, for_debug);
|
2019-12-09 13:05:52 -06:00
|
|
|
}
|
|
|
|
|
2020-12-12 18:34:32 -06:00
|
|
|
bool is_connect_inlined(const RTLIL::SigSig &conn)
|
2019-12-09 13:05:52 -06:00
|
|
|
{
|
2020-12-12 18:34:32 -06:00
|
|
|
return conn.first.is_wire() && inlined_wires.count(conn.first.as_wire());
|
2019-12-09 13:05:52 -06:00
|
|
|
}
|
|
|
|
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
bool is_connect_outlined(const RTLIL::SigSig &conn)
|
|
|
|
{
|
|
|
|
for (auto chunk : conn.first.chunks())
|
|
|
|
if (debug_outlined_wires.count(chunk.wire))
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-12-09 13:05:52 -06:00
|
|
|
void collect_connect(const RTLIL::SigSig &conn, std::vector<RTLIL::IdString> &cells)
|
2019-11-30 19:51:16 -06:00
|
|
|
{
|
2020-12-12 18:34:32 -06:00
|
|
|
if (!is_connect_inlined(conn))
|
2019-12-09 13:05:52 -06:00
|
|
|
return;
|
|
|
|
|
|
|
|
collect_sigspec_rhs(conn.second, cells);
|
|
|
|
}
|
|
|
|
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
void dump_connect(const RTLIL::SigSig &conn, bool for_debug = false)
|
2019-12-09 13:05:52 -06:00
|
|
|
{
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
if (!for_debug && is_connect_inlined(conn))
|
|
|
|
return;
|
|
|
|
if (for_debug && !is_connect_outlined(conn))
|
2019-12-09 13:05:52 -06:00
|
|
|
return;
|
|
|
|
|
2019-12-10 14:09:24 -06:00
|
|
|
f << indent << "// connection\n";
|
2019-11-30 19:51:16 -06:00
|
|
|
f << indent;
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_sigspec_lhs(conn.first, for_debug);
|
2019-11-30 19:51:16 -06:00
|
|
|
f << " = ";
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_connect_expr(conn, for_debug);
|
2019-11-30 19:51:16 -06:00
|
|
|
f << ";\n";
|
|
|
|
}
|
|
|
|
|
2020-04-21 16:48:17 -05:00
|
|
|
void dump_cell_sync(const RTLIL::Cell *cell)
|
|
|
|
{
|
|
|
|
const char *access = is_cxxrtl_blackbox_cell(cell) ? "->" : ".";
|
|
|
|
f << indent << "// cell " << cell->name.str() << " syncs\n";
|
|
|
|
for (auto conn : cell->connections())
|
|
|
|
if (cell->output(conn.first))
|
|
|
|
if (is_cxxrtl_sync_port(cell, conn.first)) {
|
|
|
|
f << indent;
|
|
|
|
dump_sigspec_lhs(conn.second);
|
|
|
|
f << " = " << mangle(cell) << access << mangle_wire_name(conn.first) << ".curr;\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
void dump_cell_expr(const RTLIL::Cell *cell, bool for_debug = false)
|
2019-11-30 19:51:16 -06:00
|
|
|
{
|
|
|
|
// Unary cells
|
2019-12-09 13:05:52 -06:00
|
|
|
if (is_unary_cell(cell->type)) {
|
2020-06-09 02:26:13 -05:00
|
|
|
f << cell->type.substr(1);
|
|
|
|
if (is_extending_cell(cell->type))
|
|
|
|
f << '_' << (cell->getParam(ID::A_SIGNED).as_bool() ? 's' : 'u');
|
|
|
|
f << "<" << cell->getParam(ID::Y_WIDTH).as_int() << ">(";
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::A), for_debug);
|
2019-12-09 13:05:52 -06:00
|
|
|
f << ")";
|
2019-11-30 19:51:16 -06:00
|
|
|
// Binary cells
|
2019-12-09 13:05:52 -06:00
|
|
|
} else if (is_binary_cell(cell->type)) {
|
2020-06-09 02:26:13 -05:00
|
|
|
f << cell->type.substr(1);
|
|
|
|
if (is_extending_cell(cell->type))
|
|
|
|
f << '_' << (cell->getParam(ID::A_SIGNED).as_bool() ? 's' : 'u') <<
|
|
|
|
(cell->getParam(ID::B_SIGNED).as_bool() ? 's' : 'u');
|
|
|
|
f << "<" << cell->getParam(ID::Y_WIDTH).as_int() << ">(";
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::A), for_debug);
|
2019-11-30 19:51:16 -06:00
|
|
|
f << ", ";
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::B), for_debug);
|
2019-12-09 13:05:52 -06:00
|
|
|
f << ")";
|
2019-11-30 19:51:16 -06:00
|
|
|
// Muxes
|
|
|
|
} else if (cell->type == ID($mux)) {
|
2019-12-09 13:05:52 -06:00
|
|
|
f << "(";
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::S), for_debug);
|
2019-11-30 19:51:16 -06:00
|
|
|
f << " ? ";
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::B), for_debug);
|
2019-11-30 19:51:16 -06:00
|
|
|
f << " : ";
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::A), for_debug);
|
2019-12-09 13:05:52 -06:00
|
|
|
f << ")";
|
2020-06-11 21:40:30 -05:00
|
|
|
// Parallel (one-hot) muxes
|
|
|
|
} else if (cell->type == ID($pmux)) {
|
|
|
|
int width = cell->getParam(ID::WIDTH).as_int();
|
|
|
|
int s_width = cell->getParam(ID::S_WIDTH).as_int();
|
|
|
|
for (int part = 0; part < s_width; part++) {
|
|
|
|
f << "(";
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::S).extract(part), for_debug);
|
2020-06-11 21:40:30 -05:00
|
|
|
f << " ? ";
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::B).extract(part * width, width), for_debug);
|
2020-06-11 21:40:30 -05:00
|
|
|
f << " : ";
|
|
|
|
}
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::A), for_debug);
|
2020-06-11 21:40:30 -05:00
|
|
|
for (int part = 0; part < s_width; part++) {
|
|
|
|
f << ")";
|
|
|
|
}
|
2020-04-05 02:46:42 -05:00
|
|
|
// Concats
|
|
|
|
} else if (cell->type == ID($concat)) {
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::B), for_debug);
|
2020-04-05 02:46:42 -05:00
|
|
|
f << ".concat(";
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::A), for_debug);
|
2020-04-05 02:46:42 -05:00
|
|
|
f << ").val()";
|
|
|
|
// Slices
|
|
|
|
} else if (cell->type == ID($slice)) {
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::A), for_debug);
|
2020-04-05 02:46:42 -05:00
|
|
|
f << ".slice<";
|
2020-04-15 12:39:14 -05:00
|
|
|
f << cell->getParam(ID::OFFSET).as_int() + cell->getParam(ID::Y_WIDTH).as_int() - 1;
|
2020-04-05 02:46:42 -05:00
|
|
|
f << ",";
|
2020-04-15 12:39:14 -05:00
|
|
|
f << cell->getParam(ID::OFFSET).as_int();
|
2020-04-05 02:46:42 -05:00
|
|
|
f << ">().val()";
|
2019-12-09 13:05:52 -06:00
|
|
|
} else {
|
|
|
|
log_assert(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-12 18:34:32 -06:00
|
|
|
bool is_cell_inlined(const RTLIL::Cell *cell)
|
2019-12-09 13:05:52 -06:00
|
|
|
{
|
2020-12-12 18:34:32 -06:00
|
|
|
return is_inlinable_cell(cell->type) && cell->hasPort(ID::Y) && cell->getPort(ID::Y).is_wire() &&
|
|
|
|
inlined_wires.count(cell->getPort(ID::Y).as_wire());
|
2019-12-09 13:05:52 -06:00
|
|
|
}
|
|
|
|
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
bool is_cell_outlined(const RTLIL::Cell *cell)
|
|
|
|
{
|
|
|
|
if (is_internal_cell(cell->type))
|
|
|
|
for (auto conn : cell->connections())
|
|
|
|
if (cell->output(conn.first))
|
|
|
|
for (auto chunk : conn.second.chunks())
|
|
|
|
if (debug_outlined_wires.count(chunk.wire))
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-04-21 16:48:17 -05:00
|
|
|
void collect_cell_eval(const RTLIL::Cell *cell, std::vector<RTLIL::IdString> &cells)
|
2019-12-09 13:05:52 -06:00
|
|
|
{
|
2020-12-12 18:34:32 -06:00
|
|
|
if (!is_cell_inlined(cell))
|
2019-12-09 13:05:52 -06:00
|
|
|
return;
|
|
|
|
|
|
|
|
cells.push_back(cell->name);
|
|
|
|
for (auto port : cell->connections())
|
2020-04-15 12:39:14 -05:00
|
|
|
if (port.first != ID::Y)
|
2019-12-09 13:05:52 -06:00
|
|
|
collect_sigspec_rhs(port.second, cells);
|
|
|
|
}
|
|
|
|
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
void dump_cell_eval(const RTLIL::Cell *cell, bool for_debug = false)
|
2019-12-09 13:05:52 -06:00
|
|
|
{
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
if (!for_debug && is_cell_inlined(cell))
|
|
|
|
return;
|
|
|
|
if (for_debug && !is_cell_outlined(cell))
|
2019-12-09 13:05:52 -06:00
|
|
|
return;
|
|
|
|
if (cell->type == ID($meminit))
|
|
|
|
return; // Handled elsewhere.
|
|
|
|
|
2020-12-12 18:34:32 -06:00
|
|
|
std::vector<RTLIL::IdString> inlined_cells;
|
|
|
|
if (is_inlinable_cell(cell->type)) {
|
2019-12-09 13:05:52 -06:00
|
|
|
for (auto port : cell->connections())
|
2020-04-15 12:39:14 -05:00
|
|
|
if (port.first != ID::Y)
|
2020-12-12 18:34:32 -06:00
|
|
|
collect_sigspec_rhs(port.second, inlined_cells);
|
2019-12-09 13:05:52 -06:00
|
|
|
}
|
2020-12-12 18:34:32 -06:00
|
|
|
if (inlined_cells.empty()) {
|
2019-12-09 13:05:52 -06:00
|
|
|
dump_attrs(cell);
|
|
|
|
f << indent << "// cell " << cell->name.str() << "\n";
|
|
|
|
} else {
|
|
|
|
f << indent << "// cells";
|
2020-12-12 18:34:32 -06:00
|
|
|
for (auto inlined_cell : inlined_cells)
|
|
|
|
f << " " << inlined_cell.str();
|
2019-12-09 13:05:52 -06:00
|
|
|
f << "\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
// Elidable cells
|
2020-12-12 18:34:32 -06:00
|
|
|
if (is_inlinable_cell(cell->type)) {
|
2019-12-09 13:05:52 -06:00
|
|
|
f << indent;
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_sigspec_lhs(cell->getPort(ID::Y), for_debug);
|
2019-12-09 13:05:52 -06:00
|
|
|
f << " = ";
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dump_cell_expr(cell, for_debug);
|
2019-11-30 19:51:16 -06:00
|
|
|
f << ";\n";
|
|
|
|
// Flip-flops
|
2019-12-09 13:05:52 -06:00
|
|
|
} else if (is_ff_cell(cell->type)) {
|
2020-04-15 12:39:14 -05:00
|
|
|
if (cell->hasPort(ID::CLK) && cell->getPort(ID::CLK).is_wire()) {
|
2019-11-30 19:51:16 -06:00
|
|
|
// Edge-sensitive logic
|
2020-04-15 12:39:14 -05:00
|
|
|
RTLIL::SigBit clk_bit = cell->getPort(ID::CLK)[0];
|
2019-11-30 19:51:16 -06:00
|
|
|
clk_bit = sigmaps[clk_bit.wire->module](clk_bit);
|
2020-12-02 15:39:25 -06:00
|
|
|
if (clk_bit.wire) {
|
|
|
|
f << indent << "if (" << (cell->getParam(ID::CLK_POLARITY).as_bool() ? "posedge_" : "negedge_")
|
|
|
|
<< mangle(clk_bit) << ") {\n";
|
|
|
|
} else {
|
|
|
|
f << indent << "if (false) {\n";
|
|
|
|
}
|
2019-11-30 19:51:16 -06:00
|
|
|
inc_indent();
|
2020-06-23 19:15:08 -05:00
|
|
|
if (cell->hasPort(ID::EN)) {
|
2019-11-30 19:51:16 -06:00
|
|
|
f << indent << "if (";
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::EN));
|
|
|
|
f << " == value<1> {" << cell->getParam(ID::EN_POLARITY).as_bool() << "u}) {\n";
|
2019-11-30 19:51:16 -06:00
|
|
|
inc_indent();
|
|
|
|
}
|
|
|
|
f << indent;
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_lhs(cell->getPort(ID::Q));
|
2019-11-30 19:51:16 -06:00
|
|
|
f << " = ";
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::D));
|
2019-11-30 19:51:16 -06:00
|
|
|
f << ";\n";
|
2020-06-23 19:15:08 -05:00
|
|
|
if (cell->hasPort(ID::EN) && cell->type != ID($sdffce)) {
|
|
|
|
dec_indent();
|
|
|
|
f << indent << "}\n";
|
|
|
|
}
|
|
|
|
if (cell->hasPort(ID::SRST)) {
|
|
|
|
f << indent << "if (";
|
|
|
|
dump_sigspec_rhs(cell->getPort(ID::SRST));
|
|
|
|
f << " == value<1> {" << cell->getParam(ID::SRST_POLARITY).as_bool() << "u}) {\n";
|
|
|
|
inc_indent();
|
|
|
|
f << indent;
|
|
|
|
dump_sigspec_lhs(cell->getPort(ID::Q));
|
|
|
|
f << " = ";
|
|
|
|
dump_const(cell->getParam(ID::SRST_VALUE));
|
|
|
|
f << ";\n";
|
|
|
|
dec_indent();
|
|
|
|
f << indent << "}\n";
|
|
|
|
}
|
|
|
|
if (cell->hasPort(ID::EN) && cell->type == ID($sdffce)) {
|
2019-11-30 19:51:16 -06:00
|
|
|
dec_indent();
|
|
|
|
f << indent << "}\n";
|
|
|
|
}
|
|
|
|
dec_indent();
|
|
|
|
f << indent << "}\n";
|
2020-04-15 12:39:14 -05:00
|
|
|
} else if (cell->hasPort(ID::EN)) {
|
2020-04-05 04:27:55 -05:00
|
|
|
// Level-sensitive logic
|
|
|
|
f << indent << "if (";
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::EN));
|
|
|
|
f << " == value<1> {" << cell->getParam(ID::EN_POLARITY).as_bool() << "u}) {\n";
|
2020-04-05 04:27:55 -05:00
|
|
|
inc_indent();
|
|
|
|
f << indent;
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_lhs(cell->getPort(ID::Q));
|
2020-04-05 04:27:55 -05:00
|
|
|
f << " = ";
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::D));
|
2020-04-05 04:27:55 -05:00
|
|
|
f << ";\n";
|
|
|
|
dec_indent();
|
|
|
|
f << indent << "}\n";
|
2019-11-30 19:51:16 -06:00
|
|
|
}
|
2020-04-15 12:39:14 -05:00
|
|
|
if (cell->hasPort(ID::ARST)) {
|
2020-04-05 04:13:13 -05:00
|
|
|
// Asynchronous reset (entire coarse cell at once)
|
2019-11-30 19:51:16 -06:00
|
|
|
f << indent << "if (";
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::ARST));
|
|
|
|
f << " == value<1> {" << cell->getParam(ID::ARST_POLARITY).as_bool() << "u}) {\n";
|
2019-11-30 19:51:16 -06:00
|
|
|
inc_indent();
|
|
|
|
f << indent;
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_lhs(cell->getPort(ID::Q));
|
2019-11-30 19:51:16 -06:00
|
|
|
f << " = ";
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_const(cell->getParam(ID::ARST_VALUE));
|
2019-11-30 19:51:16 -06:00
|
|
|
f << ";\n";
|
|
|
|
dec_indent();
|
|
|
|
f << indent << "}\n";
|
2020-04-05 04:13:13 -05:00
|
|
|
}
|
2020-04-15 12:39:14 -05:00
|
|
|
if (cell->hasPort(ID::SET)) {
|
2020-04-05 04:13:13 -05:00
|
|
|
// Asynchronous set (for individual bits)
|
|
|
|
f << indent;
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_lhs(cell->getPort(ID::Q));
|
2020-04-05 04:13:13 -05:00
|
|
|
f << " = ";
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_lhs(cell->getPort(ID::Q));
|
2020-04-05 04:13:13 -05:00
|
|
|
f << ".update(";
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_const(RTLIL::Const(RTLIL::S1, cell->getParam(ID::WIDTH).as_int()));
|
2020-04-05 04:13:13 -05:00
|
|
|
f << ", ";
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::SET));
|
|
|
|
f << (cell->getParam(ID::SET_POLARITY).as_bool() ? "" : ".bit_not()") << ");\n";
|
2020-04-05 04:13:13 -05:00
|
|
|
}
|
2020-04-15 12:39:14 -05:00
|
|
|
if (cell->hasPort(ID::CLR)) {
|
2020-04-05 04:13:13 -05:00
|
|
|
// Asynchronous clear (for individual bits; priority over set)
|
|
|
|
f << indent;
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_lhs(cell->getPort(ID::Q));
|
2020-04-05 04:13:13 -05:00
|
|
|
f << " = ";
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_lhs(cell->getPort(ID::Q));
|
2020-04-05 04:13:13 -05:00
|
|
|
f << ".update(";
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_const(RTLIL::Const(RTLIL::S0, cell->getParam(ID::WIDTH).as_int()));
|
2020-04-05 04:13:13 -05:00
|
|
|
f << ", ";
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::CLR));
|
|
|
|
f << (cell->getParam(ID::CLR_POLARITY).as_bool() ? "" : ".bit_not()") << ");\n";
|
2019-11-30 19:51:16 -06:00
|
|
|
}
|
|
|
|
// Memory ports
|
|
|
|
} else if (cell->type.in(ID($memrd), ID($memwr))) {
|
2020-04-15 12:39:14 -05:00
|
|
|
if (cell->getParam(ID::CLK_ENABLE).as_bool()) {
|
|
|
|
RTLIL::SigBit clk_bit = cell->getPort(ID::CLK)[0];
|
2019-11-30 19:51:16 -06:00
|
|
|
clk_bit = sigmaps[clk_bit.wire->module](clk_bit);
|
2020-12-02 15:39:25 -06:00
|
|
|
if (clk_bit.wire) {
|
|
|
|
f << indent << "if (" << (cell->getParam(ID::CLK_POLARITY).as_bool() ? "posedge_" : "negedge_")
|
|
|
|
<< mangle(clk_bit) << ") {\n";
|
|
|
|
} else {
|
|
|
|
f << indent << "if (false) {\n";
|
|
|
|
}
|
2019-11-30 19:51:16 -06:00
|
|
|
inc_indent();
|
|
|
|
}
|
2020-04-15 12:39:14 -05:00
|
|
|
RTLIL::Memory *memory = cell->module->memories[cell->getParam(ID::MEMID).decode_string()];
|
2020-04-04 17:53:46 -05:00
|
|
|
std::string valid_index_temp = fresh_temporary();
|
2020-04-04 21:06:26 -05:00
|
|
|
f << indent << "auto " << valid_index_temp << " = memory_index(";
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::ADDR));
|
2020-04-04 17:53:46 -05:00
|
|
|
f << ", " << memory->start_offset << ", " << memory->size << ");\n";
|
2019-11-30 19:51:16 -06:00
|
|
|
if (cell->type == ID($memrd)) {
|
2020-04-15 12:39:14 -05:00
|
|
|
bool has_enable = cell->getParam(ID::CLK_ENABLE).as_bool() && !cell->getPort(ID::EN).is_fully_ones();
|
2020-04-14 14:39:13 -05:00
|
|
|
if (has_enable) {
|
2019-11-30 19:51:16 -06:00
|
|
|
f << indent << "if (";
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::EN));
|
2019-11-30 19:51:16 -06:00
|
|
|
f << ") {\n";
|
|
|
|
inc_indent();
|
|
|
|
}
|
2020-04-04 17:53:46 -05:00
|
|
|
// The generated code has two bounds checks; one in an assertion, and another that guards the read.
|
|
|
|
// This is done so that the code does not invoke undefined behavior under any conditions, but nevertheless
|
2020-12-12 14:24:53 -06:00
|
|
|
// loudly crashes if an illegal condition is encountered. The assert may be turned off with -DCXXRTL_NDEBUG
|
|
|
|
// not only for release builds, but also to make sure the simulator (which is presumably embedded in some
|
2020-04-04 17:53:46 -05:00
|
|
|
// larger program) will never crash the code that calls into it.
|
|
|
|
//
|
|
|
|
// If assertions are disabled, out of bounds reads are defined to return zero.
|
2020-12-02 13:41:00 -06:00
|
|
|
f << indent << "CXXRTL_ASSERT(" << valid_index_temp << ".valid && \"out of bounds read\");\n";
|
2020-04-04 21:06:26 -05:00
|
|
|
f << indent << "if(" << valid_index_temp << ".valid) {\n";
|
2020-04-04 17:53:46 -05:00
|
|
|
inc_indent();
|
|
|
|
if (writable_memories[memory]) {
|
|
|
|
std::string lhs_temp = fresh_temporary();
|
|
|
|
f << indent << "value<" << memory->width << "> " << lhs_temp << " = "
|
2020-04-04 21:06:26 -05:00
|
|
|
<< mangle(memory) << "[" << valid_index_temp << ".index];\n";
|
|
|
|
std::vector<const RTLIL::Cell*> memwr_cells(transparent_for[cell].begin(), transparent_for[cell].end());
|
2020-06-14 00:42:52 -05:00
|
|
|
if (!memwr_cells.empty()) {
|
|
|
|
std::string addr_temp = fresh_temporary();
|
|
|
|
f << indent << "const value<" << cell->getPort(ID::ADDR).size() << "> &" << addr_temp << " = ";
|
|
|
|
dump_sigspec_rhs(cell->getPort(ID::ADDR));
|
|
|
|
f << ";\n";
|
|
|
|
std::sort(memwr_cells.begin(), memwr_cells.end(),
|
|
|
|
[](const RTLIL::Cell *a, const RTLIL::Cell *b) {
|
|
|
|
return a->getParam(ID::PRIORITY).as_int() < b->getParam(ID::PRIORITY).as_int();
|
|
|
|
});
|
|
|
|
for (auto memwr_cell : memwr_cells) {
|
|
|
|
f << indent << "if (" << addr_temp << " == ";
|
|
|
|
dump_sigspec_rhs(memwr_cell->getPort(ID::ADDR));
|
|
|
|
f << ") {\n";
|
|
|
|
inc_indent();
|
|
|
|
f << indent << lhs_temp << " = " << lhs_temp;
|
|
|
|
f << ".update(";
|
|
|
|
dump_sigspec_rhs(memwr_cell->getPort(ID::DATA));
|
|
|
|
f << ", ";
|
|
|
|
dump_sigspec_rhs(memwr_cell->getPort(ID::EN));
|
|
|
|
f << ");\n";
|
|
|
|
dec_indent();
|
|
|
|
f << indent << "}\n";
|
|
|
|
}
|
2020-04-04 17:53:46 -05:00
|
|
|
}
|
|
|
|
f << indent;
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_lhs(cell->getPort(ID::DATA));
|
2020-04-04 17:53:46 -05:00
|
|
|
f << " = " << lhs_temp << ";\n";
|
|
|
|
} else {
|
|
|
|
f << indent;
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_lhs(cell->getPort(ID::DATA));
|
2020-04-04 21:06:26 -05:00
|
|
|
f << " = " << mangle(memory) << "[" << valid_index_temp << ".index];\n";
|
2019-12-10 14:09:24 -06:00
|
|
|
}
|
2020-04-04 17:53:46 -05:00
|
|
|
dec_indent();
|
|
|
|
f << indent << "} else {\n";
|
|
|
|
inc_indent();
|
2019-12-10 14:09:24 -06:00
|
|
|
f << indent;
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_lhs(cell->getPort(ID::DATA));
|
2020-04-04 17:53:46 -05:00
|
|
|
f << " = value<" << memory->width << "> {};\n";
|
|
|
|
dec_indent();
|
|
|
|
f << indent << "}\n";
|
2020-04-14 14:39:13 -05:00
|
|
|
if (has_enable) {
|
2019-11-30 19:51:16 -06:00
|
|
|
dec_indent();
|
|
|
|
f << indent << "}\n";
|
|
|
|
}
|
|
|
|
} else /*if (cell->type == ID($memwr))*/ {
|
|
|
|
log_assert(writable_memories[memory]);
|
2020-04-04 17:53:46 -05:00
|
|
|
// See above for rationale of having both the assert and the condition.
|
|
|
|
//
|
|
|
|
// If assertions are disabled, out of bounds writes are defined to do nothing.
|
2020-12-02 13:41:00 -06:00
|
|
|
f << indent << "CXXRTL_ASSERT(" << valid_index_temp << ".valid && \"out of bounds write\");\n";
|
2020-04-04 21:06:26 -05:00
|
|
|
f << indent << "if (" << valid_index_temp << ".valid) {\n";
|
2020-04-04 17:53:46 -05:00
|
|
|
inc_indent();
|
2020-04-04 21:06:26 -05:00
|
|
|
f << indent << mangle(memory) << ".update(" << valid_index_temp << ".index, ";
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::DATA));
|
2020-04-04 21:06:26 -05:00
|
|
|
f << ", ";
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_sigspec_rhs(cell->getPort(ID::EN));
|
|
|
|
f << ", " << cell->getParam(ID::PRIORITY).as_int() << ");\n";
|
2020-04-04 17:53:46 -05:00
|
|
|
dec_indent();
|
|
|
|
f << indent << "}\n";
|
2019-11-30 19:51:16 -06:00
|
|
|
}
|
2020-04-15 12:39:14 -05:00
|
|
|
if (cell->getParam(ID::CLK_ENABLE).as_bool()) {
|
2019-11-30 19:51:16 -06:00
|
|
|
dec_indent();
|
|
|
|
f << indent << "}\n";
|
|
|
|
}
|
2020-04-03 11:07:43 -05:00
|
|
|
// Internal cells
|
|
|
|
} else if (is_internal_cell(cell->type)) {
|
2019-11-30 19:51:16 -06:00
|
|
|
log_cmd_error("Unsupported internal cell `%s'.\n", cell->type.c_str());
|
2020-04-03 11:07:43 -05:00
|
|
|
// User cells
|
2019-11-30 19:51:16 -06:00
|
|
|
} else {
|
2020-04-03 11:07:43 -05:00
|
|
|
log_assert(cell->known());
|
2020-12-11 17:30:32 -06:00
|
|
|
bool buffered_inputs = false;
|
2020-04-16 20:41:08 -05:00
|
|
|
const char *access = is_cxxrtl_blackbox_cell(cell) ? "->" : ".";
|
2020-04-03 11:07:43 -05:00
|
|
|
for (auto conn : cell->connections())
|
2020-12-11 17:30:32 -06:00
|
|
|
if (cell->input(conn.first)) {
|
|
|
|
RTLIL::Module *cell_module = cell->module->design->module(cell->type);
|
|
|
|
log_assert(cell_module != nullptr && cell_module->wire(conn.first) && conn.second.is_wire());
|
|
|
|
RTLIL::Wire *cell_module_wire = cell_module->wire(conn.first);
|
|
|
|
f << indent << mangle(cell) << access << mangle_wire_name(conn.first);
|
|
|
|
if (!is_cxxrtl_blackbox_cell(cell) && !unbuffered_wires[cell_module_wire]) {
|
|
|
|
buffered_inputs = true;
|
|
|
|
f << ".next";
|
|
|
|
}
|
|
|
|
f << " = ";
|
2020-04-21 09:49:36 -05:00
|
|
|
dump_sigspec_rhs(conn.second);
|
|
|
|
f << ";\n";
|
2020-04-21 20:15:27 -05:00
|
|
|
if (getenv("CXXRTL_VOID_MY_WARRANTY")) {
|
|
|
|
// Until we have proper clock tree detection, this really awful hack that opportunistically
|
|
|
|
// propagates prev_* values for clocks can be used to estimate how much faster a design could
|
|
|
|
// be if only one clock edge was simulated by replacing:
|
|
|
|
// top.p_clk = value<1>{0u}; top.step();
|
|
|
|
// top.p_clk = value<1>{1u}; top.step();
|
|
|
|
// with:
|
|
|
|
// top.prev_p_clk = value<1>{0u}; top.p_clk = value<1>{1u}; top.step();
|
|
|
|
// Don't rely on this; it will be removed without warning.
|
2020-12-11 17:30:32 -06:00
|
|
|
if (edge_wires[conn.second.as_wire()] && edge_wires[cell_module_wire]) {
|
|
|
|
f << indent << mangle(cell) << access << "prev_" << mangle(cell_module_wire) << " = ";
|
|
|
|
f << "prev_" << mangle(conn.second.as_wire()) << ";\n";
|
2020-04-21 20:15:27 -05:00
|
|
|
}
|
|
|
|
}
|
2020-04-03 11:07:43 -05:00
|
|
|
}
|
2020-04-21 16:48:17 -05:00
|
|
|
auto assign_from_outputs = [&](bool cell_converged) {
|
|
|
|
for (auto conn : cell->connections()) {
|
|
|
|
if (cell->output(conn.first)) {
|
|
|
|
if (conn.second.empty())
|
|
|
|
continue; // ignore disconnected ports
|
|
|
|
if (is_cxxrtl_sync_port(cell, conn.first))
|
|
|
|
continue; // fully sync ports are handled in CELL_SYNC nodes
|
|
|
|
f << indent;
|
|
|
|
dump_sigspec_lhs(conn.second);
|
|
|
|
f << " = " << mangle(cell) << access << mangle_wire_name(conn.first);
|
|
|
|
// Similarly to how there is no purpose to buffering cell inputs, there is also no purpose to buffering
|
|
|
|
// combinatorial cell outputs in case the cell converges within one cycle. (To convince yourself that
|
|
|
|
// this optimization is valid, consider that, since the cell converged within one cycle, it would not
|
|
|
|
// have any buffered wires if they were not output ports. Imagine inlining the cell's eval() function,
|
|
|
|
// and consider the fate of the localized wires that used to be output ports.)
|
|
|
|
//
|
2020-12-11 17:30:32 -06:00
|
|
|
// It is not possible to know apriori whether the cell (which may be late bound) will converge immediately.
|
|
|
|
// Because of this, the choice between using .curr (appropriate for buffered outputs) and .next (appropriate
|
|
|
|
// for unbuffered outputs) is made at runtime.
|
2020-04-21 16:48:17 -05:00
|
|
|
if (cell_converged && is_cxxrtl_comb_port(cell, conn.first))
|
|
|
|
f << ".next;\n";
|
|
|
|
else
|
|
|
|
f << ".curr;\n";
|
|
|
|
}
|
2020-04-03 11:07:43 -05:00
|
|
|
}
|
2020-04-21 16:48:17 -05:00
|
|
|
};
|
2020-12-11 17:30:32 -06:00
|
|
|
if (buffered_inputs) {
|
|
|
|
// If we have any buffered inputs, there's no chance of converging immediately.
|
|
|
|
f << indent << mangle(cell) << access << "eval();\n";
|
2020-04-21 16:48:17 -05:00
|
|
|
f << indent << "converged = false;\n";
|
|
|
|
assign_from_outputs(/*cell_converged=*/false);
|
2020-12-11 17:30:32 -06:00
|
|
|
} else {
|
|
|
|
f << indent << "if (" << mangle(cell) << access << "eval()) {\n";
|
|
|
|
inc_indent();
|
|
|
|
assign_from_outputs(/*cell_converged=*/true);
|
|
|
|
dec_indent();
|
|
|
|
f << indent << "} else {\n";
|
|
|
|
inc_indent();
|
|
|
|
f << indent << "converged = false;\n";
|
|
|
|
assign_from_outputs(/*cell_converged=*/false);
|
|
|
|
dec_indent();
|
|
|
|
f << indent << "}\n";
|
|
|
|
}
|
2019-11-30 19:51:16 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-09 13:05:52 -06:00
|
|
|
void dump_assign(const RTLIL::SigSig &sigsig)
|
|
|
|
{
|
|
|
|
f << indent;
|
|
|
|
dump_sigspec_lhs(sigsig.first);
|
|
|
|
f << " = ";
|
|
|
|
dump_sigspec_rhs(sigsig.second);
|
|
|
|
f << ";\n";
|
|
|
|
}
|
|
|
|
|
2019-11-30 19:51:16 -06:00
|
|
|
void dump_case_rule(const RTLIL::CaseRule *rule)
|
|
|
|
{
|
|
|
|
for (auto action : rule->actions)
|
|
|
|
dump_assign(action);
|
|
|
|
for (auto switch_ : rule->switches)
|
|
|
|
dump_switch_rule(switch_);
|
|
|
|
}
|
|
|
|
|
|
|
|
void dump_switch_rule(const RTLIL::SwitchRule *rule)
|
|
|
|
{
|
|
|
|
// The switch attributes are printed before the switch condition is captured.
|
|
|
|
dump_attrs(rule);
|
|
|
|
std::string signal_temp = fresh_temporary();
|
|
|
|
f << indent << "const value<" << rule->signal.size() << "> &" << signal_temp << " = ";
|
|
|
|
dump_sigspec(rule->signal, /*is_lhs=*/false);
|
|
|
|
f << ";\n";
|
|
|
|
|
|
|
|
bool first = true;
|
|
|
|
for (auto case_ : rule->cases) {
|
|
|
|
// The case attributes (for nested cases) are printed before the if/else if/else statement.
|
|
|
|
dump_attrs(rule);
|
|
|
|
f << indent;
|
|
|
|
if (!first)
|
|
|
|
f << "} else ";
|
|
|
|
first = false;
|
|
|
|
if (!case_->compare.empty()) {
|
|
|
|
f << "if (";
|
|
|
|
bool first = true;
|
|
|
|
for (auto &compare : case_->compare) {
|
|
|
|
if (!first)
|
|
|
|
f << " || ";
|
|
|
|
first = false;
|
|
|
|
if (compare.is_fully_def()) {
|
|
|
|
f << signal_temp << " == ";
|
|
|
|
dump_sigspec(compare, /*is_lhs=*/false);
|
|
|
|
} else if (compare.is_fully_const()) {
|
|
|
|
RTLIL::Const compare_mask, compare_value;
|
|
|
|
for (auto bit : compare.as_const()) {
|
|
|
|
switch (bit) {
|
|
|
|
case RTLIL::S0:
|
|
|
|
case RTLIL::S1:
|
|
|
|
compare_mask.bits.push_back(RTLIL::S1);
|
|
|
|
compare_value.bits.push_back(bit);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case RTLIL::Sx:
|
|
|
|
case RTLIL::Sz:
|
|
|
|
case RTLIL::Sa:
|
|
|
|
compare_mask.bits.push_back(RTLIL::S0);
|
|
|
|
compare_value.bits.push_back(RTLIL::S0);
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
log_assert(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
f << "and_uu<" << compare.size() << ">(" << signal_temp << ", ";
|
|
|
|
dump_const(compare_mask);
|
|
|
|
f << ") == ";
|
|
|
|
dump_const(compare_value);
|
|
|
|
} else {
|
|
|
|
log_assert(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
f << ") ";
|
|
|
|
}
|
|
|
|
f << "{\n";
|
|
|
|
inc_indent();
|
|
|
|
dump_case_rule(case_);
|
|
|
|
dec_indent();
|
|
|
|
}
|
|
|
|
f << indent << "}\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
void dump_process(const RTLIL::Process *proc)
|
|
|
|
{
|
|
|
|
dump_attrs(proc);
|
|
|
|
f << indent << "// process " << proc->name.str() << "\n";
|
|
|
|
// The case attributes (for root case) are always empty.
|
|
|
|
log_assert(proc->root_case.attributes.empty());
|
|
|
|
dump_case_rule(&proc->root_case);
|
|
|
|
for (auto sync : proc->syncs) {
|
2020-04-17 04:43:13 -05:00
|
|
|
RTLIL::SigBit sync_bit;
|
|
|
|
if (!sync->signal.empty()) {
|
|
|
|
sync_bit = sync->signal[0];
|
|
|
|
sync_bit = sigmaps[sync_bit.wire->module](sync_bit);
|
|
|
|
}
|
2019-11-30 19:51:16 -06:00
|
|
|
|
|
|
|
pool<std::string> events;
|
|
|
|
switch (sync->type) {
|
|
|
|
case RTLIL::STp:
|
2020-04-17 04:43:13 -05:00
|
|
|
log_assert(sync_bit.wire != nullptr);
|
2020-04-22 07:45:19 -05:00
|
|
|
events.insert("posedge_" + mangle(sync_bit));
|
2019-11-30 19:51:16 -06:00
|
|
|
break;
|
|
|
|
case RTLIL::STn:
|
2020-04-17 04:43:13 -05:00
|
|
|
log_assert(sync_bit.wire != nullptr);
|
2020-04-22 07:45:19 -05:00
|
|
|
events.insert("negedge_" + mangle(sync_bit));
|
2020-04-17 04:43:13 -05:00
|
|
|
break;
|
2019-11-30 19:51:16 -06:00
|
|
|
case RTLIL::STe:
|
2020-04-17 04:43:13 -05:00
|
|
|
log_assert(sync_bit.wire != nullptr);
|
2020-04-22 07:45:19 -05:00
|
|
|
events.insert("posedge_" + mangle(sync_bit));
|
|
|
|
events.insert("negedge_" + mangle(sync_bit));
|
2019-11-30 19:51:16 -06:00
|
|
|
break;
|
|
|
|
|
2020-04-17 04:43:13 -05:00
|
|
|
case RTLIL::STa:
|
|
|
|
events.insert("true");
|
|
|
|
break;
|
|
|
|
|
2019-11-30 19:51:16 -06:00
|
|
|
case RTLIL::ST0:
|
|
|
|
case RTLIL::ST1:
|
|
|
|
case RTLIL::STg:
|
|
|
|
case RTLIL::STi:
|
|
|
|
log_assert(false);
|
|
|
|
}
|
|
|
|
if (!events.empty()) {
|
|
|
|
f << indent << "if (";
|
|
|
|
bool first = true;
|
|
|
|
for (auto &event : events) {
|
|
|
|
if (!first)
|
|
|
|
f << " || ";
|
|
|
|
first = false;
|
|
|
|
f << event;
|
|
|
|
}
|
|
|
|
f << ") {\n";
|
|
|
|
inc_indent();
|
|
|
|
for (auto action : sync->actions)
|
|
|
|
dump_assign(action);
|
|
|
|
dec_indent();
|
|
|
|
f << indent << "}\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-12 18:34:32 -06:00
|
|
|
void dump_wire(const RTLIL::Wire *wire, bool is_local)
|
2019-11-30 19:51:16 -06:00
|
|
|
{
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
if (is_local && localized_wires[wire] && !inlined_wires.count(wire)) {
|
2020-06-11 17:21:30 -05:00
|
|
|
dump_attrs(wire);
|
|
|
|
f << indent << "value<" << wire->width << "> " << mangle(wire) << ";\n";
|
|
|
|
}
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
if (!is_local && !localized_wires[wire]) {
|
2020-04-18 03:04:57 -05:00
|
|
|
std::string width;
|
2020-04-24 13:35:53 -05:00
|
|
|
if (wire->module->has_attribute(ID(cxxrtl_blackbox)) && wire->has_attribute(ID(cxxrtl_width))) {
|
|
|
|
width = wire->get_string_attribute(ID(cxxrtl_width));
|
2020-04-18 03:04:57 -05:00
|
|
|
} else {
|
|
|
|
width = std::to_string(wire->width);
|
|
|
|
}
|
|
|
|
|
2019-12-10 14:09:24 -06:00
|
|
|
dump_attrs(wire);
|
2020-06-11 19:35:18 -05:00
|
|
|
f << indent;
|
|
|
|
if (wire->port_input && wire->port_output)
|
|
|
|
f << "/*inout*/ ";
|
|
|
|
else if (wire->port_input)
|
|
|
|
f << "/*input*/ ";
|
|
|
|
else if (wire->port_output)
|
|
|
|
f << "/*output*/ ";
|
|
|
|
f << (unbuffered_wires[wire] ? "value" : "wire") << "<" << width << "> " << mangle(wire);
|
2020-04-18 03:04:57 -05:00
|
|
|
if (wire->has_attribute(ID::init)) {
|
2019-12-10 14:09:24 -06:00
|
|
|
f << " ";
|
2020-04-15 12:39:14 -05:00
|
|
|
dump_const_init(wire->attributes.at(ID::init));
|
2019-12-10 14:09:24 -06:00
|
|
|
}
|
|
|
|
f << ";\n";
|
2020-04-21 13:46:36 -05:00
|
|
|
if (edge_wires[wire]) {
|
2020-06-11 17:21:30 -05:00
|
|
|
if (unbuffered_wires[wire]) {
|
2020-04-21 09:49:36 -05:00
|
|
|
f << indent << "value<" << width << "> prev_" << mangle(wire);
|
|
|
|
if (wire->has_attribute(ID::init)) {
|
|
|
|
f << " ";
|
|
|
|
dump_const_init(wire->attributes.at(ID::init));
|
|
|
|
}
|
|
|
|
f << ";\n";
|
|
|
|
}
|
2020-04-21 13:46:36 -05:00
|
|
|
for (auto edge_type : edge_types) {
|
|
|
|
if (edge_type.first.wire == wire) {
|
2020-04-21 09:49:36 -05:00
|
|
|
std::string prev, next;
|
2020-06-11 17:21:30 -05:00
|
|
|
if (unbuffered_wires[wire]) {
|
2020-04-21 13:46:36 -05:00
|
|
|
prev = "prev_" + mangle(edge_type.first.wire);
|
|
|
|
next = mangle(edge_type.first.wire);
|
2020-04-21 09:49:36 -05:00
|
|
|
} else {
|
2020-04-21 13:46:36 -05:00
|
|
|
prev = mangle(edge_type.first.wire) + ".curr";
|
|
|
|
next = mangle(edge_type.first.wire) + ".next";
|
2020-04-21 09:49:36 -05:00
|
|
|
}
|
2020-04-21 13:46:36 -05:00
|
|
|
prev += ".slice<" + std::to_string(edge_type.first.offset) + ">().val()";
|
|
|
|
next += ".slice<" + std::to_string(edge_type.first.offset) + ">().val()";
|
|
|
|
if (edge_type.second != RTLIL::STn) {
|
|
|
|
f << indent << "bool posedge_" << mangle(edge_type.first) << "() const {\n";
|
2020-04-21 08:59:42 -05:00
|
|
|
inc_indent();
|
2020-04-21 09:49:36 -05:00
|
|
|
f << indent << "return !" << prev << " && " << next << ";\n";
|
2020-04-21 08:59:42 -05:00
|
|
|
dec_indent();
|
|
|
|
f << indent << "}\n";
|
2020-04-21 09:49:36 -05:00
|
|
|
}
|
2020-04-21 13:46:36 -05:00
|
|
|
if (edge_type.second != RTLIL::STp) {
|
|
|
|
f << indent << "bool negedge_" << mangle(edge_type.first) << "() const {\n";
|
2020-04-21 08:59:42 -05:00
|
|
|
inc_indent();
|
2020-04-21 09:49:36 -05:00
|
|
|
f << indent << "return " << prev << " && !" << next << ";\n";
|
2020-04-21 08:59:42 -05:00
|
|
|
dec_indent();
|
|
|
|
f << indent << "}\n";
|
|
|
|
}
|
2019-12-10 14:09:24 -06:00
|
|
|
}
|
2019-11-30 19:51:16 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
void dump_debug_wire(const RTLIL::Wire *wire, bool is_local)
|
|
|
|
{
|
|
|
|
if (!debug_outlined_wires[wire])
|
|
|
|
return;
|
|
|
|
|
|
|
|
bool is_outlined_member = wire->name.isPublic() &&
|
|
|
|
!(debug_const_wires.count(wire) || debug_alias_wires.count(wire));
|
|
|
|
if (is_local && !is_outlined_member) {
|
|
|
|
dump_attrs(wire);
|
|
|
|
f << indent << "value<" << wire->width << "> " << mangle(wire) << ";\n";
|
|
|
|
}
|
|
|
|
if (!is_local && is_outlined_member) {
|
|
|
|
dump_attrs(wire);
|
|
|
|
f << indent << "/*outline*/ value<" << wire->width << "> " << mangle(wire) << ";\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-11-30 19:51:16 -06:00
|
|
|
void dump_memory(RTLIL::Module *module, const RTLIL::Memory *memory)
|
|
|
|
{
|
|
|
|
vector<const RTLIL::Cell*> init_cells;
|
|
|
|
for (auto cell : module->cells())
|
2020-04-15 12:39:14 -05:00
|
|
|
if (cell->type == ID($meminit) && cell->getParam(ID::MEMID).decode_string() == memory->name.str())
|
2019-11-30 19:51:16 -06:00
|
|
|
init_cells.push_back(cell);
|
|
|
|
|
|
|
|
std::sort(init_cells.begin(), init_cells.end(), [](const RTLIL::Cell *a, const RTLIL::Cell *b) {
|
2020-04-15 12:39:14 -05:00
|
|
|
int a_addr = a->getPort(ID::ADDR).as_int(), b_addr = b->getPort(ID::ADDR).as_int();
|
|
|
|
int a_prio = a->getParam(ID::PRIORITY).as_int(), b_prio = b->getParam(ID::PRIORITY).as_int();
|
2019-11-30 19:51:16 -06:00
|
|
|
return a_prio > b_prio || (a_prio == b_prio && a_addr < b_addr);
|
|
|
|
});
|
|
|
|
|
|
|
|
dump_attrs(memory);
|
2020-04-16 11:45:02 -05:00
|
|
|
f << indent << "memory<" << memory->width << "> " << mangle(memory)
|
2019-11-30 19:51:16 -06:00
|
|
|
<< " { " << memory->size << "u";
|
|
|
|
if (init_cells.empty()) {
|
|
|
|
f << " };\n";
|
|
|
|
} else {
|
|
|
|
f << ",\n";
|
|
|
|
inc_indent();
|
|
|
|
for (auto cell : init_cells) {
|
|
|
|
dump_attrs(cell);
|
2020-04-15 12:39:14 -05:00
|
|
|
RTLIL::Const data = cell->getPort(ID::DATA).as_const();
|
|
|
|
size_t width = cell->getParam(ID::WIDTH).as_int();
|
|
|
|
size_t words = cell->getParam(ID::WORDS).as_int();
|
2020-04-04 21:06:26 -05:00
|
|
|
f << indent << "memory<" << memory->width << ">::init<" << words << "> { "
|
2020-04-15 12:39:14 -05:00
|
|
|
<< stringf("%#x", cell->getPort(ID::ADDR).as_int()) << ", {";
|
2019-11-30 19:51:16 -06:00
|
|
|
inc_indent();
|
|
|
|
for (size_t n = 0; n < words; n++) {
|
|
|
|
if (n % 4 == 0)
|
|
|
|
f << "\n" << indent;
|
|
|
|
else
|
|
|
|
f << " ";
|
|
|
|
dump_const(data, width, n * width, /*fixed_width=*/true);
|
|
|
|
f << ",";
|
|
|
|
}
|
|
|
|
dec_indent();
|
|
|
|
f << "\n" << indent << "}},\n";
|
|
|
|
}
|
|
|
|
dec_indent();
|
|
|
|
f << indent << "};\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-17 21:14:20 -05:00
|
|
|
void dump_eval_method(RTLIL::Module *module)
|
2020-04-14 07:07:58 -05:00
|
|
|
{
|
2019-11-30 19:51:16 -06:00
|
|
|
inc_indent();
|
2020-04-21 10:51:09 -05:00
|
|
|
f << indent << "bool converged = " << (eval_converges.at(module) ? "true" : "false") << ";\n";
|
2020-04-24 13:35:53 -05:00
|
|
|
if (!module->get_bool_attribute(ID(cxxrtl_blackbox))) {
|
2020-04-22 07:45:19 -05:00
|
|
|
for (auto wire : module->wires()) {
|
|
|
|
if (edge_wires[wire]) {
|
|
|
|
for (auto edge_type : edge_types) {
|
|
|
|
if (edge_type.first.wire == wire) {
|
|
|
|
if (edge_type.second != RTLIL::STn) {
|
|
|
|
f << indent << "bool posedge_" << mangle(edge_type.first) << " = ";
|
|
|
|
f << "this->posedge_" << mangle(edge_type.first) << "();\n";
|
|
|
|
}
|
|
|
|
if (edge_type.second != RTLIL::STp) {
|
|
|
|
f << indent << "bool negedge_" << mangle(edge_type.first) << " = ";
|
|
|
|
f << "this->negedge_" << mangle(edge_type.first) << "();\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-04-16 20:41:08 -05:00
|
|
|
for (auto wire : module->wires())
|
2020-12-12 18:34:32 -06:00
|
|
|
dump_wire(wire, /*is_local=*/true);
|
2020-04-16 20:41:08 -05:00
|
|
|
for (auto node : schedule[module]) {
|
|
|
|
switch (node.type) {
|
|
|
|
case FlowGraph::Node::Type::CONNECT:
|
|
|
|
dump_connect(node.connect);
|
|
|
|
break;
|
2020-04-21 16:48:17 -05:00
|
|
|
case FlowGraph::Node::Type::CELL_SYNC:
|
|
|
|
dump_cell_sync(node.cell);
|
|
|
|
break;
|
|
|
|
case FlowGraph::Node::Type::CELL_EVAL:
|
|
|
|
dump_cell_eval(node.cell);
|
2020-04-16 20:41:08 -05:00
|
|
|
break;
|
|
|
|
case FlowGraph::Node::Type::PROCESS:
|
|
|
|
dump_process(node.process);
|
|
|
|
break;
|
|
|
|
}
|
2019-12-10 14:09:24 -06:00
|
|
|
}
|
|
|
|
}
|
2020-04-21 10:51:09 -05:00
|
|
|
f << indent << "return converged;\n";
|
2019-11-30 19:51:16 -06:00
|
|
|
dec_indent();
|
2020-04-17 21:14:20 -05:00
|
|
|
}
|
2020-04-03 11:07:43 -05:00
|
|
|
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
void dump_debug_eval_method(RTLIL::Module *module)
|
|
|
|
{
|
|
|
|
inc_indent();
|
|
|
|
for (auto wire : module->wires())
|
|
|
|
dump_debug_wire(wire, /*is_local=*/true);
|
|
|
|
for (auto node : schedule[module]) {
|
|
|
|
switch (node.type) {
|
|
|
|
case FlowGraph::Node::Type::CONNECT:
|
|
|
|
dump_connect(node.connect, /*for_debug=*/true);
|
|
|
|
break;
|
|
|
|
case FlowGraph::Node::Type::CELL_EVAL:
|
|
|
|
dump_cell_eval(node.cell, /*for_debug=*/true);
|
|
|
|
break;
|
|
|
|
case FlowGraph::Node::Type::CELL_SYNC:
|
|
|
|
case FlowGraph::Node::Type::PROCESS:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
dec_indent();
|
|
|
|
}
|
|
|
|
|
2020-04-17 21:14:20 -05:00
|
|
|
void dump_commit_method(RTLIL::Module *module)
|
|
|
|
{
|
2019-11-30 19:51:16 -06:00
|
|
|
inc_indent();
|
|
|
|
f << indent << "bool changed = false;\n";
|
|
|
|
for (auto wire : module->wires()) {
|
2020-12-12 18:34:32 -06:00
|
|
|
if (inlined_wires.count(wire))
|
2019-12-09 13:05:52 -06:00
|
|
|
continue;
|
2020-06-11 17:21:30 -05:00
|
|
|
if (unbuffered_wires[wire]) {
|
2020-04-21 13:46:36 -05:00
|
|
|
if (edge_wires[wire])
|
2020-04-21 09:49:36 -05:00
|
|
|
f << indent << "prev_" << mangle(wire) << " = " << mangle(wire) << ";\n";
|
|
|
|
continue;
|
|
|
|
}
|
2020-04-24 13:35:53 -05:00
|
|
|
if (!module->get_bool_attribute(ID(cxxrtl_blackbox)) || wire->port_id != 0)
|
2019-11-30 19:51:16 -06:00
|
|
|
f << indent << "changed |= " << mangle(wire) << ".commit();\n";
|
|
|
|
}
|
2020-04-24 13:35:53 -05:00
|
|
|
if (!module->get_bool_attribute(ID(cxxrtl_blackbox))) {
|
2020-04-16 20:41:08 -05:00
|
|
|
for (auto memory : module->memories) {
|
|
|
|
if (!writable_memories[memory.second])
|
|
|
|
continue;
|
|
|
|
f << indent << "changed |= " << mangle(memory.second) << ".commit();\n";
|
|
|
|
}
|
|
|
|
for (auto cell : module->cells()) {
|
|
|
|
if (is_internal_cell(cell->type))
|
|
|
|
continue;
|
|
|
|
const char *access = is_cxxrtl_blackbox_cell(cell) ? "->" : ".";
|
|
|
|
f << indent << "changed |= " << mangle(cell) << access << "commit();\n";
|
|
|
|
}
|
2020-04-03 11:07:43 -05:00
|
|
|
}
|
2019-11-30 19:51:16 -06:00
|
|
|
f << indent << "return changed;\n";
|
|
|
|
dec_indent();
|
2020-04-17 21:14:20 -05:00
|
|
|
}
|
|
|
|
|
2020-05-26 19:21:15 -05:00
|
|
|
void dump_debug_info_method(RTLIL::Module *module)
|
|
|
|
{
|
2020-06-09 16:50:09 -05:00
|
|
|
size_t count_public_wires = 0;
|
2020-06-08 12:29:08 -05:00
|
|
|
size_t count_const_wires = 0;
|
2020-06-08 11:22:30 -05:00
|
|
|
size_t count_alias_wires = 0;
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
size_t count_inline_wires = 0;
|
2020-06-08 12:29:08 -05:00
|
|
|
size_t count_member_wires = 0;
|
2020-06-08 11:22:30 -05:00
|
|
|
size_t count_skipped_wires = 0;
|
2020-09-02 12:16:10 -05:00
|
|
|
size_t count_driven_sync = 0;
|
|
|
|
size_t count_driven_comb = 0;
|
|
|
|
size_t count_undriven = 0;
|
|
|
|
size_t count_mixed_driver = 0;
|
2020-05-26 19:21:15 -05:00
|
|
|
inc_indent();
|
|
|
|
f << indent << "assert(path.empty() || path[path.size() - 1] == ' ');\n";
|
|
|
|
for (auto wire : module->wires()) {
|
2020-12-12 18:54:12 -06:00
|
|
|
if (!wire->name.isPublic())
|
2020-05-26 19:21:15 -05:00
|
|
|
continue;
|
2020-06-16 08:28:35 -05:00
|
|
|
if (module->get_bool_attribute(ID(cxxrtl_blackbox)) && (wire->port_id == 0))
|
|
|
|
continue;
|
2020-06-09 16:50:09 -05:00
|
|
|
count_public_wires++;
|
2020-06-08 12:29:08 -05:00
|
|
|
if (debug_const_wires.count(wire)) {
|
|
|
|
// Wire tied to a constant
|
|
|
|
f << indent << "static const value<" << wire->width << "> const_" << mangle(wire) << " = ";
|
|
|
|
dump_const(debug_const_wires[wire]);
|
|
|
|
f << ";\n";
|
2020-06-11 08:31:16 -05:00
|
|
|
f << indent << "items.add(path + " << escape_cxx_string(get_hdl_name(wire));
|
2020-06-11 07:42:37 -05:00
|
|
|
f << ", debug_item(const_" << mangle(wire) << ", ";
|
|
|
|
f << wire->start_offset << "));\n";
|
2020-06-08 12:29:08 -05:00
|
|
|
count_const_wires++;
|
|
|
|
} else if (debug_alias_wires.count(wire)) {
|
2020-06-08 11:22:30 -05:00
|
|
|
// Alias of a member wire
|
2020-06-11 08:31:16 -05:00
|
|
|
f << indent << "items.add(path + " << escape_cxx_string(get_hdl_name(wire));
|
2020-06-11 07:42:37 -05:00
|
|
|
f << ", debug_item(debug_alias(), " << mangle(debug_alias_wires[wire]) << ", ";
|
|
|
|
f << wire->start_offset << "));\n";
|
2020-06-08 11:22:30 -05:00
|
|
|
count_alias_wires++;
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
} else if (debug_outlined_wires.count(wire)) {
|
|
|
|
// Inlined but rematerializable wire
|
|
|
|
f << indent << "items.add(path + " << escape_cxx_string(get_hdl_name(wire));
|
|
|
|
f << ", debug_item(debug_eval_outline, " << mangle(wire) << ", ";
|
|
|
|
f << wire->start_offset << "));\n";
|
|
|
|
count_inline_wires++;
|
2020-06-08 12:29:08 -05:00
|
|
|
} else if (!localized_wires.count(wire)) {
|
2020-06-08 11:22:30 -05:00
|
|
|
// Member wire
|
2020-09-02 12:16:10 -05:00
|
|
|
std::vector<std::string> flags;
|
|
|
|
|
2020-09-02 10:18:44 -05:00
|
|
|
if (wire->port_input && wire->port_output)
|
2020-09-02 12:16:10 -05:00
|
|
|
flags.push_back("INOUT");
|
2020-09-02 10:18:44 -05:00
|
|
|
else if (wire->port_input)
|
2020-09-02 12:16:10 -05:00
|
|
|
flags.push_back("INPUT");
|
2020-09-02 10:18:44 -05:00
|
|
|
else if (wire->port_output)
|
2020-09-02 12:16:10 -05:00
|
|
|
flags.push_back("OUTPUT");
|
|
|
|
|
|
|
|
bool has_driven_sync = false;
|
|
|
|
bool has_driven_comb = false;
|
|
|
|
bool has_undriven = false;
|
|
|
|
SigSpec sig(wire);
|
|
|
|
for (auto bit : sig.bits())
|
|
|
|
if (!bit_has_state.count(bit))
|
|
|
|
has_undriven = true;
|
|
|
|
else if (bit_has_state[bit])
|
|
|
|
has_driven_sync = true;
|
|
|
|
else
|
|
|
|
has_driven_comb = true;
|
|
|
|
if (has_driven_sync)
|
|
|
|
flags.push_back("DRIVEN_SYNC");
|
|
|
|
if (has_driven_sync && !has_driven_comb && !has_undriven)
|
|
|
|
count_driven_sync++;
|
|
|
|
if (has_driven_comb)
|
|
|
|
flags.push_back("DRIVEN_COMB");
|
|
|
|
if (!has_driven_sync && has_driven_comb && !has_undriven)
|
|
|
|
count_driven_comb++;
|
|
|
|
if (has_undriven)
|
|
|
|
flags.push_back("UNDRIVEN");
|
|
|
|
if (!has_driven_sync && !has_driven_comb && has_undriven)
|
|
|
|
count_undriven++;
|
|
|
|
if (has_driven_sync + has_driven_comb + has_undriven > 1)
|
|
|
|
count_mixed_driver++;
|
|
|
|
|
|
|
|
f << indent << "items.add(path + " << escape_cxx_string(get_hdl_name(wire));
|
|
|
|
f << ", debug_item(" << mangle(wire) << ", ";
|
|
|
|
f << wire->start_offset;
|
|
|
|
bool first = true;
|
|
|
|
for (auto flag : flags) {
|
|
|
|
if (first) {
|
|
|
|
first = false;
|
|
|
|
f << ", ";
|
|
|
|
} else {
|
|
|
|
f << "|";
|
|
|
|
}
|
|
|
|
f << "debug_item::" << flag;
|
|
|
|
}
|
2020-09-02 10:18:44 -05:00
|
|
|
f << "));\n";
|
2020-06-08 11:22:30 -05:00
|
|
|
count_member_wires++;
|
2020-06-08 12:29:08 -05:00
|
|
|
} else {
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
// Localized or inlined wire with no debug information
|
2020-06-08 12:29:08 -05:00
|
|
|
count_skipped_wires++;
|
2020-06-08 11:22:30 -05:00
|
|
|
}
|
2020-05-26 19:21:15 -05:00
|
|
|
}
|
2020-06-16 08:28:35 -05:00
|
|
|
if (!module->get_bool_attribute(ID(cxxrtl_blackbox))) {
|
|
|
|
for (auto &memory_it : module->memories) {
|
2020-12-12 18:54:12 -06:00
|
|
|
if (!memory_it.first.isPublic())
|
2020-06-16 08:28:35 -05:00
|
|
|
continue;
|
|
|
|
f << indent << "items.add(path + " << escape_cxx_string(get_hdl_name(memory_it.second));
|
|
|
|
f << ", debug_item(" << mangle(memory_it.second) << ", ";
|
|
|
|
f << memory_it.second->start_offset << "));\n";
|
|
|
|
}
|
|
|
|
for (auto cell : module->cells()) {
|
|
|
|
if (is_internal_cell(cell->type))
|
|
|
|
continue;
|
|
|
|
const char *access = is_cxxrtl_blackbox_cell(cell) ? "->" : ".";
|
|
|
|
f << indent << mangle(cell) << access << "debug_info(items, ";
|
|
|
|
f << "path + " << escape_cxx_string(get_hdl_name(cell) + ' ') << ");\n";
|
|
|
|
}
|
2020-05-26 19:21:15 -05:00
|
|
|
}
|
|
|
|
dec_indent();
|
2020-06-08 11:22:30 -05:00
|
|
|
|
2020-06-09 16:50:09 -05:00
|
|
|
log_debug("Debug information statistics for module `%s':\n", log_id(module));
|
|
|
|
log_debug(" Public wires: %zu, of which:\n", count_public_wires);
|
2020-09-02 12:16:10 -05:00
|
|
|
log_debug(" Member wires: %zu, of which:\n", count_member_wires);
|
|
|
|
log_debug(" Driven sync: %zu\n", count_driven_sync);
|
|
|
|
log_debug(" Driven comb: %zu\n", count_driven_comb);
|
|
|
|
log_debug(" Mixed driver: %zu\n", count_mixed_driver);
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
log_debug(" Undriven: %zu\n", count_undriven);
|
|
|
|
log_debug(" Inline wires: %zu\n", count_inline_wires);
|
|
|
|
log_debug(" Alias wires: %zu\n", count_alias_wires);
|
|
|
|
log_debug(" Const wires: %zu\n", count_const_wires);
|
|
|
|
log_debug(" Other wires: %zu%s\n", count_skipped_wires,
|
|
|
|
count_skipped_wires > 0 ? " (debug information unavailable)" : "");
|
2020-05-26 19:21:15 -05:00
|
|
|
}
|
|
|
|
|
2020-04-19 11:22:02 -05:00
|
|
|
void dump_metadata_map(const dict<RTLIL::IdString, RTLIL::Const> &metadata_map)
|
|
|
|
{
|
|
|
|
if (metadata_map.empty()) {
|
|
|
|
f << "metadata_map()";
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
f << "metadata_map({\n";
|
|
|
|
inc_indent();
|
|
|
|
for (auto metadata_item : metadata_map) {
|
|
|
|
if (!metadata_item.first.begins_with("\\"))
|
|
|
|
continue;
|
|
|
|
f << indent << "{ " << escape_cxx_string(metadata_item.first.str().substr(1)) << ", ";
|
|
|
|
if (metadata_item.second.flags & RTLIL::CONST_FLAG_REAL) {
|
|
|
|
f << std::showpoint << std::stod(metadata_item.second.decode_string()) << std::noshowpoint;
|
|
|
|
} else if (metadata_item.second.flags & RTLIL::CONST_FLAG_STRING) {
|
|
|
|
f << escape_cxx_string(metadata_item.second.decode_string());
|
|
|
|
} else {
|
|
|
|
f << metadata_item.second.as_int(/*is_signed=*/metadata_item.second.flags & RTLIL::CONST_FLAG_SIGNED);
|
|
|
|
if (!(metadata_item.second.flags & RTLIL::CONST_FLAG_SIGNED))
|
|
|
|
f << "u";
|
|
|
|
}
|
|
|
|
f << " },\n";
|
|
|
|
}
|
|
|
|
dec_indent();
|
|
|
|
f << indent << "})";
|
|
|
|
}
|
|
|
|
|
2020-04-17 21:14:20 -05:00
|
|
|
void dump_module_intf(RTLIL::Module *module)
|
|
|
|
{
|
|
|
|
dump_attrs(module);
|
2020-04-24 13:35:53 -05:00
|
|
|
if (module->get_bool_attribute(ID(cxxrtl_blackbox))) {
|
|
|
|
if (module->has_attribute(ID(cxxrtl_template)))
|
2020-04-18 03:04:57 -05:00
|
|
|
f << indent << "template" << template_params(module, /*is_decl=*/true) << "\n";
|
|
|
|
f << indent << "struct " << mangle(module) << " : public module {\n";
|
2020-04-17 21:14:20 -05:00
|
|
|
inc_indent();
|
|
|
|
for (auto wire : module->wires()) {
|
|
|
|
if (wire->port_id != 0)
|
2020-12-12 18:34:32 -06:00
|
|
|
dump_wire(wire, /*is_local=*/false);
|
2020-04-17 21:14:20 -05:00
|
|
|
}
|
|
|
|
f << "\n";
|
2020-04-21 10:51:09 -05:00
|
|
|
f << indent << "bool eval() override {\n";
|
2020-04-17 21:14:20 -05:00
|
|
|
dump_eval_method(module);
|
|
|
|
f << indent << "}\n";
|
|
|
|
f << "\n";
|
|
|
|
f << indent << "bool commit() override {\n";
|
|
|
|
dump_commit_method(module);
|
|
|
|
f << indent << "}\n";
|
|
|
|
f << "\n";
|
2020-05-26 19:21:15 -05:00
|
|
|
if (debug_info) {
|
|
|
|
f << indent << "void debug_info(debug_items &items, std::string path = \"\") override {\n";
|
|
|
|
dump_debug_info_method(module);
|
|
|
|
f << indent << "}\n";
|
|
|
|
f << "\n";
|
|
|
|
}
|
2020-04-18 03:04:57 -05:00
|
|
|
f << indent << "static std::unique_ptr<" << mangle(module);
|
|
|
|
f << template_params(module, /*is_decl=*/false) << "> ";
|
2020-04-19 11:22:02 -05:00
|
|
|
f << "create(std::string name, metadata_map parameters, metadata_map attributes);\n";
|
2020-04-17 21:14:20 -05:00
|
|
|
dec_indent();
|
2020-04-18 03:04:57 -05:00
|
|
|
f << indent << "}; // struct " << mangle(module) << "\n";
|
2020-04-17 21:14:20 -05:00
|
|
|
f << "\n";
|
2020-04-18 03:04:57 -05:00
|
|
|
if (blackbox_specializations.count(module)) {
|
|
|
|
// If templated black boxes are used, the constructor of any module which includes the black box cell
|
|
|
|
// (which calls the declared but not defined in the generated code `create` function) may only be used
|
|
|
|
// if (a) the create function is defined in the same translation unit, or (b) the create function has
|
|
|
|
// a forward-declared explicit specialization.
|
|
|
|
//
|
|
|
|
// Option (b) makes it possible to have the generated code and the black box implementation in different
|
|
|
|
// translation units, which is convenient. Of course, its downside is that black boxes must predefine
|
|
|
|
// a specialization for every combination of parameters the generated code may use; but since the main
|
|
|
|
// purpose of templated black boxes is abstracting over datapath width, it is expected that there would
|
|
|
|
// be very few such combinations anyway.
|
|
|
|
for (auto specialization : blackbox_specializations[module]) {
|
|
|
|
f << indent << "template<>\n";
|
|
|
|
f << indent << "std::unique_ptr<" << mangle(module) << specialization << "> ";
|
|
|
|
f << mangle(module) << specialization << "::";
|
2020-04-19 11:22:02 -05:00
|
|
|
f << "create(std::string name, metadata_map parameters, metadata_map attributes);\n";
|
2020-04-18 03:04:57 -05:00
|
|
|
f << "\n";
|
|
|
|
}
|
|
|
|
}
|
2020-04-17 21:14:20 -05:00
|
|
|
} else {
|
2020-04-18 03:04:57 -05:00
|
|
|
f << indent << "struct " << mangle(module) << " : public module {\n";
|
2020-04-17 21:14:20 -05:00
|
|
|
inc_indent();
|
|
|
|
for (auto wire : module->wires())
|
2020-12-12 18:34:32 -06:00
|
|
|
dump_wire(wire, /*is_local=*/false);
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
for (auto wire : module->wires())
|
|
|
|
dump_debug_wire(wire, /*is_local=*/false);
|
2020-04-17 21:14:20 -05:00
|
|
|
bool has_memories = false;
|
|
|
|
for (auto memory : module->memories) {
|
|
|
|
dump_memory(module, memory.second);
|
|
|
|
has_memories = true;
|
|
|
|
}
|
|
|
|
if (has_memories)
|
|
|
|
f << "\n";
|
|
|
|
bool has_cells = false;
|
|
|
|
for (auto cell : module->cells()) {
|
|
|
|
if (is_internal_cell(cell->type))
|
|
|
|
continue;
|
|
|
|
dump_attrs(cell);
|
|
|
|
RTLIL::Module *cell_module = module->design->module(cell->type);
|
|
|
|
log_assert(cell_module != nullptr);
|
2020-04-24 13:35:53 -05:00
|
|
|
if (cell_module->get_bool_attribute(ID(cxxrtl_blackbox))) {
|
2020-04-18 03:04:57 -05:00
|
|
|
f << indent << "std::unique_ptr<" << mangle(cell_module) << template_args(cell) << "> ";
|
|
|
|
f << mangle(cell) << " = " << mangle(cell_module) << template_args(cell);
|
2020-05-26 19:21:15 -05:00
|
|
|
f << "::create(" << escape_cxx_string(get_hdl_name(cell)) << ", ";
|
2020-04-19 11:22:02 -05:00
|
|
|
dump_metadata_map(cell->parameters);
|
|
|
|
f << ", ";
|
|
|
|
dump_metadata_map(cell->attributes);
|
2020-04-17 21:14:20 -05:00
|
|
|
f << ");\n";
|
|
|
|
} else {
|
|
|
|
f << indent << mangle(cell_module) << " " << mangle(cell) << ";\n";
|
|
|
|
}
|
|
|
|
has_cells = true;
|
|
|
|
}
|
|
|
|
if (has_cells)
|
|
|
|
f << "\n";
|
2020-12-02 02:25:27 -06:00
|
|
|
f << indent << mangle(module) << "() {}\n";
|
|
|
|
if (has_cells) {
|
|
|
|
f << indent << mangle(module) << "(adopt, " << mangle(module) << " other) :\n";
|
|
|
|
bool first = true;
|
|
|
|
for (auto cell : module->cells()) {
|
|
|
|
if (is_internal_cell(cell->type))
|
|
|
|
continue;
|
|
|
|
if (first) {
|
|
|
|
first = false;
|
|
|
|
} else {
|
|
|
|
f << ",\n";
|
|
|
|
}
|
|
|
|
RTLIL::Module *cell_module = module->design->module(cell->type);
|
|
|
|
if (cell_module->get_bool_attribute(ID(cxxrtl_blackbox))) {
|
|
|
|
f << indent << " " << mangle(cell) << "(std::move(other." << mangle(cell) << "))";
|
|
|
|
} else {
|
|
|
|
f << indent << " " << mangle(cell) << "(adopt {}, std::move(other." << mangle(cell) << "))";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
f << " {\n";
|
|
|
|
inc_indent();
|
|
|
|
for (auto cell : module->cells()) {
|
|
|
|
if (is_internal_cell(cell->type))
|
|
|
|
continue;
|
|
|
|
RTLIL::Module *cell_module = module->design->module(cell->type);
|
|
|
|
if (cell_module->get_bool_attribute(ID(cxxrtl_blackbox)))
|
|
|
|
f << indent << mangle(cell) << "->reset();\n";
|
|
|
|
}
|
|
|
|
dec_indent();
|
|
|
|
f << indent << "}\n";
|
|
|
|
} else {
|
|
|
|
f << indent << mangle(module) << "(adopt, " << mangle(module) << " other) {}\n";
|
|
|
|
}
|
|
|
|
f << "\n";
|
|
|
|
f << indent << "void reset() override {\n";
|
|
|
|
inc_indent();
|
|
|
|
f << indent << "*this = " << mangle(module) << "(adopt {}, std::move(*this));\n";
|
|
|
|
dec_indent();
|
|
|
|
f << indent << "}\n";
|
|
|
|
f << "\n";
|
2020-04-21 10:51:09 -05:00
|
|
|
f << indent << "bool eval() override;\n";
|
2020-04-17 21:14:20 -05:00
|
|
|
f << indent << "bool commit() override;\n";
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
if (debug_info) {
|
|
|
|
if (debug_eval) {
|
|
|
|
f << "\n";
|
|
|
|
f << indent << "void debug_eval();\n";
|
|
|
|
for (auto wire : module->wires())
|
|
|
|
if (debug_outlined_wires.count(wire)) {
|
|
|
|
f << indent << "debug_outline debug_eval_outline { std::bind(&"
|
|
|
|
<< mangle(module) << "::debug_eval, this) };\n";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
f << "\n";
|
2020-05-26 19:21:15 -05:00
|
|
|
f << indent << "void debug_info(debug_items &items, std::string path = \"\") override;\n";
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
}
|
2020-04-17 21:14:20 -05:00
|
|
|
dec_indent();
|
2020-04-18 03:04:57 -05:00
|
|
|
f << indent << "}; // struct " << mangle(module) << "\n";
|
2020-04-17 21:14:20 -05:00
|
|
|
f << "\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void dump_module_impl(RTLIL::Module *module)
|
|
|
|
{
|
2020-04-24 13:35:53 -05:00
|
|
|
if (module->get_bool_attribute(ID(cxxrtl_blackbox)))
|
2020-04-17 21:14:20 -05:00
|
|
|
return;
|
2020-04-21 10:51:09 -05:00
|
|
|
f << indent << "bool " << mangle(module) << "::eval() {\n";
|
2020-04-17 21:14:20 -05:00
|
|
|
dump_eval_method(module);
|
2020-04-18 03:04:57 -05:00
|
|
|
f << indent << "}\n";
|
2020-04-17 21:14:20 -05:00
|
|
|
f << "\n";
|
2020-04-18 03:04:57 -05:00
|
|
|
f << indent << "bool " << mangle(module) << "::commit() {\n";
|
2020-04-17 21:14:20 -05:00
|
|
|
dump_commit_method(module);
|
2020-04-18 03:04:57 -05:00
|
|
|
f << indent << "}\n";
|
2020-04-03 11:07:43 -05:00
|
|
|
f << "\n";
|
2020-05-26 19:21:15 -05:00
|
|
|
if (debug_info) {
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
if (debug_eval) {
|
|
|
|
f << indent << "void " << mangle(module) << "::debug_eval() {\n";
|
|
|
|
dump_debug_eval_method(module);
|
|
|
|
f << indent << "}\n";
|
|
|
|
f << "\n";
|
|
|
|
}
|
2020-12-13 12:16:55 -06:00
|
|
|
f << indent << "CXXRTL_EXTREMELY_COLD\n";
|
2020-05-26 19:21:15 -05:00
|
|
|
f << indent << "void " << mangle(module) << "::debug_info(debug_items &items, std::string path) {\n";
|
|
|
|
dump_debug_info_method(module);
|
|
|
|
f << indent << "}\n";
|
|
|
|
f << "\n";
|
|
|
|
}
|
2019-11-30 19:51:16 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
void dump_design(RTLIL::Design *design)
|
|
|
|
{
|
2020-06-05 08:52:30 -05:00
|
|
|
RTLIL::Module *top_module = nullptr;
|
2020-04-16 20:41:08 -05:00
|
|
|
std::vector<RTLIL::Module*> modules;
|
2020-04-03 11:07:43 -05:00
|
|
|
TopoSort<RTLIL::Module*> topo_design;
|
|
|
|
for (auto module : design->modules()) {
|
2020-04-16 20:41:08 -05:00
|
|
|
if (!design->selected_module(module))
|
|
|
|
continue;
|
2020-04-24 13:35:53 -05:00
|
|
|
if (module->get_bool_attribute(ID(cxxrtl_blackbox)))
|
2020-04-16 20:41:08 -05:00
|
|
|
modules.push_back(module); // cxxrtl blackboxes first
|
2020-04-24 13:35:53 -05:00
|
|
|
if (module->get_blackbox_attribute() || module->get_bool_attribute(ID(cxxrtl_blackbox)))
|
2020-04-03 11:07:43 -05:00
|
|
|
continue;
|
2020-06-05 08:52:30 -05:00
|
|
|
if (module->get_bool_attribute(ID::top))
|
|
|
|
top_module = module;
|
2020-04-03 11:07:43 -05:00
|
|
|
|
2020-04-16 20:41:08 -05:00
|
|
|
topo_design.node(module);
|
2020-04-03 11:07:43 -05:00
|
|
|
for (auto cell : module->cells()) {
|
2020-04-16 20:41:08 -05:00
|
|
|
if (is_internal_cell(cell->type) || is_cxxrtl_blackbox_cell(cell))
|
2020-04-03 11:07:43 -05:00
|
|
|
continue;
|
2020-04-16 20:41:08 -05:00
|
|
|
RTLIL::Module *cell_module = design->module(cell->type);
|
|
|
|
log_assert(cell_module != nullptr);
|
|
|
|
topo_design.edge(cell_module, module);
|
2020-04-03 11:07:43 -05:00
|
|
|
}
|
|
|
|
}
|
2020-06-17 14:27:47 -05:00
|
|
|
bool no_loops = topo_design.sort();
|
|
|
|
log_assert(no_loops);
|
2020-04-16 20:41:08 -05:00
|
|
|
modules.insert(modules.end(), topo_design.sorted.begin(), topo_design.sorted.end());
|
2020-04-03 11:07:43 -05:00
|
|
|
|
2020-04-14 07:07:58 -05:00
|
|
|
if (split_intf) {
|
|
|
|
// The only thing more depraved than include guards, is mangling filenames to turn them into include guards.
|
|
|
|
std::string include_guard = design_ns + "_header";
|
|
|
|
std::transform(include_guard.begin(), include_guard.end(), include_guard.begin(), ::toupper);
|
|
|
|
|
|
|
|
f << "#ifndef " << include_guard << "\n";
|
|
|
|
f << "#define " << include_guard << "\n";
|
|
|
|
f << "\n";
|
2020-06-05 08:52:30 -05:00
|
|
|
if (top_module != nullptr && debug_info) {
|
|
|
|
f << "#include <backends/cxxrtl/cxxrtl_capi.h>\n";
|
|
|
|
f << "\n";
|
|
|
|
f << "#ifdef __cplusplus\n";
|
|
|
|
f << "extern \"C\" {\n";
|
|
|
|
f << "#endif\n";
|
|
|
|
f << "\n";
|
|
|
|
f << "cxxrtl_toplevel " << design_ns << "_create();\n";
|
|
|
|
f << "\n";
|
|
|
|
f << "#ifdef __cplusplus\n";
|
|
|
|
f << "}\n";
|
|
|
|
f << "#endif\n";
|
|
|
|
f << "\n";
|
|
|
|
} else {
|
|
|
|
f << "// The CXXRTL C API is not available because the design is built without debug information.\n";
|
|
|
|
f << "\n";
|
|
|
|
}
|
|
|
|
f << "#ifdef __cplusplus\n";
|
|
|
|
f << "\n";
|
2020-04-14 07:07:58 -05:00
|
|
|
f << "#include <backends/cxxrtl/cxxrtl.h>\n";
|
|
|
|
f << "\n";
|
|
|
|
f << "using namespace cxxrtl;\n";
|
|
|
|
f << "\n";
|
|
|
|
f << "namespace " << design_ns << " {\n";
|
|
|
|
f << "\n";
|
2020-04-16 20:41:08 -05:00
|
|
|
for (auto module : modules)
|
2020-04-14 07:07:58 -05:00
|
|
|
dump_module_intf(module);
|
|
|
|
f << "} // namespace " << design_ns << "\n";
|
|
|
|
f << "\n";
|
2020-06-05 08:52:30 -05:00
|
|
|
f << "#endif // __cplusplus\n";
|
|
|
|
f << "\n";
|
2020-04-14 07:07:58 -05:00
|
|
|
f << "#endif\n";
|
|
|
|
*intf_f << f.str(); f.str("");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (split_intf)
|
|
|
|
f << "#include \"" << intf_filename << "\"\n";
|
|
|
|
else
|
|
|
|
f << "#include <backends/cxxrtl/cxxrtl.h>\n";
|
2019-11-30 19:51:16 -06:00
|
|
|
f << "\n";
|
2020-06-06 22:45:53 -05:00
|
|
|
f << "#if defined(CXXRTL_INCLUDE_CAPI_IMPL) || \\\n";
|
|
|
|
f << " defined(CXXRTL_INCLUDE_VCD_CAPI_IMPL)\n";
|
2020-06-05 08:52:30 -05:00
|
|
|
f << "#include <backends/cxxrtl/cxxrtl_capi.cc>\n";
|
|
|
|
f << "#endif\n";
|
|
|
|
f << "\n";
|
2020-06-06 22:45:53 -05:00
|
|
|
f << "#if defined(CXXRTL_INCLUDE_VCD_CAPI_IMPL)\n";
|
|
|
|
f << "#include <backends/cxxrtl/cxxrtl_vcd_capi.cc>\n";
|
|
|
|
f << "#endif\n";
|
|
|
|
f << "\n";
|
2019-11-30 19:51:16 -06:00
|
|
|
f << "using namespace cxxrtl_yosys;\n";
|
|
|
|
f << "\n";
|
2020-04-14 07:07:58 -05:00
|
|
|
f << "namespace " << design_ns << " {\n";
|
2020-04-03 11:07:43 -05:00
|
|
|
f << "\n";
|
2020-04-16 20:41:08 -05:00
|
|
|
for (auto module : modules) {
|
2020-04-14 07:07:58 -05:00
|
|
|
if (!split_intf)
|
|
|
|
dump_module_intf(module);
|
|
|
|
dump_module_impl(module);
|
2019-11-30 19:51:16 -06:00
|
|
|
}
|
2020-04-14 07:07:58 -05:00
|
|
|
f << "} // namespace " << design_ns << "\n";
|
2020-06-05 08:52:30 -05:00
|
|
|
f << "\n";
|
|
|
|
if (top_module != nullptr && debug_info) {
|
2020-07-09 12:52:52 -05:00
|
|
|
f << "extern \"C\"\n";
|
2020-06-05 08:52:30 -05:00
|
|
|
f << "cxxrtl_toplevel " << design_ns << "_create() {\n";
|
|
|
|
inc_indent();
|
2020-06-10 10:49:28 -05:00
|
|
|
std::string top_type = design_ns + "::" + mangle(top_module);
|
2020-06-05 08:52:30 -05:00
|
|
|
f << indent << "return new _cxxrtl_toplevel { ";
|
2020-06-10 10:49:28 -05:00
|
|
|
f << "std::unique_ptr<" << top_type << ">(new " + top_type + ")";
|
2020-06-05 08:52:30 -05:00
|
|
|
f << " };\n";
|
|
|
|
dec_indent();
|
|
|
|
f << "}\n";
|
|
|
|
}
|
|
|
|
|
2020-04-14 07:07:58 -05:00
|
|
|
*impl_f << f.str(); f.str("");
|
2019-11-30 19:51:16 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// Edge-type sync rules require us to emit edge detectors, which require coordination between
|
|
|
|
// eval and commit phases. To do this we need to collect them upfront.
|
|
|
|
//
|
|
|
|
// Note that the simulator commit phase operates at wire granularity but edge-type sync rules
|
|
|
|
// operate at wire bit granularity; it is possible to have code similar to:
|
|
|
|
// wire [3:0] clocks;
|
|
|
|
// always @(posedge clocks[0]) ...
|
|
|
|
// To handle this we track edge sensitivity both for wires and wire bits.
|
|
|
|
void register_edge_signal(SigMap &sigmap, RTLIL::SigSpec signal, RTLIL::SyncType type)
|
|
|
|
{
|
|
|
|
signal = sigmap(signal);
|
|
|
|
log_assert(signal.is_wire() && signal.is_bit());
|
|
|
|
log_assert(type == RTLIL::STp || type == RTLIL::STn || type == RTLIL::STe);
|
|
|
|
|
|
|
|
RTLIL::SigBit sigbit = signal[0];
|
2020-04-21 13:46:36 -05:00
|
|
|
if (!edge_types.count(sigbit))
|
|
|
|
edge_types[sigbit] = type;
|
|
|
|
else if (edge_types[sigbit] != type)
|
|
|
|
edge_types[sigbit] = RTLIL::STe;
|
|
|
|
edge_wires.insert(signal.as_wire());
|
2019-11-30 19:51:16 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
void analyze_design(RTLIL::Design *design)
|
|
|
|
{
|
2019-12-10 14:09:24 -06:00
|
|
|
bool has_feedback_arcs = false;
|
2020-06-09 16:50:09 -05:00
|
|
|
bool has_buffered_comb_wires = false;
|
2020-04-20 11:44:51 -05:00
|
|
|
|
2019-11-30 19:51:16 -06:00
|
|
|
for (auto module : design->modules()) {
|
2019-12-10 14:09:24 -06:00
|
|
|
if (!design->selected_module(module))
|
|
|
|
continue;
|
|
|
|
|
2019-11-30 19:51:16 -06:00
|
|
|
SigMap &sigmap = sigmaps[module];
|
|
|
|
sigmap.set(module);
|
|
|
|
|
2020-04-24 13:35:53 -05:00
|
|
|
if (module->get_bool_attribute(ID(cxxrtl_blackbox))) {
|
2020-04-16 20:41:08 -05:00
|
|
|
for (auto port : module->ports) {
|
|
|
|
RTLIL::Wire *wire = module->wire(port);
|
2020-06-11 17:21:30 -05:00
|
|
|
if (wire->port_input && !wire->port_output)
|
|
|
|
unbuffered_wires.insert(wire);
|
2020-04-24 13:35:53 -05:00
|
|
|
if (wire->has_attribute(ID(cxxrtl_edge))) {
|
|
|
|
RTLIL::Const edge_attr = wire->attributes[ID(cxxrtl_edge)];
|
2020-04-16 20:41:08 -05:00
|
|
|
if (!(edge_attr.flags & RTLIL::CONST_FLAG_STRING) || (int)edge_attr.decode_string().size() != GetSize(wire))
|
2020-04-24 13:35:53 -05:00
|
|
|
log_cmd_error("Attribute `cxxrtl_edge' of port `%s.%s' is not a string with one character per bit.\n",
|
2020-04-16 20:41:08 -05:00
|
|
|
log_id(module), log_signal(wire));
|
|
|
|
|
2020-04-24 13:35:53 -05:00
|
|
|
std::string edges = wire->get_string_attribute(ID(cxxrtl_edge));
|
2020-04-16 20:41:08 -05:00
|
|
|
for (int i = 0; i < GetSize(wire); i++) {
|
|
|
|
RTLIL::SigSpec wire_sig = wire;
|
|
|
|
switch (edges[i]) {
|
|
|
|
case '-': break;
|
|
|
|
case 'p': register_edge_signal(sigmap, wire_sig[i], RTLIL::STp); break;
|
|
|
|
case 'n': register_edge_signal(sigmap, wire_sig[i], RTLIL::STn); break;
|
|
|
|
case 'a': register_edge_signal(sigmap, wire_sig[i], RTLIL::STe); break;
|
|
|
|
default:
|
2020-04-24 13:35:53 -05:00
|
|
|
log_cmd_error("Attribute `cxxrtl_edge' of port `%s.%s' contains specifiers "
|
2020-04-16 20:41:08 -05:00
|
|
|
"other than '-', 'p', 'n', or 'a'.\n",
|
|
|
|
log_id(module), log_signal(wire));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-04-21 10:51:09 -05:00
|
|
|
|
|
|
|
// Black boxes converge by default, since their implementations are quite unlikely to require
|
|
|
|
// internal propagation of comb signals.
|
|
|
|
eval_converges[module] = true;
|
2020-04-16 20:41:08 -05:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
FlowGraph flow;
|
|
|
|
|
2019-12-09 13:05:52 -06:00
|
|
|
for (auto conn : module->connections())
|
|
|
|
flow.add_node(conn);
|
|
|
|
|
2019-12-10 14:09:24 -06:00
|
|
|
dict<const RTLIL::Cell*, FlowGraph::Node*> memrw_cell_nodes;
|
|
|
|
dict<std::pair<RTLIL::SigBit, const RTLIL::Memory*>,
|
|
|
|
pool<const RTLIL::Cell*>> memwr_per_domain;
|
2019-11-30 19:51:16 -06:00
|
|
|
for (auto cell : module->cells()) {
|
2020-04-16 20:41:08 -05:00
|
|
|
if (!cell->known())
|
|
|
|
log_cmd_error("Unknown cell `%s'.\n", log_id(cell->type));
|
|
|
|
|
|
|
|
RTLIL::Module *cell_module = design->module(cell->type);
|
2020-04-18 03:04:57 -05:00
|
|
|
if (cell_module &&
|
|
|
|
cell_module->get_blackbox_attribute() &&
|
2020-04-24 13:35:53 -05:00
|
|
|
!cell_module->get_bool_attribute(ID(cxxrtl_blackbox)))
|
2020-04-16 20:41:08 -05:00
|
|
|
log_cmd_error("External blackbox cell `%s' is not marked as a CXXRTL blackbox.\n", log_id(cell->type));
|
|
|
|
|
2020-04-18 03:04:57 -05:00
|
|
|
if (cell_module &&
|
2020-04-24 13:35:53 -05:00
|
|
|
cell_module->get_bool_attribute(ID(cxxrtl_blackbox)) &&
|
|
|
|
cell_module->get_bool_attribute(ID(cxxrtl_template)))
|
2020-04-18 03:04:57 -05:00
|
|
|
blackbox_specializations[cell_module].insert(template_args(cell));
|
|
|
|
|
2019-12-10 14:09:24 -06:00
|
|
|
FlowGraph::Node *node = flow.add_node(cell);
|
2019-12-09 13:05:52 -06:00
|
|
|
|
2019-11-30 19:51:16 -06:00
|
|
|
// Various DFF cells are treated like posedge/negedge processes, see above for details.
|
2020-06-23 19:15:08 -05:00
|
|
|
if (cell->type.in(ID($dff), ID($dffe), ID($adff), ID($adffe), ID($dffsr), ID($dffsre), ID($sdff), ID($sdffe), ID($sdffce))) {
|
2020-12-02 15:39:25 -06:00
|
|
|
if (sigmap(cell->getPort(ID::CLK)).is_wire())
|
2020-04-15 12:39:14 -05:00
|
|
|
register_edge_signal(sigmap, cell->getPort(ID::CLK),
|
|
|
|
cell->parameters[ID::CLK_POLARITY].as_bool() ? RTLIL::STp : RTLIL::STn);
|
2019-11-30 19:51:16 -06:00
|
|
|
}
|
|
|
|
// Similar for memory port cells.
|
|
|
|
if (cell->type.in(ID($memrd), ID($memwr))) {
|
2020-04-15 12:39:14 -05:00
|
|
|
if (cell->getParam(ID::CLK_ENABLE).as_bool()) {
|
2020-12-02 15:39:25 -06:00
|
|
|
if (sigmap(cell->getPort(ID::CLK)).is_wire())
|
2020-04-15 12:39:14 -05:00
|
|
|
register_edge_signal(sigmap, cell->getPort(ID::CLK),
|
|
|
|
cell->parameters[ID::CLK_POLARITY].as_bool() ? RTLIL::STp : RTLIL::STn);
|
2019-11-30 19:51:16 -06:00
|
|
|
}
|
2019-12-10 14:09:24 -06:00
|
|
|
memrw_cell_nodes[cell] = node;
|
2019-11-30 19:51:16 -06:00
|
|
|
}
|
|
|
|
// Optimize access to read-only memories.
|
|
|
|
if (cell->type == ID($memwr))
|
2020-04-15 12:39:14 -05:00
|
|
|
writable_memories.insert(module->memories[cell->getParam(ID::MEMID).decode_string()]);
|
2019-12-10 14:09:24 -06:00
|
|
|
// Collect groups of memory write ports in the same domain.
|
2020-04-15 12:39:14 -05:00
|
|
|
if (cell->type == ID($memwr) && cell->getParam(ID::CLK_ENABLE).as_bool() && cell->getPort(ID::CLK).is_wire()) {
|
|
|
|
RTLIL::SigBit clk_bit = sigmap(cell->getPort(ID::CLK))[0];
|
|
|
|
const RTLIL::Memory *memory = module->memories[cell->getParam(ID::MEMID).decode_string()];
|
2019-12-10 14:09:24 -06:00
|
|
|
memwr_per_domain[{clk_bit, memory}].insert(cell);
|
|
|
|
}
|
2019-11-30 19:51:16 -06:00
|
|
|
// Handling of packed memories is delegated to the `memory_unpack` pass, so we can rely on the presence
|
|
|
|
// of RTLIL memory objects and $memrd/$memwr/$meminit cells.
|
|
|
|
if (cell->type.in(ID($mem)))
|
|
|
|
log_assert(false);
|
|
|
|
}
|
2019-12-10 14:09:24 -06:00
|
|
|
for (auto cell : module->cells()) {
|
|
|
|
// Collect groups of memory write ports read by every transparent read port.
|
2020-04-15 12:39:14 -05:00
|
|
|
if (cell->type == ID($memrd) && cell->getParam(ID::CLK_ENABLE).as_bool() && cell->getPort(ID::CLK).is_wire() &&
|
|
|
|
cell->getParam(ID::TRANSPARENT).as_bool()) {
|
|
|
|
RTLIL::SigBit clk_bit = sigmap(cell->getPort(ID::CLK))[0];
|
|
|
|
const RTLIL::Memory *memory = module->memories[cell->getParam(ID::MEMID).decode_string()];
|
2019-12-10 14:09:24 -06:00
|
|
|
for (auto memwr_cell : memwr_per_domain[{clk_bit, memory}]) {
|
|
|
|
transparent_for[cell].insert(memwr_cell);
|
|
|
|
// Our implementation of transparent $memrd cells reads \EN, \ADDR and \DATA from every $memwr cell
|
|
|
|
// in the same domain, which isn't directly visible in the netlist. Add these uses explicitly.
|
2020-04-15 12:39:14 -05:00
|
|
|
flow.add_uses(memrw_cell_nodes[cell], memwr_cell->getPort(ID::EN));
|
|
|
|
flow.add_uses(memrw_cell_nodes[cell], memwr_cell->getPort(ID::ADDR));
|
|
|
|
flow.add_uses(memrw_cell_nodes[cell], memwr_cell->getPort(ID::DATA));
|
2019-12-10 14:09:24 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-11-30 19:51:16 -06:00
|
|
|
|
2019-12-09 13:05:52 -06:00
|
|
|
for (auto proc : module->processes) {
|
|
|
|
flow.add_node(proc.second);
|
|
|
|
|
2019-11-30 19:51:16 -06:00
|
|
|
for (auto sync : proc.second->syncs)
|
|
|
|
switch (sync->type) {
|
|
|
|
// Edge-type sync rules require pre-registration.
|
|
|
|
case RTLIL::STp:
|
|
|
|
case RTLIL::STn:
|
|
|
|
case RTLIL::STe:
|
|
|
|
register_edge_signal(sigmap, sync->signal, sync->type);
|
|
|
|
break;
|
|
|
|
|
|
|
|
// Level-type sync rules require no special handling.
|
|
|
|
case RTLIL::ST0:
|
|
|
|
case RTLIL::ST1:
|
|
|
|
case RTLIL::STa:
|
|
|
|
break;
|
|
|
|
|
2020-05-07 04:44:38 -05:00
|
|
|
case RTLIL::STg:
|
|
|
|
log_cmd_error("Global clock is not supported.\n");
|
|
|
|
|
2019-11-30 19:51:16 -06:00
|
|
|
// Handling of init-type sync rules is delegated to the `proc_init` pass, so we can use the wire
|
|
|
|
// attribute regardless of input.
|
|
|
|
case RTLIL::STi:
|
|
|
|
log_assert(false);
|
|
|
|
}
|
2019-12-09 13:05:52 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
for (auto wire : module->wires()) {
|
2020-12-12 18:34:32 -06:00
|
|
|
if (!flow.is_inlinable(wire)) continue;
|
2019-12-09 13:05:52 -06:00
|
|
|
if (wire->port_id != 0) continue;
|
2020-04-15 12:39:14 -05:00
|
|
|
if (wire->get_bool_attribute(ID::keep)) continue;
|
2020-12-12 18:34:32 -06:00
|
|
|
if (wire->name.begins_with("$") && !inline_internal) continue;
|
|
|
|
if (wire->name.begins_with("\\") && !inline_public) continue;
|
2020-04-21 13:46:36 -05:00
|
|
|
if (edge_wires[wire]) continue;
|
2020-11-01 06:49:20 -06:00
|
|
|
if (flow.wire_comb_defs[wire].size() > 1)
|
|
|
|
log_cmd_error("Wire %s.%s has multiple drivers.\n", log_id(module), log_id(wire));
|
cxxrtl: localize wires with multiple comb drivers, too.
Before this commit, any wire that was not driven by an output port of
exactly one comb cell would not be localized, even if there were no
feedback arcs through that wire. This would cause the wire to become
buffered and require (often quite a few) extraneous delta cycles
during evaluation. To alleviate this problem, -O5 was running
`splitnets -driver`.
However, this solution was mistaken. Because `splitnets -driver`
followed by `opt_clean -purge` would produce more nets with multiple
drivers, it would have to be iterated to fixpoint. Moreover, even if
this was done, it would not be sufficient because `opt_clean -purge`
does not currently remove wires with the `\init` attribute (and it
is not desirable to remove such wires, since they correspond to
registers and may be useful for debugging).
The proper solution is to consider the condition in which a wire
may be localized. Specifically, if there are no feedback arcs through
this wire, and no part of the wire is driven by an output of a sync
cell, then the wire holds no state and is localizable.
After this commit, the original condition for not localizing a wire
is replaced by a check for any sync cell driving it. This makes it
unnecessary to run `splitnets -driver` in the majority of cases
to get a design with no buffered wires, and -O5 no longer includes
that pass. As a result, Minerva SRAM SoC no longer has any buffered
wires, and runs ~27% faster.
In addition, this commit prepares the flow graph for introduction
of sync outputs of black boxes.
Co-authored-by: Jean-François Nguyen <jf@lambdaconcept.com>
2020-04-21 08:33:42 -05:00
|
|
|
log_assert(flow.wire_comb_defs[wire].size() == 1);
|
2020-12-12 18:34:32 -06:00
|
|
|
inlined_wires[wire] = **flow.wire_comb_defs[wire].begin();
|
2019-12-09 13:05:52 -06:00
|
|
|
}
|
2019-12-10 14:09:24 -06:00
|
|
|
|
|
|
|
dict<FlowGraph::Node*, pool<const RTLIL::Wire*>, hash_ptr_ops> node_defs;
|
cxxrtl: localize wires with multiple comb drivers, too.
Before this commit, any wire that was not driven by an output port of
exactly one comb cell would not be localized, even if there were no
feedback arcs through that wire. This would cause the wire to become
buffered and require (often quite a few) extraneous delta cycles
during evaluation. To alleviate this problem, -O5 was running
`splitnets -driver`.
However, this solution was mistaken. Because `splitnets -driver`
followed by `opt_clean -purge` would produce more nets with multiple
drivers, it would have to be iterated to fixpoint. Moreover, even if
this was done, it would not be sufficient because `opt_clean -purge`
does not currently remove wires with the `\init` attribute (and it
is not desirable to remove such wires, since they correspond to
registers and may be useful for debugging).
The proper solution is to consider the condition in which a wire
may be localized. Specifically, if there are no feedback arcs through
this wire, and no part of the wire is driven by an output of a sync
cell, then the wire holds no state and is localizable.
After this commit, the original condition for not localizing a wire
is replaced by a check for any sync cell driving it. This makes it
unnecessary to run `splitnets -driver` in the majority of cases
to get a design with no buffered wires, and -O5 no longer includes
that pass. As a result, Minerva SRAM SoC no longer has any buffered
wires, and runs ~27% faster.
In addition, this commit prepares the flow graph for introduction
of sync outputs of black boxes.
Co-authored-by: Jean-François Nguyen <jf@lambdaconcept.com>
2020-04-21 08:33:42 -05:00
|
|
|
for (auto wire_comb_def : flow.wire_comb_defs)
|
|
|
|
for (auto node : wire_comb_def.second)
|
|
|
|
node_defs[node].insert(wire_comb_def.first);
|
2019-12-10 14:09:24 -06:00
|
|
|
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
dict<FlowGraph::Node*, pool<const RTLIL::Wire*>, hash_ptr_ops> node_uses;
|
|
|
|
for (auto wire_use : flow.wire_uses)
|
|
|
|
for (auto node : wire_use.second)
|
|
|
|
node_uses[node].insert(wire_use.first);
|
|
|
|
|
2019-12-10 14:09:24 -06:00
|
|
|
Scheduler<FlowGraph::Node> scheduler;
|
|
|
|
dict<FlowGraph::Node*, Scheduler<FlowGraph::Node>::Vertex*, hash_ptr_ops> node_map;
|
|
|
|
for (auto node : flow.nodes)
|
|
|
|
node_map[node] = scheduler.add(node);
|
|
|
|
for (auto node_def : node_defs) {
|
|
|
|
auto vertex = node_map[node_def.first];
|
|
|
|
for (auto wire : node_def.second)
|
|
|
|
for (auto succ_node : flow.wire_uses[wire]) {
|
|
|
|
auto succ_vertex = node_map[succ_node];
|
|
|
|
vertex->succs.insert(succ_vertex);
|
|
|
|
succ_vertex->preds.insert(vertex);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
auto eval_order = scheduler.schedule();
|
|
|
|
pool<FlowGraph::Node*, hash_ptr_ops> evaluated;
|
|
|
|
pool<const RTLIL::Wire*> feedback_wires;
|
|
|
|
for (auto vertex : eval_order) {
|
|
|
|
auto node = vertex->data;
|
|
|
|
schedule[module].push_back(*node);
|
|
|
|
// Any wire that is an output of node vo and input of node vi where vo is scheduled later than vi
|
|
|
|
// is a feedback wire. Feedback wires indicate apparent logic loops in the design, which may be
|
|
|
|
// caused by a true logic loop, but usually are a benign result of dependency tracking that works
|
|
|
|
// on wire, not bit, level. Nevertheless, feedback wires cannot be localized.
|
|
|
|
evaluated.insert(node);
|
|
|
|
for (auto wire : node_defs[node])
|
|
|
|
for (auto succ_node : flow.wire_uses[wire])
|
|
|
|
if (evaluated[succ_node]) {
|
|
|
|
feedback_wires.insert(wire);
|
2020-12-12 18:34:32 -06:00
|
|
|
// Feedback wires may never be inlined because feedback requires state, but the point of
|
|
|
|
// inlining (and localization) is to eliminate state.
|
|
|
|
inlined_wires.erase(wire);
|
2019-12-10 14:09:24 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!feedback_wires.empty()) {
|
|
|
|
has_feedback_arcs = true;
|
2020-04-21 18:42:56 -05:00
|
|
|
log("Module `%s' contains feedback arcs through wires:\n", log_id(module));
|
2020-04-20 11:44:51 -05:00
|
|
|
for (auto wire : feedback_wires)
|
2020-04-21 18:42:56 -05:00
|
|
|
log(" %s\n", log_id(wire));
|
2019-12-10 14:09:24 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
for (auto wire : module->wires()) {
|
|
|
|
if (feedback_wires[wire]) continue;
|
2020-06-11 19:05:05 -05:00
|
|
|
if (wire->port_output && !module->get_bool_attribute(ID::top)) continue;
|
2020-06-09 16:50:09 -05:00
|
|
|
if (wire->name.begins_with("$") && !unbuffer_internal) continue;
|
|
|
|
if (wire->name.begins_with("\\") && !unbuffer_public) continue;
|
cxxrtl: localize wires with multiple comb drivers, too.
Before this commit, any wire that was not driven by an output port of
exactly one comb cell would not be localized, even if there were no
feedback arcs through that wire. This would cause the wire to become
buffered and require (often quite a few) extraneous delta cycles
during evaluation. To alleviate this problem, -O5 was running
`splitnets -driver`.
However, this solution was mistaken. Because `splitnets -driver`
followed by `opt_clean -purge` would produce more nets with multiple
drivers, it would have to be iterated to fixpoint. Moreover, even if
this was done, it would not be sufficient because `opt_clean -purge`
does not currently remove wires with the `\init` attribute (and it
is not desirable to remove such wires, since they correspond to
registers and may be useful for debugging).
The proper solution is to consider the condition in which a wire
may be localized. Specifically, if there are no feedback arcs through
this wire, and no part of the wire is driven by an output of a sync
cell, then the wire holds no state and is localizable.
After this commit, the original condition for not localizing a wire
is replaced by a check for any sync cell driving it. This makes it
unnecessary to run `splitnets -driver` in the majority of cases
to get a design with no buffered wires, and -O5 no longer includes
that pass. As a result, Minerva SRAM SoC no longer has any buffered
wires, and runs ~27% faster.
In addition, this commit prepares the flow graph for introduction
of sync outputs of black boxes.
Co-authored-by: Jean-François Nguyen <jf@lambdaconcept.com>
2020-04-21 08:33:42 -05:00
|
|
|
if (flow.wire_sync_defs.count(wire) > 0) continue;
|
2020-06-09 16:50:09 -05:00
|
|
|
unbuffered_wires.insert(wire);
|
2020-06-11 17:21:30 -05:00
|
|
|
if (edge_wires[wire]) continue;
|
|
|
|
if (wire->get_bool_attribute(ID::keep)) continue;
|
|
|
|
if (wire->port_input || wire->port_output) continue;
|
2020-06-09 16:50:09 -05:00
|
|
|
if (wire->name.begins_with("$") && !localize_internal) continue;
|
|
|
|
if (wire->name.begins_with("\\") && !localize_public) continue;
|
2019-12-10 14:09:24 -06:00
|
|
|
localized_wires.insert(wire);
|
|
|
|
}
|
2020-04-20 11:44:51 -05:00
|
|
|
|
|
|
|
// For maximum performance, the state of the simulation (which is the same as the set of its double buffered
|
|
|
|
// wires, since using a singly buffered wire for any kind of state introduces a race condition) should contain
|
|
|
|
// no wires attached to combinatorial outputs. Feedback wires, by definition, make that impossible. However,
|
|
|
|
// it is possible that a design with no feedback arcs would end up with doubly buffered wires in such cases
|
|
|
|
// as a wire with multiple drivers where one of them is combinatorial and the other is synchronous. Such designs
|
|
|
|
// also require more than one delta cycle to converge.
|
2020-06-09 16:50:09 -05:00
|
|
|
pool<const RTLIL::Wire*> buffered_comb_wires;
|
2020-04-20 11:44:51 -05:00
|
|
|
for (auto wire : module->wires()) {
|
2020-06-09 16:50:09 -05:00
|
|
|
if (flow.wire_comb_defs[wire].size() > 0 && !unbuffered_wires[wire] && !feedback_wires[wire])
|
|
|
|
buffered_comb_wires.insert(wire);
|
2020-04-20 11:44:51 -05:00
|
|
|
}
|
2020-06-09 16:50:09 -05:00
|
|
|
if (!buffered_comb_wires.empty()) {
|
|
|
|
has_buffered_comb_wires = true;
|
2020-04-21 18:42:56 -05:00
|
|
|
log("Module `%s' contains buffered combinatorial wires:\n", log_id(module));
|
2020-06-09 16:50:09 -05:00
|
|
|
for (auto wire : buffered_comb_wires)
|
2020-04-21 18:42:56 -05:00
|
|
|
log(" %s\n", log_id(wire));
|
2020-04-20 11:44:51 -05:00
|
|
|
}
|
2020-04-21 10:51:09 -05:00
|
|
|
|
2020-06-09 16:50:09 -05:00
|
|
|
eval_converges[module] = feedback_wires.empty() && buffered_comb_wires.empty();
|
2020-06-08 11:22:30 -05:00
|
|
|
|
2020-09-02 12:16:10 -05:00
|
|
|
for (auto item : flow.bit_has_state)
|
|
|
|
bit_has_state.insert(item);
|
|
|
|
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
if (debug_info && debug_eval) {
|
|
|
|
// Find wires that can be be outlined, i.e. whose values can be always recovered from
|
|
|
|
// the values of other wires. (This is the inverse of inlining--any wire that can be
|
|
|
|
// inlined can also be outlined.) Although this may seem strictly less efficient, since
|
|
|
|
// such values are computed at least twice, second-order effects make outlining useful.
|
|
|
|
pool<const RTLIL::Wire*> worklist, visited;
|
|
|
|
for (auto wire : module->wires()) {
|
|
|
|
if (!wire->name.isPublic())
|
2020-12-13 09:33:47 -06:00
|
|
|
continue;
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
worklist.insert(wire);
|
|
|
|
}
|
|
|
|
while (!worklist.empty()) {
|
|
|
|
const RTLIL::Wire *wire = worklist.pop();
|
|
|
|
visited.insert(wire);
|
|
|
|
if (!localized_wires.count(wire) && !inlined_wires.count(wire))
|
|
|
|
continue; // member wire, doesn't need outlining
|
|
|
|
if (wire->name.isPublic() || !inlined_wires.count(wire))
|
|
|
|
debug_outlined_wires.insert(wire); // allow outlining of internal wires only
|
|
|
|
for (auto node : flow.wire_comb_defs[wire])
|
|
|
|
for (auto node_use : node_uses[node])
|
|
|
|
if (!visited.count(node_use))
|
|
|
|
worklist.insert(node_use);
|
|
|
|
}
|
|
|
|
}
|
2020-12-13 09:33:47 -06:00
|
|
|
if (debug_info && debug_alias) {
|
|
|
|
// Find wires that alias other wires or are tied to a constant. Both of these cases are
|
|
|
|
// directly expressible in the debug information, improving coverage at zero cost.
|
|
|
|
for (auto wire : module->wires()) {
|
|
|
|
if (!wire->name.isPublic())
|
|
|
|
continue;
|
|
|
|
const RTLIL::Wire *cursor = wire;
|
|
|
|
RTLIL::SigSpec alias_of;
|
|
|
|
while (1) {
|
|
|
|
if (!(flow.wire_def_inlinable.count(cursor) && flow.wire_def_inlinable[cursor]))
|
|
|
|
break; // not an alias: complex def
|
|
|
|
log_assert(flow.wire_comb_defs[cursor].size() == 1);
|
|
|
|
FlowGraph::Node *node = *flow.wire_comb_defs[cursor].begin();
|
|
|
|
if (node->type != FlowGraph::Node::Type::CONNECT)
|
|
|
|
break; // not an alias: def by cell
|
|
|
|
RTLIL::SigSpec rhs_sig = node->connect.second;
|
|
|
|
if (rhs_sig.is_fully_const()) {
|
|
|
|
alias_of = rhs_sig; // alias of const
|
|
|
|
break;
|
|
|
|
} else if (rhs_sig.is_wire()) {
|
|
|
|
RTLIL::Wire *rhs_wire = rhs_sig.as_wire(); // possible alias of wire
|
|
|
|
if (rhs_wire->port_input && !rhs_wire->port_output) {
|
|
|
|
alias_of = rhs_wire; // alias of input
|
|
|
|
break;
|
|
|
|
} else if (!localized_wires.count(rhs_wire) && !inlined_wires.count(rhs_wire)) {
|
|
|
|
alias_of = rhs_wire; // alias of member
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
if (rhs_wire->name.isPublic() && debug_outlined_wires.count(rhs_wire))
|
|
|
|
alias_of = rhs_wire; // alias of either outline or another alias
|
|
|
|
cursor = rhs_wire; // keep looking
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
break; // not an alias: complex rhs
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (alias_of.empty()) {
|
|
|
|
continue;
|
|
|
|
} else if (alias_of.is_fully_const()) {
|
|
|
|
debug_const_wires[wire] = alias_of.as_const();
|
|
|
|
} else if (alias_of.is_wire()) {
|
|
|
|
debug_alias_wires[wire] = alias_of.as_wire();
|
|
|
|
} else log_abort();
|
|
|
|
if (inlined_wires.count(wire))
|
|
|
|
debug_outlined_wires.erase(wire);
|
|
|
|
}
|
|
|
|
}
|
2019-12-10 14:09:24 -06:00
|
|
|
}
|
2020-06-09 16:50:09 -05:00
|
|
|
if (has_feedback_arcs || has_buffered_comb_wires) {
|
2020-04-20 11:44:51 -05:00
|
|
|
// Although both non-feedback buffered combinatorial wires and apparent feedback wires may be eliminated
|
2020-05-26 01:00:40 -05:00
|
|
|
// by optimizing the design, if after `proc; flatten` there are any feedback wires remaining, it is very
|
2020-04-20 11:44:51 -05:00
|
|
|
// likely that these feedback wires are indicative of a true logic loop, so they get emphasized in the message.
|
|
|
|
const char *why_pessimistic = nullptr;
|
|
|
|
if (has_feedback_arcs)
|
|
|
|
why_pessimistic = "feedback wires";
|
2020-06-09 16:50:09 -05:00
|
|
|
else if (has_buffered_comb_wires)
|
2020-04-20 11:44:51 -05:00
|
|
|
why_pessimistic = "buffered combinatorial wires";
|
|
|
|
log_warning("Design contains %s, which require delta cycles during evaluation.\n", why_pessimistic);
|
2020-06-09 15:18:07 -05:00
|
|
|
if (!run_flatten)
|
|
|
|
log("Flattening may eliminate %s from the design.\n", why_pessimistic);
|
|
|
|
if (!run_proc)
|
|
|
|
log("Converting processes to netlists may eliminate %s from the design.\n", why_pessimistic);
|
2019-11-30 19:51:16 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-02 13:18:56 -06:00
|
|
|
void check_design(RTLIL::Design *design, bool &has_top, bool &has_sync_init, bool &has_packed_mem)
|
2019-11-30 19:51:16 -06:00
|
|
|
{
|
2020-11-02 13:18:56 -06:00
|
|
|
has_sync_init = has_packed_mem = has_top = false;
|
2019-11-30 19:51:16 -06:00
|
|
|
|
|
|
|
for (auto module : design->modules()) {
|
2020-04-24 13:35:53 -05:00
|
|
|
if (module->get_blackbox_attribute() && !module->has_attribute(ID(cxxrtl_blackbox)))
|
2019-11-30 19:51:16 -06:00
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!design->selected_whole_module(module))
|
|
|
|
if (design->selected_module(module))
|
2020-04-16 20:41:08 -05:00
|
|
|
log_cmd_error("Can't handle partially selected module `%s'!\n", id2cstr(module->name));
|
2019-12-10 14:09:24 -06:00
|
|
|
if (!design->selected_module(module))
|
|
|
|
continue;
|
2019-11-30 19:51:16 -06:00
|
|
|
|
2020-11-02 13:18:56 -06:00
|
|
|
if (module->get_bool_attribute(ID::top))
|
|
|
|
has_top = true;
|
|
|
|
|
2019-11-30 19:51:16 -06:00
|
|
|
for (auto proc : module->processes)
|
|
|
|
for (auto sync : proc.second->syncs)
|
|
|
|
if (sync->type == RTLIL::STi)
|
|
|
|
has_sync_init = true;
|
|
|
|
|
2020-11-01 09:25:55 -06:00
|
|
|
// The Mem constructor also checks for well-formedness of $meminit cells, if any.
|
|
|
|
for (auto &mem : Mem::get_all_memories(module))
|
|
|
|
if (mem.packed)
|
2019-11-30 19:51:16 -06:00
|
|
|
has_packed_mem = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void prepare_design(RTLIL::Design *design)
|
|
|
|
{
|
2020-05-26 16:37:32 -05:00
|
|
|
bool did_anything = false;
|
2020-11-02 13:18:56 -06:00
|
|
|
bool has_top, has_sync_init, has_packed_mem;
|
2020-04-21 10:33:12 -05:00
|
|
|
log_push();
|
2020-11-02 13:18:56 -06:00
|
|
|
check_design(design, has_top, has_sync_init, has_packed_mem);
|
|
|
|
if (run_hierarchy && !has_top) {
|
|
|
|
Pass::call(design, "hierarchy -auto-top");
|
|
|
|
did_anything = true;
|
|
|
|
}
|
2020-06-09 15:18:07 -05:00
|
|
|
if (run_flatten) {
|
2020-04-21 10:33:12 -05:00
|
|
|
Pass::call(design, "flatten");
|
2020-05-26 16:37:32 -05:00
|
|
|
did_anything = true;
|
2020-06-09 15:18:07 -05:00
|
|
|
}
|
|
|
|
if (run_proc) {
|
|
|
|
Pass::call(design, "proc");
|
|
|
|
did_anything = true;
|
2020-04-21 10:33:12 -05:00
|
|
|
} else if (has_sync_init) {
|
2020-04-03 11:07:43 -05:00
|
|
|
// We're only interested in proc_init, but it depends on proc_prune and proc_clean, so call those
|
|
|
|
// in case they weren't already. (This allows `yosys foo.v -o foo.cc` to work.)
|
|
|
|
Pass::call(design, "proc_prune");
|
|
|
|
Pass::call(design, "proc_clean");
|
2019-11-30 19:51:16 -06:00
|
|
|
Pass::call(design, "proc_init");
|
2020-05-26 16:37:32 -05:00
|
|
|
did_anything = true;
|
2020-04-03 11:07:43 -05:00
|
|
|
}
|
2020-05-26 16:37:32 -05:00
|
|
|
if (has_packed_mem) {
|
2019-11-30 19:51:16 -06:00
|
|
|
Pass::call(design, "memory_unpack");
|
2020-05-26 16:37:32 -05:00
|
|
|
did_anything = true;
|
|
|
|
}
|
2019-11-30 19:51:16 -06:00
|
|
|
// Recheck the design if it was modified.
|
2020-11-02 13:18:56 -06:00
|
|
|
if (did_anything)
|
|
|
|
check_design(design, has_top, has_sync_init, has_packed_mem);
|
|
|
|
log_assert(has_top && !has_sync_init && !has_packed_mem);
|
2020-04-21 10:33:12 -05:00
|
|
|
log_pop();
|
2020-05-26 16:37:32 -05:00
|
|
|
if (did_anything)
|
|
|
|
log_spacer();
|
2019-11-30 19:51:16 -06:00
|
|
|
analyze_design(design);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct CxxrtlBackend : public Backend {
|
2020-06-09 16:50:09 -05:00
|
|
|
static const int DEFAULT_OPT_LEVEL = 6;
|
2020-12-13 01:44:27 -06:00
|
|
|
static const int DEFAULT_DEBUG_LEVEL = 3;
|
2019-12-10 14:09:24 -06:00
|
|
|
|
2019-11-30 19:51:16 -06:00
|
|
|
CxxrtlBackend() : Backend("cxxrtl", "convert design to C++ RTL simulation") { }
|
2020-06-18 18:34:52 -05:00
|
|
|
void help() override
|
2019-11-30 19:51:16 -06:00
|
|
|
{
|
|
|
|
// |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
|
|
|
|
log("\n");
|
|
|
|
log(" write_cxxrtl [options] [filename]\n");
|
|
|
|
log("\n");
|
2020-04-16 20:41:08 -05:00
|
|
|
log("Write C++ code that simulates the design. The generated code requires a driver\n");
|
2020-04-16 11:30:43 -05:00
|
|
|
log("that instantiates the design, toggles its clock, and interacts with its ports.\n");
|
|
|
|
log("\n");
|
|
|
|
log("The following driver may be used as an example for a design with a single clock\n");
|
|
|
|
log("driving rising edge triggered flip-flops:\n");
|
2020-04-05 05:03:23 -05:00
|
|
|
log("\n");
|
|
|
|
log(" #include \"top.cc\"\n");
|
|
|
|
log("\n");
|
|
|
|
log(" int main() {\n");
|
|
|
|
log(" cxxrtl_design::p_top top;\n");
|
2020-04-16 11:30:43 -05:00
|
|
|
log(" top.step();\n");
|
2020-04-05 05:03:23 -05:00
|
|
|
log(" while (1) {\n");
|
2020-04-16 11:30:43 -05:00
|
|
|
log(" /* user logic */\n");
|
2020-06-26 03:30:44 -05:00
|
|
|
log(" top.p_clk.set(false);\n");
|
2020-04-05 05:03:23 -05:00
|
|
|
log(" top.step();\n");
|
2020-06-26 03:30:44 -05:00
|
|
|
log(" top.p_clk.set(true);\n");
|
2020-04-16 11:30:43 -05:00
|
|
|
log(" top.step();\n");
|
2020-04-05 05:03:23 -05:00
|
|
|
log(" }\n");
|
|
|
|
log(" }\n");
|
|
|
|
log("\n");
|
2020-04-16 11:30:43 -05:00
|
|
|
log("Note that CXXRTL simulations, just like the hardware they are simulating, are\n");
|
2020-04-16 20:41:08 -05:00
|
|
|
log("subject to race conditions. If, in the example above, the user logic would run\n");
|
2020-04-16 11:30:43 -05:00
|
|
|
log("simultaneously with the rising edge of the clock, the design would malfunction.\n");
|
|
|
|
log("\n");
|
2020-04-16 20:41:08 -05:00
|
|
|
log("This backend supports replacing parts of the design with black boxes implemented\n");
|
|
|
|
log("in C++. If a module marked as a CXXRTL black box, its implementation is ignored,\n");
|
|
|
|
log("and the generated code consists only of an interface and a factory function.\n");
|
|
|
|
log("The driver must implement the factory function that creates an implementation of\n");
|
|
|
|
log("the black box, taking into account the parameters it is instantiated with.\n");
|
|
|
|
log("\n");
|
|
|
|
log("For example, the following Verilog code defines a CXXRTL black box interface for\n");
|
|
|
|
log("a synchronous debug sink:\n");
|
|
|
|
log("\n");
|
2020-04-24 13:35:53 -05:00
|
|
|
log(" (* cxxrtl_blackbox *)\n");
|
2020-04-16 20:41:08 -05:00
|
|
|
log(" module debug(...);\n");
|
2020-04-24 13:35:53 -05:00
|
|
|
log(" (* cxxrtl_edge = \"p\" *) input clk;\n");
|
2020-04-16 20:41:08 -05:00
|
|
|
log(" input en;\n");
|
2020-04-21 09:49:36 -05:00
|
|
|
log(" input [7:0] i_data;\n");
|
2020-04-24 13:35:53 -05:00
|
|
|
log(" (* cxxrtl_sync *) output [7:0] o_data;\n");
|
2020-04-16 20:41:08 -05:00
|
|
|
log(" endmodule\n");
|
|
|
|
log("\n");
|
|
|
|
log("For this HDL interface, this backend will generate the following C++ interface:\n");
|
|
|
|
log("\n");
|
2020-04-19 11:22:02 -05:00
|
|
|
log(" struct bb_p_debug : public module {\n");
|
2020-04-21 09:49:36 -05:00
|
|
|
log(" value<1> p_clk;\n");
|
2020-04-21 08:59:42 -05:00
|
|
|
log(" bool posedge_p_clk() const { /* ... */ }\n");
|
2020-04-21 09:49:36 -05:00
|
|
|
log(" value<1> p_en;\n");
|
|
|
|
log(" value<8> p_i_data;\n");
|
|
|
|
log(" wire<8> p_o_data;\n");
|
2020-04-16 20:41:08 -05:00
|
|
|
log("\n");
|
2020-04-21 10:51:09 -05:00
|
|
|
log(" bool eval() override;\n");
|
2020-04-16 20:41:08 -05:00
|
|
|
log(" bool commit() override;\n");
|
|
|
|
log("\n");
|
2020-04-19 11:22:02 -05:00
|
|
|
log(" static std::unique_ptr<bb_p_debug>\n");
|
|
|
|
log(" create(std::string name, metadata_map parameters, metadata_map attributes);\n");
|
2020-04-16 20:41:08 -05:00
|
|
|
log(" };\n");
|
|
|
|
log("\n");
|
|
|
|
log("The `create' function must be implemented by the driver. For example, it could\n");
|
|
|
|
log("always provide an implementation logging the values to standard error stream:\n");
|
|
|
|
log("\n");
|
|
|
|
log(" namespace cxxrtl_design {\n");
|
|
|
|
log("\n");
|
2020-04-19 11:22:02 -05:00
|
|
|
log(" struct stderr_debug : public bb_p_debug {\n");
|
2020-04-21 10:51:09 -05:00
|
|
|
log(" bool eval() override {\n");
|
2020-04-21 09:49:36 -05:00
|
|
|
log(" if (posedge_p_clk() && p_en)\n");
|
|
|
|
log(" fprintf(stderr, \"debug: %%02x\\n\", p_i_data.data[0]);\n");
|
|
|
|
log(" p_o_data.next = p_i_data;\n");
|
2020-04-21 10:51:09 -05:00
|
|
|
log(" return bb_p_debug::eval();\n");
|
2020-04-16 20:41:08 -05:00
|
|
|
log(" }\n");
|
|
|
|
log(" };\n");
|
|
|
|
log("\n");
|
2020-04-19 11:22:02 -05:00
|
|
|
log(" std::unique_ptr<bb_p_debug>\n");
|
|
|
|
log(" bb_p_debug::create(std::string name, cxxrtl::metadata_map parameters,\n");
|
|
|
|
log(" cxxrtl::metadata_map attributes) {\n");
|
2020-04-16 20:41:08 -05:00
|
|
|
log(" return std::make_unique<stderr_debug>();\n");
|
|
|
|
log(" }\n");
|
|
|
|
log("\n");
|
|
|
|
log(" }\n");
|
|
|
|
log("\n");
|
2020-04-18 03:04:57 -05:00
|
|
|
log("For complex applications of black boxes, it is possible to parameterize their\n");
|
|
|
|
log("port widths. For example, the following Verilog code defines a CXXRTL black box\n");
|
|
|
|
log("interface for a configurable width debug sink:\n");
|
|
|
|
log("\n");
|
2020-04-24 13:35:53 -05:00
|
|
|
log(" (* cxxrtl_blackbox, cxxrtl_template = \"WIDTH\" *)\n");
|
2020-04-18 03:04:57 -05:00
|
|
|
log(" module debug(...);\n");
|
|
|
|
log(" parameter WIDTH = 8;\n");
|
2020-04-24 13:35:53 -05:00
|
|
|
log(" (* cxxrtl_edge = \"p\" *) input clk;\n");
|
2020-04-18 03:04:57 -05:00
|
|
|
log(" input en;\n");
|
2020-04-24 13:35:53 -05:00
|
|
|
log(" (* cxxrtl_width = \"WIDTH\" *) input [WIDTH - 1:0] i_data;\n");
|
|
|
|
log(" (* cxxrtl_width = \"WIDTH\" *) output [WIDTH - 1:0] o_data;\n");
|
2020-04-18 03:04:57 -05:00
|
|
|
log(" endmodule\n");
|
|
|
|
log("\n");
|
|
|
|
log("For this parametric HDL interface, this backend will generate the following C++\n");
|
|
|
|
log("interface (only the differences are shown):\n");
|
|
|
|
log("\n");
|
|
|
|
log(" template<size_t WIDTH>\n");
|
2020-04-19 11:22:02 -05:00
|
|
|
log(" struct bb_p_debug : public module {\n");
|
2020-04-18 03:04:57 -05:00
|
|
|
log(" // ...\n");
|
2020-04-21 09:49:36 -05:00
|
|
|
log(" value<WIDTH> p_i_data;\n");
|
|
|
|
log(" wire<WIDTH> p_o_data;\n");
|
2020-04-18 03:04:57 -05:00
|
|
|
log(" // ...\n");
|
2020-04-19 11:22:02 -05:00
|
|
|
log(" static std::unique_ptr<bb_p_debug<WIDTH>>\n");
|
|
|
|
log(" create(std::string name, metadata_map parameters, metadata_map attributes);\n");
|
2020-04-18 03:04:57 -05:00
|
|
|
log(" };\n");
|
|
|
|
log("\n");
|
|
|
|
log("The `create' function must be implemented by the driver, specialized for every\n");
|
|
|
|
log("possible combination of template parameters. (Specialization is necessary to\n");
|
|
|
|
log("enable separate compilation of generated code and black box implementations.)\n");
|
|
|
|
log("\n");
|
|
|
|
log(" template<size_t SIZE>\n");
|
2020-04-19 11:22:02 -05:00
|
|
|
log(" struct stderr_debug : public bb_p_debug<SIZE> {\n");
|
2020-04-18 03:04:57 -05:00
|
|
|
log(" // ...\n");
|
|
|
|
log(" };\n");
|
|
|
|
log("\n");
|
|
|
|
log(" template<>\n");
|
2020-04-19 11:22:02 -05:00
|
|
|
log(" std::unique_ptr<bb_p_debug<8>>\n");
|
|
|
|
log(" bb_p_debug<8>::create(std::string name, cxxrtl::metadata_map parameters,\n");
|
|
|
|
log(" cxxrtl::metadata_map attributes) {\n");
|
2020-04-18 03:04:57 -05:00
|
|
|
log(" return std::make_unique<stderr_debug<8>>();\n");
|
|
|
|
log(" }\n");
|
|
|
|
log("\n");
|
2020-04-16 20:41:08 -05:00
|
|
|
log("The following attributes are recognized by this backend:\n");
|
|
|
|
log("\n");
|
2020-04-24 13:35:53 -05:00
|
|
|
log(" cxxrtl_blackbox\n");
|
2020-04-16 20:41:08 -05:00
|
|
|
log(" only valid on modules. if specified, the module contents are ignored,\n");
|
|
|
|
log(" and the generated code includes only the module interface and a factory\n");
|
|
|
|
log(" function, which will be called to instantiate the module.\n");
|
|
|
|
log("\n");
|
2020-04-24 13:35:53 -05:00
|
|
|
log(" cxxrtl_edge\n");
|
2020-04-16 20:41:08 -05:00
|
|
|
log(" only valid on inputs of black boxes. must be one of \"p\", \"n\", \"a\".\n");
|
2020-04-21 08:59:42 -05:00
|
|
|
log(" if specified on signal `clk`, the generated code includes edge detectors\n");
|
|
|
|
log(" `posedge_p_clk()` (if \"p\"), `negedge_p_clk()` (if \"n\"), or both (if\n");
|
|
|
|
log(" \"a\"), simplifying implementation of clocked black boxes.\n");
|
2020-04-16 20:41:08 -05:00
|
|
|
log("\n");
|
2020-04-24 13:35:53 -05:00
|
|
|
log(" cxxrtl_template\n");
|
2020-04-18 03:04:57 -05:00
|
|
|
log(" only valid on black boxes. must contain a space separated sequence of\n");
|
|
|
|
log(" identifiers that have a corresponding black box parameters. for each\n");
|
|
|
|
log(" of them, the generated code includes a `size_t` template parameter.\n");
|
|
|
|
log("\n");
|
2020-04-24 13:35:53 -05:00
|
|
|
log(" cxxrtl_width\n");
|
2020-04-18 03:04:57 -05:00
|
|
|
log(" only valid on ports of black boxes. must be a constant expression, which\n");
|
|
|
|
log(" is directly inserted into generated code.\n");
|
|
|
|
log("\n");
|
2020-04-24 13:35:53 -05:00
|
|
|
log(" cxxrtl_comb, cxxrtl_sync\n");
|
2020-04-21 16:48:17 -05:00
|
|
|
log(" only valid on outputs of black boxes. if specified, indicates that every\n");
|
|
|
|
log(" bit of the output port is driven, correspondingly, by combinatorial or\n");
|
|
|
|
log(" synchronous logic. this knowledge is used for scheduling optimizations.\n");
|
|
|
|
log(" if neither is specified, the output will be pessimistically treated as\n");
|
|
|
|
log(" driven by both combinatorial and synchronous logic.\n");
|
|
|
|
log("\n");
|
2020-04-05 05:03:23 -05:00
|
|
|
log("The following options are supported by this backend:\n");
|
2019-11-30 19:51:16 -06:00
|
|
|
log("\n");
|
2020-04-14 07:07:58 -05:00
|
|
|
log(" -header\n");
|
|
|
|
log(" generate separate interface (.h) and implementation (.cc) files.\n");
|
|
|
|
log(" if specified, the backend must be called with a filename, and filename\n");
|
|
|
|
log(" of the interface is derived from filename of the implementation.\n");
|
|
|
|
log(" otherwise, interface and implementation are generated together.\n");
|
|
|
|
log("\n");
|
|
|
|
log(" -namespace <ns-name>\n");
|
|
|
|
log(" place the generated code into namespace <ns-name>. if not specified,\n");
|
|
|
|
log(" \"cxxrtl_design\" is used.\n");
|
|
|
|
log("\n");
|
2020-11-02 13:18:56 -06:00
|
|
|
log(" -nohierarchy\n");
|
|
|
|
log(" use design hierarchy as-is. in most designs, a top module should be\n");
|
|
|
|
log(" present as it is exposed through the C API and has unbuffered outputs\n");
|
|
|
|
log(" for improved performance; it will be determined automatically if absent.\n");
|
|
|
|
log("\n");
|
2020-06-09 15:18:07 -05:00
|
|
|
log(" -noflatten\n");
|
|
|
|
log(" don't flatten the design. fully flattened designs can evaluate within\n");
|
|
|
|
log(" one delta cycle if they have no combinatorial feedback.\n");
|
|
|
|
log(" note that the debug interface and waveform dumps use full hierarchical\n");
|
|
|
|
log(" names for all wires even in flattened designs.\n");
|
|
|
|
log("\n");
|
|
|
|
log(" -noproc\n");
|
|
|
|
log(" don't convert processes to netlists. in most designs, converting\n");
|
|
|
|
log(" processes significantly improves evaluation performance at the cost of\n");
|
|
|
|
log(" slight increase in compilation time.\n");
|
|
|
|
log("\n");
|
2019-12-09 13:05:52 -06:00
|
|
|
log(" -O <level>\n");
|
2019-12-10 14:09:24 -06:00
|
|
|
log(" set the optimization level. the default is -O%d. higher optimization\n", DEFAULT_OPT_LEVEL);
|
|
|
|
log(" levels dramatically decrease compile and run time, and highest level\n");
|
|
|
|
log(" possible for a design should be used.\n");
|
2019-12-09 13:05:52 -06:00
|
|
|
log("\n");
|
|
|
|
log(" -O0\n");
|
|
|
|
log(" no optimization.\n");
|
|
|
|
log("\n");
|
|
|
|
log(" -O1\n");
|
2020-06-09 15:55:40 -05:00
|
|
|
log(" localize internal wires if possible.\n");
|
2019-12-09 13:05:52 -06:00
|
|
|
log("\n");
|
|
|
|
log(" -O2\n");
|
2020-06-09 16:50:09 -05:00
|
|
|
log(" like -O1, and unbuffer internal wires if possible.\n");
|
2019-12-10 14:09:24 -06:00
|
|
|
log("\n");
|
|
|
|
log(" -O3\n");
|
2020-12-12 18:34:32 -06:00
|
|
|
log(" like -O2, and inline internal wires if possible.\n");
|
2019-12-10 14:09:24 -06:00
|
|
|
log("\n");
|
|
|
|
log(" -O4\n");
|
2020-06-09 16:50:09 -05:00
|
|
|
log(" like -O3, and unbuffer public wires not marked (*keep*) if possible.\n");
|
|
|
|
log("\n");
|
|
|
|
log(" -O5\n");
|
|
|
|
log(" like -O4, and localize public wires not marked (*keep*) if possible.\n");
|
|
|
|
log("\n");
|
|
|
|
log(" -O6\n");
|
2020-12-12 18:34:32 -06:00
|
|
|
log(" like -O5, and inline public wires not marked (*keep*) if possible.\n");
|
2020-06-09 16:50:09 -05:00
|
|
|
log("\n");
|
2020-05-26 19:21:15 -05:00
|
|
|
log(" -g <level>\n");
|
|
|
|
log(" set the debug level. the default is -g%d. higher debug levels provide\n", DEFAULT_DEBUG_LEVEL);
|
|
|
|
log(" more visibility and generate more code, but do not pessimize evaluation.\n");
|
|
|
|
log("\n");
|
|
|
|
log(" -g0\n");
|
2020-12-13 01:44:27 -06:00
|
|
|
log(" no debug information. the C API is unavailable.\n");
|
2020-05-26 19:21:15 -05:00
|
|
|
log("\n");
|
|
|
|
log(" -g1\n");
|
2020-12-13 01:44:27 -06:00
|
|
|
log(" debug information for member public wires only. this is the bare minimum\n");
|
|
|
|
log(" necessary to access all design state. enables the C API.\n");
|
2020-05-26 19:21:15 -05:00
|
|
|
log("\n");
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
log(" -g2\n");
|
2020-12-13 01:44:27 -06:00
|
|
|
log(" like -g1, and include debug information for public wires that are tied\n");
|
|
|
|
log(" to a constant or another public wire.\n");
|
|
|
|
log("\n");
|
|
|
|
log(" -g3\n");
|
|
|
|
log(" like -g2, and compute debug information on demand for all public wires\n");
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
log(" that were optimized out.\n");
|
|
|
|
log("\n");
|
2019-11-30 19:51:16 -06:00
|
|
|
}
|
2020-05-26 19:21:15 -05:00
|
|
|
|
2020-06-18 18:34:52 -05:00
|
|
|
void execute(std::ostream *&f, std::string filename, std::vector<std::string> args, RTLIL::Design *design) override
|
2019-11-30 19:51:16 -06:00
|
|
|
{
|
2020-11-02 13:18:56 -06:00
|
|
|
bool nohierarchy = false;
|
2020-06-09 15:18:07 -05:00
|
|
|
bool noflatten = false;
|
|
|
|
bool noproc = false;
|
2019-12-10 14:09:24 -06:00
|
|
|
int opt_level = DEFAULT_OPT_LEVEL;
|
2020-05-26 19:21:15 -05:00
|
|
|
int debug_level = DEFAULT_DEBUG_LEVEL;
|
2020-04-14 07:07:58 -05:00
|
|
|
CxxrtlWorker worker;
|
2019-12-09 13:05:52 -06:00
|
|
|
|
2019-11-30 19:51:16 -06:00
|
|
|
log_header(design, "Executing CXXRTL backend.\n");
|
|
|
|
|
|
|
|
size_t argidx;
|
|
|
|
for (argidx = 1; argidx < args.size(); argidx++)
|
|
|
|
{
|
2020-11-02 13:18:56 -06:00
|
|
|
if (args[argidx] == "-nohierarchy") {
|
|
|
|
nohierarchy = true;
|
|
|
|
continue;
|
|
|
|
}
|
2020-06-09 15:18:07 -05:00
|
|
|
if (args[argidx] == "-noflatten") {
|
|
|
|
noflatten = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (args[argidx] == "-noproc") {
|
|
|
|
noproc = true;
|
|
|
|
continue;
|
|
|
|
}
|
2020-06-09 16:50:09 -05:00
|
|
|
if (args[argidx] == "-Og") {
|
2020-12-13 01:44:27 -06:00
|
|
|
log_warning("The `-Og` option has been removed. Use `-g3` instead for complete "
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
"design coverage regardless of optimization level.\n");
|
2020-06-09 16:50:09 -05:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (args[argidx] == "-O" && argidx+1 < args.size() && args[argidx+1] == "g") {
|
|
|
|
argidx++;
|
2020-12-13 01:44:27 -06:00
|
|
|
log_warning("The `-Og` option has been removed. Use `-g3` instead for complete "
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
"design coverage regardless of optimization level.\n");
|
2020-06-09 16:50:09 -05:00
|
|
|
continue;
|
|
|
|
}
|
2019-12-09 13:05:52 -06:00
|
|
|
if (args[argidx] == "-O" && argidx+1 < args.size()) {
|
|
|
|
opt_level = std::stoi(args[++argidx]);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (args[argidx].substr(0, 2) == "-O" && args[argidx].size() == 3 && isdigit(args[argidx][2])) {
|
|
|
|
opt_level = std::stoi(args[argidx].substr(2));
|
|
|
|
continue;
|
|
|
|
}
|
2020-05-26 19:21:15 -05:00
|
|
|
if (args[argidx] == "-g" && argidx+1 < args.size()) {
|
|
|
|
debug_level = std::stoi(args[++argidx]);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (args[argidx].substr(0, 2) == "-g" && args[argidx].size() == 3 && isdigit(args[argidx][2])) {
|
|
|
|
debug_level = std::stoi(args[argidx].substr(2));
|
|
|
|
continue;
|
|
|
|
}
|
2020-04-14 07:07:58 -05:00
|
|
|
if (args[argidx] == "-header") {
|
|
|
|
worker.split_intf = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (args[argidx] == "-namespace" && argidx+1 < args.size()) {
|
|
|
|
worker.design_ns = args[++argidx];
|
|
|
|
continue;
|
|
|
|
}
|
2019-11-30 19:51:16 -06:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
extra_args(f, filename, args, argidx);
|
|
|
|
|
2020-11-02 13:18:56 -06:00
|
|
|
worker.run_hierarchy = !nohierarchy;
|
2020-06-09 15:18:07 -05:00
|
|
|
worker.run_flatten = !noflatten;
|
|
|
|
worker.run_proc = !noproc;
|
2019-12-09 13:05:52 -06:00
|
|
|
switch (opt_level) {
|
2020-05-26 01:00:40 -05:00
|
|
|
// the highest level here must match DEFAULT_OPT_LEVEL
|
2020-06-09 16:50:09 -05:00
|
|
|
case 6:
|
2020-12-12 18:34:32 -06:00
|
|
|
worker.inline_public = true;
|
2020-05-04 14:12:30 -05:00
|
|
|
YS_FALLTHROUGH
|
2020-06-09 16:50:09 -05:00
|
|
|
case 5:
|
2020-06-09 15:55:40 -05:00
|
|
|
worker.localize_public = true;
|
2020-05-04 14:12:30 -05:00
|
|
|
YS_FALLTHROUGH
|
2020-06-09 16:50:09 -05:00
|
|
|
case 4:
|
|
|
|
worker.unbuffer_public = true;
|
|
|
|
YS_FALLTHROUGH
|
|
|
|
case 3:
|
2020-12-12 18:34:32 -06:00
|
|
|
worker.inline_internal = true;
|
2020-05-04 14:12:30 -05:00
|
|
|
YS_FALLTHROUGH
|
2020-06-09 16:50:09 -05:00
|
|
|
case 2:
|
2020-06-09 15:55:40 -05:00
|
|
|
worker.localize_internal = true;
|
2020-05-04 14:12:30 -05:00
|
|
|
YS_FALLTHROUGH
|
2020-06-09 16:50:09 -05:00
|
|
|
case 1:
|
|
|
|
worker.unbuffer_internal = true;
|
|
|
|
YS_FALLTHROUGH
|
2019-12-09 13:05:52 -06:00
|
|
|
case 0:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
log_cmd_error("Invalid optimization level %d.\n", opt_level);
|
|
|
|
}
|
2020-05-26 19:21:15 -05:00
|
|
|
switch (debug_level) {
|
|
|
|
// the highest level here must match DEFAULT_DEBUG_LEVEL
|
2020-12-13 01:44:27 -06:00
|
|
|
case 3:
|
cxxrtl: implement debug information outlining.
Aggressive wire localization and inlining is necessary for CXXRTL to
achieve high performance. However, that comes with a cost: reduced
debug information coverage. Previously, as a workaround, the `-Og`
option could have been used to guarantee complete coverage, at a cost
of a significant performance penalty.
This commit introduces debug information outlining. The main eval()
function is compiled with the user-specified optimization settings.
In tandem, an auxiliary debug_eval() function, compiled from the same
netlist, can be used to reconstruct the values of localized/inlined
signals on demand. To the extent that it is possible, debug_eval()
reuses the results of computations performed by eval(), only filling
in the missing values.
Benchmarking a representative design (Minerva SoC SRAM) shows that:
* Switching from `-O4`/`-Og` to `-O6` reduces runtime by ~40%.
* Switching from `-g1` to `-g2`, both used with `-O6`, increases
compile time by ~25%.
* Although `-g2` increases the resident size of generated modules,
this has no effect on runtime.
Because the impact of `-g2` is minimal and the benefits of having
unconditional 100% debug information coverage (and the performance
improvement as well) are major, this commit removes `-Og` and changes
the defaults to `-O6 -g2`.
We'll have our cake and eat it too!
2020-12-13 01:03:16 -06:00
|
|
|
worker.debug_eval = true;
|
|
|
|
YS_FALLTHROUGH
|
2020-12-13 01:44:27 -06:00
|
|
|
case 2:
|
|
|
|
worker.debug_alias = true;
|
|
|
|
YS_FALLTHROUGH
|
2020-05-26 19:21:15 -05:00
|
|
|
case 1:
|
|
|
|
worker.debug_info = true;
|
|
|
|
YS_FALLTHROUGH
|
|
|
|
case 0:
|
|
|
|
break;
|
|
|
|
default:
|
2020-06-07 22:21:08 -05:00
|
|
|
log_cmd_error("Invalid debug information level %d.\n", debug_level);
|
2020-05-26 19:21:15 -05:00
|
|
|
}
|
|
|
|
|
2020-04-14 07:07:58 -05:00
|
|
|
std::ofstream intf_f;
|
|
|
|
if (worker.split_intf) {
|
|
|
|
if (filename == "<stdout>")
|
|
|
|
log_cmd_error("Option -header must be used with a filename.\n");
|
|
|
|
|
|
|
|
worker.intf_filename = filename.substr(0, filename.rfind('.')) + ".h";
|
|
|
|
intf_f.open(worker.intf_filename, std::ofstream::trunc);
|
|
|
|
if (intf_f.fail())
|
|
|
|
log_cmd_error("Can't open file `%s' for writing: %s\n",
|
|
|
|
worker.intf_filename.c_str(), strerror(errno));
|
|
|
|
|
|
|
|
worker.intf_f = &intf_f;
|
|
|
|
}
|
|
|
|
worker.impl_f = f;
|
|
|
|
|
2019-11-30 19:51:16 -06:00
|
|
|
worker.prepare_design(design);
|
|
|
|
worker.dump_design(design);
|
|
|
|
}
|
|
|
|
} CxxrtlBackend;
|
|
|
|
|
|
|
|
PRIVATE_NAMESPACE_END
|