Merge pull request #1979 from whitequark/cxxrtl-go-faster

cxxrtl: Gas gas gas! I'm gonna step on the gas! Tonight I'll fly!
This commit is contained in:
Claire Wolf 2020-04-22 16:50:45 +02:00 committed by GitHub
commit 95c74b319b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 396 additions and 184 deletions

View File

@ -171,6 +171,11 @@ struct Scheduler {
}
};
bool is_input_wire(const RTLIL::Wire *wire)
{
return wire->port_input && !wire->port_output;
}
bool is_unary_cell(RTLIL::IdString type)
{
return type.in(
@ -210,11 +215,54 @@ bool is_internal_cell(RTLIL::IdString type)
return type[0] == '$' && !type.begins_with("$paramod\\");
}
bool is_cxxrtl_blackbox_cell(const RTLIL::Cell *cell)
{
RTLIL::Module *cell_module = cell->module->design->module(cell->type);
log_assert(cell_module != nullptr);
return cell_module->get_bool_attribute(ID(cxxrtl.blackbox));
}
enum class CxxrtlPortType {
UNKNOWN = 0, // or mixed comb/sync
COMB = 1,
SYNC = 2,
};
CxxrtlPortType cxxrtl_port_type(const RTLIL::Cell *cell, RTLIL::IdString port)
{
RTLIL::Module *cell_module = cell->module->design->module(cell->type);
if (cell_module == nullptr || !cell_module->get_bool_attribute(ID(cxxrtl.blackbox)))
return CxxrtlPortType::UNKNOWN;
RTLIL::Wire *cell_output_wire = cell_module->wire(port);
log_assert(cell_output_wire != nullptr);
bool is_comb = cell_output_wire->get_bool_attribute(ID(cxxrtl.comb));
bool is_sync = cell_output_wire->get_bool_attribute(ID(cxxrtl.sync));
if (is_comb && is_sync)
log_cmd_error("Port `%s.%s' is marked as both `cxxrtl.comb` and `cxxrtl.sync`.\n",
log_id(cell_module), log_signal(cell_output_wire));
else if (is_comb)
return CxxrtlPortType::COMB;
else if (is_sync)
return CxxrtlPortType::SYNC;
return CxxrtlPortType::UNKNOWN;
}
bool is_cxxrtl_comb_port(const RTLIL::Cell *cell, RTLIL::IdString port)
{
return cxxrtl_port_type(cell, port) == CxxrtlPortType::COMB;
}
bool is_cxxrtl_sync_port(const RTLIL::Cell *cell, RTLIL::IdString port)
{
return cxxrtl_port_type(cell, port) == CxxrtlPortType::SYNC;
}
struct FlowGraph {
struct Node {
enum class Type {
CONNECT,
CELL,
CELL_SYNC,
CELL_EVAL,
PROCESS
};
@ -225,7 +273,7 @@ struct FlowGraph {
};
std::vector<Node*> nodes;
dict<const RTLIL::Wire*, pool<Node*, hash_ptr_ops>> wire_defs, wire_uses;
dict<const RTLIL::Wire*, pool<Node*, hash_ptr_ops>> wire_comb_defs, wire_sync_defs, wire_uses;
dict<const RTLIL::Wire*, bool> wire_def_elidable, wire_use_elidable;
~FlowGraph()
@ -234,13 +282,17 @@ struct FlowGraph {
delete node;
}
void add_defs(Node *node, const RTLIL::SigSpec &sig, bool elidable)
void add_defs(Node *node, const RTLIL::SigSpec &sig, bool fully_sync, bool elidable)
{
for (auto chunk : sig.chunks())
if (chunk.wire)
wire_defs[chunk.wire].insert(node);
// Only defs of an entire wire in the right order can be elided.
if (sig.is_wire())
if (chunk.wire) {
if (fully_sync)
wire_sync_defs[chunk.wire].insert(node);
else
wire_comb_defs[chunk.wire].insert(node);
}
// Only comb defs of an entire wire in the right order can be elided.
if (!fully_sync && sig.is_wire())
wire_def_elidable[sig.as_wire()] = elidable;
}
@ -268,7 +320,7 @@ struct FlowGraph {
// Connections
void add_connect_defs_uses(Node *node, const RTLIL::SigSig &conn)
{
add_defs(node, conn.first, /*elidable=*/true);
add_defs(node, conn.first, /*fully_sync=*/false, /*elidable=*/true);
add_uses(node, conn.second);
}
@ -283,21 +335,59 @@ struct FlowGraph {
}
// Cells
void add_cell_defs_uses(Node *node, const RTLIL::Cell *cell)
void add_cell_sync_defs(Node *node, const RTLIL::Cell *cell)
{
// To understand why this node type is necessary and why it produces comb defs, consider a cell
// with input \i and sync output \o, used in a design such that \i is connected to \o. This does
// not result in a feedback arc because the output is synchronous. However, a naive implementation
// of code generation for cells that assigns to inputs, evaluates cells, assigns from outputs
// would not be able to immediately converge...
//
// wire<1> i_tmp;
// cell->p_i = i_tmp.curr;
// cell->eval();
// i_tmp.next = cell->p_o.curr;
//
// ... since the wire connecting the input and output ports would not be localizable. To solve
// this, the cell is split into two scheduling nodes; one exclusively for sync outputs, and
// another for inputs and all non-sync outputs. This way the generated code can be rearranged...
//
// value<1> i_tmp;
// i_tmp = cell->p_o.curr;
// cell->p_i = i_tmp;
// cell->eval();
//
// eliminating the unnecessary delta cycle. Conceptually, the CELL_SYNC node type is a series of
// connections of the form `connect \lhs \cell.\sync_output`; the right-hand side of these is not
// as a wire in RTLIL. If it was expressible, then `\cell.\sync_output` would have a sync def,
// and this node would be an ordinary CONNECT node, with `\lhs` having a comb def. Because it isn't,
// a special node type is used, the right-hand side does not appear anywhere, and the left-hand
// side has a comb def.
for (auto conn : cell->connections())
if (cell->output(conn.first))
if (is_cxxrtl_sync_port(cell, conn.first)) {
// See note regarding elidability below.
add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/false);
}
}
void add_cell_eval_defs_uses(Node *node, const RTLIL::Cell *cell)
{
log_assert(cell->known());
for (auto conn : cell->connections()) {
if (cell->output(conn.first)) {
if (is_sync_ff_cell(cell->type) || (cell->type == ID($memrd) && cell->getParam(ID::CLK_ENABLE).as_bool()))
/* non-combinatorial outputs do not introduce defs */;
else if (is_elidable_cell(cell->type))
add_defs(node, conn.second, /*elidable=*/true);
if (is_elidable_cell(cell->type))
add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/true);
else if (is_sync_ff_cell(cell->type) || (cell->type == ID($memrd) && cell->getParam(ID::CLK_ENABLE).as_bool()))
add_defs(node, conn.second, /*fully_sync=*/true, /*elidable=*/false);
else if (is_internal_cell(cell->type))
add_defs(node, conn.second, /*elidable=*/false);
else {
// Unlike outputs of internal cells (which generate code that depends on the ability to set the output
// wire bits), outputs of user cells are normal wires, and the wires connected to them can be elided.
add_defs(node, conn.second, /*elidable=*/true);
add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/false);
else if (!is_cxxrtl_sync_port(cell, conn.first)) {
// Although at first it looks like outputs of user-defined cells may always be elided, the reality is
// more complex. Fully sync outputs produce no defs and so don't participate in elision. Fully comb
// outputs are assigned in a different way depending on whether the cell's eval() immediately converged.
// Unknown/mixed outputs could be elided, but should be rare in practical designs and don't justify
// the infrastructure required to elide outputs of cells with many of them.
add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/false);
}
}
if (cell->input(conn.first))
@ -307,11 +397,27 @@ struct FlowGraph {
Node *add_node(const RTLIL::Cell *cell)
{
log_assert(cell->known());
bool has_fully_sync_outputs = false;
for (auto conn : cell->connections())
if (cell->output(conn.first) && is_cxxrtl_sync_port(cell, conn.first)) {
has_fully_sync_outputs = true;
break;
}
if (has_fully_sync_outputs) {
Node *node = new Node;
node->type = Node::Type::CELL_SYNC;
node->cell = cell;
nodes.push_back(node);
add_cell_sync_defs(node, cell);
}
Node *node = new Node;
node->type = Node::Type::CELL;
node->type = Node::Type::CELL_EVAL;
node->cell = cell;
nodes.push_back(node);
add_cell_defs_uses(node, cell);
add_cell_eval_defs_uses(node, cell);
return node;
}
@ -319,7 +425,7 @@ struct FlowGraph {
void add_case_defs_uses(Node *node, const RTLIL::CaseRule *case_)
{
for (auto &action : case_->actions) {
add_defs(node, action.first, /*elidable=*/false);
add_defs(node, action.first, /*is_sync=*/false, /*elidable=*/false);
add_uses(node, action.second);
}
for (auto sub_switch : case_->switches) {
@ -338,9 +444,9 @@ struct FlowGraph {
for (auto sync : process->syncs)
for (auto action : sync->actions) {
if (sync->type == RTLIL::STp || sync->type == RTLIL::STn || sync->type == RTLIL::STe)
/* sync actions do not introduce feedback */;
add_defs(node, action.first, /*is_sync=*/true, /*elidable=*/false);
else
add_defs(node, action.first, /*elidable=*/false);
add_defs(node, action.first, /*is_sync=*/false, /*elidable=*/false);
add_uses(node, action.second);
}
}
@ -356,13 +462,6 @@ struct FlowGraph {
}
};
bool is_cxxrtl_blackbox_cell(const RTLIL::Cell *cell)
{
RTLIL::Module *cell_module = cell->module->design->module(cell->type);
log_assert(cell_module != nullptr);
return cell_module->get_bool_attribute(ID(cxxrtl.blackbox));
}
std::vector<std::string> split_by(const std::string &str, const std::string &sep)
{
std::vector<std::string> result;
@ -414,22 +513,24 @@ struct CxxrtlWorker {
bool elide_public = false;
bool localize_internal = false;
bool localize_public = false;
bool run_splitnets = false;
bool run_opt_clean_purge = false;
bool run_proc_flatten = false;
bool max_opt_level = false;
std::ostringstream f;
std::string indent;
int temporary = 0;
dict<const RTLIL::Module*, SigMap> sigmaps;
pool<const RTLIL::Wire*> sync_wires;
dict<RTLIL::SigBit, RTLIL::SyncType> sync_types;
pool<const RTLIL::Wire*> edge_wires;
dict<RTLIL::SigBit, RTLIL::SyncType> edge_types;
pool<const RTLIL::Memory*> writable_memories;
dict<const RTLIL::Cell*, pool<const RTLIL::Cell*>> transparent_for;
dict<const RTLIL::Cell*, dict<RTLIL::Wire*, RTLIL::IdString>> cell_wire_defs;
dict<const RTLIL::Wire*, FlowGraph::Node> elided_wires;
dict<const RTLIL::Module*, std::vector<FlowGraph::Node>> schedule;
pool<const RTLIL::Wire*> localized_wires;
dict<const RTLIL::Module*, pool<std::string>> blackbox_specializations;
dict<const RTLIL::Module*, bool> eval_converges;
void inc_indent() {
indent += "\t";
@ -669,18 +770,14 @@ struct CxxrtlWorker {
case FlowGraph::Node::Type::CONNECT:
dump_connect_elided(node.connect);
break;
case FlowGraph::Node::Type::CELL:
if (is_elidable_cell(node.cell->type)) {
dump_cell_elided(node.cell);
} else {
const char *access = is_cxxrtl_blackbox_cell(node.cell) ? "->" : ".";
f << mangle(node.cell) << access << mangle_wire_name(cell_wire_defs[node.cell][chunk.wire]) << ".curr";
}
case FlowGraph::Node::Type::CELL_EVAL:
log_assert(is_elidable_cell(node.cell->type));
dump_cell_elided(node.cell);
break;
default:
log_assert(false);
}
} else if (localized_wires[chunk.wire]) {
} else if (localized_wires[chunk.wire] || is_input_wire(chunk.wire)) {
f << mangle(chunk.wire);
} else {
f << mangle(chunk.wire) << (is_lhs ? ".next" : ".curr");
@ -740,8 +837,8 @@ struct CxxrtlWorker {
case FlowGraph::Node::Type::CONNECT:
collect_connect(node.connect, cells);
break;
case FlowGraph::Node::Type::CELL:
collect_cell(node.cell, cells);
case FlowGraph::Node::Type::CELL_EVAL:
collect_cell_eval(node.cell, cells);
break;
default:
log_assert(false);
@ -780,6 +877,19 @@ struct CxxrtlWorker {
f << ";\n";
}
void dump_cell_sync(const RTLIL::Cell *cell)
{
const char *access = is_cxxrtl_blackbox_cell(cell) ? "->" : ".";
f << indent << "// cell " << cell->name.str() << " syncs\n";
for (auto conn : cell->connections())
if (cell->output(conn.first))
if (is_cxxrtl_sync_port(cell, conn.first)) {
f << indent;
dump_sigspec_lhs(conn.second);
f << " = " << mangle(cell) << access << mangle_wire_name(conn.first) << ".curr;\n";
}
}
void dump_cell_elided(const RTLIL::Cell *cell)
{
// Unary cells
@ -833,7 +943,7 @@ struct CxxrtlWorker {
elided_wires.count(cell->getPort(ID::Y).as_wire());
}
void collect_cell(const RTLIL::Cell *cell, std::vector<RTLIL::IdString> &cells)
void collect_cell_eval(const RTLIL::Cell *cell, std::vector<RTLIL::IdString> &cells)
{
if (!is_cell_elided(cell))
return;
@ -844,7 +954,7 @@ struct CxxrtlWorker {
collect_sigspec_rhs(port.second, cells);
}
void dump_cell(const RTLIL::Cell *cell)
void dump_cell_eval(const RTLIL::Cell *cell)
{
if (is_cell_elided(cell))
return;
@ -1088,26 +1198,69 @@ struct CxxrtlWorker {
log_assert(cell->known());
const char *access = is_cxxrtl_blackbox_cell(cell) ? "->" : ".";
for (auto conn : cell->connections())
if (cell->input(conn.first)) {
if (cell->input(conn.first) && !cell->output(conn.first)) {
f << indent << mangle(cell) << access << mangle_wire_name(conn.first) << " = ";
dump_sigspec_rhs(conn.second);
f << ";\n";
if (getenv("CXXRTL_VOID_MY_WARRANTY")) {
// Until we have proper clock tree detection, this really awful hack that opportunistically
// propagates prev_* values for clocks can be used to estimate how much faster a design could
// be if only one clock edge was simulated by replacing:
// top.p_clk = value<1>{0u}; top.step();
// top.p_clk = value<1>{1u}; top.step();
// with:
// top.prev_p_clk = value<1>{0u}; top.p_clk = value<1>{1u}; top.step();
// Don't rely on this; it will be removed without warning.
RTLIL::Module *cell_module = cell->module->design->module(cell->type);
if (cell_module != nullptr && cell_module->wire(conn.first) && conn.second.is_wire()) {
RTLIL::Wire *cell_module_wire = cell_module->wire(conn.first);
if (edge_wires[conn.second.as_wire()] && edge_wires[cell_module_wire]) {
f << indent << mangle(cell) << access << "prev_" << mangle(cell_module_wire) << " = ";
f << "prev_" << mangle(conn.second.as_wire()) << ";\n";
}
}
}
} else if (cell->input(conn.first)) {
f << indent << mangle(cell) << access << mangle_wire_name(conn.first) << ".next = ";
dump_sigspec_rhs(conn.second);
f << ";\n";
}
f << indent << mangle(cell) << access << "eval();\n";
for (auto conn : cell->connections()) {
if (conn.second.is_wire()) {
RTLIL::Wire *wire = conn.second.as_wire();
if (elided_wires.count(wire) && cell_wire_defs[cell].count(wire))
continue;
auto assign_from_outputs = [&](bool cell_converged) {
for (auto conn : cell->connections()) {
if (cell->output(conn.first)) {
if (conn.second.empty())
continue; // ignore disconnected ports
if (is_cxxrtl_sync_port(cell, conn.first))
continue; // fully sync ports are handled in CELL_SYNC nodes
f << indent;
dump_sigspec_lhs(conn.second);
f << " = " << mangle(cell) << access << mangle_wire_name(conn.first);
// Similarly to how there is no purpose to buffering cell inputs, there is also no purpose to buffering
// combinatorial cell outputs in case the cell converges within one cycle. (To convince yourself that
// this optimization is valid, consider that, since the cell converged within one cycle, it would not
// have any buffered wires if they were not output ports. Imagine inlining the cell's eval() function,
// and consider the fate of the localized wires that used to be output ports.)
//
// Unlike cell inputs (which are never buffered), it is not possible to know apriori whether the cell
// (which may be late bound) will converge immediately. Because of this, the choice between using .curr
// (appropriate for buffered outputs) and .next (appropriate for unbuffered outputs) is made at runtime.
if (cell_converged && is_cxxrtl_comb_port(cell, conn.first))
f << ".next;\n";
else
f << ".curr;\n";
}
}
if (cell->output(conn.first)) {
if (conn.second.empty())
continue; // ignore disconnected ports
f << indent;
dump_sigspec_lhs(conn.second);
f << " = " << mangle(cell) << access << mangle_wire_name(conn.first) << ".curr;\n";
}
}
};
f << indent << "if (" << mangle(cell) << access << "eval()) {\n";
inc_indent();
assign_from_outputs(/*cell_converged=*/true);
dec_indent();
f << indent << "} else {\n";
inc_indent();
f << indent << "converged = false;\n";
assign_from_outputs(/*cell_converged=*/false);
dec_indent();
f << indent << "}\n";
}
}
@ -1253,21 +1406,17 @@ struct CxxrtlWorker {
}
}
void dump_wire(const RTLIL::Wire *wire, bool is_local)
void dump_wire(const RTLIL::Wire *wire, bool is_local_context)
{
if (elided_wires.count(wire))
return;
if (localized_wires.count(wire) != is_local_context)
return;
if (is_local) {
if (!localized_wires.count(wire))
return;
if (is_local_context) {
dump_attrs(wire);
f << indent << "value<" << wire->width << "> " << mangle(wire) << ";\n";
} else {
if (localized_wires.count(wire))
return;
std::string width;
if (wire->module->has_attribute(ID(cxxrtl.blackbox)) && wire->has_attribute(ID(cxxrtl.width))) {
width = wire->get_string_attribute(ID(cxxrtl.width));
@ -1276,19 +1425,47 @@ struct CxxrtlWorker {
}
dump_attrs(wire);
f << indent << "wire<" << width << "> " << mangle(wire);
f << indent << (is_input_wire(wire) ? "value" : "wire") << "<" << width << "> " << mangle(wire);
if (wire->has_attribute(ID::init)) {
f << " ";
dump_const_init(wire->attributes.at(ID::init));
}
f << ";\n";
if (sync_wires[wire]) {
for (auto sync_type : sync_types) {
if (sync_type.first.wire == wire) {
if (sync_type.second != RTLIL::STn)
f << indent << "bool posedge_" << mangle(sync_type.first) << " = false;\n";
if (sync_type.second != RTLIL::STp)
f << indent << "bool negedge_" << mangle(sync_type.first) << " = false;\n";
if (edge_wires[wire]) {
if (is_input_wire(wire)) {
f << indent << "value<" << width << "> prev_" << mangle(wire);
if (wire->has_attribute(ID::init)) {
f << " ";
dump_const_init(wire->attributes.at(ID::init));
}
f << ";\n";
}
for (auto edge_type : edge_types) {
if (edge_type.first.wire == wire) {
std::string prev, next;
if (is_input_wire(wire)) {
prev = "prev_" + mangle(edge_type.first.wire);
next = mangle(edge_type.first.wire);
} else {
prev = mangle(edge_type.first.wire) + ".curr";
next = mangle(edge_type.first.wire) + ".next";
}
prev += ".slice<" + std::to_string(edge_type.first.offset) + ">().val()";
next += ".slice<" + std::to_string(edge_type.first.offset) + ">().val()";
if (edge_type.second != RTLIL::STn) {
f << indent << "bool posedge_" << mangle(edge_type.first) << "() const {\n";
inc_indent();
f << indent << "return !" << prev << " && " << next << ";\n";
dec_indent();
f << indent << "}\n";
}
if (edge_type.second != RTLIL::STp) {
f << indent << "bool negedge_" << mangle(edge_type.first) << "() const {\n";
inc_indent();
f << indent << "return " << prev << " && !" << next << ";\n";
dec_indent();
f << indent << "}\n";
}
}
}
}
@ -1343,16 +1520,36 @@ struct CxxrtlWorker {
void dump_eval_method(RTLIL::Module *module)
{
inc_indent();
f << indent << "bool converged = " << (eval_converges.at(module) ? "true" : "false") << ";\n";
if (!module->get_bool_attribute(ID(cxxrtl.blackbox))) {
for (auto wire : module->wires()) {
if (edge_wires[wire]) {
for (auto edge_type : edge_types) {
if (edge_type.first.wire == wire) {
if (edge_type.second != RTLIL::STn) {
f << indent << "bool posedge_" << mangle(edge_type.first) << " = ";
f << "this->posedge_" << mangle(edge_type.first) << "();\n";
}
if (edge_type.second != RTLIL::STp) {
f << indent << "bool negedge_" << mangle(edge_type.first) << " = ";
f << "this->negedge_" << mangle(edge_type.first) << "();\n";
}
}
}
}
}
for (auto wire : module->wires())
dump_wire(wire, /*is_local=*/true);
dump_wire(wire, /*is_local_context=*/true);
for (auto node : schedule[module]) {
switch (node.type) {
case FlowGraph::Node::Type::CONNECT:
dump_connect(node.connect);
break;
case FlowGraph::Node::Type::CELL:
dump_cell(node.cell);
case FlowGraph::Node::Type::CELL_SYNC:
dump_cell_sync(node.cell);
break;
case FlowGraph::Node::Type::CELL_EVAL:
dump_cell_eval(node.cell);
break;
case FlowGraph::Node::Type::PROCESS:
dump_process(node.process);
@ -1360,14 +1557,7 @@ struct CxxrtlWorker {
}
}
}
for (auto sync_type : sync_types) {
if (sync_type.first.wire->module == module) {
if (sync_type.second != RTLIL::STn)
f << indent << "posedge_" << mangle(sync_type.first) << " = false;\n";
if (sync_type.second != RTLIL::STp)
f << indent << "negedge_" << mangle(sync_type.first) << " = false;\n";
}
}
f << indent << "return converged;\n";
dec_indent();
}
@ -1378,39 +1568,13 @@ struct CxxrtlWorker {
for (auto wire : module->wires()) {
if (elided_wires.count(wire) || localized_wires.count(wire))
continue;
if (sync_wires[wire]) {
std::string wire_prev = mangle(wire) + "_prev";
std::string wire_curr = mangle(wire) + ".curr";
std::string wire_edge = mangle(wire) + "_edge";
f << indent << "value<" << wire->width << "> " << wire_prev << " = " << wire_curr << ";\n";
f << indent << "if (" << mangle(wire) << ".commit()) {\n";
inc_indent();
f << indent << "value<" << wire->width << "> " << wire_edge << " = "
<< wire_prev << ".bit_xor(" << wire_curr << ");\n";
for (auto sync_type : sync_types) {
if (sync_type.first.wire != wire)
continue;
if (sync_type.second != RTLIL::STn) {
f << indent << "if (" << wire_edge << ".slice<" << sync_type.first.offset << ">().val() && "
<< wire_curr << ".slice<" << sync_type.first.offset << ">().val())\n";
inc_indent();
f << indent << "posedge_" << mangle(sync_type.first) << " = true;\n";
dec_indent();
}
if (sync_type.second != RTLIL::STp) {
f << indent << "if (" << wire_edge << ".slice<" << sync_type.first.offset << ">().val() && "
<< "!" << wire_curr << ".slice<" << sync_type.first.offset << ">().val())\n";
inc_indent();
f << indent << "negedge_" << mangle(sync_type.first) << " = true;\n";
dec_indent();
}
f << indent << "changed = true;\n";
}
dec_indent();
f << indent << "}\n";
} else if (!module->get_bool_attribute(ID(cxxrtl.blackbox)) || wire->port_id != 0) {
f << indent << "changed |= " << mangle(wire) << ".commit();\n";
if (is_input_wire(wire)) {
if (edge_wires[wire])
f << indent << "prev_" << mangle(wire) << " = " << mangle(wire) << ";\n";
continue;
}
if (!module->get_bool_attribute(ID(cxxrtl.blackbox)) || wire->port_id != 0)
f << indent << "changed |= " << mangle(wire) << ".commit();\n";
}
if (!module->get_bool_attribute(ID(cxxrtl.blackbox))) {
for (auto memory : module->memories) {
@ -1466,10 +1630,10 @@ struct CxxrtlWorker {
inc_indent();
for (auto wire : module->wires()) {
if (wire->port_id != 0)
dump_wire(wire, /*is_local=*/false);
dump_wire(wire, /*is_local_context=*/false);
}
f << "\n";
f << indent << "void eval() override {\n";
f << indent << "bool eval() override {\n";
dump_eval_method(module);
f << indent << "}\n";
f << "\n";
@ -1506,7 +1670,7 @@ struct CxxrtlWorker {
f << indent << "struct " << mangle(module) << " : public module {\n";
inc_indent();
for (auto wire : module->wires())
dump_wire(wire, /*is_local=*/false);
dump_wire(wire, /*is_local_context=*/false);
f << "\n";
bool has_memories = false;
for (auto memory : module->memories) {
@ -1537,7 +1701,7 @@ struct CxxrtlWorker {
}
if (has_cells)
f << "\n";
f << indent << "void eval() override;\n";
f << indent << "bool eval() override;\n";
f << indent << "bool commit() override;\n";
dec_indent();
f << indent << "}; // struct " << mangle(module) << "\n";
@ -1549,7 +1713,7 @@ struct CxxrtlWorker {
{
if (module->get_bool_attribute(ID(cxxrtl.blackbox)))
return;
f << indent << "void " << mangle(module) << "::eval() {\n";
f << indent << "bool " << mangle(module) << "::eval() {\n";
dump_eval_method(module);
f << indent << "}\n";
f << "\n";
@ -1638,16 +1802,18 @@ struct CxxrtlWorker {
log_assert(type == RTLIL::STp || type == RTLIL::STn || type == RTLIL::STe);
RTLIL::SigBit sigbit = signal[0];
if (!sync_types.count(sigbit))
sync_types[sigbit] = type;
else if (sync_types[sigbit] != type)
sync_types[sigbit] = RTLIL::STe;
sync_wires.insert(signal.as_wire());
if (!edge_types.count(sigbit))
edge_types[sigbit] = type;
else if (edge_types[sigbit] != type)
edge_types[sigbit] = RTLIL::STe;
edge_wires.insert(signal.as_wire());
}
void analyze_design(RTLIL::Design *design)
{
bool has_feedback_arcs = false;
bool has_buffered_wires = false;
for (auto module : design->modules()) {
if (!design->selected_module(module))
continue;
@ -1680,6 +1846,10 @@ struct CxxrtlWorker {
}
}
}
// Black boxes converge by default, since their implementations are quite unlikely to require
// internal propagation of comb signals.
eval_converges[module] = true;
continue;
}
@ -1788,23 +1958,15 @@ struct CxxrtlWorker {
if (wire->get_bool_attribute(ID::keep)) continue;
if (wire->name.begins_with("$") && !elide_internal) continue;
if (wire->name.begins_with("\\") && !elide_public) continue;
if (sync_wires[wire]) continue;
log_assert(flow.wire_defs[wire].size() == 1);
elided_wires[wire] = **flow.wire_defs[wire].begin();
if (edge_wires[wire]) continue;
log_assert(flow.wire_comb_defs[wire].size() == 1);
elided_wires[wire] = **flow.wire_comb_defs[wire].begin();
}
// Elided wires that are outputs of internal cells are always connected to a well known port (Y).
// For user cells, there could be multiple of them, and we need a way to look up the port name
// knowing only the wire.
for (auto cell : module->cells())
for (auto conn : cell->connections())
if (conn.second.is_wire() && elided_wires.count(conn.second.as_wire()))
cell_wire_defs[cell][conn.second.as_wire()] = conn.first;
dict<FlowGraph::Node*, pool<const RTLIL::Wire*>, hash_ptr_ops> node_defs;
for (auto wire_def : flow.wire_defs)
for (auto node : wire_def.second)
node_defs[node].insert(wire_def.first);
for (auto wire_comb_def : flow.wire_comb_defs)
for (auto node : wire_comb_def.second)
node_defs[node].insert(wire_comb_def.first);
Scheduler<FlowGraph::Node> scheduler;
dict<FlowGraph::Node*, Scheduler<FlowGraph::Node>::Vertex*, hash_ptr_ops> node_map;
@ -1843,10 +2005,9 @@ struct CxxrtlWorker {
if (!feedback_wires.empty()) {
has_feedback_arcs = true;
log("Module `%s' contains feedback arcs through wires:\n", module->name.c_str());
for (auto wire : feedback_wires) {
log(" %s\n", wire->name.c_str());
}
log("Module `%s' contains feedback arcs through wires:\n", log_id(module));
for (auto wire : feedback_wires)
log(" %s\n", log_id(wire));
}
for (auto wire : module->wires()) {
@ -1855,14 +2016,46 @@ struct CxxrtlWorker {
if (wire->get_bool_attribute(ID::keep)) continue;
if (wire->name.begins_with("$") && !localize_internal) continue;
if (wire->name.begins_with("\\") && !localize_public) continue;
if (sync_wires[wire]) continue;
// Outputs of FF/$memrd cells and LHS of sync actions do not end up in defs.
if (flow.wire_defs[wire].size() != 1) continue;
if (edge_wires[wire]) continue;
if (flow.wire_sync_defs.count(wire) > 0) continue;
localized_wires.insert(wire);
}
// For maximum performance, the state of the simulation (which is the same as the set of its double buffered
// wires, since using a singly buffered wire for any kind of state introduces a race condition) should contain
// no wires attached to combinatorial outputs. Feedback wires, by definition, make that impossible. However,
// it is possible that a design with no feedback arcs would end up with doubly buffered wires in such cases
// as a wire with multiple drivers where one of them is combinatorial and the other is synchronous. Such designs
// also require more than one delta cycle to converge.
pool<const RTLIL::Wire*> buffered_wires;
for (auto wire : module->wires()) {
if (flow.wire_comb_defs[wire].size() > 0 && !elided_wires.count(wire) && !localized_wires[wire]) {
if (!feedback_wires[wire])
buffered_wires.insert(wire);
}
}
if (!buffered_wires.empty()) {
has_buffered_wires = true;
log("Module `%s' contains buffered combinatorial wires:\n", log_id(module));
for (auto wire : buffered_wires)
log(" %s\n", log_id(wire));
}
eval_converges[module] = feedback_wires.empty() && buffered_wires.empty();
}
if (has_feedback_arcs) {
log("Feedback arcs require delta cycles during evaluation.\n");
if (has_feedback_arcs || has_buffered_wires) {
// Although both non-feedback buffered combinatorial wires and apparent feedback wires may be eliminated
// by optimizing the design, if after `opt_clean -purge` there are any feedback wires remaining, it is very
// likely that these feedback wires are indicative of a true logic loop, so they get emphasized in the message.
const char *why_pessimistic = nullptr;
if (has_feedback_arcs)
why_pessimistic = "feedback wires";
else if (has_buffered_wires)
why_pessimistic = "buffered combinatorial wires";
log("\n");
log_warning("Design contains %s, which require delta cycles during evaluation.\n", why_pessimistic);
if (!max_opt_level)
log("Increasing the optimization level may eliminate %s from the design.\n", why_pessimistic);
}
}
@ -1894,8 +2087,12 @@ struct CxxrtlWorker {
void prepare_design(RTLIL::Design *design)
{
bool has_sync_init, has_packed_mem;
log_push();
check_design(design, has_sync_init, has_packed_mem);
if (has_sync_init) {
if (run_proc_flatten) {
Pass::call(design, "proc");
Pass::call(design, "flatten");
} else if (has_sync_init) {
// We're only interested in proc_init, but it depends on proc_prune and proc_clean, so call those
// in case they weren't already. (This allows `yosys foo.v -o foo.cc` to work.)
Pass::call(design, "proc_prune");
@ -1908,18 +2105,15 @@ struct CxxrtlWorker {
if (has_sync_init || has_packed_mem)
check_design(design, has_sync_init, has_packed_mem);
log_assert(!(has_sync_init || has_packed_mem));
if (run_splitnets) {
Pass::call(design, "splitnets -driver");
if (run_opt_clean_purge)
Pass::call(design, "opt_clean -purge");
}
log("\n");
log_pop();
analyze_design(design);
}
};
struct CxxrtlBackend : public Backend {
static const int DEFAULT_OPT_LEVEL = 5;
static const int DEFAULT_OPT_LEVEL = 6;
CxxrtlBackend() : Backend("cxxrtl", "convert design to C++ RTL simulation") { }
void help() YS_OVERRIDE
@ -1941,9 +2135,9 @@ struct CxxrtlBackend : public Backend {
log(" top.step();\n");
log(" while (1) {\n");
log(" /* user logic */\n");
log(" top.p_clk.next = value<1> {0u};\n");
log(" top.p_clk = value<1> {0u};\n");
log(" top.step();\n");
log(" top.p_clk.next = value<1> {1u};\n");
log(" top.p_clk = value<1> {1u};\n");
log(" top.step();\n");
log(" }\n");
log(" }\n");
@ -1965,18 +2159,20 @@ struct CxxrtlBackend : public Backend {
log(" module debug(...);\n");
log(" (* cxxrtl.edge = \"p\" *) input clk;\n");
log(" input en;\n");
log(" input [7:0] data;\n");
log(" input [7:0] i_data;\n");
log(" (* cxxrtl.sync *) output [7:0] o_data;\n");
log(" endmodule\n");
log("\n");
log("For this HDL interface, this backend will generate the following C++ interface:\n");
log("\n");
log(" struct bb_p_debug : public module {\n");
log(" wire<1> p_clk;\n");
log(" bool posedge_p_clk = false;\n");
log(" wire<1> p_en;\n");
log(" wire<8> p_data;\n");
log(" value<1> p_clk;\n");
log(" bool posedge_p_clk() const { /* ... */ }\n");
log(" value<1> p_en;\n");
log(" value<8> p_i_data;\n");
log(" wire<8> p_o_data;\n");
log("\n");
log(" void eval() override;\n");
log(" bool eval() override;\n");
log(" bool commit() override;\n");
log("\n");
log(" static std::unique_ptr<bb_p_debug>\n");
@ -1989,10 +2185,11 @@ struct CxxrtlBackend : public Backend {
log(" namespace cxxrtl_design {\n");
log("\n");
log(" struct stderr_debug : public bb_p_debug {\n");
log(" void eval() override {\n");
log(" if (posedge_p_clk && p_en.curr)\n");
log(" fprintf(stderr, \"debug: %%02x\\n\", p_data.curr.data[0]);\n");
log(" bb_p_debug::eval();\n");
log(" bool eval() override {\n");
log(" if (posedge_p_clk() && p_en)\n");
log(" fprintf(stderr, \"debug: %%02x\\n\", p_i_data.data[0]);\n");
log(" p_o_data.next = p_i_data;\n");
log(" return bb_p_debug::eval();\n");
log(" }\n");
log(" };\n");
log("\n");
@ -2013,7 +2210,8 @@ struct CxxrtlBackend : public Backend {
log(" parameter WIDTH = 8;\n");
log(" (* cxxrtl.edge = \"p\" *) input clk;\n");
log(" input en;\n");
log(" (* cxxrtl.width = \"WIDTH\" *) input [WIDTH - 1:0] data;\n");
log(" (* cxxrtl.width = \"WIDTH\" *) input [WIDTH - 1:0] i_data;\n");
log(" (* cxxrtl.width = \"WIDTH\" *) output [WIDTH - 1:0] o_data;\n");
log(" endmodule\n");
log("\n");
log("For this parametric HDL interface, this backend will generate the following C++\n");
@ -2022,7 +2220,8 @@ struct CxxrtlBackend : public Backend {
log(" template<size_t WIDTH>\n");
log(" struct bb_p_debug : public module {\n");
log(" // ...\n");
log(" wire<WIDTH> p_data;\n");
log(" value<WIDTH> p_i_data;\n");
log(" wire<WIDTH> p_o_data;\n");
log(" // ...\n");
log(" static std::unique_ptr<bb_p_debug<WIDTH>>\n");
log(" create(std::string name, metadata_map parameters, metadata_map attributes);\n");
@ -2053,10 +2252,9 @@ struct CxxrtlBackend : public Backend {
log("\n");
log(" cxxrtl.edge\n");
log(" only valid on inputs of black boxes. must be one of \"p\", \"n\", \"a\".\n");
log(" if specified on signal `clk`, the generated code includes boolean fields\n");
log(" `posedge_p_clk` (if \"p\"), `negedge_p_clk` (if \"n\"), or both (if \"a\"),\n");
log(" as well as edge detection logic, simplifying implementation of clocked\n");
log(" black boxes.\n");
log(" if specified on signal `clk`, the generated code includes edge detectors\n");
log(" `posedge_p_clk()` (if \"p\"), `negedge_p_clk()` (if \"n\"), or both (if\n");
log(" \"a\"), simplifying implementation of clocked black boxes.\n");
log("\n");
log(" cxxrtl.template\n");
log(" only valid on black boxes. must contain a space separated sequence of\n");
@ -2067,6 +2265,13 @@ struct CxxrtlBackend : public Backend {
log(" only valid on ports of black boxes. must be a constant expression, which\n");
log(" is directly inserted into generated code.\n");
log("\n");
log(" cxxrtl.comb, cxxrtl.sync\n");
log(" only valid on outputs of black boxes. if specified, indicates that every\n");
log(" bit of the output port is driven, correspondingly, by combinatorial or\n");
log(" synchronous logic. this knowledge is used for scheduling optimizations.\n");
log(" if neither is specified, the output will be pessimistically treated as\n");
log(" driven by both combinatorial and synchronous logic.\n");
log("\n");
log("The following options are supported by this backend:\n");
log("\n");
log(" -header\n");
@ -2100,7 +2305,10 @@ struct CxxrtlBackend : public Backend {
log(" like -O3, and localize public wires not marked (*keep*) if possible.\n");
log("\n");
log(" -O5\n");
log(" like -O4, and run `splitnets -driver; opt_clean -purge` first.\n");
log(" like -O4, and run `opt_clean -purge` first.\n");
log("\n");
log(" -O6\n");
log(" like -O5, and run `proc; flatten` first.\n");
log("\n");
}
void execute(std::ostream *&f, std::string filename, std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
@ -2134,8 +2342,11 @@ struct CxxrtlBackend : public Backend {
extra_args(f, filename, args, argidx);
switch (opt_level) {
case 6:
worker.max_opt_level = true;
worker.run_proc_flatten = true;
case 5:
worker.run_splitnets = true;
worker.run_opt_clean_purge = true;
case 4:
worker.localize_public = true;
case 3:

View File

@ -717,15 +717,16 @@ struct module {
module(const module &) = delete;
module &operator=(const module &) = delete;
virtual void eval() = 0;
virtual bool eval() = 0;
virtual bool commit() = 0;
size_t step() {
size_t deltas = 0;
bool converged = false;
do {
eval();
converged = eval();
deltas++;
} while (commit());
} while (commit() && !converged);
return deltas;
}
};