write_cxxrtl: improve writable memory handling.

This commit reduces space and time overhead for writable memories
to O(write port count) in both cases; implements handling for write
port priorities; and simplifies runtime representation of memories.
This commit is contained in:
whitequark 2020-04-05 02:06:26 +00:00
parent fb0270b752
commit 01e6850bd3
2 changed files with 88 additions and 66 deletions

View File

@ -827,7 +827,7 @@ struct CxxrtlWorker {
}
RTLIL::Memory *memory = cell->module->memories[cell->getParam(ID(MEMID)).decode_string()];
std::string valid_index_temp = fresh_temporary();
f << indent << "std::pair<bool, size_t> " << valid_index_temp << " = memory_index(";
f << indent << "auto " << valid_index_temp << " = memory_index(";
dump_sigspec_rhs(cell->getPort(ID(ADDR)));
f << ", " << memory->start_offset << ", " << memory->size << ");\n";
if (cell->type == ID($memrd)) {
@ -844,8 +844,8 @@ struct CxxrtlWorker {
// larger program) will never crash the code that calls into it.
//
// If assertions are disabled, out of bounds reads are defined to return zero.
f << indent << "assert(" << valid_index_temp << ".first && \"out of bounds read\");\n";
f << indent << "if(" << valid_index_temp << ".first) {\n";
f << indent << "assert(" << valid_index_temp << ".valid && \"out of bounds read\");\n";
f << indent << "if(" << valid_index_temp << ".valid) {\n";
inc_indent();
if (writable_memories[memory]) {
std::string addr_temp = fresh_temporary();
@ -854,17 +854,22 @@ struct CxxrtlWorker {
f << ";\n";
std::string lhs_temp = fresh_temporary();
f << indent << "value<" << memory->width << "> " << lhs_temp << " = "
<< mangle(memory) << "[" << valid_index_temp << ".second].curr;\n";
for (auto memwr_cell : transparent_for[cell]) {
<< mangle(memory) << "[" << valid_index_temp << ".index];\n";
std::vector<const RTLIL::Cell*> memwr_cells(transparent_for[cell].begin(), transparent_for[cell].end());
std::sort(memwr_cells.begin(), memwr_cells.end(),
[](const RTLIL::Cell *a, const RTLIL::Cell *b) {
return a->getParam(ID(PRIORITY)).as_int() < b->getParam(ID(PRIORITY)).as_int();
});
for (auto memwr_cell : memwr_cells) {
f << indent << "if (" << addr_temp << " == ";
dump_sigspec_rhs(memwr_cell->getPort(ID(ADDR)));
f << ") {\n";
inc_indent();
f << indent << lhs_temp << " = " << lhs_temp;
f << ".update(";
dump_sigspec_rhs(memwr_cell->getPort(ID(EN)));
f << ", ";
dump_sigspec_rhs(memwr_cell->getPort(ID(DATA)));
f << ", ";
dump_sigspec_rhs(memwr_cell->getPort(ID(EN)));
f << ");\n";
dec_indent();
f << indent << "}\n";
@ -875,7 +880,7 @@ struct CxxrtlWorker {
} else {
f << indent;
dump_sigspec_lhs(cell->getPort(ID(DATA)));
f << " = " << mangle(memory) << "[" << valid_index_temp << ".second];\n";
f << " = " << mangle(memory) << "[" << valid_index_temp << ".index];\n";
}
dec_indent();
f << indent << "} else {\n";
@ -890,22 +895,18 @@ struct CxxrtlWorker {
f << indent << "}\n";
}
} else /*if (cell->type == ID($memwr))*/ {
// FIXME: handle write port priority, here and above in transparent $memrd cells
log_assert(writable_memories[memory]);
// See above for rationale of having both the assert and the condition.
//
// If assertions are disabled, out of bounds writes are defined to do nothing.
f << indent << "assert(" << valid_index_temp << ".first && \"out of bounds write\");\n";
f << indent << "if (" << valid_index_temp << ".first) {\n";
f << indent << "assert(" << valid_index_temp << ".valid && \"out of bounds write\");\n";
f << indent << "if (" << valid_index_temp << ".valid) {\n";
inc_indent();
std::string lhs_temp = fresh_temporary();
f << indent << "wire<" << memory->width << "> &" << lhs_temp << " = ";
f << mangle(memory) << "[" << valid_index_temp << ".second];\n";
f << indent << lhs_temp << ".next = " << lhs_temp << ".curr.update(";
dump_sigspec_rhs(cell->getPort(ID(EN)));
f << ", ";
f << indent << mangle(memory) << ".update(" << valid_index_temp << ".index, ";
dump_sigspec_rhs(cell->getPort(ID(DATA)));
f << ");\n";
f << ", ";
dump_sigspec_rhs(cell->getPort(ID(EN)));
f << ", " << cell->getParam(ID(PRIORITY)).as_int() << ");\n";
dec_indent();
f << indent << "}\n";
}
@ -1122,8 +1123,8 @@ struct CxxrtlWorker {
});
dump_attrs(memory);
f << indent << "memory_" << (writable_memories[memory] ? "rw" : "ro")
<< "<" << memory->width << "> " << mangle(memory)
f << indent << (writable_memories[memory] ? "" : "const ")
<< "memory<" << memory->width << "> " << mangle(memory)
<< " { " << memory->size << "u";
if (init_cells.empty()) {
f << " };\n";
@ -1135,8 +1136,7 @@ struct CxxrtlWorker {
RTLIL::Const data = cell->getPort(ID(DATA)).as_const();
size_t width = cell->getParam(ID(WIDTH)).as_int();
size_t words = cell->getParam(ID(WORDS)).as_int();
f << indent << "memory_" << (writable_memories[memory] ? "rw" : "ro")
<< "<" << memory->width << ">::init<" << words << "> { "
f << indent << "memory<" << memory->width << ">::init<" << words << "> { "
<< stringf("%#x", cell->getPort(ID(ADDR)).as_int()) << ", {";
inc_indent();
for (size_t n = 0; n < words; n++) {
@ -1257,10 +1257,7 @@ struct CxxrtlWorker {
for (auto memory : module->memories) {
if (!writable_memories[memory.second])
continue;
f << indent << "for (size_t i = 0; i < " << memory.second->size << "u; i++)\n";
inc_indent();
f << indent << "changed |= " << mangle(memory.second) << "[i].commit();\n";
dec_indent();
f << indent << "changed |= " << mangle(memory.second) << ".commit();\n";
}
for (auto cell : module->cells()) {
if (is_internal_cell(cell->type))

View File

@ -1,7 +1,7 @@
/*
* yosys -- Yosys Open SYnthesis Suite
*
* Copyright (C) 2019 whitequark <whitequark@whitequark.org>
* Copyright (C) 2019-2020 whitequark <whitequark@whitequark.org>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted.
@ -28,6 +28,7 @@
#include <type_traits>
#include <tuple>
#include <vector>
#include <algorithm>
#include <sstream>
// The cxxrtl support library implements compile time specialized arbitrary width arithmetics, as well as provides
@ -73,9 +74,6 @@ struct value : public expr_base<value<Bits>> {
template<typename... Init>
explicit constexpr value(Init ...init) : data{init...} {}
// This allows using value<> as well as wire<> in memory initializers.
using init = value<Bits>;
value(const value<Bits> &) = default;
value(value<Bits> &&) = default;
value<Bits> &operator=(const value<Bits> &) = default;
@ -297,7 +295,7 @@ struct value : public expr_base<value<Bits>> {
return result;
}
value<Bits> update(const value<Bits> &mask, const value<Bits> &val) const {
value<Bits> update(const value<Bits> &val, const value<Bits> &mask) const {
return bit_and(mask.bit_not()).bit_or(val.bit_and(mask));
}
@ -559,19 +557,6 @@ struct wire {
wire(wire<Bits> &&) = default;
wire<Bits> &operator=(const wire<Bits> &) = delete;
// We want to avoid having operator=(wire<>) or operator=(value<>) that overwrites both curr and next,
// since this operation is almost always wrong. But we also need an operation like that for memory
// initialization. This is solved by adding a wrapper and making the use of operator= valid only when
// this wrapper is used.
struct init {
value<Bits> data;
};
wire<Bits> &operator=(const init &init) {
curr = next = init.data;
return *this;
}
bool commit() {
if (curr != next) {
curr = next;
@ -587,12 +572,10 @@ std::ostream &operator<<(std::ostream &os, const wire<Bits> &val) {
return os;
}
template<class Elem>
template<size_t Width>
struct memory {
using StoredElem = typename std::remove_const<Elem>::type;
std::vector<StoredElem> data;
std::vector<value<Width>> data;
static constexpr size_t width = StoredElem::bits;
size_t depth() const {
return data.size();
}
@ -600,8 +583,8 @@ struct memory {
memory() = delete;
explicit memory(size_t depth) : data(depth) {}
memory(const memory<Elem> &) = delete;
memory<Elem> &operator=(const memory<Elem> &) = delete;
memory(const memory<Width> &) = delete;
memory<Width> &operator=(const memory<Width> &) = delete;
// The only way to get the compiler to put the initializer in .rodata and do not copy it on stack is to stuff it
// into a plain array. You'd think an std::initializer_list would work here, but it doesn't, because you can't
@ -610,7 +593,7 @@ struct memory {
template<size_t Size>
struct init {
size_t offset;
typename Elem::init data[Size];
value<Width> data[Size];
};
template<size_t... InitSize>
@ -621,18 +604,56 @@ struct memory {
auto _ = {std::move(std::begin(init.data), std::end(init.data), data.begin() + init.offset)...};
}
Elem &operator [](size_t index) {
value<Width> &operator [](size_t index) {
assert(index < data.size());
return data[index];
}
const value<Width> &operator [](size_t index) const {
assert(index < data.size());
return data[index];
}
// A simple way to make a writable memory would be to use an array of wires instead of an array of values.
// However, there are two significant downsides to this approach: first, it has large overhead (2× space
// overhead, and O(depth) time overhead during commit); second, it does not simplify handling write port
// priorities. Although in principle write ports could be ordered or conditionally enabled in generated
// code based on their priorities and selected addresses, the feedback arc set problem is computationally
// expensive, and the heuristic based algorithms are not easily modified to guarantee (rather than prefer)
// a particular write port evaluation order.
//
// The approach used here instead is to queue writes into a buffer during the eval phase, then perform
// the writes during the commit phase in the priority order. This approach has low overhead, with both space
// and time proportional to the amount of write ports. Because virtually every memory in a practical design
// has at most two write ports, linear search is used on every write, being the fastest and simplest approach.
struct write {
size_t index;
value<Width> val;
value<Width> mask;
int priority;
};
std::vector<write> write_queue;
void update(size_t index, const value<Width> &val, const value<Width> &mask, int priority = 0) {
assert(index < data.size());
write_queue.emplace_back(write { index, val, mask, priority });
}
bool commit() {
bool changed = false;
std::sort(write_queue.begin(), write_queue.end(),
[](const write &a, const write &b) { return a.priority < b.priority; });
for (const write &entry : write_queue) {
value<Width> elem = data[entry.index];
elem = elem.update(entry.val, entry.mask);
changed |= (data[entry.index] != elem);
data[entry.index] = elem;
}
write_queue.clear();
return changed;
}
};
template<size_t Width>
using memory_rw = memory<wire<Width>>;
template<size_t Width>
using memory_ro = memory<const value<Width>>;
struct module {
module() {}
virtual ~module() {}
@ -1098,15 +1119,19 @@ value<BitsY> mod_ss(const value<BitsA> &a, const value<BitsB> &b) {
}
// Memory helper
template<size_t BitsAddr>
std::pair<bool, size_t> memory_index(const value<BitsAddr> &addr, size_t offset, size_t depth) {
static_assert(value<BitsAddr>::chunks <= 1, "memory address is too wide");
size_t offset_index = addr.data[0];
struct memory_index {
bool valid;
size_t index;
bool valid = (offset_index >= offset && offset_index < offset + depth);
size_t index = offset_index - offset;
return std::make_pair(valid, index);
}
template<size_t BitsAddr>
memory_index(const value<BitsAddr> &addr, size_t offset, size_t depth) {
static_assert(value<BitsAddr>::chunks <= 1, "memory address is too wide");
size_t offset_index = addr.data[0];
valid = (offset_index >= offset && offset_index < offset + depth);
index = offset_index - offset;
}
};
} // namespace cxxrtl_yosys