memory_bram: Some refactoring

This will make more sense when the new transparency masks land.

Fixes #2902.
This commit is contained in:
Marcelina Kościelnicka 2021-08-01 01:29:49 +02:00
parent 12db9b4273
commit 4451f7f5e9
1 changed files with 177 additions and 199 deletions

View File

@ -30,9 +30,6 @@ struct rules_t
int group, index, dupidx;
int wrmode, enable, transp, clocks, clkpol;
SigBit sig_clock;
SigSpec sig_addr, sig_data, sig_en;
bool effective_clkpol;
bool make_transp;
bool make_outreg;
int mapped_port;
@ -93,7 +90,6 @@ struct rules_t
pi.mapped_port = -1;
pi.make_transp = false;
pi.make_outreg = false;
pi.effective_clkpol = false;
portinfos.push_back(pi);
}
return portinfos;
@ -402,10 +398,8 @@ struct rules_t
}
};
bool replace_memory(Mem &orig_mem, const rules_t &rules, FfInitVals *initvals, const rules_t::bram_t &bram, const rules_t::match_t &match, dict<string, int> &match_properties, int mode)
bool replace_memory(Mem &mem, const rules_t &rules, FfInitVals *initvals, const rules_t::bram_t &bram, const rules_t::match_t &match, dict<string, int> &match_properties, int mode)
{
// We will modify ports — make a copy of the structure.
Mem mem(orig_mem);
Module *module = mem.module;
auto portinfos = bram.make_portinfos();
@ -441,35 +435,12 @@ bool replace_memory(Mem &orig_mem, const rules_t &rules, FfInitVals *initvals, c
log(" Mapping to bram type %s (variant %d):\n", log_id(bram.name), bram.variant);
// bram.dump_config();
bool cell_init = !mem.inits.empty();
vector<Const> initdata;
if (cell_init) {
Const initparam = mem.get_init_data();
initdata.reserve(mem.size);
for (int i=0; i < mem.size; i++)
initdata.push_back(initparam.extract(mem.width*i, mem.width, State::Sx));
}
std::vector<int> shuffle_map;
if (match.shuffle_enable && bram.dbits >= portinfos.at(match.shuffle_enable - 'A').enable*2 && portinfos.at(match.shuffle_enable - 'A').enable > 0 && !mem.wr_ports.empty())
{
int bucket_size = bram.dbits / portinfos.at(match.shuffle_enable - 'A').enable;
log(" Shuffle bit order to accommodate enable buckets of size %d..\n", bucket_size);
// extract unshuffled data/enable bits
std::vector<SigSpec> old_wr_en;
std::vector<SigSpec> old_wr_data;
std::vector<SigSpec> old_rd_data;
for (auto &port : mem.wr_ports) {
old_wr_en.push_back(port.en);
old_wr_data.push_back(port.data);
}
for (auto &port : mem.rd_ports)
old_rd_data.push_back(port.data);
// analyze enable structure
std::vector<SigSpec> en_order;
@ -484,52 +455,13 @@ bool replace_memory(Mem &orig_mem, const rules_t &rules, FfInitVals *initvals, c
bits_wr_en[sig].push_back(i);
}
// re-create memory ports
std::vector<SigSpec> new_wr_en(GetSize(old_wr_en));
std::vector<SigSpec> new_wr_data(GetSize(old_wr_data));
std::vector<SigSpec> new_rd_data(GetSize(old_rd_data));
std::vector<std::vector<State>> new_initdata;
std::vector<int> shuffle_map;
if (cell_init)
new_initdata.resize(mem.size);
for (auto &it : en_order)
{
auto &bits = bits_wr_en.at(it);
int buckets = (GetSize(bits) + bucket_size - 1) / bucket_size;
int fillbits = buckets*bucket_size - GetSize(bits);
SigBit fillbit;
for (auto bit : bits_wr_en.at(it))
shuffle_map.push_back(bit);
for (int i = 0; i < GetSize(bits); i++) {
for (int j = 0; j < GetSize(mem.wr_ports); j++) {
new_wr_en[j].append(old_wr_en[j][bits[i]]);
new_wr_data[j].append(old_wr_data[j][bits[i]]);
fillbit = old_wr_en[j][bits[i]];
}
for (int j = 0; j < GetSize(mem.rd_ports); j++)
new_rd_data[j].append(old_rd_data[j][bits[i]]);
if (cell_init) {
for (int j = 0; j < mem.size; j++)
new_initdata[j].push_back(initdata[j][bits[i]]);
}
shuffle_map.push_back(bits[i]);
}
for (int i = 0; i < fillbits; i++) {
for (int j = 0; j < GetSize(mem.wr_ports); j++) {
new_wr_en[j].append(fillbit);
new_wr_data[j].append(State::S0);
}
for (int j = 0; j < GetSize(mem.rd_ports); j++)
new_rd_data[j].append(State::Sx);
if (cell_init) {
for (int j = 0; j < mem.size; j++)
new_initdata[j].push_back(State::Sx);
}
while (GetSize(shuffle_map) % bucket_size)
shuffle_map.push_back(-1);
}
}
log(" Results of bit order shuffling:");
@ -538,26 +470,15 @@ bool replace_memory(Mem &orig_mem, const rules_t &rules, FfInitVals *initvals, c
log("\n");
// update mem_*, wr_*, and rd_* variables
mem.width = GetSize(new_wr_en.front());
for (int i = 0; i < GetSize(mem.wr_ports); i++) {
auto &port = mem.wr_ports[i];
port.en = new_wr_en[i];
port.data = new_wr_data[i];
}
for (int i = 0; i < GetSize(mem.rd_ports); i++) {
auto &port = mem.rd_ports[i];
port.data = new_rd_data[i];
}
if (cell_init) {
for (int i = 0; i < mem.size; i++)
initdata[i] = Const(new_initdata[i]);
}
} else {
for (int i = 0; i < mem.width; i++)
shuffle_map.push_back(i);
}
// Align width up to dbits.
while (GetSize(shuffle_map) % bram.dbits)
shuffle_map.push_back(-1);
// assign write ports
pair<SigBit, bool> wr_clkdom;
for (int cell_port_i = 0, bram_port_i = 0; cell_port_i < GetSize(mem.wr_ports); cell_port_i++)
@ -575,7 +496,7 @@ bool replace_memory(Mem &orig_mem, const rules_t &rules, FfInitVals *initvals, c
for (; bram_port_i < GetSize(portinfos); bram_port_i++)
{
auto &pi = portinfos[bram_port_i];
make_transp_enbits = pi.enable;
make_transp_enbits = pi.enable ? pi.enable : 1;
make_transp_clk = clkdom;
if (pi.wrmode != 1)
@ -602,16 +523,25 @@ bool replace_memory(Mem &orig_mem, const rules_t &rules, FfInitVals *initvals, c
}
}
SigSpec sig_en;
SigBit last_en_bit = State::S1;
for (int i = 0; i < mem.width; i++) {
if (pi.enable && i % (bram.dbits / pi.enable) == 0) {
last_en_bit = port.en[i];
sig_en.append(last_en_bit);
}
if (last_en_bit != port.en[i]) {
log(" Bram port %c%d has incompatible enable structure.\n", pi.group + 'A', pi.index + 1);
goto skip_bram_wport;
// We need to check enable compatibility of this port, as well as all
// ports that have priority over this one (because they will be involved
// in emulate_priority logic).
for (int i = 0; i < GetSize(mem.wr_ports); i++) {
auto &oport = mem.wr_ports[i];
if (i != cell_port_i && !oport.priority_mask[cell_port_i])
continue;
SigBit last_en_bit = State::S1;
for (int j = 0; j < GetSize(shuffle_map); j++) {
if (shuffle_map[j] == -1)
continue;
SigBit en_bit = oport.en[shuffle_map[j]];
if (pi.enable && j % (bram.dbits / pi.enable) == 0)
last_en_bit = en_bit;
if (last_en_bit != en_bit) {
log(" Bram port %c%d has incompatible enable structure.\n", pi.group + 'A', pi.index + 1);
goto skip_bram_wport;
}
}
}
@ -621,14 +551,8 @@ bool replace_memory(Mem &orig_mem, const rules_t &rules, FfInitVals *initvals, c
if (port.clk_enable) {
clock_domains[pi.clocks] = clkdom;
clock_polarities[pi.clkpol] = clkdom.second;
pi.sig_clock = clkdom.first;
pi.effective_clkpol = clkdom.second;
}
pi.sig_en = sig_en;
pi.sig_addr = port.addr;
pi.sig_data = port.data;
bram_port_i++;
goto mapped_wr_port;
}
@ -651,10 +575,6 @@ grow_read_ports:;
for (auto &pi : portinfos) {
if (pi.wrmode == 0) {
pi.mapped_port = -1;
pi.sig_clock = SigBit();
pi.sig_addr = SigSpec();
pi.sig_data = SigSpec();
pi.sig_en = SigSpec();
pi.make_outreg = false;
pi.make_transp = false;
}
@ -766,19 +686,13 @@ grow_read_ports:;
log(" Mapped to bram port %c%d.%d.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1);
pi.mapped_port = cell_port_i;
if (port.clk_enable && !pi.make_outreg) {
if (pi.clocks) {
clock_domains[pi.clocks] = clkdom;
clock_polarities[pi.clkpol] = clkdom.second;
if (!pi.make_transp)
read_transp[pi.transp] = transp;
pi.sig_clock = clkdom.first;
pi.sig_en = port.en;
pi.effective_clkpol = clkdom.second;
}
pi.sig_addr = port.addr;
pi.sig_data = port.data;
if (grow_read_ports_cursor < cell_port_i) {
grow_read_ports_cursor = cell_port_i;
try_growing_more_read_ports = true;
@ -798,17 +712,19 @@ grow_read_ports:;
// update properties and re-check conditions
int dcells = GetSize(shuffle_map) / bram.dbits;
int acells = (mem.size + (1 << bram.abits) - 1) / (1 << bram.abits);
if (mode <= 1)
{
match_properties["dups"] = dup_count;
match_properties["waste"] = match_properties["dups"] * match_properties["bwaste"];
int cells = ((mem.width + bram.dbits - 1) / bram.dbits) * ((mem.size + (1 << bram.abits) - 1) / (1 << bram.abits));
int cells = dcells * acells;
match_properties["efficiency"] = (100 * match_properties["bits"]) / (dup_count * cells * bram.dbits * (1 << bram.abits));
match_properties["dcells"] = ((mem.width + bram.dbits - 1) / bram.dbits);
match_properties["acells"] = ((mem.size + (1 << bram.abits) - 1) / (1 << bram.abits));
match_properties["cells"] = match_properties["dcells"] * match_properties["acells"] * match_properties["dups"];
match_properties["dcells"] = dcells;
match_properties["acells"] = acells;
match_properties["cells"] = cells * dup_count;
log(" Updated properties: dups=%d waste=%d efficiency=%d\n",
match_properties["dups"], match_properties["waste"], match_properties["efficiency"]);
@ -875,6 +791,70 @@ grow_read_ports:;
return true;
}
// At this point we are commited to replacing the RAM, and can mutate mem.
// We don't really support priorities, emulate them.
for (int i = 0; i < GetSize(mem.wr_ports); i++)
for (int j = 0; j < i; j++)
mem.emulate_priority(j, i);
// Now the big swizzle.
mem.width = GetSize(shuffle_map);
// Swizzle write ports.
for (auto &port : mem.wr_ports) {
SigSpec new_en, new_data;
SigBit en_bit = State::S1;
for (auto idx : shuffle_map) {
if (idx == -1) {
new_data.append(State::Sx);
} else {
new_data.append(port.data[idx]);
en_bit = port.en[idx];
}
new_en.append(en_bit);
}
port.en = new_en;
port.data = new_data;
}
// Swizzle read ports.
for (auto &port : mem.rd_ports) {
SigSpec new_data = module->addWire(NEW_ID, mem.width);
Const new_init_value = Const(State::Sx, mem.width);
Const new_arst_value = Const(State::Sx, mem.width);
Const new_srst_value = Const(State::Sx, mem.width);
for (int i = 0; i < mem.width; i++)
if (shuffle_map[i] != -1) {
module->connect(port.data[shuffle_map[i]], new_data[i]);
new_init_value[i] = port.init_value[shuffle_map[i]];
new_arst_value[i] = port.arst_value[shuffle_map[i]];
new_srst_value[i] = port.srst_value[shuffle_map[i]];
}
port.data = new_data;
port.init_value = new_init_value;
port.arst_value = new_arst_value;
port.srst_value = new_srst_value;
}
// Swizzle the init data.
bool cell_init = !mem.inits.empty();
vector<Const> initdata;
if (cell_init) {
Const initparam = mem.get_init_data();
initdata.reserve(mem.size);
for (int i = 0; i < mem.size; i++) {
std::vector<State> val;
for (auto idx : shuffle_map) {
if (idx == -1)
val.push_back(State::Sx);
else
val.push_back(initparam[mem.width * i + idx]);
}
initdata.push_back(Const(val));
}
}
// prepare variant parameters
dict<IdString, Const> variant_params;
@ -882,20 +862,15 @@ grow_read_ports:;
bram.find_variant_params(variant_params, other_bram);
// Apply make_outreg where necessary.
for (auto &pi : portinfos) {
if (pi.make_outreg) {
for (auto &pi : portinfos)
if (pi.make_outreg)
mem.extract_rdff(pi.mapped_port, initvals);
auto &port = mem.rd_ports[pi.mapped_port];
pi.sig_addr = port.addr;
pi.sig_data = port.data;
}
}
// actually replace that memory cell
dict<SigSpec, pair<SigSpec, SigSpec>> dout_cache;
for (int grid_d = 0; grid_d*bram.dbits < mem.width; grid_d++)
for (int grid_d = 0; grid_d < dcells; grid_d++)
{
SigSpec mktr_wraddr, mktr_wrdata, mktr_wrdata_q;
vector<SigSpec> mktr_wren;
@ -905,11 +880,11 @@ grow_read_ports:;
mktr_wrdata = module->addWire(NEW_ID, bram.dbits);
mktr_wrdata_q = module->addWire(NEW_ID, bram.dbits);
module->addDff(NEW_ID, make_transp_clk.first, mktr_wrdata, mktr_wrdata_q, make_transp_clk.second);
for (int grid_a = 0; grid_a*(1 << bram.abits) < mem.size; grid_a++)
for (int grid_a = 0; grid_a < acells; grid_a++)
mktr_wren.push_back(module->addWire(NEW_ID, make_transp_enbits));
}
for (int grid_a = 0; grid_a*(1 << bram.abits) < mem.size; grid_a++)
for (int grid_a = 0; grid_a < acells; grid_a++)
for (int dupidx = 0; dupidx < dup_count; dupidx++)
{
Cell *c = module->addCell(module->uniquify(stringf("%s.%d.%d.%d", mem.memid.c_str(), grid_d, grid_a, dupidx)), bram.name);
@ -919,18 +894,16 @@ grow_read_ports:;
c->setParam(vp.first, vp.second);
if (cell_init) {
int init_offset = grid_a*(1 << bram.abits);
int init_offset = grid_a*(1 << bram.abits) - mem.start_offset;
int init_shift = grid_d*bram.dbits;
int init_size = (1 << bram.abits);
Const initparam(State::Sx, init_size*bram.dbits);
for (int i = 0; i < init_size; i++) {
State padding = State::Sx;
for (int i = 0; i < init_size; i++)
for (int j = 0; j < bram.dbits; j++)
if (init_offset+i < GetSize(initdata) && init_shift+j < GetSize(initdata[init_offset+i]))
if (init_offset+i < GetSize(initdata) && init_offset+i >= 0)
initparam[i*bram.dbits+j] = initdata[init_offset+i][init_shift+j];
else
initparam[i*bram.dbits+j] = padding;
}
initparam[i*bram.dbits+j] = State::Sx;
c->setParam(ID::INIT, initparam);
}
@ -942,55 +915,75 @@ grow_read_ports:;
string prefix = stringf("%c%d", pi.group + 'A', pi.index + 1);
const char *pf = prefix.c_str();
if (pi.clocks && (!c->hasPort(stringf("\\CLK%d", (pi.clocks-1) % clocks_max + 1)) || pi.sig_clock.wire)) {
c->setPort(stringf("\\CLK%d", (pi.clocks-1) % clocks_max + 1), pi.sig_clock);
if (pi.clkpol > 1 && pi.sig_clock.wire)
c->setParam(stringf("\\CLKPOL%d", (pi.clkpol-1) % clkpol_max + 1), clock_polarities.at(pi.clkpol));
if (pi.transp > 1 && pi.sig_clock.wire)
c->setParam(stringf("\\TRANSP%d", (pi.transp-1) % transp_max + 1), read_transp.at(pi.transp));
}
if (pi.clocks && clock_domains.count(pi.clocks))
c->setPort(stringf("\\CLK%d", (pi.clocks-1) % clocks_max + 1), clock_domains.at(pi.clocks).first);
if (pi.clkpol > 1 && clock_polarities.count(pi.clkpol))
c->setParam(stringf("\\CLKPOL%d", (pi.clkpol-1) % clkpol_max + 1), clock_polarities.at(pi.clkpol));
if (pi.transp > 1 && read_transp.count(pi.transp))
c->setParam(stringf("\\TRANSP%d", (pi.transp-1) % transp_max + 1), read_transp.at(pi.transp));
SigSpec addr_ok;
if (GetSize(pi.sig_addr) > bram.abits) {
SigSpec extra_addr = pi.sig_addr.extract(bram.abits, GetSize(pi.sig_addr) - bram.abits);
SigSpec sig_addr;
if (pi.mapped_port >= 0) {
if (pi.wrmode == 1)
sig_addr = mem.wr_ports[pi.mapped_port].addr;
else
sig_addr = mem.rd_ports[pi.mapped_port].addr;
}
if (GetSize(sig_addr) > bram.abits) {
SigSpec extra_addr = sig_addr.extract(bram.abits, GetSize(sig_addr) - bram.abits);
SigSpec extra_addr_sel = SigSpec(grid_a, GetSize(extra_addr));
addr_ok = module->Eq(NEW_ID, extra_addr, extra_addr_sel);
}
if (pi.enable)
{
SigSpec sig_en = pi.sig_en;
if (pi.wrmode == 1) {
sig_en.extend_u0((grid_d+1) * pi.enable);
sig_en = sig_en.extract(grid_d * pi.enable, pi.enable);
}
if (!addr_ok.empty())
sig_en = module->Mux(NEW_ID, SigSpec(0, GetSize(sig_en)), sig_en, addr_ok);
c->setPort(stringf("\\%sEN", pf), sig_en);
if (pi.wrmode == 1 && enable_make_transp)
module->connect(mktr_wren[grid_a], sig_en);
}
SigSpec sig_addr = pi.sig_addr;
sig_addr.extend_u0(bram.abits);
c->setPort(stringf("\\%sADDR", pf), sig_addr);
if (pi.wrmode == 1 && enable_make_transp && grid_a == 0)
module->connect(mktr_wraddr, sig_addr);
SigSpec sig_data = pi.sig_data;
sig_data.extend_u0((grid_d+1) * bram.dbits);
sig_data = sig_data.extract(grid_d * bram.dbits, bram.dbits);
if (pi.wrmode == 1) {
if (pi.mapped_port == -1)
{
if (pi.enable)
c->setPort(stringf("\\%sEN", pf), Const(State::S0, pi.enable));
continue;
}
auto &port = mem.wr_ports[pi.mapped_port];
SigSpec sig_data = port.data.extract(grid_d * bram.dbits, bram.dbits);
c->setPort(stringf("\\%sDATA", pf), sig_data);
if (enable_make_transp && grid_a == 0)
if (pi.enable)
{
SigSpec sig_en;
int stride = bram.dbits / pi.enable;
for (int i = 0; i < pi.enable; i++)
sig_en.append(port.en[stride * i + grid_d * bram.dbits]);
if (!addr_ok.empty())
sig_en = module->Mux(NEW_ID, SigSpec(0, GetSize(sig_en)), sig_en, addr_ok);
c->setPort(stringf("\\%sEN", pf), sig_en);
if (enable_make_transp)
module->connect(mktr_wren[grid_a], sig_en);
}
else if (enable_make_transp)
module->connect(mktr_wren[grid_a], addr_ok);
if (enable_make_transp && grid_a == 0) {
module->connect(mktr_wraddr, sig_addr);
module->connect(mktr_wrdata, sig_data);
}
} else {
if (pi.mapped_port == -1)
{
if (pi.enable)
c->setPort(stringf("\\%sEN", pf), State::S0);
continue;
}
auto &port = mem.rd_ports[pi.mapped_port];
SigSpec sig_data = port.data.extract(grid_d * bram.dbits, bram.dbits);
SigSpec bram_dout = module->addWire(NEW_ID, bram.dbits);
c->setPort(stringf("\\%sDATA", pf), bram_dout);
if (pi.make_transp) {
@ -1010,22 +1003,21 @@ grow_read_ports:;
}
}
for (int i = bram.dbits-1; i >= 0; i--)
if (sig_data[i].wire == nullptr) {
sig_data.remove(i);
bram_dout.remove(i);
}
SigSpec addr_ok_q = addr_ok;
if (pi.clocks && !addr_ok.empty()) {
if (port.clk_enable && !addr_ok.empty()) {
addr_ok_q = module->addWire(NEW_ID);
if (!pi.sig_en.empty())
addr_ok = module->Mux(NEW_ID, addr_ok_q, addr_ok, pi.sig_en);
module->addDff(NEW_ID, pi.sig_clock, addr_ok, addr_ok_q, pi.effective_clkpol);
module->addDffe(NEW_ID, port.clk, port.en, addr_ok, addr_ok_q, port.clk_polarity);
}
dout_cache[sig_data].first.append(addr_ok_q);
dout_cache[sig_data].second.append(bram_dout);
if (pi.enable) {
SigSpec sig_en = port.en;
if (!addr_ok.empty())
sig_en = module->And(NEW_ID, sig_en, addr_ok);
c->setPort(stringf("\\%sEN", pf), sig_en);
}
}
}
}
@ -1070,20 +1062,6 @@ void handle_memory(Mem &mem, const rules_t &rules, FfInitVals *initvals)
log(" %s=%d", it.first.c_str(), it.second);
log("\n");
// This pass cannot deal with write port priority — we need to emulate it,
// if present. Since priority emulation will change the enable signals,
// which in turn may change enable grouping and mapping eligibility in
// pathological cases, we need to do this before checking mapping
// eligibility. This will create priority emulation logic for all
// memories in the design regardless of whether we end up mapping them
// or not, but since we never call Mem::emit(), the new priority masks
// and enables won't be commited to the design, and this logic will be
// unused (and removed by subsequent opt_clean) for unmapped memories.
for (int i = 0; i < GetSize(mem.wr_ports); i++)
for (int j = 0; j < i; j++)
mem.emulate_priority(j, i);
pool<pair<IdString, int>> failed_brams;
dict<pair<int, int>, tuple<int, int, int>> best_rule_cache;