mirror of https://github.com/YosysHQ/yosys.git
Merge remote-tracking branch 'origin/master' into xaig_dff
This commit is contained in:
commit
304e5f9ea4
|
@ -28,14 +28,13 @@
|
|||
#include <sys/wait.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/un.h>
|
||||
extern char **environ;
|
||||
#endif
|
||||
|
||||
#include "libs/json11/json11.hpp"
|
||||
#include "libs/sha1/sha1.h"
|
||||
#include "kernel/yosys.h"
|
||||
|
||||
extern char **environ;
|
||||
|
||||
YOSYS_NAMESPACE_BEGIN
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
@ -238,6 +237,11 @@ struct RpcModule : RTLIL::Module {
|
|||
|
||||
#if defined(_WIN32)
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <BaseTsd.h>
|
||||
typedef SSIZE_T ssize_t;
|
||||
#endif
|
||||
|
||||
struct HandleRpcServer : RpcServer {
|
||||
HANDLE hsend, hrecv;
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ struct EquivOptPass:public ScriptPass
|
|||
log(" equiv_opt [options] [command]\n");
|
||||
log("\n");
|
||||
log("This command uses temporal induction to check circuit equivalence before and\n");
|
||||
log("after an optimization pass.\n");
|
||||
log("after an optimization pass.\n");
|
||||
log("\n");
|
||||
log(" -run <from_label>:<to_label>\n");
|
||||
log(" only run the commands between the labels (see below). an empty\n");
|
||||
|
@ -50,6 +50,9 @@ struct EquivOptPass:public ScriptPass
|
|||
log(" -multiclock\n");
|
||||
log(" run clk2fflogic before equivalence checking.\n");
|
||||
log("\n");
|
||||
log(" -async2sync\n");
|
||||
log(" run async2sync before equivalence checking.\n");
|
||||
log("\n");
|
||||
log(" -undef\n");
|
||||
log(" enable modelling of undef states during equiv_induct.\n");
|
||||
log("\n");
|
||||
|
@ -59,7 +62,7 @@ struct EquivOptPass:public ScriptPass
|
|||
}
|
||||
|
||||
std::string command, techmap_opts;
|
||||
bool assert, undef, multiclock;
|
||||
bool assert, undef, multiclock, async2sync;
|
||||
|
||||
void clear_flags() YS_OVERRIDE
|
||||
{
|
||||
|
@ -68,6 +71,7 @@ struct EquivOptPass:public ScriptPass
|
|||
assert = false;
|
||||
undef = false;
|
||||
multiclock = false;
|
||||
async2sync = false;
|
||||
}
|
||||
|
||||
void execute(std::vector < std::string > args, RTLIL::Design * design) YS_OVERRIDE
|
||||
|
@ -101,6 +105,10 @@ struct EquivOptPass:public ScriptPass
|
|||
multiclock = true;
|
||||
continue;
|
||||
}
|
||||
if (args[argidx] == "-async2sync") {
|
||||
async2sync = true;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -120,6 +128,9 @@ struct EquivOptPass:public ScriptPass
|
|||
if (!design->full_selection())
|
||||
log_cmd_error("This command only operates on fully selected designs!\n");
|
||||
|
||||
if (async2sync && multiclock)
|
||||
log_cmd_error("The '-async2sync' and '-multiclock' options are mutually exclusive!\n");
|
||||
|
||||
log_header(design, "Executing EQUIV_OPT pass.\n");
|
||||
log_push();
|
||||
|
||||
|
@ -157,8 +168,8 @@ struct EquivOptPass:public ScriptPass
|
|||
if (check_label("prove")) {
|
||||
if (multiclock || help_mode)
|
||||
run("clk2fflogic", "(only with -multiclock)");
|
||||
if (!multiclock || help_mode)
|
||||
run("async2sync", "(only without -multiclock)");
|
||||
if (async2sync || help_mode)
|
||||
run("async2sync", " (only with -async2sync)");
|
||||
run("equiv_make gold gate equiv");
|
||||
if (help_mode)
|
||||
run("equiv_induct [-undef] equiv");
|
||||
|
|
|
@ -190,7 +190,7 @@ create matches for different sections of a cell. For example:
|
|||
select pmux->type == $pmux
|
||||
slice idx GetSize(port(pmux, \S))
|
||||
index <SigBit> port(pmux, \S)[idx] === port(eq, \Y)
|
||||
set pmux_slice idx
|
||||
set pmux_slice idx
|
||||
endmatch
|
||||
|
||||
The first argument to `slice` is the local variable name used to identify the
|
||||
|
|
|
@ -9,3 +9,7 @@ match lut
|
|||
index <SigSpec> port(lut, \I1) === port(carry, \I0)
|
||||
index <SigSpec> port(lut, \I2) === port(carry, \I1)
|
||||
endmatch
|
||||
|
||||
code
|
||||
accept;
|
||||
endcode
|
||||
|
|
|
@ -8,21 +8,23 @@ match dff
|
|||
select GetSize(port(dff, \D)) > 1
|
||||
endmatch
|
||||
|
||||
code sigD
|
||||
sigD = port(dff, \D);
|
||||
endcode
|
||||
|
||||
match rstmux
|
||||
select rstmux->type == $mux
|
||||
select GetSize(port(rstmux, \Y)) > 1
|
||||
index <SigSpec> port(rstmux, \Y) === port(dff, \D)
|
||||
index <SigSpec> port(rstmux, \Y) === sigD
|
||||
choice <IdString> BA {\B, \A}
|
||||
select port(rstmux, BA).is_fully_const()
|
||||
set rstmuxBA BA
|
||||
optional
|
||||
semioptional
|
||||
endmatch
|
||||
|
||||
code sigD
|
||||
if (rstmux)
|
||||
sigD = port(rstmux, rstmuxBA == \B ? \A : \B);
|
||||
else
|
||||
sigD = port(dff, \D);
|
||||
endcode
|
||||
|
||||
match cemux
|
||||
|
@ -32,66 +34,97 @@ match cemux
|
|||
choice <IdString> AB {\A, \B}
|
||||
index <SigSpec> port(cemux, AB) === port(dff, \Q)
|
||||
set cemuxAB AB
|
||||
semioptional
|
||||
endmatch
|
||||
|
||||
code
|
||||
SigSpec D = port(cemux, cemuxAB == \A ? \B : \A);
|
||||
SigSpec Q = port(dff, \Q);
|
||||
if (!cemux && !rstmux)
|
||||
reject;
|
||||
endcode
|
||||
|
||||
code
|
||||
Const rst;
|
||||
if (rstmux)
|
||||
rst = port(rstmux, rstmuxBA).as_const();
|
||||
int width = GetSize(D);
|
||||
|
||||
SigSpec &ceA = cemux->connections_.at(\A);
|
||||
SigSpec &ceB = cemux->connections_.at(\B);
|
||||
SigSpec &ceY = cemux->connections_.at(\Y);
|
||||
SigSpec &dffD = dff->connections_.at(\D);
|
||||
SigSpec &dffQ = dff->connections_.at(\Q);
|
||||
|
||||
if (D[width-1] == D[width-2]) {
|
||||
did_something = true;
|
||||
|
||||
SigBit sign = D[width-1];
|
||||
bool is_signed = sign.wire;
|
||||
int i;
|
||||
for (i = width-1; i >= 2; i--) {
|
||||
if (!is_signed) {
|
||||
module->connect(Q[i], sign);
|
||||
if (D[i-1] != sign || (rst.size() && rst[i-1] != rst[width-1]))
|
||||
break;
|
||||
}
|
||||
else {
|
||||
module->connect(Q[i], Q[i-1]);
|
||||
if (D[i-2] != sign || (rst.size() && rst[i-1] != rst[width-1]))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ceA.remove(i, width-i);
|
||||
ceB.remove(i, width-i);
|
||||
ceY.remove(i, width-i);
|
||||
cemux->fixup_parameters();
|
||||
dffD.remove(i, width-i);
|
||||
dffQ.remove(i, width-i);
|
||||
dff->fixup_parameters();
|
||||
|
||||
log("dffcemux pattern in %s: dff=%s, cemux=%s; removed top %d bits.\n", log_id(module), log_id(dff), log_id(cemux), width-i);
|
||||
accept;
|
||||
SigSpec D;
|
||||
if (cemux) {
|
||||
D = port(cemux, cemuxAB == \A ? \B : \A);
|
||||
if (rstmux)
|
||||
rst = port(rstmux, rstmuxBA).as_const();
|
||||
else
|
||||
rst = Const(State::Sx, GetSize(D));
|
||||
}
|
||||
else {
|
||||
log_assert(rstmux);
|
||||
D = port(rstmux, rstmuxBA == \B ? \A : \B);
|
||||
rst = port(rstmux, rstmuxBA).as_const();
|
||||
}
|
||||
SigSpec Q = port(dff, \Q);
|
||||
int width = GetSize(D);
|
||||
|
||||
SigSpec &dffD = dff->connections_.at(\D);
|
||||
SigSpec &dffQ = dff->connections_.at(\Q);
|
||||
Const init;
|
||||
for (const auto &b : Q) {
|
||||
auto it = b.wire->attributes.find(\init);
|
||||
init.bits.push_back(it == b.wire->attributes.end() ? State::Sx : it->second[b.offset]);
|
||||
}
|
||||
|
||||
auto cmpx = [=](State lhs, State rhs) {
|
||||
if (lhs == State::Sx || rhs == State::Sx)
|
||||
return true;
|
||||
return lhs == rhs;
|
||||
};
|
||||
|
||||
int i = width-1;
|
||||
while (i > 1) {
|
||||
log_dump(i, D[i], D[i-1], rst[i], rst[i-1], init[i], init[i-1]);
|
||||
if (D[i] != D[i-1])
|
||||
break;
|
||||
if (!cmpx(rst[i], rst[i-1]))
|
||||
break;
|
||||
if (!cmpx(init[i], init[i-1]))
|
||||
break;
|
||||
if (!cmpx(rst[i], init[i]))
|
||||
break;
|
||||
module->connect(Q[i], Q[i-1]);
|
||||
i--;
|
||||
}
|
||||
if (i < width-1) {
|
||||
did_something = true;
|
||||
if (cemux) {
|
||||
SigSpec &ceA = cemux->connections_.at(\A);
|
||||
SigSpec &ceB = cemux->connections_.at(\B);
|
||||
SigSpec &ceY = cemux->connections_.at(\Y);
|
||||
ceA.remove(i, width-1-i);
|
||||
ceB.remove(i, width-1-i);
|
||||
ceY.remove(i, width-1-i);
|
||||
cemux->fixup_parameters();
|
||||
}
|
||||
if (rstmux) {
|
||||
SigSpec &rstA = rstmux->connections_.at(\A);
|
||||
SigSpec &rstB = rstmux->connections_.at(\B);
|
||||
SigSpec &rstY = rstmux->connections_.at(\Y);
|
||||
rstA.remove(i, width-1-i);
|
||||
rstB.remove(i, width-1-i);
|
||||
rstY.remove(i, width-1-i);
|
||||
rstmux->fixup_parameters();
|
||||
}
|
||||
dffD.remove(i, width-1-i);
|
||||
dffQ.remove(i, width-1-i);
|
||||
dff->fixup_parameters();
|
||||
|
||||
log("dffcemux pattern in %s: dff=%s, cemux=%s, rstmux=%s; removed top %d bits.\n", log_id(module), log_id(dff), log_id(cemux, "n/a"), log_id(rstmux, "n/a"), width-1-i);
|
||||
width = i+1;
|
||||
}
|
||||
if (cemux) {
|
||||
SigSpec &ceA = cemux->connections_.at(\A);
|
||||
SigSpec &ceB = cemux->connections_.at(\B);
|
||||
SigSpec &ceY = cemux->connections_.at(\Y);
|
||||
|
||||
int count = 0;
|
||||
for (int i = width-1; i >= 0; i--) {
|
||||
if (D[i].wire)
|
||||
continue;
|
||||
Wire *w = Q[i].wire;
|
||||
auto it = w->attributes.find(\init);
|
||||
State init;
|
||||
if (it != w->attributes.end())
|
||||
init = it->second[Q[i].offset];
|
||||
else
|
||||
init = State::Sx;
|
||||
|
||||
if (init == State::Sx || init == D[i].data) {
|
||||
if (cmpx(rst[i], D[i].data) && cmpx(init[i], D[i].data)) {
|
||||
count++;
|
||||
module->connect(Q[i], D[i]);
|
||||
ceA.remove(i);
|
||||
|
@ -105,9 +138,10 @@ code
|
|||
did_something = true;
|
||||
cemux->fixup_parameters();
|
||||
dff->fixup_parameters();
|
||||
log("dffcemux pattern in %s: dff=%s, cemux=%s; removed %d constant bits.\n", log_id(module), log_id(dff), log_id(cemux), count);
|
||||
log("dffcemux pattern in %s: dff=%s, cemux=%s, rstmux=%s; removed %d constant bits.\n", log_id(module), log_id(dff), log_id(cemux), log_id(rstmux, "n/a"), count);
|
||||
}
|
||||
|
||||
accept;
|
||||
}
|
||||
|
||||
if (did_something)
|
||||
accept;
|
||||
endcode
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
#include "kernel/yosys.h"
|
||||
#include "kernel/sigtools.h"
|
||||
#include <deque>
|
||||
|
||||
USING_YOSYS_NAMESPACE
|
||||
PRIVATE_NAMESPACE_BEGIN
|
||||
|
@ -608,8 +609,13 @@ struct XilinxDspPass : public Pass {
|
|||
extra_args(args, argidx, design);
|
||||
|
||||
for (auto module : design->selected_modules()) {
|
||||
// Experimental feature: pack $add/$sub cells with
|
||||
// (* use_dsp48="simd" *) into DSP48E1's using its
|
||||
// SIMD feature
|
||||
xilinx_simd_pack(module, module->selected_cells());
|
||||
|
||||
// Match for all features ([ABDMP][12]?REG, pre-adder,
|
||||
// post-adder, pattern detector, etc.) except for CREG
|
||||
{
|
||||
xilinx_dsp_pm pm(module, module->selected_cells());
|
||||
pm.run_xilinx_dsp_pack(xilinx_dsp_pack);
|
||||
|
@ -618,14 +624,17 @@ struct XilinxDspPass : public Pass {
|
|||
// is no guarantee that the cell ordering corresponds
|
||||
// to the "expected" case (i.e. the order in which
|
||||
// they appear in the source) thus the possiblity
|
||||
// existed that a register got packed as CREG into a
|
||||
// existed that a register got packed as a CREG into a
|
||||
// downstream DSP that should have otherwise been a
|
||||
// PREG of an upstream DSP that had not been pattern
|
||||
// matched yet
|
||||
// PREG of an upstream DSP that had not been visited
|
||||
// yet
|
||||
{
|
||||
xilinx_dsp_CREG_pm pm(module, module->selected_cells());
|
||||
pm.run_xilinx_dsp_packC(xilinx_dsp_packC);
|
||||
}
|
||||
// Lastly, identify and utilise PCOUT -> PCIN,
|
||||
// ACOUT -> ACIN, and BCOUT-> BCIN dedicated cascade
|
||||
// chains
|
||||
{
|
||||
xilinx_dsp_cascade_pm pm(module, module->selected_cells());
|
||||
pm.run_xilinx_dsp_cascade();
|
||||
|
|
|
@ -1,3 +1,57 @@
|
|||
// This file describes the main pattern matcher setup (of three total) that
|
||||
// forms the `xilinx_dsp` pass described in xilinx_dsp.cc
|
||||
// At a high level, it works as follows:
|
||||
// ( 1) Starting from a DSP48E1 cell
|
||||
// ( 2) Match the driver of the 'A' input to a possible $dff cell (ADREG)
|
||||
// (attached to at most two $mux cells that implement clock-enable or
|
||||
// reset functionality, using a subpattern discussed below)
|
||||
// If ADREG matched, treat 'A' input as input of ADREG
|
||||
// ( 3) Match the driver of the 'A' and 'D' inputs for a possible $add cell
|
||||
// (pre-adder)
|
||||
// ( 4) If pre-adder was present, find match 'A' input for A2REG
|
||||
// If pre-adder was not present, move ADREG to A2REG
|
||||
// If A2REG, then match 'A' input for A1REG
|
||||
// ( 5) Match 'B' input for B2REG
|
||||
// If B2REG, then match 'B' input for B1REG
|
||||
// ( 6) Match 'D' input for DREG
|
||||
// ( 7) Match 'P' output that exclusively drives an MREG
|
||||
// ( 8) Match 'P' output that exclusively drives one of two inputs to an $add
|
||||
// cell (post-adder).
|
||||
// The other input to the adder is assumed to come in from the 'C' input
|
||||
// (note: 'P' -> 'C' connections that exist for accumulators are
|
||||
// recognised in xilinx_dsp.cc).
|
||||
// ( 9) Match 'P' output that exclusively drives a PREG
|
||||
// (10) If post-adder and PREG both present, match for a $mux cell driving
|
||||
// the 'C' input, where one of the $mux's inputs is the PREG output.
|
||||
// This indicates an accumulator situation, and one where a $mux exists
|
||||
// to override the accumulated value:
|
||||
// +--------------------------------+
|
||||
// | ____ |
|
||||
// +--| \ |
|
||||
// |$mux|-+ |
|
||||
// 'C' ---|____/ | |
|
||||
// | /-------\ +----+ |
|
||||
// +----+ +-| post- |___|PREG|---+ 'P'
|
||||
// |MREG|------ | adder | +----+
|
||||
// +----+ \-------/
|
||||
// (11) If PREG present, match for a greater-than-or-equal $ge cell attached
|
||||
// to the 'P' output where it is compared to a constant that is a
|
||||
// power-of-2: e.g. `assign overflow = (PREG >= 2**40);`
|
||||
// In this scenario, the pattern detector functionality of a DSP48E1 can
|
||||
// to implement this function
|
||||
// Notes:
|
||||
// - The intention of this pattern matcher is for it to be compatible with
|
||||
// DSP48E1 cells inferred from multiply operations by Yosys, as well as for
|
||||
// user instantiations that may already contain the cells being packed...
|
||||
// (though the latter is currently untested)
|
||||
// - Since the $dff-with-optional-clock-enable-or-reset-mux pattern is used
|
||||
// for each *REG match, it has been factored out into two subpatterns:
|
||||
// in_dffe and out_dffe located at the bottom of this file.
|
||||
// - Matching for pattern detector features is currently incomplete. For
|
||||
// example, matching for underflow as well as overflow detection is
|
||||
// possible, as would auto-reset, enabling saturated arithmetic, detecting
|
||||
// custom patterns, etc.
|
||||
|
||||
pattern xilinx_dsp_pack
|
||||
|
||||
state <SigBit> clock
|
||||
|
@ -5,12 +59,11 @@ state <SigSpec> sigA sigB sigC sigD sigM sigP
|
|||
state <IdString> postAddAB postAddMuxAB
|
||||
state <bool> ffA1cepol ffA2cepol ffADcepol ffB1cepol ffB2cepol ffDcepol ffMcepol ffPcepol
|
||||
state <bool> ffArstpol ffADrstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol
|
||||
|
||||
state <Cell*> ffAD ffADcemux ffADrstmux ffA1 ffA1cemux ffA1rstmux ffA2 ffA2cemux ffA2rstmux
|
||||
state <Cell*> ffB1 ffB1cemux ffB1rstmux ffB2 ffB2cemux ffB2rstmux
|
||||
state <Cell*> ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux
|
||||
|
||||
// subpattern
|
||||
// Variables used for subpatterns
|
||||
state <SigSpec> argQ argD
|
||||
state <bool> ffcepol ffrstpol
|
||||
state <int> ffoffset
|
||||
|
@ -19,6 +72,7 @@ udata <SigBit> dffclock
|
|||
udata <Cell*> dff dffcemux dffrstmux
|
||||
udata <bool> dffcepol dffrstpol
|
||||
|
||||
// (1) Starting from a DSP48E1 cell
|
||||
match dsp
|
||||
select dsp->type.in(\DSP48E1)
|
||||
endmatch
|
||||
|
@ -50,17 +104,21 @@ code sigA sigB sigC sigD sigM clock
|
|||
sigM.append(P[i]);
|
||||
}
|
||||
log_assert(nusers(P.extract_end(i)) <= 1);
|
||||
// This sigM could have no users if downstream sinks (e.g. $add) is
|
||||
// narrower than $mul result, for example
|
||||
if (sigM.empty())
|
||||
reject;
|
||||
}
|
||||
else
|
||||
sigM = P;
|
||||
// This sigM could have no users if downstream $add
|
||||
// is narrower than $mul result, for example
|
||||
if (sigM.empty())
|
||||
reject;
|
||||
|
||||
clock = port(dsp, \CLK, SigBit());
|
||||
endcode
|
||||
|
||||
// (2) Match the driver of the 'A' input to a possible $dff cell (ADREG)
|
||||
// (attached to at most two $mux cells that implement clock-enable or
|
||||
// reset functionality, using a subpattern discussed above)
|
||||
// If matched, treat 'A' input as input of ADREG
|
||||
code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock
|
||||
if (param(dsp, \ADREG).as_int() == 0) {
|
||||
argQ = sigA;
|
||||
|
@ -81,6 +139,8 @@ code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock
|
|||
}
|
||||
endcode
|
||||
|
||||
// (3) Match the driver of the 'A' and 'D' inputs for a possible $add cell
|
||||
// (pre-adder)
|
||||
match preAdd
|
||||
if sigD.empty() || sigD.is_fully_zero()
|
||||
// Ensure that preAdder not already used
|
||||
|
@ -106,11 +166,12 @@ code sigA sigD
|
|||
if (preAdd) {
|
||||
sigA = port(preAdd, \A);
|
||||
sigD = port(preAdd, \B);
|
||||
if (GetSize(sigA) < GetSize(sigD))
|
||||
std::swap(sigA, sigD);
|
||||
}
|
||||
endcode
|
||||
|
||||
// (4) If pre-adder was present, find match 'A' input for A2REG
|
||||
// If pre-adder was not present, move ADREG to A2REG
|
||||
// Then match 'A' input for A1REG
|
||||
code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock ffA2 ffA2cemux ffA2rstmux ffA2cepol ffArstpol ffA1 ffA1cemux ffA1rstmux ffA1cepol
|
||||
// Only search for ffA2 if there was a pre-adder
|
||||
// (otherwise ffA2 would have been matched as ffAD)
|
||||
|
@ -173,6 +234,8 @@ ffA1_end: ;
|
|||
}
|
||||
endcode
|
||||
|
||||
// (5) Match 'B' input for B2REG
|
||||
// If B2REG, then match 'B' input for B1REG
|
||||
code argQ ffB2 ffB2cemux ffB2rstmux ffB2cepol ffBrstpol sigB clock ffB1 ffB1cemux ffB1rstmux ffB1cepol
|
||||
if (param(dsp, \BREG).as_int() == 0) {
|
||||
argQ = sigB;
|
||||
|
@ -222,6 +285,7 @@ ffB1_end: ;
|
|||
}
|
||||
endcode
|
||||
|
||||
// (6) Match 'D' input for DREG
|
||||
code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock
|
||||
if (param(dsp, \DREG).as_int() == 0) {
|
||||
argQ = sigD;
|
||||
|
@ -242,6 +306,7 @@ code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock
|
|||
}
|
||||
endcode
|
||||
|
||||
// (7) Match 'P' output that exclusively drives an MREG
|
||||
code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock
|
||||
if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) {
|
||||
argD = sigM;
|
||||
|
@ -263,6 +328,11 @@ code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock
|
|||
sigP = sigM;
|
||||
endcode
|
||||
|
||||
// (8) Match 'P' output that exclusively drives one of two inputs to an $add
|
||||
// cell (post-adder).
|
||||
// The other input to the adder is assumed to come in from the 'C' input
|
||||
// (note: 'P' -> 'C' connections that exist for accumulators are
|
||||
// recognised in xilinx_dsp.cc).
|
||||
match postAdd
|
||||
// Ensure that Z mux is not already used
|
||||
if port(dsp, \OPMODE, SigSpec()).extract(4,3).is_fully_zero()
|
||||
|
@ -291,6 +361,7 @@ code sigC sigP
|
|||
}
|
||||
endcode
|
||||
|
||||
// (9) Match 'P' output that exclusively drives a PREG
|
||||
code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock
|
||||
if (param(dsp, \PREG).as_int() == 0) {
|
||||
int users = 2;
|
||||
|
@ -316,6 +387,19 @@ code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock
|
|||
}
|
||||
endcode
|
||||
|
||||
// (10) If post-adder and PREG both present, match for a $mux cell driving
|
||||
// the 'C' input, where one of the $mux's inputs is the PREG output.
|
||||
// This indicates an accumulator situation, and one where a $mux exists
|
||||
// to override the accumulated value:
|
||||
// +--------------------------------+
|
||||
// | ____ |
|
||||
// +--| \ |
|
||||
// |$mux|-+ |
|
||||
// 'C' ---|____/ | |
|
||||
// | /-------\ +----+ |
|
||||
// +----+ +-| post- |___|PREG|---+ 'P'
|
||||
// |MREG|------ | adder | +----+
|
||||
// +----+ \-------/
|
||||
match postAddMux
|
||||
if postAdd
|
||||
if ffP
|
||||
|
@ -333,6 +417,11 @@ code sigC
|
|||
sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A);
|
||||
endcode
|
||||
|
||||
// (11) If PREG present, match for a greater-than-or-equal $ge cell attached to
|
||||
// the 'P' output where it is compared to a constant that is a power-of-2:
|
||||
// e.g. `assign overflow = (PREG >= 2**40);`
|
||||
// In this scenario, the pattern detector functionality of a DSP48E1 can
|
||||
// to implement this function
|
||||
match overflow
|
||||
if ffP
|
||||
if param(dsp, \USE_PATTERN_DETECT, Const("NO_PATDET")).decode_string() == "NO_PATDET"
|
||||
|
@ -351,22 +440,45 @@ endcode
|
|||
|
||||
// #######################
|
||||
|
||||
// Subpattern for matching against input registers, based on knowledge of the
|
||||
// 'Q' input. Typically, identifying registers with clock-enable and reset
|
||||
// capability would be a task would be handled by other Yosys passes such as
|
||||
// dff2dffe, but since DSP inference happens much before this, these patterns
|
||||
// have to be manually identified.
|
||||
// At a high level:
|
||||
// (1) Starting from a $dff cell that (partially or fully) drives the given
|
||||
// 'Q' argument
|
||||
// (2) Match for a $mux cell implementing synchronous reset semantics ---
|
||||
// one that exclusively drives the 'D' input of the $dff, with one of its
|
||||
// $mux inputs being fully zero
|
||||
// (3) Match for a $mux cell implement clock enable semantics --- one that
|
||||
// exclusively drives the 'D' input of the $dff (or the other input of
|
||||
// the reset $mux) and where one of this $mux's inputs is connected to
|
||||
// the 'Q' output of the $dff
|
||||
subpattern in_dffe
|
||||
arg argD argQ clock
|
||||
|
||||
code
|
||||
dff = nullptr;
|
||||
for (auto c : argQ.chunks()) {
|
||||
for (const auto &c : argQ.chunks()) {
|
||||
// Abandon matches when 'Q' is a constant
|
||||
if (!c.wire)
|
||||
reject;
|
||||
// Abandon matches when 'Q' has the keep attribute set
|
||||
if (c.wire->get_bool_attribute(\keep))
|
||||
reject;
|
||||
Const init = c.wire->attributes.at(\init, State::Sx);
|
||||
if (!init.is_fully_undef() && !init.is_fully_zero())
|
||||
reject;
|
||||
// Abandon matches when 'Q' has a non-zero init attribute set
|
||||
// (not supported by DSP48E1)
|
||||
Const init = c.wire->attributes.at(\init, Const());
|
||||
if (!init.empty())
|
||||
for (auto b : init.extract(c.offset, c.width))
|
||||
if (b != State::Sx && b != State::S0)
|
||||
reject;
|
||||
}
|
||||
endcode
|
||||
|
||||
// (1) Starting from a $dff cell that (partially or fully) drives the given
|
||||
// 'Q' argument
|
||||
match ff
|
||||
select ff->type.in($dff)
|
||||
// DSP48E1 does not support clock inversion
|
||||
|
@ -379,14 +491,12 @@ match ff
|
|||
filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
|
||||
filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
|
||||
|
||||
filter clock == SigBit() || port(ff, \CLK) == clock
|
||||
|
||||
set ffoffset offset
|
||||
endmatch
|
||||
|
||||
code argQ argD
|
||||
{
|
||||
if (clock != SigBit() && port(ff, \CLK) != clock)
|
||||
reject;
|
||||
|
||||
SigSpec Q = port(ff, \Q);
|
||||
dff = ff;
|
||||
dffclock = port(ff, \CLK);
|
||||
|
@ -398,9 +508,11 @@ code argQ argD
|
|||
// has two (ff, ffrstmux) users
|
||||
if (nusers(dffD) > 2)
|
||||
argD = SigSpec();
|
||||
}
|
||||
endcode
|
||||
|
||||
// (2) Match for a $mux cell implementing synchronous reset semantics ---
|
||||
// exclusively drives the 'D' input of the $dff, with one of the $mux
|
||||
// inputs being fully zero
|
||||
match ffrstmux
|
||||
if !argD.empty()
|
||||
select ffrstmux->type.in($mux)
|
||||
|
@ -432,6 +544,10 @@ code argD
|
|||
dffrstmux = nullptr;
|
||||
endcode
|
||||
|
||||
// (3) Match for a $mux cell implement clock enable semantics --- one that
|
||||
// exclusively drives the 'D' input of the $dff (or the other input of
|
||||
// the reset $mux) and where one of this $mux's inputs is connected to
|
||||
// the 'Q' output of the $dff
|
||||
match ffcemux
|
||||
if !argD.empty()
|
||||
select ffcemux->type.in($mux)
|
||||
|
@ -456,16 +572,32 @@ endcode
|
|||
|
||||
// #######################
|
||||
|
||||
// Subpattern for matching against output registers, based on knowledge of the
|
||||
// 'D' input.
|
||||
// At a high level:
|
||||
// (1) Starting from an optional $mux cell that implements clock enable
|
||||
// semantics --- one where the given 'D' argument (partially or fully)
|
||||
// drives one of its two inputs
|
||||
// (2) Starting from, or continuing onto, another optional $mux cell that
|
||||
// implements synchronous reset semantics --- one where the given 'D'
|
||||
// argument (or the clock enable $mux output) drives one of its two inputs
|
||||
// and where the other input is fully zero
|
||||
// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the
|
||||
// output of the previous clock enable or reset $mux cells)
|
||||
subpattern out_dffe
|
||||
arg argD argQ clock
|
||||
|
||||
code
|
||||
dff = nullptr;
|
||||
for (auto c : argD.chunks())
|
||||
// Abandon matches when 'D' has the keep attribute set
|
||||
if (c.wire->get_bool_attribute(\keep))
|
||||
reject;
|
||||
endcode
|
||||
|
||||
// (1) Starting from an optional $mux cell that implements clock enable
|
||||
// semantics --- one where the given 'D' argument (partially or fully)
|
||||
// drives one of its two inputs
|
||||
match ffcemux
|
||||
select ffcemux->type.in($mux)
|
||||
// ffcemux output must have two users: ffcemux and ff.D
|
||||
|
@ -504,6 +636,10 @@ code argD argQ
|
|||
}
|
||||
endcode
|
||||
|
||||
// (2) Starting from, or continuing onto, another optional $mux cell that
|
||||
// implements synchronous reset semantics --- one where the given 'D'
|
||||
// argument (or the clock enable $mux output) drives one of its two inputs
|
||||
// and where the other input is fully zero
|
||||
match ffrstmux
|
||||
select ffrstmux->type.in($mux)
|
||||
// ffrstmux output must have two users: ffrstmux and ff.D
|
||||
|
@ -542,6 +678,8 @@ code argD argQ
|
|||
}
|
||||
endcode
|
||||
|
||||
// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the
|
||||
// output of the previous clock enable or reset $mux cells)
|
||||
match ff
|
||||
select ff->type.in($dff)
|
||||
// DSP48E1 does not support clock inversion
|
||||
|
@ -558,32 +696,30 @@ match ff
|
|||
// Check that FF.Q is connected to CE-mux
|
||||
filter !ffcemux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
|
||||
|
||||
filter clock == SigBit() || port(ff, \CLK) == clock
|
||||
|
||||
set ffoffset offset
|
||||
endmatch
|
||||
|
||||
code argQ
|
||||
if (ff) {
|
||||
if (clock != SigBit() && port(ff, \CLK) != clock)
|
||||
reject;
|
||||
|
||||
SigSpec D = port(ff, \D);
|
||||
SigSpec Q = port(ff, \Q);
|
||||
if (!ffcemux) {
|
||||
argQ = argD;
|
||||
argQ.replace(D, Q);
|
||||
}
|
||||
|
||||
for (auto c : argQ.chunks()) {
|
||||
Const init = c.wire->attributes.at(\init, State::Sx);
|
||||
if (!init.is_fully_undef() && !init.is_fully_zero())
|
||||
reject;
|
||||
}
|
||||
|
||||
dff = ff;
|
||||
dffQ = argQ;
|
||||
dffclock = port(ff, \CLK);
|
||||
SigSpec D = port(ff, \D);
|
||||
SigSpec Q = port(ff, \Q);
|
||||
if (!ffcemux) {
|
||||
argQ = argD;
|
||||
argQ.replace(D, Q);
|
||||
}
|
||||
// No enable/reset mux possible without flop
|
||||
else if (dffcemux || dffrstmux)
|
||||
reject;
|
||||
|
||||
// Abandon matches when 'Q' has a non-zero init attribute set
|
||||
// (not supported by DSP48E1)
|
||||
for (auto c : argQ.chunks()) {
|
||||
Const init = c.wire->attributes.at(\init, Const());
|
||||
if (!init.empty())
|
||||
for (auto b : init.extract(c.offset, c.width))
|
||||
if (b != State::Sx && b != State::S0)
|
||||
reject;
|
||||
}
|
||||
|
||||
dff = ff;
|
||||
dffQ = argQ;
|
||||
dffclock = port(ff, \CLK);
|
||||
endcode
|
||||
|
|
|
@ -1,3 +1,26 @@
|
|||
// This file describes the second of three pattern matcher setups that
|
||||
// forms the `xilinx_dsp` pass described in xilinx_dsp.cc
|
||||
// At a high level, it works as follows:
|
||||
// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already,
|
||||
// and (b) uses the 'C' port
|
||||
// (2) Match the driver of the 'C' input to a possible $dff cell (CREG)
|
||||
// (attached to at most two $mux cells that implement clock-enable or
|
||||
// reset functionality, using a subpattern discussed below)
|
||||
// Notes:
|
||||
// - Running CREG packing after xilinx_dsp_pack is necessary since there is no
|
||||
// guarantee that the cell ordering corresponds to the "expected" case (i.e.
|
||||
// the order in which they appear in the source) thus the possiblity existed
|
||||
// that a register got packed as a CREG into a downstream DSP that should
|
||||
// have otherwise been a PREG of an upstream DSP that had not been visited
|
||||
// yet
|
||||
// - The reason this is separated out from the xilinx_dsp.pmg file is
|
||||
// for efficiency --- each *.pmg file creates a class of the same basename,
|
||||
// which when constructed, creates a custom database tailored to the
|
||||
// pattern(s) contained within. Since the pattern in this file must be
|
||||
// executed after the pattern contained in xilinx_dsp.pmg, it is necessary
|
||||
// to reconstruct this database. Separating the two patterns into
|
||||
// independent files causes two smaller, more specific, databases.
|
||||
|
||||
pattern xilinx_dsp_packC
|
||||
|
||||
udata <std::function<SigSpec(const SigSpec&)>> unextend
|
||||
|
@ -6,7 +29,7 @@ state <SigSpec> sigC sigP
|
|||
state <bool> ffCcepol ffCrstpol
|
||||
state <Cell*> ffC ffCcemux ffCrstmux
|
||||
|
||||
// subpattern
|
||||
// Variables used for subpatterns
|
||||
state <SigSpec> argQ argD
|
||||
state <bool> ffcepol ffrstpol
|
||||
state <int> ffoffset
|
||||
|
@ -15,13 +38,15 @@ udata <SigBit> dffclock
|
|||
udata <Cell*> dff dffcemux dffrstmux
|
||||
udata <bool> dffcepol dffrstpol
|
||||
|
||||
// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already,
|
||||
// and (b) uses the 'C' port
|
||||
match dsp
|
||||
select dsp->type.in(\DSP48E1)
|
||||
select param(dsp, \CREG, 1).as_int() == 0
|
||||
select nusers(port(dsp, \C, SigSpec())) > 1
|
||||
endmatch
|
||||
|
||||
code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock
|
||||
code sigC sigP clock
|
||||
unextend = [](const SigSpec &sig) {
|
||||
int i;
|
||||
for (i = GetSize(sig)-1; i > 0; i--)
|
||||
|
@ -48,11 +73,13 @@ code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock
|
|||
else
|
||||
sigP = P;
|
||||
|
||||
if (sigC == sigP)
|
||||
reject;
|
||||
|
||||
clock = port(dsp, \CLK, SigBit());
|
||||
endcode
|
||||
|
||||
// (2) Match the driver of the 'C' input to a possible $dff cell (CREG)
|
||||
// (attached to at most two $mux cells that implement clock-enable or
|
||||
// reset functionality, using the in_dffe subpattern)
|
||||
code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock
|
||||
argQ = sigC;
|
||||
subpattern(in_dffe);
|
||||
if (dff) {
|
||||
|
@ -77,22 +104,44 @@ endcode
|
|||
|
||||
// #######################
|
||||
|
||||
// Subpattern for matching against input registers, based on knowledge of the
|
||||
// 'Q' input. Typically, identifying registers with clock-enable and reset
|
||||
// capability would be a task would be handled by other Yosys passes such as
|
||||
// dff2dffe, but since DSP inference happens much before this, these patterns
|
||||
// have to be manually identified.
|
||||
// At a high level:
|
||||
// (1) Starting from a $dff cell that (partially or fully) drives the given
|
||||
// 'Q' argument
|
||||
// (2) Match for a $mux cell implementing synchronous reset semantics ---
|
||||
// one that exclusively drives the 'D' input of the $dff, with one of its
|
||||
// $mux inputs being fully zero
|
||||
// (3) Match for a $mux cell implement clock enable semantics --- one that
|
||||
// exclusively drives the 'D' input of the $dff (or the other input of
|
||||
// the reset $mux) and where one of this $mux's inputs is connected to
|
||||
// the 'Q' output of the $dff
|
||||
subpattern in_dffe
|
||||
arg argD argQ clock
|
||||
|
||||
code
|
||||
dff = nullptr;
|
||||
for (auto c : argQ.chunks()) {
|
||||
for (const auto &c : argQ.chunks()) {
|
||||
// Abandon matches when 'Q' is a constant
|
||||
if (!c.wire)
|
||||
reject;
|
||||
// Abandon matches when 'Q' has the keep attribute set
|
||||
if (c.wire->get_bool_attribute(\keep))
|
||||
reject;
|
||||
Const init = c.wire->attributes.at(\init, State::Sx);
|
||||
if (!init.is_fully_undef() && !init.is_fully_zero())
|
||||
reject;
|
||||
// Abandon matches when 'Q' has a non-zero init attribute set
|
||||
// (not supported by DSP48E1)
|
||||
Const init = c.wire->attributes.at(\init, Const());
|
||||
for (auto b : init.extract(c.offset, c.width))
|
||||
if (b != State::Sx && b != State::S0)
|
||||
reject;
|
||||
}
|
||||
endcode
|
||||
|
||||
// (1) Starting from a $dff cell that (partially or fully) drives the given
|
||||
// 'Q' argument
|
||||
match ff
|
||||
select ff->type.in($dff)
|
||||
// DSP48E1 does not support clock inversion
|
||||
|
@ -105,14 +154,12 @@ match ff
|
|||
filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
|
||||
filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
|
||||
|
||||
filter clock == SigBit() || port(ff, \CLK) == clock
|
||||
|
||||
set ffoffset offset
|
||||
endmatch
|
||||
|
||||
code argQ argD
|
||||
{
|
||||
if (clock != SigBit() && port(ff, \CLK) != clock)
|
||||
reject;
|
||||
|
||||
SigSpec Q = port(ff, \Q);
|
||||
dff = ff;
|
||||
dffclock = port(ff, \CLK);
|
||||
|
@ -124,9 +171,11 @@ code argQ argD
|
|||
// has two (ff, ffrstmux) users
|
||||
if (nusers(dffD) > 2)
|
||||
argD = SigSpec();
|
||||
}
|
||||
endcode
|
||||
|
||||
// (2) Match for a $mux cell implementing synchronous reset semantics ---
|
||||
// exclusively drives the 'D' input of the $dff, with one of the $mux
|
||||
// inputs being fully zero
|
||||
match ffrstmux
|
||||
if !argD.empty()
|
||||
select ffrstmux->type.in($mux)
|
||||
|
@ -158,6 +207,10 @@ code argD
|
|||
dffrstmux = nullptr;
|
||||
endcode
|
||||
|
||||
// (3) Match for a $mux cell implement clock enable semantics --- one that
|
||||
// exclusively drives the 'D' input of the $dff (or the other input of
|
||||
// the reset $mux) and where one of this $mux's inputs is connected to
|
||||
// the 'Q' output of the $dff
|
||||
match ffcemux
|
||||
if !argD.empty()
|
||||
select ffcemux->type.in($mux)
|
||||
|
|
|
@ -1,3 +1,46 @@
|
|||
// This file describes the third of three pattern matcher setups that
|
||||
// forms the `xilinx_dsp` pass described in xilinx_dsp.cc
|
||||
// At a high level, it works as follows:
|
||||
// (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer
|
||||
// (controlled by OPMODE[6:4]) set to zero and (b) doesn't already
|
||||
// use the 'PCOUT' port
|
||||
// (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled,
|
||||
// (b) has its Z multiplexer output set to the 'C' port, which is
|
||||
// driven by the 'P' output of the previous DSP cell, and (c) has its
|
||||
// 'PCIN' port unused
|
||||
// (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the
|
||||
// previous DSP cell right-shifted by 17 bits
|
||||
// (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists)
|
||||
// if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this
|
||||
// DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already
|
||||
// have an ACOUT -> ACIN cascade, (d) the previous DSP does not already
|
||||
// use its ACOUT port, then examine if an ACOUT -> ACIN cascade
|
||||
// opportunity exists by matching for a $dff-with-optional-clock-enable-
|
||||
// or-reset and checking that the 'D' input of this register is the same
|
||||
// as the 'A' input of the previous DSP
|
||||
// (4) Same as (3) but for BCOUT -> BCIN cascade
|
||||
// (5) Recursively go to (2.1) until no more matches possible, keeping track
|
||||
// of the longest possible chain found
|
||||
// (6) The longest chain is then divided into chunks of no more than
|
||||
// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the
|
||||
// height of a DSP column) with each DSP in each chunk being rewritten
|
||||
// to use [ABP]COUT -> [ABP]CIN cascading as appropriate
|
||||
// Notes:
|
||||
// - Currently, [AB]COUT -> [AB]COUT cascades (3 or 4) are only considered
|
||||
// if a PCOUT -> PCIN cascade is (2.1 or 2.2) first identified; this need
|
||||
// not be the case --- [AB] cascades can exist independently of a P cascade
|
||||
// (though all three cascades must come from the same DSP). This situation
|
||||
// is not handled currently.
|
||||
// - In addition, [AB]COUT -> [AB]COUT cascades (3 or 4) are currently
|
||||
// conservative in that they examine the situation where (a) the previous
|
||||
// DSP has [AB]2REG or [AB]1REG enabled, (b) that the downstream DSP has no
|
||||
// registers enabled, and (c) that there exists only one additional register
|
||||
// between the upstream and downstream DSPs. This can certainly be relaxed
|
||||
// to identify situations ranging from (i) neither DSP uses any registers,
|
||||
// to (ii) upstream DSP has 2 registers, downstream DSP has 2 registers, and
|
||||
// there exists a further 2 registers between them. This remains a TODO
|
||||
// item.
|
||||
|
||||
pattern xilinx_dsp_cascade
|
||||
|
||||
udata <std::function<SigSpec(const SigSpec&)>> unextend
|
||||
|
@ -6,7 +49,7 @@ state <Cell*> next
|
|||
state <SigSpec> clock
|
||||
state <int> AREG BREG
|
||||
|
||||
// subpattern
|
||||
// Variables used for subpatterns
|
||||
state <SigSpec> argQ argD
|
||||
state <bool> ffcepol ffrstpol
|
||||
state <int> ffoffset
|
||||
|
@ -19,12 +62,19 @@ code
|
|||
#define MAX_DSP_CASCADE 20
|
||||
endcode
|
||||
|
||||
// (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer
|
||||
// (controlled by OPMODE[6:4]) set to zero and (b) doesn't already
|
||||
// use the 'PCOUT' port
|
||||
match first
|
||||
select first->type.in(\DSP48E1)
|
||||
select port(first, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("000")
|
||||
select nusers(port(first, \PCOUT, SigSpec())) <= 1
|
||||
endmatch
|
||||
|
||||
// (6) The longest chain is then divided into chunks of no more than
|
||||
// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the
|
||||
// height of a DSP column) with each DSP in each chunk being rewritten
|
||||
// to use [ABP]COUT -> [ABP]CIN cascading as appropriate
|
||||
code
|
||||
longest_chain.clear();
|
||||
chain.emplace_back(first, -1, -1, -1);
|
||||
|
@ -106,6 +156,10 @@ subpattern tail
|
|||
arg first
|
||||
arg next
|
||||
|
||||
// (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled,
|
||||
// (b) has its Z multiplexer output set to the 'C' port, which is
|
||||
// driven by the 'P' output of the previous DSP cell, and (c) has its
|
||||
// 'PCIN' port unused
|
||||
match nextP
|
||||
select nextP->type.in(\DSP48E1)
|
||||
select !param(nextP, \CREG, State::S1).as_bool()
|
||||
|
@ -116,6 +170,8 @@ match nextP
|
|||
semioptional
|
||||
endmatch
|
||||
|
||||
// (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the
|
||||
// previous DSP cell right-shifted by 17 bits
|
||||
match nextP_shift17
|
||||
if !nextP
|
||||
select nextP_shift17->type.in(\DSP48E1)
|
||||
|
@ -145,6 +201,14 @@ code next
|
|||
}
|
||||
endcode
|
||||
|
||||
// (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists)
|
||||
// if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this
|
||||
// DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already
|
||||
// have an ACOUT -> ACIN cascade, (d) the previous DSP does not already
|
||||
// use its ACOUT port, then examine if an ACOUT -> ACIN cascade
|
||||
// opportunity exists by matching for a $dff-with-optional-clock-enable-
|
||||
// or-reset and checking that the 'D' input of this register is the same
|
||||
// as the 'A' input of the previous DSP
|
||||
code argQ clock AREG
|
||||
AREG = -1;
|
||||
if (next) {
|
||||
|
@ -152,7 +216,6 @@ code argQ clock AREG
|
|||
if (param(prev, \AREG, 2).as_int() > 0 &&
|
||||
param(next, \AREG, 2).as_int() > 0 &&
|
||||
param(next, \A_INPUT, Const("DIRECT")).decode_string() == "DIRECT" &&
|
||||
port(next, \ACIN, SigSpec()).is_fully_zero() &&
|
||||
nusers(port(prev, \ACOUT, SigSpec())) <= 1) {
|
||||
argQ = unextend(port(next, \A));
|
||||
clock = port(prev, \CLK);
|
||||
|
@ -174,6 +237,7 @@ reject_AREG: ;
|
|||
}
|
||||
endcode
|
||||
|
||||
// (4) Same as (3) but for BCOUT -> BCIN cascade
|
||||
code argQ clock BREG
|
||||
BREG = -1;
|
||||
if (next) {
|
||||
|
@ -203,13 +267,14 @@ reject_BREG: ;
|
|||
}
|
||||
endcode
|
||||
|
||||
// (5) Recursively go to (2.1) until no more matches possible, recording the
|
||||
// longest possible chain
|
||||
code
|
||||
if (next) {
|
||||
chain.emplace_back(next, nextP_shift17 ? 17 : nextP ? 0 : -1, AREG, BREG);
|
||||
|
||||
SigSpec sigC = unextend(port(next, \C));
|
||||
|
||||
// TODO: Cannot use 'reject' since semioptional
|
||||
if (nextP_shift17) {
|
||||
if (GetSize(sigC)+17 <= GetSize(port(std::get<0>(chain.back()), \P)) &&
|
||||
port(std::get<0>(chain.back()), \P).extract(17, GetSize(sigC)) != sigC)
|
||||
|
@ -232,22 +297,44 @@ endcode
|
|||
|
||||
// #######################
|
||||
|
||||
// Subpattern for matching against input registers, based on knowledge of the
|
||||
// 'Q' input. Typically, identifying registers with clock-enable and reset
|
||||
// capability would be a task would be handled by other Yosys passes such as
|
||||
// dff2dffe, but since DSP inference happens much before this, these patterns
|
||||
// have to be manually identified.
|
||||
// At a high level:
|
||||
// (1) Starting from a $dff cell that (partially or fully) drives the given
|
||||
// 'Q' argument
|
||||
// (2) Match for a $mux cell implementing synchronous reset semantics ---
|
||||
// one that exclusively drives the 'D' input of the $dff, with one of its
|
||||
// $mux inputs being fully zero
|
||||
// (3) Match for a $mux cell implement clock enable semantics --- one that
|
||||
// exclusively drives the 'D' input of the $dff (or the other input of
|
||||
// the reset $mux) and where one of this $mux's inputs is connected to
|
||||
// the 'Q' output of the $dff
|
||||
subpattern in_dffe
|
||||
arg argD argQ clock
|
||||
|
||||
code
|
||||
dff = nullptr;
|
||||
for (auto c : argQ.chunks()) {
|
||||
for (const auto &c : argQ.chunks()) {
|
||||
// Abandon matches when 'Q' is a constant
|
||||
if (!c.wire)
|
||||
reject;
|
||||
// Abandon matches when 'Q' has the keep attribute set
|
||||
if (c.wire->get_bool_attribute(\keep))
|
||||
reject;
|
||||
Const init = c.wire->attributes.at(\init, State::Sx);
|
||||
if (!init.is_fully_undef() && !init.is_fully_zero())
|
||||
reject;
|
||||
// Abandon matches when 'Q' has a non-zero init attribute set
|
||||
// (not supported by DSP48E1)
|
||||
Const init = c.wire->attributes.at(\init, Const());
|
||||
for (auto b : init.extract(c.offset, c.width))
|
||||
if (b != State::Sx && b != State::S0)
|
||||
reject;
|
||||
}
|
||||
endcode
|
||||
|
||||
// (1) Starting from a $dff cell that (partially or fully) drives the given
|
||||
// 'Q' argument
|
||||
match ff
|
||||
select ff->type.in($dff)
|
||||
// DSP48E1 does not support clock inversion
|
||||
|
@ -260,14 +347,12 @@ match ff
|
|||
filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
|
||||
filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
|
||||
|
||||
filter clock == SigBit() || port(ff, \CLK) == clock
|
||||
|
||||
set ffoffset offset
|
||||
endmatch
|
||||
|
||||
code argQ argD
|
||||
{
|
||||
if (clock != SigBit() && port(ff, \CLK) != clock)
|
||||
reject;
|
||||
|
||||
SigSpec Q = port(ff, \Q);
|
||||
dff = ff;
|
||||
dffclock = port(ff, \CLK);
|
||||
|
@ -279,9 +364,11 @@ code argQ argD
|
|||
// has two (ff, ffrstmux) users
|
||||
if (nusers(dffD) > 2)
|
||||
argD = SigSpec();
|
||||
}
|
||||
endcode
|
||||
|
||||
// (2) Match for a $mux cell implementing synchronous reset semantics ---
|
||||
// exclusively drives the 'D' input of the $dff, with one of the $mux
|
||||
// inputs being fully zero
|
||||
match ffrstmux
|
||||
if !argD.empty()
|
||||
select ffrstmux->type.in($mux)
|
||||
|
@ -313,6 +400,10 @@ code argD
|
|||
dffrstmux = nullptr;
|
||||
endcode
|
||||
|
||||
// (3) Match for a $mux cell implement clock enable semantics --- one that
|
||||
// exclusively drives the 'D' input of the $dff (or the other input of
|
||||
// the reset $mux) and where one of this $mux's inputs is connected to
|
||||
// the 'Q' output of the $dff
|
||||
match ffcemux
|
||||
if !argD.empty()
|
||||
select ffcemux->type.in($mux)
|
||||
|
|
|
@ -340,13 +340,17 @@ struct SynthXilinxPass : public ScriptPass
|
|||
run("techmap -map +/cmp2lut.v -D LUT_WIDTH=6");
|
||||
}
|
||||
|
||||
if (check_label("map_dsp"), "(skip if '-nodsp')") {
|
||||
if (check_label("map_dsp", "(skip if '-nodsp')")) {
|
||||
if (!nodsp || help_mode) {
|
||||
// NB: Xilinx multipliers are signed only
|
||||
run("techmap -map +/mul2dsp.v -map +/xilinx/dsp_map.v -D DSP_A_MAXWIDTH=25 -D DSP_A_MAXWIDTH_PARTIAL=18 -D DSP_B_MAXWIDTH=18 "
|
||||
"-D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 " // Blocks Nx1 multipliers
|
||||
"-D DSP_Y_MINWIDTH=9 " // UG901 suggests small multiplies are those 4x4 and smaller
|
||||
"-D DSP_SIGNEDONLY=1 -D DSP_NAME=$__MUL25X18");
|
||||
run("techmap -map +/mul2dsp.v -map +/xilinx/dsp_map.v -D DSP_A_MAXWIDTH=25 "
|
||||
"-D DSP_A_MAXWIDTH_PARTIAL=18 -D DSP_B_MAXWIDTH=18 " // Partial multipliers are intentionally
|
||||
// limited to 18x18 in order to take
|
||||
// advantage of the (PCOUT << 17) -> PCIN
|
||||
// dedicated cascade chain capability
|
||||
"-D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 " // Blocks Nx1 multipliers
|
||||
"-D DSP_Y_MINWIDTH=9 " // UG901 suggests small multiplies are those 4x4 and smaller
|
||||
"-D DSP_SIGNEDONLY=1 -D DSP_NAME=$__MUL25X18");
|
||||
run("select a:mul2dsp");
|
||||
run("setattr -unset mul2dsp");
|
||||
run("opt_expr -fine");
|
||||
|
|
|
@ -1,14 +1,11 @@
|
|||
read_verilog latches.v
|
||||
design -save read
|
||||
|
||||
proc
|
||||
async2sync # converts latches to a 'sync' variant clocked by a 'super'-clock
|
||||
flatten
|
||||
synth_ice40
|
||||
equiv_opt -assert -map +/ice40/cells_sim.v synth_ice40 # equivalency check
|
||||
design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
|
||||
# Can't run any sort of equivalence check because latches are blown to LUTs
|
||||
#equiv_opt -async2sync -assert -map +/ice40/cells_sim.v synth_ice40 # equivalency check
|
||||
|
||||
design -load read
|
||||
#design -load preopt
|
||||
synth_ice40
|
||||
cd top
|
||||
select -assert-count 4 t:SB_LUT4
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
read_verilog <<EOT
|
||||
module top(input A, B, CI, output O, CO);
|
||||
SB_CARRY carry (
|
||||
.I0(A),
|
||||
.I1(B),
|
||||
.CI(CI),
|
||||
.CO(CO)
|
||||
);
|
||||
SB_LUT4 #(
|
||||
.LUT_INIT(16'b 0110_1001_1001_0110)
|
||||
) adder (
|
||||
.I0(1'b0),
|
||||
.I1(A),
|
||||
.I2(B),
|
||||
.I3(1'b0),
|
||||
.O(O)
|
||||
);
|
||||
endmodule
|
||||
EOT
|
||||
|
||||
ice40_wrapcarry
|
||||
select -assert-count 1 t:$__ICE40_CARRY_WRAPPER
|
|
@ -131,8 +131,8 @@ EOT
|
|||
proc
|
||||
equiv_opt -assert peepopt
|
||||
design -load postopt
|
||||
select -assert-count 1 t:$dff r:WIDTH=5 %i
|
||||
select -assert-count 1 t:$mux r:WIDTH=5 %i
|
||||
select -assert-count 1 t:$dff r:WIDTH=4 %i
|
||||
select -assert-count 1 t:$mux r:WIDTH=4 %i
|
||||
select -assert-count 0 t:$dff t:$mux %% t:* %D
|
||||
|
||||
####################
|
||||
|
@ -173,3 +173,41 @@ select -assert-count 1 t:$dff r:WIDTH=2 %i
|
|||
select -assert-count 2 t:$mux
|
||||
select -assert-count 2 t:$mux r:WIDTH=2 %i
|
||||
select -assert-count 0 t:$logic_not t:$dff t:$mux %% t:* %D
|
||||
|
||||
####################
|
||||
|
||||
design -reset
|
||||
read_verilog <<EOT
|
||||
module peepopt_dffmuxext_signed_rst_init(input clk, ce, rstn, input signed [1:0] i, output reg signed [3:0] o);
|
||||
initial o <= 4'b0010;
|
||||
always @(posedge clk) begin
|
||||
if (ce) o <= i;
|
||||
if (!rstn) o <= 4'b1111;
|
||||
end
|
||||
endmodule
|
||||
EOT
|
||||
|
||||
proc
|
||||
# NB: equiv_opt uses equiv_induct which covers
|
||||
# only the induction half of temporal induction
|
||||
# --- missing the base-case half
|
||||
# This makes it akin to `sat -tempinduct-inductonly`
|
||||
# instead of `sat -tempinduct-baseonly` or
|
||||
# `sat -tempinduct` which is necessary for this
|
||||
# testcase
|
||||
#equiv_opt -assert peepopt
|
||||
|
||||
design -save gold
|
||||
peepopt
|
||||
wreduce
|
||||
design -stash gate
|
||||
design -import gold -as gold
|
||||
design -import gate -as gate
|
||||
miter -equiv -flatten -make_assert -make_outputs gold gate miter
|
||||
sat -tempinduct -verify -prove-asserts -show-ports miter
|
||||
|
||||
design -load gate
|
||||
select -assert-count 1 t:$dff r:WIDTH=4 %i
|
||||
select -assert-count 2 t:$mux
|
||||
select -assert-count 2 t:$mux r:WIDTH=4 %i
|
||||
select -assert-count 0 t:$logic_not t:$dff t:$mux %% t:* %D
|
||||
|
|
|
@ -2,9 +2,7 @@ read_verilog latches.v
|
|||
|
||||
proc
|
||||
flatten
|
||||
equiv_opt -assert -run :prove -map +/xilinx/cells_sim.v synth_xilinx # equivalency check
|
||||
async2sync
|
||||
equiv_opt -assert -run prove: -map +/xilinx/cells_sim.v synth_xilinx # equivalency check
|
||||
equiv_opt -async2sync -assert -map +/xilinx/cells_sim.v synth_xilinx # equivalency check
|
||||
|
||||
design -load preopt
|
||||
synth_xilinx
|
||||
|
|
Loading…
Reference in New Issue