yosys/passes/pmgen/xilinx_dsp.pmg

491 lines
14 KiB
Plaintext

// This file describes the main pattern matcher setup (of three total) that
// forms the `xilinx_dsp` pass described in xilinx_dsp.cc
// At a high level, it works as follows:
// ( 1) Starting from a DSP48E1 cell
// ( 2) Match the driver of the 'A' input to a possible $sdffe cell (ADREG)
// If ADREG matched, treat 'A' input as input of ADREG
// ( 3) Match the driver of the 'A' and 'D' inputs for a possible $add cell
// (pre-adder)
// ( 4) If pre-adder was present, find match 'A' input for A2REG
// If pre-adder was not present, move ADREG to A2REG
// If A2REG, then match 'A' input for A1REG
// ( 5) Match 'B' input for B2REG
// If B2REG, then match 'B' input for B1REG
// ( 6) Match 'D' input for DREG
// ( 7) Match 'P' output that exclusively drives an MREG
// ( 8) Match 'P' output that exclusively drives one of two inputs to an $add
// cell (post-adder).
// The other input to the adder is assumed to come in from the 'C' input
// (note: 'P' -> 'C' connections that exist for accumulators are
// recognised in xilinx_dsp.cc).
// ( 9) Match 'P' output that exclusively drives a PREG
// (10) If post-adder and PREG both present, match for a $mux cell driving
// the 'C' input, where one of the $mux's inputs is the PREG output.
// This indicates an accumulator situation, and one where a $mux exists
// to override the accumulated value:
// +--------------------------------+
// | ____ |
// +--| \ |
// |$mux|-+ |
// 'C' ---|____/ | |
// | /-------\ +----+ |
// +----+ +-| post- |___|PREG|---+ 'P'
// |MREG|------ | adder | +----+
// +----+ \-------/
// (11) If PREG present, match for a greater-than-or-equal $ge cell attached
// to the 'P' output where it is compared to a constant that is a
// power-of-2: e.g. `assign overflow = (PREG >= 2**40);`
// In this scenario, the pattern detector functionality of a DSP48E1 can
// to implement this function
// Notes:
// - The intention of this pattern matcher is for it to be compatible with
// DSP48E1 cells inferred from multiply operations by Yosys, as well as for
// user instantiations that may already contain the cells being packed...
// (though the latter is currently untested)
// - Since the $sdffe pattern is used
// for each *REG match, it has been factored out into two subpatterns:
// in_dffe and out_dffe located at the bottom of this file.
// - Matching for pattern detector features is currently incomplete. For
// example, matching for underflow as well as overflow detection is
// possible, as would auto-reset, enabling saturated arithmetic, detecting
// custom patterns, etc.
pattern xilinx_dsp_pack
state <SigBit> clock
state <SigSpec> sigA sigB sigC sigD sigM sigP
state <IdString> postAddAB postAddMuxAB
state <Cell*> ffAD ffA1 ffA2
state <Cell*> ffB1 ffB2
state <Cell*> ffD ffM ffP
// Variables used for subpatterns
state <SigSpec> argQ argD
udata <SigSpec> dffD dffQ
udata <SigBit> dffclock
udata <Cell*> dff
// (1) Starting from a DSP48E1 cell
match dsp
select dsp->type.in(\DSP48E1)
endmatch
code sigA sigB sigC sigD sigM clock
auto unextend = [](const SigSpec &sig) {
int i;
for (i = GetSize(sig)-1; i > 0; i--)
if (sig[i] != sig[i-1])
break;
// Do not remove non-const sign bit
if (sig[i].wire)
++i;
return sig.extract(0, i);
};
sigA = unextend(port(dsp, \A));
sigB = unextend(port(dsp, \B));
sigC = port(dsp, \C, SigSpec());
sigD = port(dsp, \D, SigSpec());
SigSpec P = port(dsp, \P);
if (param(dsp, \USE_MULT).decode_string() == "MULTIPLY") {
// Only care about those bits that are used
int i;
for (i = GetSize(P)-1; i >= 0; i--)
if (nusers(P[i]) > 1)
break;
i++;
log_assert(nusers(P.extract_end(i)) <= 1);
// This sigM could have no users if downstream sinks (e.g. $add) is
// narrower than $mul result, for example
if (i == 0)
reject;
sigM = P.extract(0, i);
}
else
sigM = P;
clock = port(dsp, \CLK, SigBit());
endcode
// (2) Match the driver of the 'A' input to a possible $sdffe cell (ADREG)
// If matched, treat 'A' input as input of ADREG
code argQ ffAD sigA clock
if (param(dsp, \ADREG).as_int() == 0) {
argQ = sigA;
subpattern(in_dffe);
if (dff) {
ffAD = dff;
clock = dffclock;
sigA = dffD;
}
}
endcode
// (3) Match the driver of the 'A' and 'D' inputs for a possible $add cell
// (pre-adder)
match preAdd
if sigD.empty() || sigD.is_fully_zero()
// Ensure that preAdder not already used
if param(dsp, \USE_DPORT).decode_string() == "FALSE"
if port(dsp, \INMODE, Const(0, 5)).is_fully_zero()
select preAdd->type.in($add)
// Output has to be 25 bits or less
select GetSize(port(preAdd, \Y)) <= 25
select nusers(port(preAdd, \Y)) == 2
choice <IdString> AB {\A, \B}
// A port has to be 30 bits or less
select GetSize(port(preAdd, AB)) <= 30
define <IdString> BA (AB == \A ? \B : \A)
// D port has to be 25 bits or less
select GetSize(port(preAdd, BA)) <= 25
index <SigSpec> port(preAdd, \Y) === sigA
optional
endmatch
code sigA sigD
if (preAdd) {
sigA = port(preAdd, \A);
sigD = port(preAdd, \B);
}
endcode
// (4) If pre-adder was present, find match 'A' input for A2REG
// If pre-adder was not present, move ADREG to A2REG
// Then match 'A' input for A1REG
code argQ ffAD sigA clock ffA2 ffA1
// Only search for ffA2 if there was a pre-adder
// (otherwise ffA2 would have been matched as ffAD)
if (preAdd) {
if (param(dsp, \AREG).as_int() == 0) {
argQ = sigA;
subpattern(in_dffe);
if (dff) {
ffA2 = dff;
clock = dffclock;
sigA = dffD;
}
}
}
// And if there wasn't a pre-adder,
// move AD register to A
else if (ffAD) {
log_assert(!ffA2);
std::swap(ffA2, ffAD);
}
// Now attempt to match A1
if (ffA2) {
argQ = sigA;
subpattern(in_dffe);
if (dff) {
if (dff->type != ffA2->type)
goto ffA1_end;
if (dff->type.in($sdff, $sdffe, $sdffce)) {
if (param(dff, \SRST_POLARITY) != param(ffA2, \SRST_POLARITY))
goto ffA1_end;
if (port(dff, \SRST) != port(ffA2, \SRST))
goto ffA1_end;
}
if (dff->type.in($dffe, $sdffe, $sdffce)) {
if (param(dff, \EN_POLARITY) != param(ffA2, \EN_POLARITY))
goto ffA1_end;
if (port(dff, \EN) != port(ffA2, \EN))
goto ffA1_end;
}
ffA1 = dff;
clock = dffclock;
sigA = dffD;
ffA1_end: ;
}
}
endcode
// (5) Match 'B' input for B2REG
// If B2REG, then match 'B' input for B1REG
code argQ ffB2 sigB clock ffB1
if (param(dsp, \BREG).as_int() == 0) {
argQ = sigB;
subpattern(in_dffe);
if (dff) {
ffB2 = dff;
clock = dffclock;
sigB = dffD;
// Now attempt to match B1
if (ffB2) {
argQ = sigB;
subpattern(in_dffe);
if (dff) {
if (dff->type != ffB2->type)
goto ffB1_end;
if (dff->type.in($sdff, $sdffe, $sdffce)) {
if (param(dff, \SRST_POLARITY) != param(ffB2, \SRST_POLARITY))
goto ffB1_end;
if (port(dff, \SRST) != port(ffB2, \SRST))
goto ffB1_end;
}
if (dff->type.in($dffe, $sdffe, $sdffce)) {
if (param(dff, \EN_POLARITY) != param(ffB2, \EN_POLARITY))
goto ffB1_end;
if (port(dff, \EN) != port(ffB2, \EN))
goto ffB1_end;
}
ffB1 = dff;
clock = dffclock;
sigB = dffD;
ffB1_end: ;
}
}
}
}
endcode
// (6) Match 'D' input for DREG
code argQ ffD sigD clock
if (param(dsp, \DREG).as_int() == 0) {
argQ = sigD;
subpattern(in_dffe);
if (dff) {
ffD = dff;
clock = dffclock;
sigD = dffD;
}
}
endcode
// (7) Match 'P' output that exclusively drives an MREG
code argD ffM sigM sigP clock
if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) {
argD = sigM;
subpattern(out_dffe);
if (dff) {
ffM = dff;
clock = dffclock;
sigM = dffQ;
}
}
sigP = sigM;
endcode
// (8) Match 'P' output that exclusively drives one of two inputs to an $add
// cell (post-adder).
// The other input to the adder is assumed to come in from the 'C' input
// (note: 'P' -> 'C' connections that exist for accumulators are
// recognised in xilinx_dsp.cc).
match postAdd
// Ensure that Z mux is not already used
if port(dsp, \OPMODE, SigSpec(0, 7)).extract(4,3).is_fully_zero()
select postAdd->type.in($add)
select GetSize(port(postAdd, \Y)) <= 48
choice <IdString> AB {\A, \B}
select nusers(port(postAdd, AB)) == 2
index <SigBit> port(postAdd, AB)[0] === sigP[0]
filter GetSize(port(postAdd, AB)) >= GetSize(sigP)
filter port(postAdd, AB).extract(0, GetSize(sigP)) == sigP
// Check that remainder of AB is a sign- or zero-extension
filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(sigP[GetSize(sigP)-1], GetSize(port(postAdd, AB))-GetSize(sigP)) || port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(State::S0, GetSize(port(postAdd, AB))-GetSize(sigP))
set postAddAB AB
optional
endmatch
code sigC sigP
if (postAdd) {
sigC = port(postAdd, postAddAB == \A ? \B : \A);
sigP = port(postAdd, \Y);
}
endcode
// (9) Match 'P' output that exclusively drives a PREG
code argD ffP sigP clock
if (param(dsp, \PREG).as_int() == 0) {
if (nusers(sigP) == 2) {
argD = sigP;
subpattern(out_dffe);
if (dff) {
ffP = dff;
clock = dffclock;
sigP = dffQ;
}
}
}
endcode
// (10) If post-adder and PREG both present, match for a $mux cell driving
// the 'C' input, where one of the $mux's inputs is the PREG output.
// This indicates an accumulator situation, and one where a $mux exists
// to override the accumulated value:
// +--------------------------------+
// | ____ |
// +--| \ |
// |$mux|-+ |
// 'C' ---|____/ | |
// | /-------\ +----+ |
// +----+ +-| post- |___|PREG|---+ 'P'
// |MREG|------ | adder | +----+
// +----+ \-------/
match postAddMux
if postAdd
if ffP
select postAddMux->type.in($mux)
select nusers(port(postAddMux, \Y)) == 2
choice <IdString> AB {\A, \B}
index <SigSpec> port(postAddMux, AB) === sigP
index <SigSpec> port(postAddMux, \Y) === sigC
set postAddMuxAB AB
optional
endmatch
code sigC
if (postAddMux)
sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A);
endcode
// (11) If PREG present, match for a greater-than-or-equal $ge cell attached to
// the 'P' output where it is compared to a constant that is a power-of-2:
// e.g. `assign overflow = (PREG >= 2**40);`
// In this scenario, the pattern detector functionality of a DSP48E1 can
// to implement this function
match overflow
if ffP
if param(dsp, \USE_PATTERN_DETECT).decode_string() == "NO_PATDET"
select overflow->type.in($ge)
select GetSize(port(overflow, \Y)) <= 48
select port(overflow, \B).is_fully_const()
define <Const> B port(overflow, \B).as_const()
select std::count(B.begin(), B.end(), State::S1) == 1
index <SigSpec> port(overflow, \A) === sigP
optional
endmatch
code
accept;
endcode
// #######################
// Subpattern for matching against input registers, based on knowledge of the
// 'Q' input.
subpattern in_dffe
arg argQ clock
code
dff = nullptr;
if (argQ.empty())
reject;
for (const auto &c : argQ.chunks()) {
// Abandon matches when 'Q' is a constant
if (!c.wire)
reject;
// Abandon matches when 'Q' has the keep attribute set
if (c.wire->get_bool_attribute(\keep))
reject;
// Abandon matches when 'Q' has a non-zero init attribute set
// (not supported by DSP48E1)
Const init = c.wire->attributes.at(\init, Const());
if (!init.empty())
for (auto b : init.extract(c.offset, c.width))
if (b != State::Sx && b != State::S0)
reject;
}
endcode
match ff
select ff->type.in($dff, $dffe, $sdff, $sdffe)
// DSP48E1 does not support clock inversion
select param(ff, \CLK_POLARITY).as_bool()
// Check that reset value, if present, is fully 0.
filter ff->type.in($dff, $dffe) || param(ff, \SRST_VALUE).is_fully_zero()
slice offset GetSize(port(ff, \D))
index <SigBit> port(ff, \Q)[offset] === argQ[0]
// Check that the rest of argQ is present
filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
filter clock == SigBit() || port(ff, \CLK)[0] == clock
endmatch
code argQ
SigSpec Q = port(ff, \Q);
dff = ff;
dffclock = port(ff, \CLK);
dffD = argQ;
SigSpec D = port(ff, \D);
argQ = Q;
dffD.replace(argQ, D);
endcode
// #######################
// Subpattern for matching against output registers, based on knowledge of the
// 'D' input.
// At a high level:
// (1) Starting from an optional $mux cell that implements clock enable
// semantics --- one where the given 'D' argument (partially or fully)
// drives one of its two inputs
// (2) Starting from, or continuing onto, another optional $mux cell that
// implements synchronous reset semantics --- one where the given 'D'
// argument (or the clock enable $mux output) drives one of its two inputs
// and where the other input is fully zero
// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the
// output of the previous clock enable or reset $mux cells)
subpattern out_dffe
arg argD argQ clock
code
dff = nullptr;
for (auto c : argD.chunks())
// Abandon matches when 'D' has the keep attribute set
if (c.wire->get_bool_attribute(\keep))
reject;
endcode
match ff
select ff->type.in($dff, $dffe, $sdff, $sdffe)
// DSP48E1 does not support clock inversion
select param(ff, \CLK_POLARITY).as_bool()
slice offset GetSize(port(ff, \D))
index <SigBit> port(ff, \D)[offset] === argD[0]
// Check that the rest of argD is present
filter GetSize(port(ff, \D)) >= offset + GetSize(argD)
filter port(ff, \D).extract(offset, GetSize(argD)) == argD
filter clock == SigBit() || port(ff, \CLK)[0] == clock
endmatch
code argQ
SigSpec D = port(ff, \D);
SigSpec Q = port(ff, \Q);
argQ = argD;
argQ.replace(D, Q);
// Abandon matches when 'Q' has a non-zero init attribute set
// (not supported by DSP48E1)
for (auto c : argQ.chunks()) {
Const init = c.wire->attributes.at(\init, Const());
if (!init.empty())
for (auto b : init.extract(c.offset, c.width))
if (b != State::Sx && b != State::S0)
reject;
}
dff = ff;
dffQ = argQ;
dffclock = port(ff, \CLK);
endcode