Merge pull request #1438 from YosysHQ/eddie/xilinx_dsp_comments

Add notes and comments for xilinx_dsp
This commit is contained in:
Eddie Hung 2019-10-08 10:53:30 -07:00 committed by GitHub
commit 472b5d33a6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 365 additions and 73 deletions

View File

@ -609,8 +609,13 @@ struct XilinxDspPass : public Pass {
extra_args(args, argidx, design); extra_args(args, argidx, design);
for (auto module : design->selected_modules()) { for (auto module : design->selected_modules()) {
// Experimental feature: pack $add/$sub cells with
// (* use_dsp48="simd" *) into DSP48E1's using its
// SIMD feature
xilinx_simd_pack(module, module->selected_cells()); xilinx_simd_pack(module, module->selected_cells());
// Match for all features ([ABDMP][12]?REG, pre-adder,
// post-adder, pattern detector, etc.) except for CREG
{ {
xilinx_dsp_pm pm(module, module->selected_cells()); xilinx_dsp_pm pm(module, module->selected_cells());
pm.run_xilinx_dsp_pack(xilinx_dsp_pack); pm.run_xilinx_dsp_pack(xilinx_dsp_pack);
@ -619,14 +624,17 @@ struct XilinxDspPass : public Pass {
// is no guarantee that the cell ordering corresponds // is no guarantee that the cell ordering corresponds
// to the "expected" case (i.e. the order in which // to the "expected" case (i.e. the order in which
// they appear in the source) thus the possiblity // they appear in the source) thus the possiblity
// existed that a register got packed as CREG into a // existed that a register got packed as a CREG into a
// downstream DSP that should have otherwise been a // downstream DSP that should have otherwise been a
// PREG of an upstream DSP that had not been pattern // PREG of an upstream DSP that had not been visited
// matched yet // yet
{ {
xilinx_dsp_CREG_pm pm(module, module->selected_cells()); xilinx_dsp_CREG_pm pm(module, module->selected_cells());
pm.run_xilinx_dsp_packC(xilinx_dsp_packC); pm.run_xilinx_dsp_packC(xilinx_dsp_packC);
} }
// Lastly, identify and utilise PCOUT -> PCIN,
// ACOUT -> ACIN, and BCOUT-> BCIN dedicated cascade
// chains
{ {
xilinx_dsp_cascade_pm pm(module, module->selected_cells()); xilinx_dsp_cascade_pm pm(module, module->selected_cells());
pm.run_xilinx_dsp_cascade(); pm.run_xilinx_dsp_cascade();

View File

@ -1,3 +1,57 @@
// This file describes the main pattern matcher setup (of three total) that
// forms the `xilinx_dsp` pass described in xilinx_dsp.cc
// At a high level, it works as follows:
// ( 1) Starting from a DSP48E1 cell
// ( 2) Match the driver of the 'A' input to a possible $dff cell (ADREG)
// (attached to at most two $mux cells that implement clock-enable or
// reset functionality, using a subpattern discussed below)
// If ADREG matched, treat 'A' input as input of ADREG
// ( 3) Match the driver of the 'A' and 'D' inputs for a possible $add cell
// (pre-adder)
// ( 4) If pre-adder was present, find match 'A' input for A2REG
// If pre-adder was not present, move ADREG to A2REG
// If A2REG, then match 'A' input for A1REG
// ( 5) Match 'B' input for B2REG
// If B2REG, then match 'B' input for B1REG
// ( 6) Match 'D' input for DREG
// ( 7) Match 'P' output that exclusively drives an MREG
// ( 8) Match 'P' output that exclusively drives one of two inputs to an $add
// cell (post-adder).
// The other input to the adder is assumed to come in from the 'C' input
// (note: 'P' -> 'C' connections that exist for accumulators are
// recognised in xilinx_dsp.cc).
// ( 9) Match 'P' output that exclusively drives a PREG
// (10) If post-adder and PREG both present, match for a $mux cell driving
// the 'C' input, where one of the $mux's inputs is the PREG output.
// This indicates an accumulator situation, and one where a $mux exists
// to override the accumulated value:
// +--------------------------------+
// | ____ |
// +--| \ |
// |$mux|-+ |
// 'C' ---|____/ | |
// | /-------\ +----+ |
// +----+ +-| post- |___|PREG|---+ 'P'
// |MREG|------ | adder | +----+
// +----+ \-------/
// (11) If PREG present, match for a greater-than-or-equal $ge cell attached
// to the 'P' output where it is compared to a constant that is a
// power-of-2: e.g. `assign overflow = (PREG >= 2**40);`
// In this scenario, the pattern detector functionality of a DSP48E1 can
// to implement this function
// Notes:
// - The intention of this pattern matcher is for it to be compatible with
// DSP48E1 cells inferred from multiply operations by Yosys, as well as for
// user instantiations that may already contain the cells being packed...
// (though the latter is currently untested)
// - Since the $dff-with-optional-clock-enable-or-reset-mux pattern is used
// for each *REG match, it has been factored out into two subpatterns:
// in_dffe and out_dffe located at the bottom of this file.
// - Matching for pattern detector features is currently incomplete. For
// example, matching for underflow as well as overflow detection is
// possible, as would auto-reset, enabling saturated arithmetic, detecting
// custom patterns, etc.
pattern xilinx_dsp_pack pattern xilinx_dsp_pack
state <SigBit> clock state <SigBit> clock
@ -5,12 +59,11 @@ state <SigSpec> sigA sigB sigC sigD sigM sigP
state <IdString> postAddAB postAddMuxAB state <IdString> postAddAB postAddMuxAB
state <bool> ffA1cepol ffA2cepol ffADcepol ffB1cepol ffB2cepol ffDcepol ffMcepol ffPcepol state <bool> ffA1cepol ffA2cepol ffADcepol ffB1cepol ffB2cepol ffDcepol ffMcepol ffPcepol
state <bool> ffArstpol ffADrstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol state <bool> ffArstpol ffADrstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol
state <Cell*> ffAD ffADcemux ffADrstmux ffA1 ffA1cemux ffA1rstmux ffA2 ffA2cemux ffA2rstmux state <Cell*> ffAD ffADcemux ffADrstmux ffA1 ffA1cemux ffA1rstmux ffA2 ffA2cemux ffA2rstmux
state <Cell*> ffB1 ffB1cemux ffB1rstmux ffB2 ffB2cemux ffB2rstmux state <Cell*> ffB1 ffB1cemux ffB1rstmux ffB2 ffB2cemux ffB2rstmux
state <Cell*> ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux state <Cell*> ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux
// subpattern // Variables used for subpatterns
state <SigSpec> argQ argD state <SigSpec> argQ argD
state <bool> ffcepol ffrstpol state <bool> ffcepol ffrstpol
state <int> ffoffset state <int> ffoffset
@ -19,6 +72,7 @@ udata <SigBit> dffclock
udata <Cell*> dff dffcemux dffrstmux udata <Cell*> dff dffcemux dffrstmux
udata <bool> dffcepol dffrstpol udata <bool> dffcepol dffrstpol
// (1) Starting from a DSP48E1 cell
match dsp match dsp
select dsp->type.in(\DSP48E1) select dsp->type.in(\DSP48E1)
endmatch endmatch
@ -50,17 +104,21 @@ code sigA sigB sigC sigD sigM clock
sigM.append(P[i]); sigM.append(P[i]);
} }
log_assert(nusers(P.extract_end(i)) <= 1); log_assert(nusers(P.extract_end(i)) <= 1);
// This sigM could have no users if downstream sinks (e.g. $add) is
// narrower than $mul result, for example
if (sigM.empty())
reject;
} }
else else
sigM = P; sigM = P;
// This sigM could have no users if downstream $add
// is narrower than $mul result, for example
if (sigM.empty())
reject;
clock = port(dsp, \CLK, SigBit()); clock = port(dsp, \CLK, SigBit());
endcode endcode
// (2) Match the driver of the 'A' input to a possible $dff cell (ADREG)
// (attached to at most two $mux cells that implement clock-enable or
// reset functionality, using a subpattern discussed above)
// If matched, treat 'A' input as input of ADREG
code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock
if (param(dsp, \ADREG).as_int() == 0) { if (param(dsp, \ADREG).as_int() == 0) {
argQ = sigA; argQ = sigA;
@ -81,6 +139,8 @@ code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock
} }
endcode endcode
// (3) Match the driver of the 'A' and 'D' inputs for a possible $add cell
// (pre-adder)
match preAdd match preAdd
if sigD.empty() || sigD.is_fully_zero() if sigD.empty() || sigD.is_fully_zero()
// Ensure that preAdder not already used // Ensure that preAdder not already used
@ -106,11 +166,12 @@ code sigA sigD
if (preAdd) { if (preAdd) {
sigA = port(preAdd, \A); sigA = port(preAdd, \A);
sigD = port(preAdd, \B); sigD = port(preAdd, \B);
if (GetSize(sigA) < GetSize(sigD))
std::swap(sigA, sigD);
} }
endcode endcode
// (4) If pre-adder was present, find match 'A' input for A2REG
// If pre-adder was not present, move ADREG to A2REG
// Then match 'A' input for A1REG
code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock ffA2 ffA2cemux ffA2rstmux ffA2cepol ffArstpol ffA1 ffA1cemux ffA1rstmux ffA1cepol code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock ffA2 ffA2cemux ffA2rstmux ffA2cepol ffArstpol ffA1 ffA1cemux ffA1rstmux ffA1cepol
// Only search for ffA2 if there was a pre-adder // Only search for ffA2 if there was a pre-adder
// (otherwise ffA2 would have been matched as ffAD) // (otherwise ffA2 would have been matched as ffAD)
@ -173,6 +234,8 @@ ffA1_end: ;
} }
endcode endcode
// (5) Match 'B' input for B2REG
// If B2REG, then match 'B' input for B1REG
code argQ ffB2 ffB2cemux ffB2rstmux ffB2cepol ffBrstpol sigB clock ffB1 ffB1cemux ffB1rstmux ffB1cepol code argQ ffB2 ffB2cemux ffB2rstmux ffB2cepol ffBrstpol sigB clock ffB1 ffB1cemux ffB1rstmux ffB1cepol
if (param(dsp, \BREG).as_int() == 0) { if (param(dsp, \BREG).as_int() == 0) {
argQ = sigB; argQ = sigB;
@ -222,6 +285,7 @@ ffB1_end: ;
} }
endcode endcode
// (6) Match 'D' input for DREG
code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock
if (param(dsp, \DREG).as_int() == 0) { if (param(dsp, \DREG).as_int() == 0) {
argQ = sigD; argQ = sigD;
@ -242,6 +306,7 @@ code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock
} }
endcode endcode
// (7) Match 'P' output that exclusively drives an MREG
code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock
if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) { if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) {
argD = sigM; argD = sigM;
@ -263,6 +328,11 @@ code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock
sigP = sigM; sigP = sigM;
endcode endcode
// (8) Match 'P' output that exclusively drives one of two inputs to an $add
// cell (post-adder).
// The other input to the adder is assumed to come in from the 'C' input
// (note: 'P' -> 'C' connections that exist for accumulators are
// recognised in xilinx_dsp.cc).
match postAdd match postAdd
// Ensure that Z mux is not already used // Ensure that Z mux is not already used
if port(dsp, \OPMODE, SigSpec()).extract(4,3).is_fully_zero() if port(dsp, \OPMODE, SigSpec()).extract(4,3).is_fully_zero()
@ -291,6 +361,7 @@ code sigC sigP
} }
endcode endcode
// (9) Match 'P' output that exclusively drives a PREG
code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock
if (param(dsp, \PREG).as_int() == 0) { if (param(dsp, \PREG).as_int() == 0) {
int users = 2; int users = 2;
@ -316,6 +387,19 @@ code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock
} }
endcode endcode
// (10) If post-adder and PREG both present, match for a $mux cell driving
// the 'C' input, where one of the $mux's inputs is the PREG output.
// This indicates an accumulator situation, and one where a $mux exists
// to override the accumulated value:
// +--------------------------------+
// | ____ |
// +--| \ |
// |$mux|-+ |
// 'C' ---|____/ | |
// | /-------\ +----+ |
// +----+ +-| post- |___|PREG|---+ 'P'
// |MREG|------ | adder | +----+
// +----+ \-------/
match postAddMux match postAddMux
if postAdd if postAdd
if ffP if ffP
@ -333,6 +417,11 @@ code sigC
sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A); sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A);
endcode endcode
// (11) If PREG present, match for a greater-than-or-equal $ge cell attached to
// the 'P' output where it is compared to a constant that is a power-of-2:
// e.g. `assign overflow = (PREG >= 2**40);`
// In this scenario, the pattern detector functionality of a DSP48E1 can
// to implement this function
match overflow match overflow
if ffP if ffP
if param(dsp, \USE_PATTERN_DETECT, Const("NO_PATDET")).decode_string() == "NO_PATDET" if param(dsp, \USE_PATTERN_DETECT, Const("NO_PATDET")).decode_string() == "NO_PATDET"
@ -351,22 +440,45 @@ endcode
// ####################### // #######################
// Subpattern for matching against input registers, based on knowledge of the
// 'Q' input. Typically, identifying registers with clock-enable and reset
// capability would be a task would be handled by other Yosys passes such as
// dff2dffe, but since DSP inference happens much before this, these patterns
// have to be manually identified.
// At a high level:
// (1) Starting from a $dff cell that (partially or fully) drives the given
// 'Q' argument
// (2) Match for a $mux cell implementing synchronous reset semantics ---
// one that exclusively drives the 'D' input of the $dff, with one of its
// $mux inputs being fully zero
// (3) Match for a $mux cell implement clock enable semantics --- one that
// exclusively drives the 'D' input of the $dff (or the other input of
// the reset $mux) and where one of this $mux's inputs is connected to
// the 'Q' output of the $dff
subpattern in_dffe subpattern in_dffe
arg argD argQ clock arg argD argQ clock
code code
dff = nullptr; dff = nullptr;
for (auto c : argQ.chunks()) { for (const auto &c : argQ.chunks()) {
// Abandon matches when 'Q' is a constant
if (!c.wire) if (!c.wire)
reject; reject;
// Abandon matches when 'Q' has the keep attribute set
if (c.wire->get_bool_attribute(\keep)) if (c.wire->get_bool_attribute(\keep))
reject; reject;
Const init = c.wire->attributes.at(\init, State::Sx); // Abandon matches when 'Q' has a non-zero init attribute set
if (!init.is_fully_undef() && !init.is_fully_zero()) // (not supported by DSP48E1)
reject; Const init = c.wire->attributes.at(\init, Const());
if (!init.empty())
for (auto b : init.extract(c.offset, c.width))
if (b != State::Sx && b != State::S0)
reject;
} }
endcode endcode
// (1) Starting from a $dff cell that (partially or fully) drives the given
// 'Q' argument
match ff match ff
select ff->type.in($dff) select ff->type.in($dff)
// DSP48E1 does not support clock inversion // DSP48E1 does not support clock inversion
@ -379,14 +491,12 @@ match ff
filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
filter clock == SigBit() || port(ff, \CLK) == clock
set ffoffset offset set ffoffset offset
endmatch endmatch
code argQ argD code argQ argD
{
if (clock != SigBit() && port(ff, \CLK) != clock)
reject;
SigSpec Q = port(ff, \Q); SigSpec Q = port(ff, \Q);
dff = ff; dff = ff;
dffclock = port(ff, \CLK); dffclock = port(ff, \CLK);
@ -398,9 +508,11 @@ code argQ argD
// has two (ff, ffrstmux) users // has two (ff, ffrstmux) users
if (nusers(dffD) > 2) if (nusers(dffD) > 2)
argD = SigSpec(); argD = SigSpec();
}
endcode endcode
// (2) Match for a $mux cell implementing synchronous reset semantics ---
// exclusively drives the 'D' input of the $dff, with one of the $mux
// inputs being fully zero
match ffrstmux match ffrstmux
if !argD.empty() if !argD.empty()
select ffrstmux->type.in($mux) select ffrstmux->type.in($mux)
@ -432,6 +544,10 @@ code argD
dffrstmux = nullptr; dffrstmux = nullptr;
endcode endcode
// (3) Match for a $mux cell implement clock enable semantics --- one that
// exclusively drives the 'D' input of the $dff (or the other input of
// the reset $mux) and where one of this $mux's inputs is connected to
// the 'Q' output of the $dff
match ffcemux match ffcemux
if !argD.empty() if !argD.empty()
select ffcemux->type.in($mux) select ffcemux->type.in($mux)
@ -456,16 +572,32 @@ endcode
// ####################### // #######################
// Subpattern for matching against output registers, based on knowledge of the
// 'D' input.
// At a high level:
// (1) Starting from an optional $mux cell that implements clock enable
// semantics --- one where the given 'D' argument (partially or fully)
// drives one of its two inputs
// (2) Starting from, or continuing onto, another optional $mux cell that
// implements synchronous reset semantics --- one where the given 'D'
// argument (or the clock enable $mux output) drives one of its two inputs
// and where the other input is fully zero
// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the
// output of the previous clock enable or reset $mux cells)
subpattern out_dffe subpattern out_dffe
arg argD argQ clock arg argD argQ clock
code code
dff = nullptr; dff = nullptr;
for (auto c : argD.chunks()) for (auto c : argD.chunks())
// Abandon matches when 'D' has the keep attribute set
if (c.wire->get_bool_attribute(\keep)) if (c.wire->get_bool_attribute(\keep))
reject; reject;
endcode endcode
// (1) Starting from an optional $mux cell that implements clock enable
// semantics --- one where the given 'D' argument (partially or fully)
// drives one of its two inputs
match ffcemux match ffcemux
select ffcemux->type.in($mux) select ffcemux->type.in($mux)
// ffcemux output must have two users: ffcemux and ff.D // ffcemux output must have two users: ffcemux and ff.D
@ -504,6 +636,10 @@ code argD argQ
} }
endcode endcode
// (2) Starting from, or continuing onto, another optional $mux cell that
// implements synchronous reset semantics --- one where the given 'D'
// argument (or the clock enable $mux output) drives one of its two inputs
// and where the other input is fully zero
match ffrstmux match ffrstmux
select ffrstmux->type.in($mux) select ffrstmux->type.in($mux)
// ffrstmux output must have two users: ffrstmux and ff.D // ffrstmux output must have two users: ffrstmux and ff.D
@ -542,6 +678,8 @@ code argD argQ
} }
endcode endcode
// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the
// output of the previous clock enable or reset $mux cells)
match ff match ff
select ff->type.in($dff) select ff->type.in($dff)
// DSP48E1 does not support clock inversion // DSP48E1 does not support clock inversion
@ -558,32 +696,30 @@ match ff
// Check that FF.Q is connected to CE-mux // Check that FF.Q is connected to CE-mux
filter !ffcemux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ filter !ffcemux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
filter clock == SigBit() || port(ff, \CLK) == clock
set ffoffset offset set ffoffset offset
endmatch endmatch
code argQ code argQ
if (ff) { SigSpec D = port(ff, \D);
if (clock != SigBit() && port(ff, \CLK) != clock) SigSpec Q = port(ff, \Q);
reject; if (!ffcemux) {
argQ = argD;
SigSpec D = port(ff, \D); argQ.replace(D, Q);
SigSpec Q = port(ff, \Q);
if (!ffcemux) {
argQ = argD;
argQ.replace(D, Q);
}
for (auto c : argQ.chunks()) {
Const init = c.wire->attributes.at(\init, State::Sx);
if (!init.is_fully_undef() && !init.is_fully_zero())
reject;
}
dff = ff;
dffQ = argQ;
dffclock = port(ff, \CLK);
} }
// No enable/reset mux possible without flop
else if (dffcemux || dffrstmux) // Abandon matches when 'Q' has a non-zero init attribute set
reject; // (not supported by DSP48E1)
for (auto c : argQ.chunks()) {
Const init = c.wire->attributes.at(\init, Const());
if (!init.empty())
for (auto b : init.extract(c.offset, c.width))
if (b != State::Sx && b != State::S0)
reject;
}
dff = ff;
dffQ = argQ;
dffclock = port(ff, \CLK);
endcode endcode

View File

@ -1,3 +1,26 @@
// This file describes the second of three pattern matcher setups that
// forms the `xilinx_dsp` pass described in xilinx_dsp.cc
// At a high level, it works as follows:
// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already,
// and (b) uses the 'C' port
// (2) Match the driver of the 'C' input to a possible $dff cell (CREG)
// (attached to at most two $mux cells that implement clock-enable or
// reset functionality, using a subpattern discussed below)
// Notes:
// - Running CREG packing after xilinx_dsp_pack is necessary since there is no
// guarantee that the cell ordering corresponds to the "expected" case (i.e.
// the order in which they appear in the source) thus the possiblity existed
// that a register got packed as a CREG into a downstream DSP that should
// have otherwise been a PREG of an upstream DSP that had not been visited
// yet
// - The reason this is separated out from the xilinx_dsp.pmg file is
// for efficiency --- each *.pmg file creates a class of the same basename,
// which when constructed, creates a custom database tailored to the
// pattern(s) contained within. Since the pattern in this file must be
// executed after the pattern contained in xilinx_dsp.pmg, it is necessary
// to reconstruct this database. Separating the two patterns into
// independent files causes two smaller, more specific, databases.
pattern xilinx_dsp_packC pattern xilinx_dsp_packC
udata <std::function<SigSpec(const SigSpec&)>> unextend udata <std::function<SigSpec(const SigSpec&)>> unextend
@ -6,7 +29,7 @@ state <SigSpec> sigC sigP
state <bool> ffCcepol ffCrstpol state <bool> ffCcepol ffCrstpol
state <Cell*> ffC ffCcemux ffCrstmux state <Cell*> ffC ffCcemux ffCrstmux
// subpattern // Variables used for subpatterns
state <SigSpec> argQ argD state <SigSpec> argQ argD
state <bool> ffcepol ffrstpol state <bool> ffcepol ffrstpol
state <int> ffoffset state <int> ffoffset
@ -15,13 +38,15 @@ udata <SigBit> dffclock
udata <Cell*> dff dffcemux dffrstmux udata <Cell*> dff dffcemux dffrstmux
udata <bool> dffcepol dffrstpol udata <bool> dffcepol dffrstpol
// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already,
// and (b) uses the 'C' port
match dsp match dsp
select dsp->type.in(\DSP48E1) select dsp->type.in(\DSP48E1)
select param(dsp, \CREG, 1).as_int() == 0 select param(dsp, \CREG, 1).as_int() == 0
select nusers(port(dsp, \C, SigSpec())) > 1 select nusers(port(dsp, \C, SigSpec())) > 1
endmatch endmatch
code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock code sigC sigP clock
unextend = [](const SigSpec &sig) { unextend = [](const SigSpec &sig) {
int i; int i;
for (i = GetSize(sig)-1; i > 0; i--) for (i = GetSize(sig)-1; i > 0; i--)
@ -48,11 +73,13 @@ code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock
else else
sigP = P; sigP = P;
if (sigC == sigP)
reject;
clock = port(dsp, \CLK, SigBit()); clock = port(dsp, \CLK, SigBit());
endcode
// (2) Match the driver of the 'C' input to a possible $dff cell (CREG)
// (attached to at most two $mux cells that implement clock-enable or
// reset functionality, using the in_dffe subpattern)
code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock
argQ = sigC; argQ = sigC;
subpattern(in_dffe); subpattern(in_dffe);
if (dff) { if (dff) {
@ -77,22 +104,44 @@ endcode
// ####################### // #######################
// Subpattern for matching against input registers, based on knowledge of the
// 'Q' input. Typically, identifying registers with clock-enable and reset
// capability would be a task would be handled by other Yosys passes such as
// dff2dffe, but since DSP inference happens much before this, these patterns
// have to be manually identified.
// At a high level:
// (1) Starting from a $dff cell that (partially or fully) drives the given
// 'Q' argument
// (2) Match for a $mux cell implementing synchronous reset semantics ---
// one that exclusively drives the 'D' input of the $dff, with one of its
// $mux inputs being fully zero
// (3) Match for a $mux cell implement clock enable semantics --- one that
// exclusively drives the 'D' input of the $dff (or the other input of
// the reset $mux) and where one of this $mux's inputs is connected to
// the 'Q' output of the $dff
subpattern in_dffe subpattern in_dffe
arg argD argQ clock arg argD argQ clock
code code
dff = nullptr; dff = nullptr;
for (auto c : argQ.chunks()) { for (const auto &c : argQ.chunks()) {
// Abandon matches when 'Q' is a constant
if (!c.wire) if (!c.wire)
reject; reject;
// Abandon matches when 'Q' has the keep attribute set
if (c.wire->get_bool_attribute(\keep)) if (c.wire->get_bool_attribute(\keep))
reject; reject;
Const init = c.wire->attributes.at(\init, State::Sx); // Abandon matches when 'Q' has a non-zero init attribute set
if (!init.is_fully_undef() && !init.is_fully_zero()) // (not supported by DSP48E1)
reject; Const init = c.wire->attributes.at(\init, Const());
for (auto b : init.extract(c.offset, c.width))
if (b != State::Sx && b != State::S0)
reject;
} }
endcode endcode
// (1) Starting from a $dff cell that (partially or fully) drives the given
// 'Q' argument
match ff match ff
select ff->type.in($dff) select ff->type.in($dff)
// DSP48E1 does not support clock inversion // DSP48E1 does not support clock inversion
@ -105,14 +154,12 @@ match ff
filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
filter clock == SigBit() || port(ff, \CLK) == clock
set ffoffset offset set ffoffset offset
endmatch endmatch
code argQ argD code argQ argD
{
if (clock != SigBit() && port(ff, \CLK) != clock)
reject;
SigSpec Q = port(ff, \Q); SigSpec Q = port(ff, \Q);
dff = ff; dff = ff;
dffclock = port(ff, \CLK); dffclock = port(ff, \CLK);
@ -124,9 +171,11 @@ code argQ argD
// has two (ff, ffrstmux) users // has two (ff, ffrstmux) users
if (nusers(dffD) > 2) if (nusers(dffD) > 2)
argD = SigSpec(); argD = SigSpec();
}
endcode endcode
// (2) Match for a $mux cell implementing synchronous reset semantics ---
// exclusively drives the 'D' input of the $dff, with one of the $mux
// inputs being fully zero
match ffrstmux match ffrstmux
if !argD.empty() if !argD.empty()
select ffrstmux->type.in($mux) select ffrstmux->type.in($mux)
@ -158,6 +207,10 @@ code argD
dffrstmux = nullptr; dffrstmux = nullptr;
endcode endcode
// (3) Match for a $mux cell implement clock enable semantics --- one that
// exclusively drives the 'D' input of the $dff (or the other input of
// the reset $mux) and where one of this $mux's inputs is connected to
// the 'Q' output of the $dff
match ffcemux match ffcemux
if !argD.empty() if !argD.empty()
select ffcemux->type.in($mux) select ffcemux->type.in($mux)

View File

@ -1,3 +1,46 @@
// This file describes the third of three pattern matcher setups that
// forms the `xilinx_dsp` pass described in xilinx_dsp.cc
// At a high level, it works as follows:
// (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer
// (controlled by OPMODE[6:4]) set to zero and (b) doesn't already
// use the 'PCOUT' port
// (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled,
// (b) has its Z multiplexer output set to the 'C' port, which is
// driven by the 'P' output of the previous DSP cell, and (c) has its
// 'PCIN' port unused
// (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the
// previous DSP cell right-shifted by 17 bits
// (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists)
// if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this
// DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already
// have an ACOUT -> ACIN cascade, (d) the previous DSP does not already
// use its ACOUT port, then examine if an ACOUT -> ACIN cascade
// opportunity exists by matching for a $dff-with-optional-clock-enable-
// or-reset and checking that the 'D' input of this register is the same
// as the 'A' input of the previous DSP
// (4) Same as (3) but for BCOUT -> BCIN cascade
// (5) Recursively go to (2.1) until no more matches possible, keeping track
// of the longest possible chain found
// (6) The longest chain is then divided into chunks of no more than
// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the
// height of a DSP column) with each DSP in each chunk being rewritten
// to use [ABP]COUT -> [ABP]CIN cascading as appropriate
// Notes:
// - Currently, [AB]COUT -> [AB]COUT cascades (3 or 4) are only considered
// if a PCOUT -> PCIN cascade is (2.1 or 2.2) first identified; this need
// not be the case --- [AB] cascades can exist independently of a P cascade
// (though all three cascades must come from the same DSP). This situation
// is not handled currently.
// - In addition, [AB]COUT -> [AB]COUT cascades (3 or 4) are currently
// conservative in that they examine the situation where (a) the previous
// DSP has [AB]2REG or [AB]1REG enabled, (b) that the downstream DSP has no
// registers enabled, and (c) that there exists only one additional register
// between the upstream and downstream DSPs. This can certainly be relaxed
// to identify situations ranging from (i) neither DSP uses any registers,
// to (ii) upstream DSP has 2 registers, downstream DSP has 2 registers, and
// there exists a further 2 registers between them. This remains a TODO
// item.
pattern xilinx_dsp_cascade pattern xilinx_dsp_cascade
udata <std::function<SigSpec(const SigSpec&)>> unextend udata <std::function<SigSpec(const SigSpec&)>> unextend
@ -6,7 +49,7 @@ state <Cell*> next
state <SigSpec> clock state <SigSpec> clock
state <int> AREG BREG state <int> AREG BREG
// subpattern // Variables used for subpatterns
state <SigSpec> argQ argD state <SigSpec> argQ argD
state <bool> ffcepol ffrstpol state <bool> ffcepol ffrstpol
state <int> ffoffset state <int> ffoffset
@ -19,12 +62,19 @@ code
#define MAX_DSP_CASCADE 20 #define MAX_DSP_CASCADE 20
endcode endcode
// (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer
// (controlled by OPMODE[6:4]) set to zero and (b) doesn't already
// use the 'PCOUT' port
match first match first
select first->type.in(\DSP48E1) select first->type.in(\DSP48E1)
select port(first, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("000") select port(first, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("000")
select nusers(port(first, \PCOUT, SigSpec())) <= 1 select nusers(port(first, \PCOUT, SigSpec())) <= 1
endmatch endmatch
// (6) The longest chain is then divided into chunks of no more than
// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the
// height of a DSP column) with each DSP in each chunk being rewritten
// to use [ABP]COUT -> [ABP]CIN cascading as appropriate
code code
longest_chain.clear(); longest_chain.clear();
chain.emplace_back(first, -1, -1, -1); chain.emplace_back(first, -1, -1, -1);
@ -106,6 +156,10 @@ subpattern tail
arg first arg first
arg next arg next
// (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled,
// (b) has its Z multiplexer output set to the 'C' port, which is
// driven by the 'P' output of the previous DSP cell, and (c) has its
// 'PCIN' port unused
match nextP match nextP
select nextP->type.in(\DSP48E1) select nextP->type.in(\DSP48E1)
select !param(nextP, \CREG, State::S1).as_bool() select !param(nextP, \CREG, State::S1).as_bool()
@ -116,6 +170,8 @@ match nextP
semioptional semioptional
endmatch endmatch
// (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the
// previous DSP cell right-shifted by 17 bits
match nextP_shift17 match nextP_shift17
if !nextP if !nextP
select nextP_shift17->type.in(\DSP48E1) select nextP_shift17->type.in(\DSP48E1)
@ -145,6 +201,14 @@ code next
} }
endcode endcode
// (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists)
// if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this
// DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already
// have an ACOUT -> ACIN cascade, (d) the previous DSP does not already
// use its ACOUT port, then examine if an ACOUT -> ACIN cascade
// opportunity exists by matching for a $dff-with-optional-clock-enable-
// or-reset and checking that the 'D' input of this register is the same
// as the 'A' input of the previous DSP
code argQ clock AREG code argQ clock AREG
AREG = -1; AREG = -1;
if (next) { if (next) {
@ -152,7 +216,6 @@ code argQ clock AREG
if (param(prev, \AREG, 2).as_int() > 0 && if (param(prev, \AREG, 2).as_int() > 0 &&
param(next, \AREG, 2).as_int() > 0 && param(next, \AREG, 2).as_int() > 0 &&
param(next, \A_INPUT, Const("DIRECT")).decode_string() == "DIRECT" && param(next, \A_INPUT, Const("DIRECT")).decode_string() == "DIRECT" &&
port(next, \ACIN, SigSpec()).is_fully_zero() &&
nusers(port(prev, \ACOUT, SigSpec())) <= 1) { nusers(port(prev, \ACOUT, SigSpec())) <= 1) {
argQ = unextend(port(next, \A)); argQ = unextend(port(next, \A));
clock = port(prev, \CLK); clock = port(prev, \CLK);
@ -174,6 +237,7 @@ reject_AREG: ;
} }
endcode endcode
// (4) Same as (3) but for BCOUT -> BCIN cascade
code argQ clock BREG code argQ clock BREG
BREG = -1; BREG = -1;
if (next) { if (next) {
@ -203,13 +267,14 @@ reject_BREG: ;
} }
endcode endcode
// (5) Recursively go to (2.1) until no more matches possible, recording the
// longest possible chain
code code
if (next) { if (next) {
chain.emplace_back(next, nextP_shift17 ? 17 : nextP ? 0 : -1, AREG, BREG); chain.emplace_back(next, nextP_shift17 ? 17 : nextP ? 0 : -1, AREG, BREG);
SigSpec sigC = unextend(port(next, \C)); SigSpec sigC = unextend(port(next, \C));
// TODO: Cannot use 'reject' since semioptional
if (nextP_shift17) { if (nextP_shift17) {
if (GetSize(sigC)+17 <= GetSize(port(std::get<0>(chain.back()), \P)) && if (GetSize(sigC)+17 <= GetSize(port(std::get<0>(chain.back()), \P)) &&
port(std::get<0>(chain.back()), \P).extract(17, GetSize(sigC)) != sigC) port(std::get<0>(chain.back()), \P).extract(17, GetSize(sigC)) != sigC)
@ -232,22 +297,44 @@ endcode
// ####################### // #######################
// Subpattern for matching against input registers, based on knowledge of the
// 'Q' input. Typically, identifying registers with clock-enable and reset
// capability would be a task would be handled by other Yosys passes such as
// dff2dffe, but since DSP inference happens much before this, these patterns
// have to be manually identified.
// At a high level:
// (1) Starting from a $dff cell that (partially or fully) drives the given
// 'Q' argument
// (2) Match for a $mux cell implementing synchronous reset semantics ---
// one that exclusively drives the 'D' input of the $dff, with one of its
// $mux inputs being fully zero
// (3) Match for a $mux cell implement clock enable semantics --- one that
// exclusively drives the 'D' input of the $dff (or the other input of
// the reset $mux) and where one of this $mux's inputs is connected to
// the 'Q' output of the $dff
subpattern in_dffe subpattern in_dffe
arg argD argQ clock arg argD argQ clock
code code
dff = nullptr; dff = nullptr;
for (auto c : argQ.chunks()) { for (const auto &c : argQ.chunks()) {
// Abandon matches when 'Q' is a constant
if (!c.wire) if (!c.wire)
reject; reject;
// Abandon matches when 'Q' has the keep attribute set
if (c.wire->get_bool_attribute(\keep)) if (c.wire->get_bool_attribute(\keep))
reject; reject;
Const init = c.wire->attributes.at(\init, State::Sx); // Abandon matches when 'Q' has a non-zero init attribute set
if (!init.is_fully_undef() && !init.is_fully_zero()) // (not supported by DSP48E1)
reject; Const init = c.wire->attributes.at(\init, Const());
for (auto b : init.extract(c.offset, c.width))
if (b != State::Sx && b != State::S0)
reject;
} }
endcode endcode
// (1) Starting from a $dff cell that (partially or fully) drives the given
// 'Q' argument
match ff match ff
select ff->type.in($dff) select ff->type.in($dff)
// DSP48E1 does not support clock inversion // DSP48E1 does not support clock inversion
@ -260,14 +347,12 @@ match ff
filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
filter clock == SigBit() || port(ff, \CLK) == clock
set ffoffset offset set ffoffset offset
endmatch endmatch
code argQ argD code argQ argD
{
if (clock != SigBit() && port(ff, \CLK) != clock)
reject;
SigSpec Q = port(ff, \Q); SigSpec Q = port(ff, \Q);
dff = ff; dff = ff;
dffclock = port(ff, \CLK); dffclock = port(ff, \CLK);
@ -279,9 +364,11 @@ code argQ argD
// has two (ff, ffrstmux) users // has two (ff, ffrstmux) users
if (nusers(dffD) > 2) if (nusers(dffD) > 2)
argD = SigSpec(); argD = SigSpec();
}
endcode endcode
// (2) Match for a $mux cell implementing synchronous reset semantics ---
// exclusively drives the 'D' input of the $dff, with one of the $mux
// inputs being fully zero
match ffrstmux match ffrstmux
if !argD.empty() if !argD.empty()
select ffrstmux->type.in($mux) select ffrstmux->type.in($mux)
@ -313,6 +400,10 @@ code argD
dffrstmux = nullptr; dffrstmux = nullptr;
endcode endcode
// (3) Match for a $mux cell implement clock enable semantics --- one that
// exclusively drives the 'D' input of the $dff (or the other input of
// the reset $mux) and where one of this $mux's inputs is connected to
// the 'Q' output of the $dff
match ffcemux match ffcemux
if !argD.empty() if !argD.empty()
select ffcemux->type.in($mux) select ffcemux->type.in($mux)

View File

@ -342,10 +342,14 @@ struct SynthXilinxPass : public ScriptPass
if (check_label("map_dsp", "(skip if '-nodsp')")) { if (check_label("map_dsp", "(skip if '-nodsp')")) {
if (!nodsp || help_mode) { if (!nodsp || help_mode) {
// NB: Xilinx multipliers are signed only // NB: Xilinx multipliers are signed only
run("techmap -map +/mul2dsp.v -map +/xilinx/dsp_map.v -D DSP_A_MAXWIDTH=25 -D DSP_A_MAXWIDTH_PARTIAL=18 -D DSP_B_MAXWIDTH=18 " run("techmap -map +/mul2dsp.v -map +/xilinx/dsp_map.v -D DSP_A_MAXWIDTH=25 "
"-D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 " // Blocks Nx1 multipliers "-D DSP_A_MAXWIDTH_PARTIAL=18 -D DSP_B_MAXWIDTH=18 " // Partial multipliers are intentionally
"-D DSP_Y_MINWIDTH=9 " // UG901 suggests small multiplies are those 4x4 and smaller // limited to 18x18 in order to take
"-D DSP_SIGNEDONLY=1 -D DSP_NAME=$__MUL25X18"); // advantage of the (PCOUT << 17) -> PCIN
// dedicated cascade chain capability
"-D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 " // Blocks Nx1 multipliers
"-D DSP_Y_MINWIDTH=9 " // UG901 suggests small multiplies are those 4x4 and smaller
"-D DSP_SIGNEDONLY=1 -D DSP_NAME=$__MUL25X18");
run("select a:mul2dsp"); run("select a:mul2dsp");
run("setattr -unset mul2dsp"); run("setattr -unset mul2dsp");
run("opt_expr -fine"); run("opt_expr -fine");