// This file describes the main pattern matcher setup (of three total) that // forms the `xilinx_dsp` pass described in xilinx_dsp.cc // At a high level, it works as follows: // ( 1) Starting from a DSP48E1 cell // ( 2) Match the driver of the 'A' input to a possible $dff cell (ADREG) // (attached to at most two $mux cells that implement clock-enable or // reset functionality, using a subpattern discussed below) // If ADREG matched, treat 'A' input as input of ADREG // ( 3) Match the driver of the 'A' and 'D' inputs for a possible $add cell // (pre-adder) // ( 4) If pre-adder was present, find match 'A' input for A2REG // If pre-adder was not present, move ADREG to A2REG // If A2REG, then match 'A' input for A1REG // ( 5) Match 'B' input for B2REG // If B2REG, then match 'B' input for B1REG // ( 6) Match 'D' input for DREG // ( 7) Match 'P' output that exclusively drives an MREG // ( 8) Match 'P' output that exclusively drives one of two inputs to an $add // cell (post-adder). // The other input to the adder is assumed to come in from the 'C' input // (note: 'P' -> 'C' connections that exist for accumulators are // recognised in xilinx_dsp.cc). // ( 9) Match 'P' output that exclusively drives a PREG // (10) If post-adder and PREG both present, match for a $mux cell driving // the 'C' input, where one of the $mux's inputs is the PREG output. // This indicates an accumulator situation, and one where a $mux exists // to override the accumulated value: // +--------------------------------+ // | ____ | // +--| \ | // |$mux|-+ | // 'C' ---|____/ | | // | /-------\ +----+ | // +----+ +-| post- |___|PREG|---+ 'P' // |MREG|------ | adder | +----+ // +----+ \-------/ // (11) If PREG present, match for a greater-than-or-equal $ge cell attached // to the 'P' output where it is compared to a constant that is a // power-of-2: e.g. `assign overflow = (PREG >= 2**40);` // In this scenario, the pattern detector functionality of a DSP48E1 can // to implement this function // Notes: // - The intention of this pattern matcher is for it to be compatible with // DSP48E1 cells inferred from multiply operations by Yosys, as well as for // user instantiations that may already contain the cells being packed... // (though the latter is currently untested) // - Since the $dff-with-optional-clock-enable-or-reset-mux pattern is used // for each *REG match, it has been factored out into two subpatterns: // in_dffe and out_dffe located at the bottom of this file. // - Matching for pattern detector features is currently incomplete. For // example, matching for underflow as well as overflow detection is // possible, as would auto-reset, enabling saturated arithmetic, detecting // custom patterns, etc. pattern xilinx_dsp_pack state <SigBit> clock state <SigSpec> sigA sigB sigC sigD sigM sigP state <IdString> postAddAB postAddMuxAB state <bool> ffA1cepol ffA2cepol ffADcepol ffB1cepol ffB2cepol ffDcepol ffMcepol ffPcepol state <bool> ffArstpol ffADrstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol state <Cell*> ffAD ffADcemux ffADrstmux ffA1 ffA1cemux ffA1rstmux ffA2 ffA2cemux ffA2rstmux state <Cell*> ffB1 ffB1cemux ffB1rstmux ffB2 ffB2cemux ffB2rstmux state <Cell*> ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux // Variables used for subpatterns state <SigSpec> argQ argD state <bool> ffcepol ffrstpol state <int> ffoffset udata <SigSpec> dffD dffQ udata <SigBit> dffclock udata <Cell*> dff dffcemux dffrstmux udata <bool> dffcepol dffrstpol // (1) Starting from a DSP48E1 cell match dsp select dsp->type.in(\DSP48E1) endmatch code sigA sigB sigC sigD sigM clock auto unextend = [](const SigSpec &sig) { int i; for (i = GetSize(sig)-1; i > 0; i--) if (sig[i] != sig[i-1]) break; // Do not remove non-const sign bit if (sig[i].wire) ++i; return sig.extract(0, i); }; sigA = unextend(port(dsp, \A)); sigB = unextend(port(dsp, \B)); sigC = port(dsp, \C, SigSpec()); sigD = port(dsp, \D, SigSpec()); SigSpec P = port(dsp, \P); if (param(dsp, \USE_MULT).decode_string() == "MULTIPLY") { // Only care about those bits that are used int i; for (i = GetSize(P)-1; i >= 0; i--) if (nusers(P[i]) > 1) break; i++; log_assert(nusers(P.extract_end(i)) <= 1); // This sigM could have no users if downstream sinks (e.g. $add) is // narrower than $mul result, for example if (i == 0) reject; sigM = P.extract(0, i); } else sigM = P; clock = port(dsp, \CLK, SigBit()); endcode // (2) Match the driver of the 'A' input to a possible $dff cell (ADREG) // (attached to at most two $mux cells that implement clock-enable or // reset functionality, using a subpattern discussed above) // If matched, treat 'A' input as input of ADREG code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock if (param(dsp, \ADREG).as_int() == 0) { argQ = sigA; subpattern(in_dffe); if (dff) { ffAD = dff; clock = dffclock; if (dffrstmux) { ffADrstmux = dffrstmux; ffADrstpol = dffrstpol; } if (dffcemux) { ffADcemux = dffcemux; ffADcepol = dffcepol; } sigA = dffD; } } endcode // (3) Match the driver of the 'A' and 'D' inputs for a possible $add cell // (pre-adder) match preAdd if sigD.empty() || sigD.is_fully_zero() // Ensure that preAdder not already used if param(dsp, \USE_DPORT).decode_string() == "FALSE" if port(dsp, \INMODE, Const(0, 5)).is_fully_zero() select preAdd->type.in($add) // Output has to be 25 bits or less select GetSize(port(preAdd, \Y)) <= 25 select nusers(port(preAdd, \Y)) == 2 choice <IdString> AB {\A, \B} // A port has to be 30 bits or less select GetSize(port(preAdd, AB)) <= 30 define <IdString> BA (AB == \A ? \B : \A) // D port has to be 25 bits or less select GetSize(port(preAdd, BA)) <= 25 index <SigSpec> port(preAdd, \Y) === sigA optional endmatch code sigA sigD if (preAdd) { sigA = port(preAdd, \A); sigD = port(preAdd, \B); } endcode // (4) If pre-adder was present, find match 'A' input for A2REG // If pre-adder was not present, move ADREG to A2REG // Then match 'A' input for A1REG code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock ffA2 ffA2cemux ffA2rstmux ffA2cepol ffArstpol ffA1 ffA1cemux ffA1rstmux ffA1cepol // Only search for ffA2 if there was a pre-adder // (otherwise ffA2 would have been matched as ffAD) if (preAdd) { if (param(dsp, \AREG).as_int() == 0) { argQ = sigA; subpattern(in_dffe); if (dff) { ffA2 = dff; clock = dffclock; if (dffrstmux) { ffA2rstmux = dffrstmux; ffArstpol = dffrstpol; } if (dffcemux) { ffA2cepol = dffcepol; ffA2cemux = dffcemux; } sigA = dffD; } } } // And if there wasn't a pre-adder, // move AD register to A else if (ffAD) { log_assert(!ffA2 && !ffA2cemux && !ffA2rstmux); std::swap(ffA2, ffAD); std::swap(ffA2cemux, ffADcemux); std::swap(ffA2rstmux, ffADrstmux); ffA2cepol = ffADcepol; ffArstpol = ffADrstpol; } // Now attempt to match A1 if (ffA2) { argQ = sigA; subpattern(in_dffe); if (dff) { if ((ffA2rstmux != nullptr) ^ (dffrstmux != nullptr)) goto ffA1_end; if (dffrstmux) { if (ffArstpol != dffrstpol) goto ffA1_end; if (port(ffA2rstmux, \S) != port(dffrstmux, \S)) goto ffA1_end; ffA1rstmux = dffrstmux; } ffA1 = dff; clock = dffclock; if (dffcemux) { ffA1cemux = dffcemux; ffA1cepol = dffcepol; } sigA = dffD; ffA1_end: ; } } endcode // (5) Match 'B' input for B2REG // If B2REG, then match 'B' input for B1REG code argQ ffB2 ffB2cemux ffB2rstmux ffB2cepol ffBrstpol sigB clock ffB1 ffB1cemux ffB1rstmux ffB1cepol if (param(dsp, \BREG).as_int() == 0) { argQ = sigB; subpattern(in_dffe); if (dff) { ffB2 = dff; clock = dffclock; if (dffrstmux) { ffB2rstmux = dffrstmux; ffBrstpol = dffrstpol; } if (dffcemux) { ffB2cemux = dffcemux; ffB2cepol = dffcepol; } sigB = dffD; // Now attempt to match B1 if (ffB2) { argQ = sigB; subpattern(in_dffe); if (dff) { if ((ffB2rstmux != nullptr) ^ (dffrstmux != nullptr)) goto ffB1_end; if (dffrstmux) { if (ffBrstpol != dffrstpol) goto ffB1_end; if (port(ffB2rstmux, \S) != port(dffrstmux, \S)) goto ffB1_end; ffB1rstmux = dffrstmux; } ffB1 = dff; clock = dffclock; if (dffcemux) { ffB1cemux = dffcemux; ffB1cepol = dffcepol; } sigB = dffD; ffB1_end: ; } } } } endcode // (6) Match 'D' input for DREG code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock if (param(dsp, \DREG).as_int() == 0) { argQ = sigD; subpattern(in_dffe); if (dff) { ffD = dff; clock = dffclock; if (dffrstmux) { ffDrstmux = dffrstmux; ffDrstpol = dffrstpol; } if (dffcemux) { ffDcemux = dffcemux; ffDcepol = dffcepol; } sigD = dffD; } } endcode // (7) Match 'P' output that exclusively drives an MREG code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) { argD = sigM; subpattern(out_dffe); if (dff) { ffM = dff; clock = dffclock; if (dffrstmux) { ffMrstmux = dffrstmux; ffMrstpol = dffrstpol; } if (dffcemux) { ffMcemux = dffcemux; ffMcepol = dffcepol; } sigM = dffQ; } } sigP = sigM; endcode // (8) Match 'P' output that exclusively drives one of two inputs to an $add // cell (post-adder). // The other input to the adder is assumed to come in from the 'C' input // (note: 'P' -> 'C' connections that exist for accumulators are // recognised in xilinx_dsp.cc). match postAdd // Ensure that Z mux is not already used if port(dsp, \OPMODE, SigSpec(0, 7)).extract(4,3).is_fully_zero() select postAdd->type.in($add) select GetSize(port(postAdd, \Y)) <= 48 choice <IdString> AB {\A, \B} select nusers(port(postAdd, AB)) <= 3 filter ffMcemux || nusers(port(postAdd, AB)) == 2 filter !ffMcemux || nusers(port(postAdd, AB)) == 3 index <SigBit> port(postAdd, AB)[0] === sigP[0] filter GetSize(port(postAdd, AB)) >= GetSize(sigP) filter port(postAdd, AB).extract(0, GetSize(sigP)) == sigP // Check that remainder of AB is a sign- or zero-extension filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(sigP[GetSize(sigP)-1], GetSize(port(postAdd, AB))-GetSize(sigP)) || port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(State::S0, GetSize(port(postAdd, AB))-GetSize(sigP)) set postAddAB AB optional endmatch code sigC sigP if (postAdd) { sigC = port(postAdd, postAddAB == \A ? \B : \A); sigP = port(postAdd, \Y); } endcode // (9) Match 'P' output that exclusively drives a PREG code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock if (param(dsp, \PREG).as_int() == 0) { int users = 2; // If ffMcemux and no postAdd new-value net must have three users: ffMcemux, ffM and ffPcemux if (ffMcemux && !postAdd) users++; if (nusers(sigP) == users) { argD = sigP; subpattern(out_dffe); if (dff) { ffP = dff; clock = dffclock; if (dffrstmux) { ffPrstmux = dffrstmux; ffPrstpol = dffrstpol; } if (dffcemux) { ffPcemux = dffcemux; ffPcepol = dffcepol; } sigP = dffQ; } } } endcode // (10) If post-adder and PREG both present, match for a $mux cell driving // the 'C' input, where one of the $mux's inputs is the PREG output. // This indicates an accumulator situation, and one where a $mux exists // to override the accumulated value: // +--------------------------------+ // | ____ | // +--| \ | // |$mux|-+ | // 'C' ---|____/ | | // | /-------\ +----+ | // +----+ +-| post- |___|PREG|---+ 'P' // |MREG|------ | adder | +----+ // +----+ \-------/ match postAddMux if postAdd if ffP select postAddMux->type.in($mux) select nusers(port(postAddMux, \Y)) == 2 choice <IdString> AB {\A, \B} index <SigSpec> port(postAddMux, AB) === sigP index <SigSpec> port(postAddMux, \Y) === sigC set postAddMuxAB AB optional endmatch code sigC if (postAddMux) sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A); endcode // (11) If PREG present, match for a greater-than-or-equal $ge cell attached to // the 'P' output where it is compared to a constant that is a power-of-2: // e.g. `assign overflow = (PREG >= 2**40);` // In this scenario, the pattern detector functionality of a DSP48E1 can // to implement this function match overflow if ffP if param(dsp, \USE_PATTERN_DETECT).decode_string() == "NO_PATDET" select overflow->type.in($ge) select GetSize(port(overflow, \Y)) <= 48 select port(overflow, \B).is_fully_const() define <Const> B port(overflow, \B).as_const() select std::count(B.bits.begin(), B.bits.end(), State::S1) == 1 index <SigSpec> port(overflow, \A) === sigP optional endmatch code accept; endcode // ####################### // Subpattern for matching against input registers, based on knowledge of the // 'Q' input. Typically, identifying registers with clock-enable and reset // capability would be a task would be handled by other Yosys passes such as // dff2dffe, but since DSP inference happens much before this, these patterns // have to be manually identified. // At a high level: // (1) Starting from a $dff cell that (partially or fully) drives the given // 'Q' argument // (2) Match for a $mux cell implementing synchronous reset semantics --- // one that exclusively drives the 'D' input of the $dff, with one of its // $mux inputs being fully zero // (3) Match for a $mux cell implement clock enable semantics --- one that // exclusively drives the 'D' input of the $dff (or the other input of // the reset $mux) and where one of this $mux's inputs is connected to // the 'Q' output of the $dff subpattern in_dffe arg argD argQ clock code dff = nullptr; if (argQ.empty()) reject; for (const auto &c : argQ.chunks()) { // Abandon matches when 'Q' is a constant if (!c.wire) reject; // Abandon matches when 'Q' has the keep attribute set if (c.wire->get_bool_attribute(\keep)) reject; // Abandon matches when 'Q' has a non-zero init attribute set // (not supported by DSP48E1) Const init = c.wire->attributes.at(\init, Const()); if (!init.empty()) for (auto b : init.extract(c.offset, c.width)) if (b != State::Sx && b != State::S0) reject; } endcode // (1) Starting from a $dff cell that (partially or fully) drives the given // 'Q' argument match ff select ff->type.in($dff) // DSP48E1 does not support clock inversion select param(ff, \CLK_POLARITY).as_bool() slice offset GetSize(port(ff, \D)) index <SigBit> port(ff, \Q)[offset] === argQ[0] // Check that the rest of argQ is present filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ filter clock == SigBit() || port(ff, \CLK) == clock set ffoffset offset endmatch code argQ argD SigSpec Q = port(ff, \Q); dff = ff; dffclock = port(ff, \CLK); dffD = argQ; argD = port(ff, \D); argQ = Q; dffD.replace(argQ, argD); // Only search for ffrstmux if dffD only // has two (ff, ffrstmux) users if (nusers(dffD) > 2) argD = SigSpec(); endcode // (2) Match for a $mux cell implementing synchronous reset semantics --- // exclusively drives the 'D' input of the $dff, with one of the $mux // inputs being fully zero match ffrstmux if !argD.empty() select ffrstmux->type.in($mux) index <SigSpec> port(ffrstmux, \Y) === argD choice <IdString> BA {\B, \A} // DSP48E1 only supports reset to zero select port(ffrstmux, BA).is_fully_zero() define <bool> pol (BA == \B) set ffrstpol pol semioptional endmatch code argD if (ffrstmux) { dffrstmux = ffrstmux; dffrstpol = ffrstpol; argD = port(ffrstmux, ffrstpol ? \A : \B); dffD.replace(port(ffrstmux, \Y), argD); // Only search for ffcemux if argQ has at // least 3 users (ff, <upstream>, ffrstmux) and // dffD only has two (ff, ffrstmux) if (!(nusers(argQ) >= 3 && nusers(dffD) == 2)) argD = SigSpec(); } else dffrstmux = nullptr; endcode // (3) Match for a $mux cell implement clock enable semantics --- one that // exclusively drives the 'D' input of the $dff (or the other input of // the reset $mux) and where one of this $mux's inputs is connected to // the 'Q' output of the $dff match ffcemux if !argD.empty() select ffcemux->type.in($mux) index <SigSpec> port(ffcemux, \Y) === argD choice <IdString> AB {\A, \B} index <SigSpec> port(ffcemux, AB) === argQ define <bool> pol (AB == \A) set ffcepol pol semioptional endmatch code argD if (ffcemux) { dffcemux = ffcemux; dffcepol = ffcepol; argD = port(ffcemux, ffcepol ? \B : \A); dffD.replace(port(ffcemux, \Y), argD); } else dffcemux = nullptr; endcode // ####################### // Subpattern for matching against output registers, based on knowledge of the // 'D' input. // At a high level: // (1) Starting from an optional $mux cell that implements clock enable // semantics --- one where the given 'D' argument (partially or fully) // drives one of its two inputs // (2) Starting from, or continuing onto, another optional $mux cell that // implements synchronous reset semantics --- one where the given 'D' // argument (or the clock enable $mux output) drives one of its two inputs // and where the other input is fully zero // (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the // output of the previous clock enable or reset $mux cells) subpattern out_dffe arg argD argQ clock code dff = nullptr; for (auto c : argD.chunks()) // Abandon matches when 'D' has the keep attribute set if (c.wire->get_bool_attribute(\keep)) reject; endcode // (1) Starting from an optional $mux cell that implements clock enable // semantics --- one where the given 'D' argument (partially or fully) // drives one of its two inputs match ffcemux select ffcemux->type.in($mux) // ffcemux output must have two users: ffcemux and ff.D select nusers(port(ffcemux, \Y)) == 2 choice <IdString> AB {\A, \B} // keep-last-value net must have at least three users: ffcemux, ff, downstream sink(s) select nusers(port(ffcemux, AB)) >= 3 slice offset GetSize(port(ffcemux, \Y)) define <IdString> BA (AB == \A ? \B : \A) index <SigBit> port(ffcemux, BA)[offset] === argD[0] // Check that the rest of argD is present filter GetSize(port(ffcemux, BA)) >= offset + GetSize(argD) filter port(ffcemux, BA).extract(offset, GetSize(argD)) == argD set ffoffset offset define <bool> pol (AB == \A) set ffcepol pol semioptional endmatch code argD argQ dffcemux = ffcemux; if (ffcemux) { SigSpec BA = port(ffcemux, ffcepol ? \B : \A); SigSpec Y = port(ffcemux, \Y); argQ = argD; argD.replace(BA, Y); argQ.replace(BA, port(ffcemux, ffcepol ? \A : \B)); dffcemux = ffcemux; dffcepol = ffcepol; } endcode // (2) Starting from, or continuing onto, another optional $mux cell that // implements synchronous reset semantics --- one where the given 'D' // argument (or the clock enable $mux output) drives one of its two inputs // and where the other input is fully zero match ffrstmux select ffrstmux->type.in($mux) // ffrstmux output must have two users: ffrstmux and ff.D select nusers(port(ffrstmux, \Y)) == 2 choice <IdString> BA {\B, \A} // DSP48E1 only supports reset to zero select port(ffrstmux, BA).is_fully_zero() slice offset GetSize(port(ffrstmux, \Y)) define <IdString> AB (BA == \B ? \A : \B) index <SigBit> port(ffrstmux, AB)[offset] === argD[0] // Check that offset is consistent filter !ffcemux || ffoffset == offset // Check that the rest of argD is present filter GetSize(port(ffrstmux, AB)) >= offset + GetSize(argD) filter port(ffrstmux, AB).extract(offset, GetSize(argD)) == argD set ffoffset offset define <bool> pol (AB == \A) set ffrstpol pol semioptional endmatch code argD argQ dffrstmux = ffrstmux; if (ffrstmux) { SigSpec AB = port(ffrstmux, ffrstpol ? \A : \B); SigSpec Y = port(ffrstmux, \Y); argD.replace(AB, Y); dffrstmux = ffrstmux; dffrstpol = ffrstpol; } endcode // (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the // output of the previous clock enable or reset $mux cells) match ff select ff->type.in($dff) // DSP48E1 does not support clock inversion select param(ff, \CLK_POLARITY).as_bool() slice offset GetSize(port(ff, \D)) index <SigBit> port(ff, \D)[offset] === argD[0] // Check that offset is consistent filter (!ffcemux && !ffrstmux) || ffoffset == offset // Check that the rest of argD is present filter GetSize(port(ff, \D)) >= offset + GetSize(argD) filter port(ff, \D).extract(offset, GetSize(argD)) == argD // Check that FF.Q is connected to CE-mux filter !ffcemux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ filter clock == SigBit() || port(ff, \CLK) == clock set ffoffset offset endmatch code argQ SigSpec D = port(ff, \D); SigSpec Q = port(ff, \Q); if (!ffcemux) { argQ = argD; argQ.replace(D, Q); } // Abandon matches when 'Q' has a non-zero init attribute set // (not supported by DSP48E1) for (auto c : argQ.chunks()) { Const init = c.wire->attributes.at(\init, Const()); if (!init.empty()) for (auto b : init.extract(c.offset, c.width)) if (b != State::Sx && b != State::S0) reject; } dff = ff; dffQ = argQ; dffclock = port(ff, \CLK); endcode