// This file describes the main pattern matcher setup (of three total) that // forms the `xilinx_dsp` pass described in xilinx_dsp.cc // At a high level, it works as follows: // ( 1) Starting from a DSP48E1 cell // ( 2) Match the driver of the 'A' input to a possible $dff cell (ADREG) // (attached to at most two $mux cells that implement clock-enable or // reset functionality, using a subpattern discussed below) // If ADREG matched, treat 'A' input as input of ADREG // ( 3) Match the driver of the 'A' and 'D' inputs for a possible $add cell // (pre-adder) // ( 4) If pre-adder was present, find match 'A' input for A2REG // If pre-adder was not present, move ADREG to A2REG // If A2REG, then match 'A' input for A1REG // ( 5) Match 'B' input for B2REG // If B2REG, then match 'B' input for B1REG // ( 6) Match 'D' input for DREG // ( 7) Match 'P' output that exclusively drives an MREG // ( 8) Match 'P' output that exclusively drives one of two inputs to an $add // cell (post-adder). // The other input to the adder is assumed to come in from the 'C' input // (note: 'P' -> 'C' connections that exist for accumulators are // recognised in xilinx_dsp.cc). // ( 9) Match 'P' output that exclusively drives a PREG // (10) If post-adder and PREG both present, match for a $mux cell driving // the 'C' input, where one of the $mux's inputs is the PREG output. // This indicates an accumulator situation, and one where a $mux exists // to override the accumulated value: // +--------------------------------+ // | ____ | // +--| \ | // |$mux|-+ | // 'C' ---|____/ | | // | /-------\ +----+ | // +----+ +-| post- |___|PREG|---+ 'P' // |MREG|------ | adder | +----+ // +----+ \-------/ // (11) If PREG present, match for a greater-than-or-equal $ge cell attached // to the 'P' output where it is compared to a constant that is a // power-of-2: e.g. `assign overflow = (PREG >= 2**40);` // In this scenario, the pattern detector functionality of a DSP48E1 can // to implement this function // Notes: // - The intention of this pattern matcher is for it to be compatible with // DSP48E1 cells inferred from multiply operations by Yosys, as well as for // user instantiations that may already contain the cells being packed... // (though the latter is currently untested) // - Since the $dff-with-optional-clock-enable-or-reset-mux pattern is used // for each *REG match, it has been factored out into two subpatterns: // in_dffe and out_dffe located at the bottom of this file. // - Matching for pattern detector features is currently incomplete. For // example, matching for underflow as well as overflow detection is // possible, as would auto-reset, enabling saturated arithmetic, detecting // custom patterns, etc. pattern xilinx_dsp_pack state clock state sigA sigB sigC sigD sigM sigP state postAddAB postAddMuxAB state ffA1cepol ffA2cepol ffADcepol ffB1cepol ffB2cepol ffDcepol ffMcepol ffPcepol state ffArstpol ffADrstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol state ffAD ffADcemux ffADrstmux ffA1 ffA1cemux ffA1rstmux ffA2 ffA2cemux ffA2rstmux state ffB1 ffB1cemux ffB1rstmux ffB2 ffB2cemux ffB2rstmux state ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux // Variables used for subpatterns state argQ argD state ffcepol ffrstpol state ffoffset udata dffD dffQ udata dffclock udata dff dffcemux dffrstmux udata dffcepol dffrstpol // (1) Starting from a DSP48E1 cell match dsp select dsp->type.in(\DSP48E1) endmatch code sigA sigB sigC sigD sigM clock auto unextend = [](const SigSpec &sig) { int i; for (i = GetSize(sig)-1; i > 0; i--) if (sig[i] != sig[i-1]) break; // Do not remove non-const sign bit if (sig[i].wire) ++i; return sig.extract(0, i); }; sigA = unextend(port(dsp, \A)); sigB = unextend(port(dsp, \B)); sigC = port(dsp, \C, SigSpec()); sigD = port(dsp, \D, SigSpec()); SigSpec P = port(dsp, \P); if (param(dsp, \USE_MULT, Const("MULTIPLY")).decode_string() == "MULTIPLY") { // Only care about those bits that are used int i; for (i = GetSize(P)-1; i >= 0; i--) if (nusers(P[i]) > 1) break; i++; log_assert(nusers(P.extract_end(i)) <= 1); // This sigM could have no users if downstream sinks (e.g. $add) is // narrower than $mul result, for example if (i == 0) reject; sigM = P.extract(0, i); } else sigM = P; clock = port(dsp, \CLK, SigBit()); endcode // (2) Match the driver of the 'A' input to a possible $dff cell (ADREG) // (attached to at most two $mux cells that implement clock-enable or // reset functionality, using a subpattern discussed above) // If matched, treat 'A' input as input of ADREG code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock if (param(dsp, \ADREG).as_int() == 0) { argQ = sigA; subpattern(in_dffe); if (dff) { ffAD = dff; clock = dffclock; if (dffrstmux) { ffADrstmux = dffrstmux; ffADrstpol = dffrstpol; } if (dffcemux) { ffADcemux = dffcemux; ffADcepol = dffcepol; } sigA = dffD; } } endcode // (3) Match the driver of the 'A' and 'D' inputs for a possible $add cell // (pre-adder) match preAdd if sigD.empty() || sigD.is_fully_zero() // Ensure that preAdder not already used if param(dsp, \USE_DPORT, Const("FALSE")).decode_string() == "FALSE" if port(dsp, \INMODE, Const(0, 5)).is_fully_zero() select preAdd->type.in($add) // Output has to be 25 bits or less select GetSize(port(preAdd, \Y)) <= 25 select nusers(port(preAdd, \Y)) == 2 choice AB {\A, \B} // A port has to be 30 bits or less select GetSize(port(preAdd, AB)) <= 30 define BA (AB == \A ? \B : \A) // D port has to be 25 bits or less select GetSize(port(preAdd, BA)) <= 25 index port(preAdd, \Y) === sigA optional endmatch code sigA sigD if (preAdd) { sigA = port(preAdd, \A); sigD = port(preAdd, \B); } endcode // (4) If pre-adder was present, find match 'A' input for A2REG // If pre-adder was not present, move ADREG to A2REG // Then match 'A' input for A1REG code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock ffA2 ffA2cemux ffA2rstmux ffA2cepol ffArstpol ffA1 ffA1cemux ffA1rstmux ffA1cepol // Only search for ffA2 if there was a pre-adder // (otherwise ffA2 would have been matched as ffAD) if (preAdd) { if (param(dsp, \AREG).as_int() == 0) { argQ = sigA; subpattern(in_dffe); if (dff) { ffA2 = dff; clock = dffclock; if (dffrstmux) { ffA2rstmux = dffrstmux; ffArstpol = dffrstpol; } if (dffcemux) { ffA2cepol = dffcepol; ffA2cemux = dffcemux; } sigA = dffD; } } } // And if there wasn't a pre-adder, // move AD register to A else if (ffAD) { log_assert(!ffA2 && !ffA2cemux && !ffA2rstmux); std::swap(ffA2, ffAD); std::swap(ffA2cemux, ffADcemux); std::swap(ffA2rstmux, ffADrstmux); ffA2cepol = ffADcepol; ffArstpol = ffADrstpol; } // Now attempt to match A1 if (ffA2) { argQ = sigA; subpattern(in_dffe); if (dff) { if ((ffA2rstmux != nullptr) ^ (dffrstmux != nullptr)) goto ffA1_end; if (dffrstmux) { if (ffArstpol != dffrstpol) goto ffA1_end; if (port(ffA2rstmux, \S) != port(dffrstmux, \S)) goto ffA1_end; ffA1rstmux = dffrstmux; } ffA1 = dff; clock = dffclock; if (dffcemux) { ffA1cemux = dffcemux; ffA1cepol = dffcepol; } sigA = dffD; ffA1_end: ; } } endcode // (5) Match 'B' input for B2REG // If B2REG, then match 'B' input for B1REG code argQ ffB2 ffB2cemux ffB2rstmux ffB2cepol ffBrstpol sigB clock ffB1 ffB1cemux ffB1rstmux ffB1cepol if (param(dsp, \BREG).as_int() == 0) { argQ = sigB; subpattern(in_dffe); if (dff) { ffB2 = dff; clock = dffclock; if (dffrstmux) { ffB2rstmux = dffrstmux; ffBrstpol = dffrstpol; } if (dffcemux) { ffB2cemux = dffcemux; ffB2cepol = dffcepol; } sigB = dffD; // Now attempt to match B1 if (ffB2) { argQ = sigB; subpattern(in_dffe); if (dff) { if ((ffB2rstmux != nullptr) ^ (dffrstmux != nullptr)) goto ffB1_end; if (dffrstmux) { if (ffBrstpol != dffrstpol) goto ffB1_end; if (port(ffB2rstmux, \S) != port(dffrstmux, \S)) goto ffB1_end; ffB1rstmux = dffrstmux; } ffB1 = dff; clock = dffclock; if (dffcemux) { ffB1cemux = dffcemux; ffB1cepol = dffcepol; } sigB = dffD; ffB1_end: ; } } } } endcode // (6) Match 'D' input for DREG code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock if (param(dsp, \DREG).as_int() == 0) { argQ = sigD; subpattern(in_dffe); if (dff) { ffD = dff; clock = dffclock; if (dffrstmux) { ffDrstmux = dffrstmux; ffDrstpol = dffrstpol; } if (dffcemux) { ffDcemux = dffcemux; ffDcepol = dffcepol; } sigD = dffD; } } endcode // (7) Match 'P' output that exclusively drives an MREG code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) { argD = sigM; subpattern(out_dffe); if (dff) { ffM = dff; clock = dffclock; if (dffrstmux) { ffMrstmux = dffrstmux; ffMrstpol = dffrstpol; } if (dffcemux) { ffMcemux = dffcemux; ffMcepol = dffcepol; } sigM = dffQ; } } sigP = sigM; endcode // (8) Match 'P' output that exclusively drives one of two inputs to an $add // cell (post-adder). // The other input to the adder is assumed to come in from the 'C' input // (note: 'P' -> 'C' connections that exist for accumulators are // recognised in xilinx_dsp.cc). match postAdd // Ensure that Z mux is not already used if port(dsp, \OPMODE, SigSpec()).extract(4,3).is_fully_zero() select postAdd->type.in($add) select GetSize(port(postAdd, \Y)) <= 48 choice AB {\A, \B} select nusers(port(postAdd, AB)) <= 3 filter ffMcemux || nusers(port(postAdd, AB)) == 2 filter !ffMcemux || nusers(port(postAdd, AB)) == 3 index port(postAdd, AB)[0] === sigP[0] filter GetSize(port(postAdd, AB)) >= GetSize(sigP) filter port(postAdd, AB).extract(0, GetSize(sigP)) == sigP // Check that remainder of AB is a sign-extension define AB_SIGNED (param(postAdd, AB == \A ? \A_SIGNED : \B_SIGNED).as_bool()) filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(AB_SIGNED ? sigP[GetSize(sigP)-1] : State::S0, GetSize(port(postAdd, AB))-GetSize(sigP)) set postAddAB AB optional endmatch code sigC sigP if (postAdd) { sigC = port(postAdd, postAddAB == \A ? \B : \A); sigP = port(postAdd, \Y); } endcode // (9) Match 'P' output that exclusively drives a PREG code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock if (param(dsp, \PREG).as_int() == 0) { int users = 2; // If ffMcemux and no postAdd new-value net must have three users: ffMcemux, ffM and ffPcemux if (ffMcemux && !postAdd) users++; if (nusers(sigP) == users) { argD = sigP; subpattern(out_dffe); if (dff) { ffP = dff; clock = dffclock; if (dffrstmux) { ffPrstmux = dffrstmux; ffPrstpol = dffrstpol; } if (dffcemux) { ffPcemux = dffcemux; ffPcepol = dffcepol; } sigP = dffQ; } } } endcode // (10) If post-adder and PREG both present, match for a $mux cell driving // the 'C' input, where one of the $mux's inputs is the PREG output. // This indicates an accumulator situation, and one where a $mux exists // to override the accumulated value: // +--------------------------------+ // | ____ | // +--| \ | // |$mux|-+ | // 'C' ---|____/ | | // | /-------\ +----+ | // +----+ +-| post- |___|PREG|---+ 'P' // |MREG|------ | adder | +----+ // +----+ \-------/ match postAddMux if postAdd if ffP select postAddMux->type.in($mux) select nusers(port(postAddMux, \Y)) == 2 choice AB {\A, \B} index port(postAddMux, AB) === sigP index port(postAddMux, \Y) === sigC set postAddMuxAB AB optional endmatch code sigC if (postAddMux) sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A); endcode // (11) If PREG present, match for a greater-than-or-equal $ge cell attached to // the 'P' output where it is compared to a constant that is a power-of-2: // e.g. `assign overflow = (PREG >= 2**40);` // In this scenario, the pattern detector functionality of a DSP48E1 can // to implement this function match overflow if ffP if param(dsp, \USE_PATTERN_DETECT, Const("NO_PATDET")).decode_string() == "NO_PATDET" select overflow->type.in($ge) select GetSize(port(overflow, \Y)) <= 48 select port(overflow, \B).is_fully_const() define B port(overflow, \B).as_const() select std::count(B.bits.begin(), B.bits.end(), State::S1) == 1 index port(overflow, \A) === sigP optional endmatch code accept; endcode // ####################### // Subpattern for matching against input registers, based on knowledge of the // 'Q' input. Typically, identifying registers with clock-enable and reset // capability would be a task would be handled by other Yosys passes such as // dff2dffe, but since DSP inference happens much before this, these patterns // have to be manually identified. // At a high level: // (1) Starting from a $dff cell that (partially or fully) drives the given // 'Q' argument // (2) Match for a $mux cell implementing synchronous reset semantics --- // one that exclusively drives the 'D' input of the $dff, with one of its // $mux inputs being fully zero // (3) Match for a $mux cell implement clock enable semantics --- one that // exclusively drives the 'D' input of the $dff (or the other input of // the reset $mux) and where one of this $mux's inputs is connected to // the 'Q' output of the $dff subpattern in_dffe arg argD argQ clock code dff = nullptr; if (GetSize(argQ) == 0) reject; for (const auto &c : argQ.chunks()) { // Abandon matches when 'Q' is a constant if (!c.wire) reject; // Abandon matches when 'Q' has the keep attribute set if (c.wire->get_bool_attribute(\keep)) reject; // Abandon matches when 'Q' has a non-zero init attribute set // (not supported by DSP48E1) Const init = c.wire->attributes.at(\init, Const()); if (!init.empty()) for (auto b : init.extract(c.offset, c.width)) if (b != State::Sx && b != State::S0) reject; } endcode // (1) Starting from a $dff cell that (partially or fully) drives the given // 'Q' argument match ff select ff->type.in($dff) // DSP48E1 does not support clock inversion select param(ff, \CLK_POLARITY).as_bool() slice offset GetSize(port(ff, \D)) index port(ff, \Q)[offset] === argQ[0] // Check that the rest of argQ is present filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ filter clock == SigBit() || port(ff, \CLK) == clock set ffoffset offset endmatch code argQ argD SigSpec Q = port(ff, \Q); dff = ff; dffclock = port(ff, \CLK); dffD = argQ; argD = port(ff, \D); argQ = Q; dffD.replace(argQ, argD); // Only search for ffrstmux if dffD only // has two (ff, ffrstmux) users if (nusers(dffD) > 2) argD = SigSpec(); endcode // (2) Match for a $mux cell implementing synchronous reset semantics --- // exclusively drives the 'D' input of the $dff, with one of the $mux // inputs being fully zero match ffrstmux if !argD.empty() select ffrstmux->type.in($mux) index port(ffrstmux, \Y) === argD choice BA {\B, \A} // DSP48E1 only supports reset to zero select port(ffrstmux, BA).is_fully_zero() define pol (BA == \B) set ffrstpol pol semioptional endmatch code argD if (ffrstmux) { dffrstmux = ffrstmux; dffrstpol = ffrstpol; argD = port(ffrstmux, ffrstpol ? \A : \B); dffD.replace(port(ffrstmux, \Y), argD); // Only search for ffcemux if argQ has at // least 3 users (ff, , ffrstmux) and // dffD only has two (ff, ffrstmux) if (!(nusers(argQ) >= 3 && nusers(dffD) == 2)) argD = SigSpec(); } else dffrstmux = nullptr; endcode // (3) Match for a $mux cell implement clock enable semantics --- one that // exclusively drives the 'D' input of the $dff (or the other input of // the reset $mux) and where one of this $mux's inputs is connected to // the 'Q' output of the $dff match ffcemux if !argD.empty() select ffcemux->type.in($mux) index port(ffcemux, \Y) === argD choice AB {\A, \B} index port(ffcemux, AB) === argQ define pol (AB == \A) set ffcepol pol semioptional endmatch code argD if (ffcemux) { dffcemux = ffcemux; dffcepol = ffcepol; argD = port(ffcemux, ffcepol ? \B : \A); dffD.replace(port(ffcemux, \Y), argD); } else dffcemux = nullptr; endcode // ####################### // Subpattern for matching against output registers, based on knowledge of the // 'D' input. // At a high level: // (1) Starting from an optional $mux cell that implements clock enable // semantics --- one where the given 'D' argument (partially or fully) // drives one of its two inputs // (2) Starting from, or continuing onto, another optional $mux cell that // implements synchronous reset semantics --- one where the given 'D' // argument (or the clock enable $mux output) drives one of its two inputs // and where the other input is fully zero // (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the // output of the previous clock enable or reset $mux cells) subpattern out_dffe arg argD argQ clock code dff = nullptr; for (auto c : argD.chunks()) // Abandon matches when 'D' has the keep attribute set if (c.wire->get_bool_attribute(\keep)) reject; endcode // (1) Starting from an optional $mux cell that implements clock enable // semantics --- one where the given 'D' argument (partially or fully) // drives one of its two inputs match ffcemux select ffcemux->type.in($mux) // ffcemux output must have two users: ffcemux and ff.D select nusers(port(ffcemux, \Y)) == 2 choice AB {\A, \B} // keep-last-value net must have at least three users: ffcemux, ff, downstream sink(s) select nusers(port(ffcemux, AB)) >= 3 slice offset GetSize(port(ffcemux, \Y)) define BA (AB == \A ? \B : \A) index port(ffcemux, BA)[offset] === argD[0] // Check that the rest of argD is present filter GetSize(port(ffcemux, BA)) >= offset + GetSize(argD) filter port(ffcemux, BA).extract(offset, GetSize(argD)) == argD set ffoffset offset define pol (AB == \A) set ffcepol pol semioptional endmatch code argD argQ dffcemux = ffcemux; if (ffcemux) { SigSpec BA = port(ffcemux, ffcepol ? \B : \A); SigSpec Y = port(ffcemux, \Y); argQ = argD; argD.replace(BA, Y); argQ.replace(BA, port(ffcemux, ffcepol ? \A : \B)); dffcemux = ffcemux; dffcepol = ffcepol; } endcode // (2) Starting from, or continuing onto, another optional $mux cell that // implements synchronous reset semantics --- one where the given 'D' // argument (or the clock enable $mux output) drives one of its two inputs // and where the other input is fully zero match ffrstmux select ffrstmux->type.in($mux) // ffrstmux output must have two users: ffrstmux and ff.D select nusers(port(ffrstmux, \Y)) == 2 choice BA {\B, \A} // DSP48E1 only supports reset to zero select port(ffrstmux, BA).is_fully_zero() slice offset GetSize(port(ffrstmux, \Y)) define AB (BA == \B ? \A : \B) index port(ffrstmux, AB)[offset] === argD[0] // Check that offset is consistent filter !ffcemux || ffoffset == offset // Check that the rest of argD is present filter GetSize(port(ffrstmux, AB)) >= offset + GetSize(argD) filter port(ffrstmux, AB).extract(offset, GetSize(argD)) == argD set ffoffset offset define pol (AB == \A) set ffrstpol pol semioptional endmatch code argD argQ dffrstmux = ffrstmux; if (ffrstmux) { SigSpec AB = port(ffrstmux, ffrstpol ? \A : \B); SigSpec Y = port(ffrstmux, \Y); argD.replace(AB, Y); dffrstmux = ffrstmux; dffrstpol = ffrstpol; } endcode // (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the // output of the previous clock enable or reset $mux cells) match ff select ff->type.in($dff) // DSP48E1 does not support clock inversion select param(ff, \CLK_POLARITY).as_bool() slice offset GetSize(port(ff, \D)) index port(ff, \D)[offset] === argD[0] // Check that offset is consistent filter (!ffcemux && !ffrstmux) || ffoffset == offset // Check that the rest of argD is present filter GetSize(port(ff, \D)) >= offset + GetSize(argD) filter port(ff, \D).extract(offset, GetSize(argD)) == argD // Check that FF.Q is connected to CE-mux filter !ffcemux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ filter clock == SigBit() || port(ff, \CLK) == clock set ffoffset offset endmatch code argQ SigSpec D = port(ff, \D); SigSpec Q = port(ff, \Q); if (!ffcemux) { argQ = argD; argQ.replace(D, Q); } // Abandon matches when 'Q' has a non-zero init attribute set // (not supported by DSP48E1) for (auto c : argQ.chunks()) { Const init = c.wire->attributes.at(\init, Const()); if (!init.empty()) for (auto b : init.extract(c.offset, c.width)) if (b != State::Sx && b != State::S0) reject; } dff = ff; dffQ = argQ; dffclock = port(ff, \CLK); endcode