From 666c6128a90de588ab26c876a257ea48edfded30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Ko=C5=9Bcielnicki?= Date: Sun, 22 Dec 2019 20:43:39 +0100 Subject: [PATCH 1/8] xilinx_dsp: Initial DSP48A/DSP48A1 support. --- passes/pmgen/Makefile.inc | 3 +- passes/pmgen/xilinx_dsp.cc | 212 +++++++++- passes/pmgen/xilinx_dsp48a.pmg | 673 ++++++++++++++++++++++++++++++ passes/pmgen/xilinx_dsp_CREG.pmg | 9 +- techlibs/xilinx/synth_xilinx.cc | 5 +- techlibs/xilinx/xc3sda_dsp_map.v | 2 +- techlibs/xilinx/xc6s_dsp_map.v | 2 +- tests/arch/xilinx/macc.sh | 3 + tests/arch/xilinx/mul.ys | 12 + tests/arch/xilinx/mul_unsigned.ys | 14 + 10 files changed, 921 insertions(+), 14 deletions(-) create mode 100644 passes/pmgen/xilinx_dsp48a.pmg diff --git a/passes/pmgen/Makefile.inc b/passes/pmgen/Makefile.inc index 145d2ebf9..1a57bef7d 100644 --- a/passes/pmgen/Makefile.inc +++ b/passes/pmgen/Makefile.inc @@ -22,8 +22,9 @@ $(eval $(call add_extra_objs,passes/pmgen/ice40_wrapcarry_pm.h)) # -------------------------------------- OBJS += passes/pmgen/xilinx_dsp.o -passes/pmgen/xilinx_dsp.o: passes/pmgen/xilinx_dsp_pm.h passes/pmgen/xilinx_dsp_CREG_pm.h passes/pmgen/xilinx_dsp_cascade_pm.h +passes/pmgen/xilinx_dsp.o: passes/pmgen/xilinx_dsp_pm.h passes/pmgen/xilinx_dsp48a_pm.h passes/pmgen/xilinx_dsp_CREG_pm.h passes/pmgen/xilinx_dsp_cascade_pm.h $(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp_pm.h)) +$(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp48a_pm.h)) $(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp_CREG_pm.h)) $(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp_cascade_pm.h)) diff --git a/passes/pmgen/xilinx_dsp.cc b/passes/pmgen/xilinx_dsp.cc index 054e123e4..81c3c57c4 100644 --- a/passes/pmgen/xilinx_dsp.cc +++ b/passes/pmgen/xilinx_dsp.cc @@ -26,6 +26,7 @@ USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN #include "passes/pmgen/xilinx_dsp_pm.h" +#include "passes/pmgen/xilinx_dsp48a_pm.h" #include "passes/pmgen/xilinx_dsp_CREG_pm.h" #include "passes/pmgen/xilinx_dsp_cascade_pm.h" @@ -487,6 +488,190 @@ void xilinx_dsp_pack(xilinx_dsp_pm &pm) pm.blacklist(cell); } +void xilinx_dsp48a_pack(xilinx_dsp48a_pm &pm) +{ + auto &st = pm.st_xilinx_dsp48a_pack; + + log("Analysing %s.%s for Xilinx DSP48A/DSP48A1 packing.\n", log_id(pm.module), log_id(st.dsp)); + + log_debug("preAdd: %s\n", log_id(st.preAdd, "--")); + log_debug("ffA1: %s %s %s\n", log_id(st.ffA1, "--"), log_id(st.ffA1cemux, "--"), log_id(st.ffA1rstmux, "--")); + log_debug("ffA0: %s %s %s\n", log_id(st.ffA0, "--"), log_id(st.ffA0cemux, "--"), log_id(st.ffA0rstmux, "--")); + log_debug("ffB1: %s %s %s\n", log_id(st.ffB1, "--"), log_id(st.ffB1cemux, "--"), log_id(st.ffB1rstmux, "--")); + log_debug("ffB0: %s %s %s\n", log_id(st.ffB0, "--"), log_id(st.ffB0cemux, "--"), log_id(st.ffB0rstmux, "--")); + log_debug("ffD: %s %s %s\n", log_id(st.ffD, "--"), log_id(st.ffDcemux, "--"), log_id(st.ffDrstmux, "--")); + log_debug("dsp: %s\n", log_id(st.dsp, "--")); + log_debug("ffM: %s %s %s\n", log_id(st.ffM, "--"), log_id(st.ffMcemux, "--"), log_id(st.ffMrstmux, "--")); + log_debug("postAdd: %s\n", log_id(st.postAdd, "--")); + log_debug("postAddMux: %s\n", log_id(st.postAddMux, "--")); + log_debug("ffP: %s %s %s\n", log_id(st.ffP, "--"), log_id(st.ffPcemux, "--"), log_id(st.ffPrstmux, "--")); + + Cell *cell = st.dsp; + SigSpec &opmode = cell->connections_.at(ID(OPMODE)); + + if (st.preAdd) { + log(" preadder %s (%s)\n", log_id(st.preAdd), log_id(st.preAdd->type)); + bool D_SIGNED = st.preAdd->getParam(ID(A_SIGNED)).as_bool(); + bool B_SIGNED = st.preAdd->getParam(ID(B_SIGNED)).as_bool(); + st.sigB.extend_u0(18, B_SIGNED); + st.sigD.extend_u0(18, D_SIGNED); + cell->setPort(ID(B), st.sigB); + cell->setPort(ID(D), st.sigD); + opmode[4] = State::S1; + if (st.preAdd->type == ID($add)) + opmode[6] = State::S0; + else if (st.preAdd->type == ID($sub)) + opmode[6] = State::S1; + else + log_assert(!"strange pre-adder type"); + + pm.autoremove(st.preAdd); + } + if (st.postAdd) { + log(" postadder %s (%s)\n", log_id(st.postAdd), log_id(st.postAdd->type)); + + if (st.postAddMux) { + log_assert(st.ffP); + opmode[2] = st.postAddMux->getPort(ID(S)); + pm.autoremove(st.postAddMux); + } + else if (st.ffP && st.sigC == st.sigP) + opmode[2] = State::S0; + else + opmode[2] = State::S1; + opmode[3] = State::S1; + + if (opmode[2] != State::S0) { + if (st.postAddMuxAB == ID(A)) + st.sigC.extend_u0(48, st.postAdd->getParam(ID(B_SIGNED)).as_bool()); + else + st.sigC.extend_u0(48, st.postAdd->getParam(ID(A_SIGNED)).as_bool()); + cell->setPort(ID(C), st.sigC); + } + + pm.autoremove(st.postAdd); + } + + if (st.clock != SigBit()) + { + cell->setPort(ID(CLK), st.clock); + + auto f = [&pm,cell](SigSpec &A, Cell* ff, Cell* cemux, bool cepol, IdString ceport, Cell* rstmux, bool rstpol, IdString rstport) { + SigSpec D = ff->getPort(ID(D)); + SigSpec Q = pm.sigmap(ff->getPort(ID(Q))); + if (!A.empty()) + A.replace(Q, D); + if (rstmux) { + SigSpec Y = rstmux->getPort(ID(Y)); + SigSpec AB = rstmux->getPort(rstpol ? ID(A) : ID(B)); + if (!A.empty()) + A.replace(Y, AB); + if (rstport != IdString()) { + SigSpec S = rstmux->getPort(ID(S)); + cell->setPort(rstport, rstpol ? S : pm.module->Not(NEW_ID, S)); + } + } + else if (rstport != IdString()) + cell->setPort(rstport, State::S0); + if (cemux) { + SigSpec Y = cemux->getPort(ID(Y)); + SigSpec BA = cemux->getPort(cepol ? ID(B) : ID(A)); + SigSpec S = cemux->getPort(ID(S)); + if (!A.empty()) + A.replace(Y, BA); + cell->setPort(ceport, cepol ? S : pm.module->Not(NEW_ID, S)); + } + else + cell->setPort(ceport, State::S1); + + for (auto c : Q.chunks()) { + auto it = c.wire->attributes.find(ID(init)); + if (it == c.wire->attributes.end()) + continue; + for (int i = c.offset; i < c.offset+c.width; i++) { + log_assert(it->second[i] == State::S0 || it->second[i] == State::Sx); + it->second[i] = State::Sx; + } + } + }; + + if (st.ffA0 || st.ffA1) { + SigSpec A = cell->getPort(ID(A)); + if (st.ffA1) { + f(A, st.ffA1, st.ffA1cemux, st.ffAcepol, ID(CEA), st.ffA1rstmux, st.ffArstpol, ID(RSTA)); + cell->setParam(ID(A1REG), 1); + } + if (st.ffA0) { + f(A, st.ffA0, st.ffA0cemux, st.ffAcepol, ID(CEA), st.ffA0rstmux, st.ffArstpol, ID(RSTA)); + cell->setParam(ID(A0REG), 1); + } + pm.add_siguser(A, cell); + cell->setPort(ID(A), A); + } + if (st.ffB0 || st.ffB1) { + SigSpec B = cell->getPort(ID(B)); + if (st.ffB1) { + f(B, st.ffB1, st.ffB1cemux, st.ffBcepol, ID(CEB), st.ffB1rstmux, st.ffBrstpol, ID(RSTB)); + cell->setParam(ID(B1REG), 1); + } + if (st.ffB0) { + f(B, st.ffB0, st.ffB0cemux, st.ffBcepol, ID(CEB), st.ffB0rstmux, st.ffBrstpol, ID(RSTB)); + cell->setParam(ID(B0REG), 1); + } + pm.add_siguser(B, cell); + cell->setPort(ID(B), B); + } + if (st.ffD) { + SigSpec D = cell->getPort(ID(D)); + f(D, st.ffD, st.ffDcemux, st.ffDcepol, ID(CED), st.ffDrstmux, st.ffDrstpol, ID(RSTD)); + pm.add_siguser(D, cell); + cell->setPort(ID(D), D); + cell->setParam(ID(DREG), 1); + } + if (st.ffM) { + SigSpec M; // unused + f(M, st.ffM, st.ffMcemux, st.ffMcepol, ID(CEM), st.ffMrstmux, st.ffMrstpol, ID(RSTM)); + st.ffM->connections_.at(ID(Q)).replace(st.sigM, pm.module->addWire(NEW_ID, GetSize(st.sigM))); + cell->setParam(ID(MREG), State::S1); + } + if (st.ffP) { + SigSpec P; // unused + f(P, st.ffP, st.ffPcemux, st.ffPcepol, ID(CEP), st.ffPrstmux, st.ffPrstpol, ID(RSTP)); + st.ffP->connections_.at(ID(Q)).replace(st.sigP, pm.module->addWire(NEW_ID, GetSize(st.sigP))); + cell->setParam(ID(PREG), State::S1); + } + + log(" clock: %s (%s)", log_signal(st.clock), "posedge"); + + if (st.ffA0) + log(" ffA0:%s", log_id(st.ffA0)); + if (st.ffA1) + log(" ffA1:%s", log_id(st.ffA1)); + + if (st.ffB0) + log(" ffB0:%s", log_id(st.ffB0)); + if (st.ffB1) + log(" ffB1:%s", log_id(st.ffB1)); + + if (st.ffD) + log(" ffD:%s", log_id(st.ffD)); + + if (st.ffM) + log(" ffM:%s", log_id(st.ffM)); + + if (st.ffP) + log(" ffP:%s", log_id(st.ffP)); + } + log("\n"); + + SigSpec P = st.sigP; + if (GetSize(P) < 48) + P.append(pm.module->addWire(NEW_ID, 48-GetSize(P))); + cell->setPort(ID(P), P); + + pm.blacklist(cell); +} + void xilinx_dsp_packC(xilinx_dsp_CREG_pm &pm) { auto &st = pm.st_xilinx_dsp_packC; @@ -592,33 +777,48 @@ struct XilinxDspPass : public Pass { log("P output implementing the operation \"(P >= )\" will be transformed\n"); log("into using the DSP48E1's pattern detector feature for overflow detection.\n"); log("\n"); + log(" -family {xcup|xcu|xc7|xc6v|xc5v|xc4v|xc6s|xc3sda}\n"); + log(" select the family to target\n"); + log(" default: xc7\n"); + log("\n"); } void execute(std::vector args, RTLIL::Design *design) YS_OVERRIDE { log_header(design, "Executing XILINX_DSP pass (pack resources into DSPs).\n"); + std::string family = "xc7"; size_t argidx; for (argidx = 1; argidx < args.size(); argidx++) { - // if (args[argidx] == "-singleton") { - // singleton_mode = true; - // continue; - // } + if ((args[argidx] == "-family" || args[argidx] == "-arch") && argidx+1 < args.size()) { + family = args[++argidx]; + continue; + } break; } extra_args(args, argidx, design); + // Don't bother distinguishing between those. + if (family == "xc6v") + family = "xc7"; + if (family == "xcup") + family = "xcu"; + for (auto module : design->selected_modules()) { // Experimental feature: pack $add/$sub cells with // (* use_dsp48="simd" *) into DSP48E1's using its // SIMD feature - xilinx_simd_pack(module, module->selected_cells()); + if (family == "xc7") + xilinx_simd_pack(module, module->selected_cells()); // Match for all features ([ABDMP][12]?REG, pre-adder, // post-adder, pattern detector, etc.) except for CREG - { + if (family == "xc7") { xilinx_dsp_pm pm(module, module->selected_cells()); pm.run_xilinx_dsp_pack(xilinx_dsp_pack); + } else if (family == "xc6s" || family == "xc3sda") { + xilinx_dsp48a_pm pm(module, module->selected_cells()); + pm.run_xilinx_dsp48a_pack(xilinx_dsp48a_pack); } // Separating out CREG packing is necessary since there // is no guarantee that the cell ordering corresponds diff --git a/passes/pmgen/xilinx_dsp48a.pmg b/passes/pmgen/xilinx_dsp48a.pmg new file mode 100644 index 000000000..97d5c5ccd --- /dev/null +++ b/passes/pmgen/xilinx_dsp48a.pmg @@ -0,0 +1,673 @@ +// This file describes the main pattern matcher setup (of three total) that +// forms the `xilinx_dsp` pass described in xilinx_dsp.cc — version for +// DSP48A/DSP48A1 (Spartan 3A DSP, Spartan 6). +// At a high level, it works as follows: +// ( 1) Starting from a DSP48A/DSP48A1 cell +// ( 2) Match the driver of the 'B' input to a possible $dff cell (B1REG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed below) +// If B1REG matched, treat 'B' input as input of B1REG +// ( 3) Match the driver of the 'B' and 'D' inputs for a possible $add cell +// (pre-adder) +// ( 4) Match 'B' input for B0REG +// ( 5) Match 'A' input for A1REG +// If A1REG, then match 'A' input for A0REG +// ( 6) Match 'D' input for DREG +// ( 7) Match 'P' output that exclusively drives an MREG +// ( 8) Match 'P' output that exclusively drives one of two inputs to an $add +// cell (post-adder). +// The other input to the adder is assumed to come in from the 'C' input +// (note: 'P' -> 'C' connections that exist for accumulators are +// recognised in xilinx_dsp.cc). +// ( 9) Match 'P' output that exclusively drives a PREG +// (10) If post-adder and PREG both present, match for a $mux cell driving +// the 'C' input, where one of the $mux's inputs is the PREG output. +// This indicates an accumulator situation, and one where a $mux exists +// to override the accumulated value: +// +--------------------------------+ +// | ____ | +// +--| \ | +// |$mux|-+ | +// 'C' ---|____/ | | +// | /-------\ +----+ | +// +----+ +-| post- |___|PREG|---+ 'P' +// |MREG|------ | adder | +----+ +// +----+ \-------/ +// Notes: see the notes in xilinx_dsp.pmg + +pattern xilinx_dsp48a_pack + +state clock +state sigA sigB sigC sigD sigM sigP +state postAddAB postAddMuxAB +state ffAcepol ffBcepol ffDcepol ffMcepol ffPcepol +state ffArstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol +state ffA0 ffA0cemux ffA0rstmux ffA1 ffA1cemux ffA1rstmux +state ffB0 ffB0cemux ffB0rstmux ffB1 ffB1cemux ffB1rstmux +state ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux + +// Variables used for subpatterns +state argQ argD +state ffcepol ffrstpol +state ffoffset +udata dffD dffQ +udata dffclock +udata dff dffcemux dffrstmux +udata dffcepol dffrstpol + +// (1) Starting from a DSP48A/DSP48A1 cell +match dsp + select dsp->type.in(\DSP48A, \DSP48A1) +endmatch + +code sigA sigB sigC sigD sigM clock + auto unextend = [](const SigSpec &sig) { + int i; + for (i = GetSize(sig)-1; i > 0; i--) + if (sig[i] != sig[i-1]) + break; + // Do not remove non-const sign bit + if (sig[i].wire) + ++i; + return sig.extract(0, i); + }; + sigA = unextend(port(dsp, \A)); + sigB = unextend(port(dsp, \B)); + + sigC = port(dsp, \C, SigSpec()); + sigD = port(dsp, \D, SigSpec()); + + SigSpec P = port(dsp, \P); + // Only care about those bits that are used + int i; + for (i = GetSize(P)-1; i >= 0; i--) + if (nusers(P[i]) > 1) + break; + i++; + log_assert(nusers(P.extract_end(i)) <= 1); + // This sigM could have no users if downstream sinks (e.g. $add) is + // narrower than $mul result, for example + if (i == 0) + reject; + sigM = P.extract(0, i); + + clock = port(dsp, \CLK, SigBit()); +endcode + +// (2) Match the driver of the 'B' input to a possible $dff cell (B1REG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed above) +// If matched, treat 'B' input as input of B1REG +code argQ ffB1 ffB1cemux ffB1rstmux ffBcepol ffBrstpol sigB clock + if (param(dsp, \B1REG).as_int() == 0 && param(dsp, \B0REG).as_int() == 0 && port(dsp, \OPMODE, SigSpec()).extract(4, 1).is_fully_zero()) { + argQ = sigB; + subpattern(in_dffe); + if (dff) { + ffB1 = dff; + clock = dffclock; + if (dffrstmux) { + ffB1rstmux = dffrstmux; + ffBrstpol = dffrstpol; + } + if (dffcemux) { + ffB1cemux = dffcemux; + ffBcepol = dffcepol; + } + sigB = dffD; + } + } +endcode + +// (3) Match the driver of the 'B' and 'D' inputs for a possible $add cell +// (pre-adder) +match preAdd + if sigD.empty() || sigD.is_fully_zero() + if param(dsp, \B0REG).as_int() == 0 + // Ensure that preAdder not already used + if port(dsp, \OPMODE, SigSpec()).extract(4, 1).is_fully_zero() + + select preAdd->type.in($add, $sub) + // Output has to be 18 bits or less + select GetSize(port(preAdd, \Y)) <= 18 + select nusers(port(preAdd, \Y)) == 2 + // D port has to be 18 bits or less + select GetSize(port(preAdd, \A)) <= 18 + // B port has to be 18 bits or less + select GetSize(port(preAdd, \B)) <= 18 + index port(preAdd, \Y) === sigB + + optional +endmatch + +code sigB sigD + if (preAdd) { + sigD = port(preAdd, \A); + sigB = port(preAdd, \B); + } +endcode + +// (4) Match 'B' input for B0REG +code argQ ffB0 ffB0cemux ffB0rstmux ffBcepol ffBrstpol sigB clock + if (param(dsp, \B0REG).as_int() == 0) { + argQ = sigB; + subpattern(in_dffe); + if (dff) { + if (ffB1) { + if ((ffB1rstmux != nullptr) ^ (dffrstmux != nullptr)) + goto ffB0_end; + if ((ffB1cemux != nullptr) ^ (dffcemux != nullptr)) + goto ffB0_end; + if (dffrstmux) { + if (ffBrstpol != dffrstpol) + goto ffB0_end; + if (port(ffB1rstmux, \S) != port(dffrstmux, \S)) + goto ffB0_end; + ffB0rstmux = dffrstmux; + } + if (dffcemux) { + if (ffBcepol != dffcepol) + goto ffB0_end; + if (port(ffB1cemux, \S) != port(dffcemux, \S)) + goto ffB0_end; + ffB0cemux = dffcemux; + } + } + ffB0 = dff; + clock = dffclock; + if (dffrstmux) { + ffB0rstmux = dffrstmux; + ffBrstpol = dffrstpol; + } + if (dffcemux) { + ffB0cemux = dffcemux; + ffBcepol = dffcepol; + } + sigB = dffD; + } + } +ffB0_end: +endcode + +// (5) Match 'A' input for A1REG +// If A1REG, then match 'A' input for A0REG +code argQ ffA1 ffA1cemux ffA1rstmux ffAcepol ffArstpol sigA clock ffA0 ffA0cemux ffA0rstmux + if (param(dsp, \A0REG).as_int() == 0 && param(dsp, \A1REG).as_int() == 0) { + argQ = sigA; + subpattern(in_dffe); + if (dff) { + ffA1 = dff; + clock = dffclock; + if (dffrstmux) { + ffA1rstmux = dffrstmux; + ffArstpol = dffrstpol; + } + if (dffcemux) { + ffA1cemux = dffcemux; + ffAcepol = dffcepol; + } + sigA = dffD; + + // Now attempt to match A0 + if (ffA1) { + argQ = sigA; + subpattern(in_dffe); + if (dff) { + if ((ffA1rstmux != nullptr) ^ (dffrstmux != nullptr)) + goto ffA0_end; + if ((ffA1cemux != nullptr) ^ (dffcemux != nullptr)) + goto ffA0_end; + if (dffrstmux) { + if (ffArstpol != dffrstpol) + goto ffA0_end; + if (port(ffA1rstmux, \S) != port(dffrstmux, \S)) + goto ffA0_end; + ffA0rstmux = dffrstmux; + } + if (dffcemux) { + if (ffAcepol != dffcepol) + goto ffA0_end; + if (port(ffA1cemux, \S) != port(dffcemux, \S)) + goto ffA0_end; + ffA0cemux = dffcemux; + } + + ffA0 = dff; + clock = dffclock; + + if (dffcemux) { + ffA0cemux = dffcemux; + ffAcepol = dffcepol; + } + sigA = dffD; + +ffA0_end: ; + } + } + + } + } +endcode + +// (6) Match 'D' input for DREG +code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock + if (param(dsp, \DREG).as_int() == 0) { + argQ = sigD; + subpattern(in_dffe); + if (dff) { + ffD = dff; + clock = dffclock; + if (dffrstmux) { + ffDrstmux = dffrstmux; + ffDrstpol = dffrstpol; + } + if (dffcemux) { + ffDcemux = dffcemux; + ffDcepol = dffcepol; + } + sigD = dffD; + } + } +endcode + +// (7) Match 'P' output that exclusively drives an MREG +code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock + if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) { + argD = sigM; + subpattern(out_dffe); + if (dff) { + ffM = dff; + clock = dffclock; + if (dffrstmux) { + ffMrstmux = dffrstmux; + ffMrstpol = dffrstpol; + } + if (dffcemux) { + ffMcemux = dffcemux; + ffMcepol = dffcepol; + } + sigM = dffQ; + } + } + sigP = sigM; +endcode + +// (8) Match 'P' output that exclusively drives one of two inputs to an $add +// cell (post-adder). +// The other input to the adder is assumed to come in from the 'C' input +// (note: 'P' -> 'C' connections that exist for accumulators are +// recognised in xilinx_dsp.cc). +match postAdd + // Ensure that Z mux is not already used + if port(dsp, \OPMODE, SigSpec()).extract(2,2).is_fully_zero() + + select postAdd->type.in($add) + select GetSize(port(postAdd, \Y)) <= 48 + choice AB {\A, \B} + select nusers(port(postAdd, AB)) <= 3 + filter ffMcemux || nusers(port(postAdd, AB)) == 2 + filter !ffMcemux || nusers(port(postAdd, AB)) == 3 + + index port(postAdd, AB)[0] === sigP[0] + filter GetSize(port(postAdd, AB)) >= GetSize(sigP) + filter port(postAdd, AB).extract(0, GetSize(sigP)) == sigP + // Check that remainder of AB is a sign- or zero-extension + filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(sigP[GetSize(sigP)-1], GetSize(port(postAdd, AB))-GetSize(sigP)) || port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(State::S0, GetSize(port(postAdd, AB))-GetSize(sigP)) + + set postAddAB AB + optional +endmatch + +code sigC sigP + if (postAdd) { + sigC = port(postAdd, postAddAB == \A ? \B : \A); + sigP = port(postAdd, \Y); + } +endcode + +// (9) Match 'P' output that exclusively drives a PREG +code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock + if (param(dsp, \PREG).as_int() == 0) { + int users = 2; + // If ffMcemux and no postAdd new-value net must have three users: ffMcemux, ffM and ffPcemux + if (ffMcemux && !postAdd) users++; + if (nusers(sigP) == users) { + argD = sigP; + subpattern(out_dffe); + if (dff) { + ffP = dff; + clock = dffclock; + if (dffrstmux) { + ffPrstmux = dffrstmux; + ffPrstpol = dffrstpol; + } + if (dffcemux) { + ffPcemux = dffcemux; + ffPcepol = dffcepol; + } + sigP = dffQ; + } + } + } +endcode + +// (10) If post-adder and PREG both present, match for a $mux cell driving +// the 'C' input, where one of the $mux's inputs is the PREG output. +// This indicates an accumulator situation, and one where a $mux exists +// to override the accumulated value: +// +--------------------------------+ +// | ____ | +// +--| \ | +// |$mux|-+ | +// 'C' ---|____/ | | +// | /-------\ +----+ | +// +----+ +-| post- |___|PREG|---+ 'P' +// |MREG|------ | adder | +----+ +// +----+ \-------/ +match postAddMux + if postAdd + if ffP + select postAddMux->type.in($mux) + select nusers(port(postAddMux, \Y)) == 2 + choice AB {\A, \B} + index port(postAddMux, AB) === sigP + index port(postAddMux, \Y) === sigC + set postAddMuxAB AB + optional +endmatch + +code sigC + if (postAddMux) + sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A); +endcode + +code + accept; +endcode + +// ####################### + +// Subpattern for matching against input registers, based on knowledge of the +// 'Q' input. Typically, identifying registers with clock-enable and reset +// capability would be a task would be handled by other Yosys passes such as +// dff2dffe, but since DSP inference happens much before this, these patterns +// have to be manually identified. +// At a high level: +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// one that exclusively drives the 'D' input of the $dff, with one of its +// $mux inputs being fully zero +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff +subpattern in_dffe +arg argD argQ clock + +code + dff = nullptr; + if (GetSize(argQ) == 0) + reject; + for (const auto &c : argQ.chunks()) { + // Abandon matches when 'Q' is a constant + if (!c.wire) + reject; + // Abandon matches when 'Q' has the keep attribute set + if (c.wire->get_bool_attribute(\keep)) + reject; + // Abandon matches when 'Q' has a non-zero init attribute set + // (not supported by DSP48E1) + Const init = c.wire->attributes.at(\init, Const()); + if (!init.empty()) + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx && b != State::S0) + reject; + } +endcode + +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument +match ff + select ff->type.in($dff) + // DSP48E1 does not support clock inversion + select param(ff, \CLK_POLARITY).as_bool() + + slice offset GetSize(port(ff, \D)) + index port(ff, \Q)[offset] === argQ[0] + + // Check that the rest of argQ is present + filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) + filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + + filter clock == SigBit() || port(ff, \CLK) == clock + + set ffoffset offset +endmatch + +code argQ argD + SigSpec Q = port(ff, \Q); + dff = ff; + dffclock = port(ff, \CLK); + dffD = argQ; + argD = port(ff, \D); + argQ = Q; + dffD.replace(argQ, argD); + // Only search for ffrstmux if dffD only + // has two (ff, ffrstmux) users + if (nusers(dffD) > 2) + argD = SigSpec(); +endcode + +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// exclusively drives the 'D' input of the $dff, with one of the $mux +// inputs being fully zero +match ffrstmux + if !argD.empty() + select ffrstmux->type.in($mux) + index port(ffrstmux, \Y) === argD + + choice BA {\B, \A} + // DSP48E1 only supports reset to zero + select port(ffrstmux, BA).is_fully_zero() + + define pol (BA == \B) + set ffrstpol pol + semioptional +endmatch + +code argD + if (ffrstmux) { + dffrstmux = ffrstmux; + dffrstpol = ffrstpol; + argD = port(ffrstmux, ffrstpol ? \A : \B); + dffD.replace(port(ffrstmux, \Y), argD); + + // Only search for ffcemux if argQ has at + // least 3 users (ff, , ffrstmux) and + // dffD only has two (ff, ffrstmux) + if (!(nusers(argQ) >= 3 && nusers(dffD) == 2)) + argD = SigSpec(); + } + else + dffrstmux = nullptr; +endcode + +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff +match ffcemux + if !argD.empty() + select ffcemux->type.in($mux) + index port(ffcemux, \Y) === argD + choice AB {\A, \B} + index port(ffcemux, AB) === argQ + define pol (AB == \A) + set ffcepol pol + semioptional +endmatch + +code argD + if (ffcemux) { + dffcemux = ffcemux; + dffcepol = ffcepol; + argD = port(ffcemux, ffcepol ? \B : \A); + dffD.replace(port(ffcemux, \Y), argD); + } + else + dffcemux = nullptr; +endcode + +// ####################### + +// Subpattern for matching against output registers, based on knowledge of the +// 'D' input. +// At a high level: +// (1) Starting from an optional $mux cell that implements clock enable +// semantics --- one where the given 'D' argument (partially or fully) +// drives one of its two inputs +// (2) Starting from, or continuing onto, another optional $mux cell that +// implements synchronous reset semantics --- one where the given 'D' +// argument (or the clock enable $mux output) drives one of its two inputs +// and where the other input is fully zero +// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the +// output of the previous clock enable or reset $mux cells) +subpattern out_dffe +arg argD argQ clock + +code + dff = nullptr; + for (auto c : argD.chunks()) + // Abandon matches when 'D' has the keep attribute set + if (c.wire->get_bool_attribute(\keep)) + reject; +endcode + +// (1) Starting from an optional $mux cell that implements clock enable +// semantics --- one where the given 'D' argument (partially or fully) +// drives one of its two inputs +match ffcemux + select ffcemux->type.in($mux) + // ffcemux output must have two users: ffcemux and ff.D + select nusers(port(ffcemux, \Y)) == 2 + + choice AB {\A, \B} + // keep-last-value net must have at least three users: ffcemux, ff, downstream sink(s) + select nusers(port(ffcemux, AB)) >= 3 + + slice offset GetSize(port(ffcemux, \Y)) + define BA (AB == \A ? \B : \A) + index port(ffcemux, BA)[offset] === argD[0] + + // Check that the rest of argD is present + filter GetSize(port(ffcemux, BA)) >= offset + GetSize(argD) + filter port(ffcemux, BA).extract(offset, GetSize(argD)) == argD + + set ffoffset offset + define pol (AB == \A) + set ffcepol pol + + semioptional +endmatch + +code argD argQ + dffcemux = ffcemux; + if (ffcemux) { + SigSpec BA = port(ffcemux, ffcepol ? \B : \A); + SigSpec Y = port(ffcemux, \Y); + argQ = argD; + argD.replace(BA, Y); + argQ.replace(BA, port(ffcemux, ffcepol ? \A : \B)); + + dffcemux = ffcemux; + dffcepol = ffcepol; + } +endcode + +// (2) Starting from, or continuing onto, another optional $mux cell that +// implements synchronous reset semantics --- one where the given 'D' +// argument (or the clock enable $mux output) drives one of its two inputs +// and where the other input is fully zero +match ffrstmux + select ffrstmux->type.in($mux) + // ffrstmux output must have two users: ffrstmux and ff.D + select nusers(port(ffrstmux, \Y)) == 2 + + choice BA {\B, \A} + // DSP48E1 only supports reset to zero + select port(ffrstmux, BA).is_fully_zero() + + slice offset GetSize(port(ffrstmux, \Y)) + define AB (BA == \B ? \A : \B) + index port(ffrstmux, AB)[offset] === argD[0] + + // Check that offset is consistent + filter !ffcemux || ffoffset == offset + // Check that the rest of argD is present + filter GetSize(port(ffrstmux, AB)) >= offset + GetSize(argD) + filter port(ffrstmux, AB).extract(offset, GetSize(argD)) == argD + + set ffoffset offset + define pol (AB == \A) + set ffrstpol pol + + semioptional +endmatch + +code argD argQ + dffrstmux = ffrstmux; + if (ffrstmux) { + SigSpec AB = port(ffrstmux, ffrstpol ? \A : \B); + SigSpec Y = port(ffrstmux, \Y); + argD.replace(AB, Y); + + dffrstmux = ffrstmux; + dffrstpol = ffrstpol; + } +endcode + +// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the +// output of the previous clock enable or reset $mux cells) +match ff + select ff->type.in($dff) + // DSP48E1 does not support clock inversion + select param(ff, \CLK_POLARITY).as_bool() + + slice offset GetSize(port(ff, \D)) + index port(ff, \D)[offset] === argD[0] + + // Check that offset is consistent + filter (!ffcemux && !ffrstmux) || ffoffset == offset + // Check that the rest of argD is present + filter GetSize(port(ff, \D)) >= offset + GetSize(argD) + filter port(ff, \D).extract(offset, GetSize(argD)) == argD + // Check that FF.Q is connected to CE-mux + filter !ffcemux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + + filter clock == SigBit() || port(ff, \CLK) == clock + + set ffoffset offset +endmatch + +code argQ + SigSpec D = port(ff, \D); + SigSpec Q = port(ff, \Q); + if (!ffcemux) { + argQ = argD; + argQ.replace(D, Q); + } + + // Abandon matches when 'Q' has a non-zero init attribute set + // (not supported by DSP48E1) + for (auto c : argQ.chunks()) { + Const init = c.wire->attributes.at(\init, Const()); + if (!init.empty()) + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx && b != State::S0) + reject; + } + + dff = ff; + dffQ = argQ; + dffclock = port(ff, \CLK); +endcode diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index 5cd34162e..b20e4f458 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -1,7 +1,7 @@ // This file describes the second of three pattern matcher setups that // forms the `xilinx_dsp` pass described in xilinx_dsp.cc // At a high level, it works as follows: -// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already, +// (1) Starting from a DSP48* cell that (a) doesn't have a CREG already, // and (b) uses the 'C' port // (2) Match the driver of the 'C' input to a possible $dff cell (CREG) // (attached to at most two $mux cells that implement clock-enable or @@ -38,10 +38,10 @@ udata dffclock udata dff dffcemux dffrstmux udata dffcepol dffrstpol -// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already, +// (1) Starting from a DSP48* cell that (a) doesn't have a CREG already, // and (b) uses the 'C' port match dsp - select dsp->type.in(\DSP48E1) + select dsp->type.in(\DSP48A, \DSP48A1, \DSP48E1) select param(dsp, \CREG, 1).as_int() == 0 select nusers(port(dsp, \C, SigSpec())) > 1 endmatch @@ -60,7 +60,8 @@ code sigC sigP clock sigC = unextend(port(dsp, \C, SigSpec())); SigSpec P = port(dsp, \P); - if (param(dsp, \USE_MULT, Const("MULTIPLY")).decode_string() == "MULTIPLY") { + if (!dsp->type.in(\DSP48E1) || + param(dsp, \USE_MULT, Const("MULTIPLY")).decode_string() == "MULTIPLY") { // Only care about those bits that are used int i; for (i = GetSize(P)-1; i >= 0; i--) diff --git a/techlibs/xilinx/synth_xilinx.cc b/techlibs/xilinx/synth_xilinx.cc index 971089b28..a19046911 100644 --- a/techlibs/xilinx/synth_xilinx.cc +++ b/techlibs/xilinx/synth_xilinx.cc @@ -387,7 +387,10 @@ struct SynthXilinxPass : public ScriptPass run("opt_expr -fine"); run("wreduce"); run("select -clear"); - run("xilinx_dsp"); + if (help_mode) + run("xilinx_dsp -family "); + else + run("xilinx_dsp -family " + family); run("chtype -set $mul t:$__soft_mul"); } } diff --git a/techlibs/xilinx/xc3sda_dsp_map.v b/techlibs/xilinx/xc3sda_dsp_map.v index 87348a173..258f90395 100644 --- a/techlibs/xilinx/xc3sda_dsp_map.v +++ b/techlibs/xilinx/xc3sda_dsp_map.v @@ -27,7 +27,7 @@ module \$__MUL18X18 (input [17:0] A, input [17:0] B, output [35:0] Y); .D(18'b0), .P(P_48), - .OPMODE(8'b0000010) + .OPMODE(8'b0000001) ); assign Y = P_48; endmodule diff --git a/techlibs/xilinx/xc6s_dsp_map.v b/techlibs/xilinx/xc6s_dsp_map.v index e8705723b..bdce60c14 100644 --- a/techlibs/xilinx/xc6s_dsp_map.v +++ b/techlibs/xilinx/xc6s_dsp_map.v @@ -27,7 +27,7 @@ module \$__MUL18X18 (input [17:0] A, input [17:0] B, output [35:0] Y); .D(18'b0), .P(P_48), - .OPMODE(8'b0000010) + .OPMODE(8'b0000001) ); assign Y = P_48; endmodule diff --git a/tests/arch/xilinx/macc.sh b/tests/arch/xilinx/macc.sh index 154a29848..58b97b646 100644 --- a/tests/arch/xilinx/macc.sh +++ b/tests/arch/xilinx/macc.sh @@ -1,3 +1,6 @@ ../../../yosys -qp "synth_xilinx -top macc2; rename -top macc2_uut" -o macc_uut.v macc.v iverilog -o test_macc macc_tb.v macc_uut.v macc.v ../../../techlibs/xilinx/cells_sim.v vvp -N ./test_macc +../../../yosys -qp "synth_xilinx -family xc6s -top macc2; rename -top macc2_uut" -o macc_uut.v macc.v +iverilog -o test_macc macc_tb.v macc_uut.v macc.v ../../../techlibs/xilinx/cells_sim.v +vvp -N ./test_macc diff --git a/tests/arch/xilinx/mul.ys b/tests/arch/xilinx/mul.ys index d76814966..6cf994fbf 100644 --- a/tests/arch/xilinx/mul.ys +++ b/tests/arch/xilinx/mul.ys @@ -7,3 +7,15 @@ cd top # Constrain all select calls below inside the top module select -assert-count 1 t:DSP48E1 select -assert-none t:DSP48E1 %% t:* %D + +design -reset + +read_verilog ../common/mul.v +hierarchy -top top +proc +equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx -family xc6s # equivalency check +design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design) +cd top # Constrain all select calls below inside the top module + +select -assert-count 1 t:DSP48A1 +select -assert-none t:DSP48A1 %% t:* %D diff --git a/tests/arch/xilinx/mul_unsigned.ys b/tests/arch/xilinx/mul_unsigned.ys index 62495b90c..c714680af 100644 --- a/tests/arch/xilinx/mul_unsigned.ys +++ b/tests/arch/xilinx/mul_unsigned.ys @@ -9,3 +9,17 @@ select -assert-count 1 t:BUFG select -assert-count 1 t:DSP48E1 select -assert-count 30 t:FDRE select -assert-none t:DSP48E1 t:FDRE t:BUFG %% t:* %D + +design -reset + +read_verilog mul_unsigned.v +hierarchy -top mul_unsigned +proc + +equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx -family xc6s # equivalency check +design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design) +cd mul_unsigned # Constrain all select calls below inside the top module +select -assert-count 1 t:BUFG +select -assert-count 1 t:DSP48A1 +select -assert-count 30 t:FDRE +select -assert-none t:DSP48A1 t:FDRE t:BUFG %% t:* %D From d00533eaa81b0c9dd80679bdde4aba60c8b1eece Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Mon, 23 Dec 2019 11:42:46 -0800 Subject: [PATCH 2/8] Add DSP48A* PCOUT -> PCIN cascade support --- passes/pmgen/xilinx_dsp_cascade.pmg | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/passes/pmgen/xilinx_dsp_cascade.pmg b/passes/pmgen/xilinx_dsp_cascade.pmg index 7a32df2b7..9763facdf 100644 --- a/passes/pmgen/xilinx_dsp_cascade.pmg +++ b/passes/pmgen/xilinx_dsp_cascade.pmg @@ -62,12 +62,11 @@ code #define MAX_DSP_CASCADE 20 endcode -// (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer -// (controlled by OPMODE[6:4]) set to zero and (b) doesn't already -// use the 'PCOUT' port +// (1) Starting from a DSP48* cell that (a) has the Z multiplexer +// (controlled by OPMODE[3:2] for DSP48A*, by OPMODE[6:4] for DSP48E1) +// set to zero and (b) doesn't already use the 'PCOUT' port match first - select first->type.in(\DSP48E1) - select port(first, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("000") + select (first->type.in(\DSP48A, \DSP48A1) && port(first, \OPMODE, Const(0, 7)).extract(2,2) == Const::from_string("00")) || (first->type.in(\DSP48E1) && port(first, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("000")) select nusers(port(first, \PCOUT, SigSpec())) <= 1 endmatch @@ -156,22 +155,21 @@ subpattern tail arg first arg next -// (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled, +// (2.1) Match another DSP48* cell that (a) does not have the CREG enabled, // (b) has its Z multiplexer output set to the 'C' port, which is // driven by the 'P' output of the previous DSP cell, and (c) has its // 'PCIN' port unused match nextP - select nextP->type.in(\DSP48E1) select !param(nextP, \CREG, State::S1).as_bool() - select port(nextP, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("011") + select (nextP->type.in(\DSP48A, \DSP48A1) && port(nextP, \OPMODE, Const(0, 7)).extract(2,2) == Const::from_string("11")) || (nextP->type.in(\DSP48E1) && port(nextP, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("011")) select nusers(port(nextP, \C, SigSpec())) > 1 select nusers(port(nextP, \PCIN, SigSpec())) == 0 index port(nextP, \C)[0] === port(std::get<0>(chain.back()), \P)[0] semioptional endmatch -// (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the -// previous DSP cell right-shifted by 17 bits +// (2.2) For DSP48E1 only, same as (2.1) but with the 'C' port driven +// by the 'P' output of the previous DSP cell right-shifted by 17 bits match nextP_shift17 if !nextP select nextP_shift17->type.in(\DSP48E1) @@ -188,6 +186,8 @@ code next if (!nextP) next = nextP_shift17; if (next) { + if (next->type != first->type) + reject; unextend = [](const SigSpec &sig) { int i; for (i = GetSize(sig)-1; i > 0; i--) From 71cac30309ec19bb72ff64ae5f5471ba0ecfaf46 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Mon, 23 Dec 2019 12:38:18 -0800 Subject: [PATCH 3/8] Support unregistered cascades for A and B inputs --- passes/pmgen/xilinx_dsp_cascade.pmg | 121 +++++++++++++++++----------- 1 file changed, 74 insertions(+), 47 deletions(-) diff --git a/passes/pmgen/xilinx_dsp_cascade.pmg b/passes/pmgen/xilinx_dsp_cascade.pmg index 9763facdf..7a310764c 100644 --- a/passes/pmgen/xilinx_dsp_cascade.pmg +++ b/passes/pmgen/xilinx_dsp_cascade.pmg @@ -119,21 +119,42 @@ finally add_siguser(cascade, dsp_pcin); add_siguser(cascade, dsp); - dsp->setParam(ID(ACASCREG), AREG); + if (dsp->type.in(\DSP48E1)) + dsp->setParam(ID(ACASCREG), AREG); dsp_pcin->setParam(ID(A_INPUT), Const("CASCADE")); log_debug("ACOUT -> ACIN cascade for %s -> %s\n", log_id(dsp), log_id(dsp_pcin)); } if (BREG >= 0) { Wire *cascade = module->addWire(NEW_ID, 18); - dsp_pcin->setPort(ID(B), Const(0, 18)); - dsp_pcin->setPort(ID(BCIN), cascade); + if (dsp->type.in(\DSP48A, \DSP48A1)) { + // According to UG389 p9 [https://www.xilinx.com/support/documentation/user_guides/ug389.pdf] + // "The DSP48A1 component uses this input when cascading + // BCOUT from an adjacent DSP48A1 slice. The tools then + // translate BCOUT cascading to the dedicated BCIN input + // and set the B_INPUT attribute for implementation." + dsp_pcin->setPort(ID(B), cascade); + } + else { + dsp_pcin->setPort(ID(B), Const(0, 18)); + dsp_pcin->setPort(ID(BCIN), cascade); + } dsp->setPort(ID(BCOUT), cascade); add_siguser(cascade, dsp_pcin); add_siguser(cascade, dsp); - dsp->setParam(ID(BCASCREG), BREG); - dsp_pcin->setParam(ID(B_INPUT), Const("CASCADE")); + if (dsp->type.in(\DSP48E1)) { + dsp->setParam(ID(BCASCREG), BREG); + // According to UG389 p13 [https://www.xilinx.com/support/documentation/user_guides/ug389.pdf] + // "The attribute is only used by place and route tools and + // is not necessary for the users to set for synthesis. The + // attribute is determined by the connection to the B port + // of the DSP48A1 slice. If the B port is connected to the + // BCOUT of another DSP48A1 slice, then the tools automatically + // set the attribute to 'CASCADE', otherwise it is set to + // 'DIRECT'". + dsp_pcin->setParam(ID(B_INPUT), Const("CASCADE")); + } log_debug("BCOUT -> BCIN cascade for %s -> %s\n", log_id(dsp), log_id(dsp_pcin)); } @@ -202,36 +223,39 @@ code next endcode // (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists) -// if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this -// DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already -// have an ACOUT -> ACIN cascade, (d) the previous DSP does not already -// use its ACOUT port, then examine if an ACOUT -> ACIN cascade -// opportunity exists by matching for a $dff-with-optional-clock-enable- -// or-reset and checking that the 'D' input of this register is the same -// as the 'A' input of the previous DSP +// if (a) this DSP48 does not use A2REG nor A1REG, (b) this DSP48E1 does +// not already have an ACOUT -> ACIN cascade, (c) the previous DSP does +// not already use its ACOUT port, then examine if an ACOUT -> ACIN cascade +// opportunity exists if (i) A ports are identical, or (ii) separated by a +// $dff-with-optional-clock-enable-or-reset and checking that the 'D' input +// of this register is the same as the 'A' input of the previous DSP +// TODO: Check for two levels of flops, instead of just one code argQ clock AREG AREG = -1; - if (next) { + if (next && next->type.in(\DSP48E1)) { Cell *prev = std::get<0>(chain.back()); - if (param(prev, \AREG, 2).as_int() > 0 && - param(next, \AREG, 2).as_int() > 0 && + if (param(next, \AREG, 2).as_int() == 0 && param(next, \A_INPUT, Const("DIRECT")).decode_string() == "DIRECT" && nusers(port(prev, \ACOUT, SigSpec())) <= 1) { - argQ = unextend(port(next, \A)); - clock = port(prev, \CLK); - subpattern(in_dffe); - if (dff) { - if (!dffrstmux && port(prev, \RSTA, State::S0) != State::S0) - goto reject_AREG; - if (dffrstmux && port(dffrstmux, \S) != port(prev, \RSTA, State::S0)) - goto reject_AREG; - if (!dffcemux && port(prev, \CEA2, State::S0) != State::S0) - goto reject_AREG; - if (dffcemux && port(dffcemux, \S) != port(prev, \CEA2, State::S0)) - goto reject_AREG; - if (dffD == unextend(port(prev, \A))) - AREG = 1; -reject_AREG: ; + if (port(prev, \A) == port(next, \A)) + AREG = 0; + else { + argQ = unextend(port(next, \A)); + clock = port(prev, \CLK); + subpattern(in_dffe); + if (dff) { + if (!dffrstmux && port(prev, \RSTA, State::S0) != State::S0) + goto reject_AREG; + if (dffrstmux && port(dffrstmux, \S) != port(prev, \RSTA, State::S0)) + goto reject_AREG; + if (!dffcemux && port(prev, \CEA2, State::S0) != State::S0) + goto reject_AREG; + if (dffcemux && port(dffcemux, \S) != port(prev, \CEA2, State::S0)) + goto reject_AREG; + if (dffD == unextend(port(prev, \A))) + AREG = 1; +reject_AREG: ; + } } } } @@ -242,26 +266,29 @@ code argQ clock BREG BREG = -1; if (next) { Cell *prev = std::get<0>(chain.back()); - if (param(prev, \BREG, 2).as_int() > 0 && - param(next, \BREG, 2).as_int() > 0 && + if (((next->type.in(\DSP48A, \DSP48A1) && param(next, \B1REG, 1) == 0) || (next->type.in(\DSP48E1) && param(next, \BREG, 2).as_int() == 0)) && param(next, \B_INPUT, Const("DIRECT")).decode_string() == "DIRECT" && port(next, \BCIN, SigSpec()).is_fully_zero() && nusers(port(prev, \BCOUT, SigSpec())) <= 1) { - argQ = unextend(port(next, \B)); - clock = port(prev, \CLK); - subpattern(in_dffe); - if (dff) { - if (!dffrstmux && port(prev, \RSTB, State::S0) != State::S0) - goto reject_BREG; - if (dffrstmux && port(dffrstmux, \S) != port(prev, \RSTB, State::S0)) - goto reject_BREG; - if (!dffcemux && port(prev, \CEB2, State::S0) != State::S0) - goto reject_BREG; - if (dffcemux && port(dffcemux, \S) != port(prev, \CEB2, State::S0)) - goto reject_BREG; - if (dffD == unextend(port(prev, \B))) - BREG = 1; -reject_BREG: ; + if (port(prev, \B) == port(next, \B)) + BREG = 0; + else { + argQ = unextend(port(next, \B)); + clock = port(prev, \CLK); + subpattern(in_dffe); + if (dff) { + if (!dffrstmux && port(prev, \RSTB, State::S0) != State::S0) + goto reject_BREG; + if (dffrstmux && port(dffrstmux, \S) != port(prev, \RSTB, State::S0)) + goto reject_BREG; + if (!dffcemux && port(prev, \CEB2, State::S0) != State::S0) + goto reject_BREG; + if (dffcemux && port(dffcemux, \S) != port(prev, \CEB2, State::S0)) + goto reject_BREG; + if (dffD == unextend(port(prev, \B))) + BREG = 1; +reject_BREG: ; + } } } } From edabe73377e08ebdc1315d9a907f0a4ff8bfddd3 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Mon, 23 Dec 2019 13:41:26 -0800 Subject: [PATCH 4/8] Fix checking CE[AB] and for direct connections --- passes/pmgen/xilinx_dsp_cascade.pmg | 58 ++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/passes/pmgen/xilinx_dsp_cascade.pmg b/passes/pmgen/xilinx_dsp_cascade.pmg index 7a310764c..1116afd41 100644 --- a/passes/pmgen/xilinx_dsp_cascade.pmg +++ b/passes/pmgen/xilinx_dsp_cascade.pmg @@ -223,10 +223,10 @@ code next endcode // (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists) -// if (a) this DSP48 does not use A2REG nor A1REG, (b) this DSP48E1 does -// not already have an ACOUT -> ACIN cascade, (c) the previous DSP does -// not already use its ACOUT port, then examine if an ACOUT -> ACIN cascade -// opportunity exists if (i) A ports are identical, or (ii) separated by a +// if (a) this DSP48E1 does not already have an ACOUT -> ACIN cascade, +// (b) the previous DSP does not already use its ACOUT port, then +// examine if an ACOUT -> ACIN cascade opportunity exists if +// (i) A ports are identical, or (ii) separated by a // $dff-with-optional-clock-enable-or-reset and checking that the 'D' input // of this register is the same as the 'A' input of the previous DSP // TODO: Check for two levels of flops, instead of just one @@ -234,11 +234,14 @@ code argQ clock AREG AREG = -1; if (next && next->type.in(\DSP48E1)) { Cell *prev = std::get<0>(chain.back()); - if (param(next, \AREG, 2).as_int() == 0 && - param(next, \A_INPUT, Const("DIRECT")).decode_string() == "DIRECT" && + + if (param(next, \A_INPUT, Const("DIRECT")).decode_string() == "DIRECT" && + port(next, \ACIN, SigSpec()).is_fully_zero() && nusers(port(prev, \ACOUT, SigSpec())) <= 1) { - if (port(prev, \A) == port(next, \A)) - AREG = 0; + if (param(prev, \AREG, 2) == 0) { + if (port(prev, \A) == port(next, \A)) + AREG = 0; + } else { argQ = unextend(port(next, \A)); clock = port(prev, \CLK); @@ -248,16 +251,22 @@ code argQ clock AREG goto reject_AREG; if (dffrstmux && port(dffrstmux, \S) != port(prev, \RSTA, State::S0)) goto reject_AREG; - if (!dffcemux && port(prev, \CEA2, State::S0) != State::S0) + IdString CEA; + if (param(prev, \AREG, 2) == 1) + CEA = \CEA2; + else if (param(prev, \AREG, 2) == 2) + CEA = \CEA1; + else log_abort(); + if (!dffcemux && port(prev, CEA, State::S0) != State::S0) goto reject_AREG; - if (dffcemux && port(dffcemux, \S) != port(prev, \CEA2, State::S0)) + if (dffcemux && port(dffcemux, \S) != port(prev, CEA, State::S0)) goto reject_AREG; if (dffD == unextend(port(prev, \A))) AREG = 1; -reject_AREG: ; } } } +reject_AREG: ; } endcode @@ -266,12 +275,14 @@ code argQ clock BREG BREG = -1; if (next) { Cell *prev = std::get<0>(chain.back()); - if (((next->type.in(\DSP48A, \DSP48A1) && param(next, \B1REG, 1) == 0) || (next->type.in(\DSP48E1) && param(next, \BREG, 2).as_int() == 0)) && - param(next, \B_INPUT, Const("DIRECT")).decode_string() == "DIRECT" && + if (param(next, \B_INPUT, Const("DIRECT")).decode_string() == "DIRECT" && port(next, \BCIN, SigSpec()).is_fully_zero() && nusers(port(prev, \BCOUT, SigSpec())) <= 1) { - if (port(prev, \B) == port(next, \B)) - BREG = 0; + if ((next->type.in(\DSP48A, \DSP48A1) && param(prev, \B0REG, 0) == 0 && param(prev, \B1REG, 1) == 0) || + (next->type.in(\DSP48E1) && param(prev, \BREG, 2) == 0)) { + if (port(prev, \B) == port(next, \B)) + BREG = 0; + } else { argQ = unextend(port(next, \B)); clock = port(prev, \CLK); @@ -281,16 +292,27 @@ code argQ clock BREG goto reject_BREG; if (dffrstmux && port(dffrstmux, \S) != port(prev, \RSTB, State::S0)) goto reject_BREG; - if (!dffcemux && port(prev, \CEB2, State::S0) != State::S0) + IdString CEB; + if (next->type.in(\DSP48A, \DSP48A1)) + CEB = \CEB; + else if (next->type.in(\DSP48E1)) { + if (param(prev, \BREG, 2) == 1) + CEB = \CEB2; + else if (param(prev, \BREG, 2) == 2) + CEB = \CEB1; + else log_abort(); + } + else log_abort(); + if (!dffcemux && port(prev, CEB, State::S0) != State::S0) goto reject_BREG; - if (dffcemux && port(dffcemux, \S) != port(prev, \CEB2, State::S0)) + if (dffcemux && port(dffcemux, \S) != port(prev, CEB, State::S0)) goto reject_BREG; if (dffD == unextend(port(prev, \B))) BREG = 1; -reject_BREG: ; } } } +reject_BREG: ; } endcode From 75acaff6f5416137fdf515bda5c214ccc228df98 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Mon, 23 Dec 2019 14:22:13 -0800 Subject: [PATCH 5/8] Fix CEA/CEB check --- passes/pmgen/xilinx_dsp_cascade.pmg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/passes/pmgen/xilinx_dsp_cascade.pmg b/passes/pmgen/xilinx_dsp_cascade.pmg index 1116afd41..9fdefff31 100644 --- a/passes/pmgen/xilinx_dsp_cascade.pmg +++ b/passes/pmgen/xilinx_dsp_cascade.pmg @@ -257,7 +257,7 @@ code argQ clock AREG else if (param(prev, \AREG, 2) == 2) CEA = \CEA1; else log_abort(); - if (!dffcemux && port(prev, CEA, State::S0) != State::S0) + if (!dffcemux && port(prev, CEA, State::S0) != State::S1) goto reject_AREG; if (dffcemux && port(dffcemux, \S) != port(prev, CEA, State::S0)) goto reject_AREG; @@ -303,7 +303,7 @@ code argQ clock BREG else log_abort(); } else log_abort(); - if (!dffcemux && port(prev, CEB, State::S0) != State::S0) + if (!dffcemux && port(prev, CEB, State::S0) != State::S1) goto reject_BREG; if (dffcemux && port(dffcemux, \S) != port(prev, CEB, State::S0)) goto reject_BREG; From 1d0ac659ad37af7fa3d32a95bf04c4ce0e009792 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Mon, 23 Dec 2019 14:40:59 -0800 Subject: [PATCH 6/8] Fix OPMODE for PCIN->PCOUT cascades in xc6s, check B[01]REG too --- passes/pmgen/xilinx_dsp_cascade.pmg | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/passes/pmgen/xilinx_dsp_cascade.pmg b/passes/pmgen/xilinx_dsp_cascade.pmg index 9fdefff31..b4c2b348f 100644 --- a/passes/pmgen/xilinx_dsp_cascade.pmg +++ b/passes/pmgen/xilinx_dsp_cascade.pmg @@ -99,14 +99,21 @@ finally add_siguser(cascade, dsp); SigSpec opmode = port(dsp_pcin, \OPMODE, Const(0, 7)); - if (P == 17) - opmode[6] = State::S1; - else if (P == 0) - opmode[6] = State::S0; - else log_abort(); + if (dsp->type.in(\DSP48A, \DSP48A1)) { + log_assert(P == 0); + opmode[3] = State::S0; + opmode[2] = State::S1; + } + else if (dsp->type.in(\DSP48E1)) { + if (P == 17) + opmode[6] = State::S1; + else if (P == 0) + opmode[6] = State::S0; + else log_abort(); - opmode[5] = State::S0; - opmode[4] = State::S1; + opmode[5] = State::S0; + opmode[4] = State::S1; + } dsp_pcin->setPort(\OPMODE, opmode); log_debug("PCOUT -> PCIN cascade for %s -> %s\n", log_id(dsp), log_id(dsp_pcin)); @@ -307,8 +314,11 @@ code argQ clock BREG goto reject_BREG; if (dffcemux && port(dffcemux, \S) != port(prev, CEB, State::S0)) goto reject_BREG; - if (dffD == unextend(port(prev, \B))) + if (dffD == unextend(port(prev, \B))) { + if (next->type.in(\DSP48A, \DSP48A1) && param(prev, \B0REG, 0) != 0) + goto reject_BREG; BREG = 1; + } } } } From 2e21aa59a296c666f8e8fa0033efce4504ebd9ba Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Mon, 23 Dec 2019 14:58:06 -0800 Subject: [PATCH 7/8] Add DSP cascade tests --- tests/arch/xilinx/dsp_cascade.ys | 89 ++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 tests/arch/xilinx/dsp_cascade.ys diff --git a/tests/arch/xilinx/dsp_cascade.ys b/tests/arch/xilinx/dsp_cascade.ys new file mode 100644 index 000000000..f9185551b --- /dev/null +++ b/tests/arch/xilinx/dsp_cascade.ys @@ -0,0 +1,89 @@ +design -reset +read_verilog < DSP48E1.PCIN +# (i.e. Take all DSP48E1s, expand to find all wires connected +# to its PCOUT port, then remove all DSP48E1s from this +# selection, then expand again to find all cells where +# those wires are connected to the PCIN port, then remove +# all wires from this selection, and lastly intersect +# this selection with all DSP48E1 cells (to check that +# the connected cells are indeed DSPs) +select -assert-count 2 t:DSP48E1 %co:+[PCOUT] t:DSP48E1 %d %co:+[PCIN] w:* %d t:DSP48E1 %i + +design -load read +equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx -family xc6s +design -load postopt +cd cascade +select -assert-count 3 t:DSP48A1 +select -assert-count 5 t:FDRE # No cascade for A input +select -assert-none t:DSP48A1 t:BUFG t:FDRE %% t:* %D +# Very crude method of checking that DSP48E1.PCOUT -> DSP48E1.PCIN +# (see above for explanation) +select -assert-count 2 t:DSP48A1 %co:+[PCOUT] t:DSP48A1 %d %co:+[PCIN] w:* %d t:DSP48A1 %i + +design -reset +read_verilog < DSP48E1.PCIN +# (see above for explanation) +select -assert-count 1 t:DSP48E1 %co:+[PCOUT] t:DSP48E1 %d %co:+[PCIN] w:* %d t:DSP48E1 %i + +design -load read +equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx -family xc6s +design -load postopt +cd cascade +select -assert-count 2 t:DSP48A1 +select -assert-count 10 t:FDRE # Cannot cascade because first 'm' DSP + # uses both B0REG and B1REG, whereas 'o' + # only requires 1 +select -assert-none t:DSP48A1 t:BUFG t:FDRE %% t:* %D +# Very crude method of checking that DSP48E1.PCOUT -> DSP48E1.PCIN +# (see above for explanation) +select -assert-count 1 t:DSP48A1 %co:+[PCOUT] t:DSP48A1 %d %co:+[PCIN] w:* %d t:DSP48A1 %i + From e226a8f7f1e2fa55102890462fc2a0097a04092b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Ko=C5=9Bcielnicki?= Date: Wed, 25 Dec 2019 15:39:40 +0100 Subject: [PATCH 8/8] Minor nit fixes --- passes/pmgen/xilinx_dsp_cascade.pmg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/passes/pmgen/xilinx_dsp_cascade.pmg b/passes/pmgen/xilinx_dsp_cascade.pmg index b4c2b348f..b14a1ee0a 100644 --- a/passes/pmgen/xilinx_dsp_cascade.pmg +++ b/passes/pmgen/xilinx_dsp_cascade.pmg @@ -66,7 +66,7 @@ endcode // (controlled by OPMODE[3:2] for DSP48A*, by OPMODE[6:4] for DSP48E1) // set to zero and (b) doesn't already use the 'PCOUT' port match first - select (first->type.in(\DSP48A, \DSP48A1) && port(first, \OPMODE, Const(0, 7)).extract(2,2) == Const::from_string("00")) || (first->type.in(\DSP48E1) && port(first, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("000")) + select (first->type.in(\DSP48A, \DSP48A1) && port(first, \OPMODE, Const(0, 8)).extract(2,2) == Const::from_string("00")) || (first->type.in(\DSP48E1) && port(first, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("000")) select nusers(port(first, \PCOUT, SigSpec())) <= 1 endmatch @@ -189,7 +189,7 @@ arg next // 'PCIN' port unused match nextP select !param(nextP, \CREG, State::S1).as_bool() - select (nextP->type.in(\DSP48A, \DSP48A1) && port(nextP, \OPMODE, Const(0, 7)).extract(2,2) == Const::from_string("11")) || (nextP->type.in(\DSP48E1) && port(nextP, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("011")) + select (nextP->type.in(\DSP48A, \DSP48A1) && port(nextP, \OPMODE, Const(0, 8)).extract(2,2) == Const::from_string("11")) || (nextP->type.in(\DSP48E1) && port(nextP, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("011")) select nusers(port(nextP, \C, SigSpec())) > 1 select nusers(port(nextP, \PCIN, SigSpec())) == 0 index port(nextP, \C)[0] === port(std::get<0>(chain.back()), \P)[0]