quicklogic: ql_dsp_simd add dspv2 support, fix dspv1

This commit is contained in:
Emil J. Tywoniak 2025-03-05 21:39:18 +01:00
parent 434334ba63
commit a8c10eea03
2 changed files with 251 additions and 72 deletions

View File

@ -29,7 +29,7 @@ PRIVATE_NAMESPACE_BEGIN
struct QlDspSimdPass : public Pass {
QlDspSimdPass() : Pass("ql_dsp_simd", "merge QuickLogic K6N10f DSP pairs to operate in SIMD mode") {}
QlDspSimdPass() : Pass("ql_dsp_simd", "merge QuickLogic K6N10f DSP pairs to operate in fractured mode") {}
void help() override
{
@ -37,16 +37,17 @@ struct QlDspSimdPass : public Pass {
log("\n");
log(" ql_dsp_simd [selection]\n");
log("\n");
log("This pass identifies K6N10f DSP cells with identical configuration and pack pairs\n");
log("of them together into other DSP cells that can perform SIMD operation.\n");
log("This pass identifies K6N10f DSP cells with identical configuration and merges\n");
log("pairs of them, enabling fractured mode.\n");
}
// ..........................................
/// Describes DSP config unique to a whole DSP cell
/// Describes DSP config unique to a DSP cell
struct DspConfig {
// Port connections
dict<RTLIL::IdString, RTLIL::SigSpec> connections;
dict<RTLIL::IdString, RTLIL::Const> parameters;
DspConfig() = default;
@ -55,60 +56,149 @@ struct QlDspSimdPass : public Pass {
[[nodiscard]] Hasher hash_into(Hasher h) const { h.eat(connections); return h; }
bool operator==(const DspConfig &ref) const { return connections == ref.connections; }
bool operator==(const DspConfig &ref) const { return connections == ref.connections && parameters == ref.parameters; }
};
// ..........................................
const int m_ModeBitsSize = 80;
// DSP parameters
const std::vector<std::string> m_DspParams = {"COEFF_3", "COEFF_2", "COEFF_1", "COEFF_0"};
const int m_Dspv1ModeBitsSize = 80;
const int m_Dspv2ModeBitsSize = 68;
/// Temporary SigBit to SigBit helper map.
SigMap sigmap;
static bool is_cascade(const Cell* cell)
{
std::array cascade_ports {
ID(a_cout_o),
ID(b_cout_o),
ID(z_cout_o),
ID(a_cin_i),
ID(b_cin_i),
ID(z_cin_i)
};
for (auto p : cascade_ports) {
if (cell->hasPort(p) && !cell->getPort(p).is_fully_undef())
return true;
}
return false;
}
// ..........................................
void execute(std::vector<std::string> a_Args, RTLIL::Design *a_Design) override
{
log_header(a_Design, "Executing QL_DSP_SIMD pass.\n");
// DSP control and config ports to consider and how to map them to ports
// of the target DSP cell
static const std::vector<std::pair<IdString, IdString>> m_DspCfgPorts = {
std::make_pair(ID(clock_i), ID(clk)),
std::make_pair(ID(reset_i), ID(reset)),
std::make_pair(ID(feedback_i), ID(feedback)),
std::make_pair(ID(load_acc_i), ID(load_acc)),
std::make_pair(ID(unsigned_a_i), ID(unsigned_a)),
std::make_pair(ID(unsigned_b_i), ID(unsigned_b)),
std::make_pair(ID(subtract_i), ID(subtract)),
std::make_pair(ID(output_select_i), ID(output_select)),
std::make_pair(ID(saturate_enable_i), ID(saturate_enable)),
std::make_pair(ID(shift_right_i), ID(shift_right)),
std::make_pair(ID(round_i), ID(round)),
std::make_pair(ID(register_inputs_i), ID(register_inputs))
// The following lists have to match simulation model interfaces.
// DSP control and config ports that must be equal between
// merged half-blocks
// In addition to functional differences,
// v1 and v2 have different balance between shared functionality
// in ports vs params.
static const std::vector<IdString> m_Dspv1CfgPorts = {
ID(acc_fir_i),
ID(feedback_i),
ID(load_acc_i),
ID(unsigned_a_i),
ID(unsigned_b_i),
ID(clock_i),
ID(s_reset),
ID(saturate_enable_i),
ID(output_select_i),
ID(round_i),
ID(shift_right_i),
ID(subtract_i),
ID(register_inputs_i),
ID(coeff_0_i),
ID(coeff_1_i),
ID(coeff_2_i),
ID(coeff_3_i),
};
static const std::vector<IdString> m_Dspv1CfgParams = {};
static const std::vector<IdString> m_Dspv2CfgPorts = {
ID(clock_i),
ID(reset_i),
ID(acc_reset_i),
ID(feedback_i),
ID(load_acc_i),
ID(output_select_i),
};
static const std::vector<IdString> m_Dspv2CfgParams = {
ID(COEFF_0),
ID(ACC_FIR),
ID(ROUND),
ID(ZC_SHIFT),
ID(ZREG_SHIFT),
ID(SHIFT_REG),
ID(SATURATE),
ID(SUBTRACT),
ID(PRE_ADD),
ID(A_SEL),
ID(A_REG),
ID(B_SEL),
ID(B_REG),
ID(C_REG),
ID(BC_REG),
ID(M_REG),
ID(FRAC_MODE),
};
// DSP data ports and how to map them to ports of the target DSP cell
static const std::vector<std::pair<IdString, IdString>> m_DspDataPorts = {
std::make_pair(ID(a_i), ID(a)),
std::make_pair(ID(b_i), ID(b)),
std::make_pair(ID(acc_fir_i), ID(acc_fir)),
std::make_pair(ID(z_o), ID(z)),
std::make_pair(ID(dly_b_o), ID(dly_b))
// Data ports to be concatenated into merged cell
static const std::vector<IdString> m_Dspv1DataPorts = {
ID(a_i),
ID(b_i),
ID(z_o),
ID(dly_b_o),
};
static const std::vector<IdString> m_Dspv2DataPorts = {
ID(a_i),
ID(b_i),
ID(c_i),
ID(z_o),
};
// Params to serialize into MODE_BITS param
static const std::vector<IdString> m_Dspv1ModeBitParams = {
ID(COEFF_3),
ID(COEFF_2),
ID(COEFF_1),
ID(COEFF_0),
};
static const std::vector<IdString> m_Dspv2ModeBitParams = {};
// Source DSP cell type (SISD)
static const IdString m_SisdDspType = ID(dsp_t1_10x9x32);
// Source DSP cell type (half-block)
static const IdString m_Dspv1SisdType = ID(dsp_t1_10x9x32_cfg_ports);
static const IdString m_Dspv2SisdType = ID(dspv2_16x9x32_cfg_ports);
// Target DSP cell types for the SIMD mode
static const IdString m_SimdDspType = ID(QL_DSP2);
// Target DSP cell types (full-block)
static const IdString m_Dspv1SimdType = ID(dsp_t1_20x18x64_cfg_ports);
static const IdString m_Dspv2SimdType = ID(dspv2_32x18x64_cfg_ports);
// Parse args
extra_args(a_Args, 1, a_Design);
int dsp_version = 1;
size_t argidx;
for (argidx = 1; argidx < a_Args.size(); argidx++) {
if (a_Args[argidx] == "-dspv2") {
dsp_version = 2;
continue;
}
break;
}
extra_args(a_Args, argidx, a_Design);
log_assert(dsp_version < 3);
log_assert(dsp_version > 0);
const auto& cfg_ports = (dsp_version == 1) ? m_Dspv1CfgPorts : m_Dspv2CfgPorts;
const auto& cfg_params = (dsp_version == 1) ? m_Dspv1CfgParams : m_Dspv2CfgParams;
const auto& data_ports = (dsp_version == 1) ? m_Dspv1DataPorts : m_Dspv2DataPorts;
auto half_dsp = (dsp_version == 1) ? m_Dspv1SisdType : m_Dspv2SisdType;
auto full_dsp = (dsp_version == 1) ? m_Dspv1SimdType : m_Dspv2SimdType;
auto mode_bit_params = (dsp_version == 1) ? m_Dspv1ModeBitParams : m_Dspv2ModeBitParams;
auto mode_bits_size = (dsp_version == 1) ? m_Dspv1ModeBitsSize : m_Dspv2ModeBitsSize;
int cellsMerged = 0;
// Process modules
for (auto module : a_Design->selected_modules()) {
// Setup the SigMap
@ -118,25 +208,33 @@ struct QlDspSimdPass : public Pass {
dict<DspConfig, std::vector<RTLIL::Cell *>> groups;
for (auto cell : module->selected_cells()) {
// Check if this is a DSP cell we are looking for (type starts with m_SisdDspType)
if (cell->type != m_SisdDspType)
if (cell->type != half_dsp)
continue;
// Skip if it has the (* keep *) attribute set
if (cell->has_keep_attr())
if (cell->has_keep_attr()) {
log_debug("skip %s because it's marked keep\n", log_id(cell));
continue;
}
// Skip if it has cascading
if (is_cascade(cell)) {
log_debug("skip %s because it's cascading\n", log_id(cell));
continue;
}
// Add to a group
const auto key = getDspConfig(cell, m_DspCfgPorts);
const auto key = getDspConfig(cell, cfg_ports, cfg_params);
groups[key].push_back(cell);
}
log_debug("Checking %zu detected mode-equivalent DSP cell classes\n", groups.size());
std::vector<Cell *> cellsToRemove;
// Map cell pairs to the target DSP SIMD cell
for (const auto &it : groups) {
const auto &group = it.second;
const auto &config = it.first;
log_debug("Checking %zu half-blocks\n", group.size());
// Ensure an even number
size_t count = group.size();
if (count & 1)
@ -148,7 +246,7 @@ struct QlDspSimdPass : public Pass {
Cell *dsp_b = group[i + 1];
// Create the new cell
Cell *simd = module->addCell(NEW_ID, m_SimdDspType);
Cell *simd = module->addCell(NEW_ID, full_dsp);
log(" SIMD: %s (%s) + %s (%s) => %s (%s)\n", log_id(dsp_a), log_id(dsp_a->type),
log_id(dsp_b), log_id(dsp_b->type), log_id(simd), log_id(simd->type));
@ -157,26 +255,35 @@ struct QlDspSimdPass : public Pass {
// its port widths)
if (!simd->known())
log_error(" The target cell type '%s' is not known!", log_id(simd));
// Connect common ports
for (const auto &it : m_DspCfgPorts)
simd->setPort(it.first, config.connections.at(it.second));
for (auto port : cfg_ports) {
if (config.connections.count(port))
simd->setPort(port, config.connections.at(port));
}
for (auto param : cfg_params) {
if (config.parameters.count(param))
simd->setParam(param, config.parameters.at(param));
}
// Connect data ports
for (const auto &it : m_DspDataPorts) {
for (auto port : data_ports) {
size_t width;
bool isOutput;
std::tie(width, isOutput) = getPortInfo(simd, it.second);
std::tie(width, isOutput) = getPortInfo(simd, port);
if (!width)
log_error("Can't determine portinfo for %s\n", log_id(port));
auto getConnection = [&](const RTLIL::Cell *cell) {
RTLIL::SigSpec sigspec;
if (cell->hasPort(it.first)) {
const auto &sig = cell->getPort(it.first);
if (cell->hasPort(port)) {
const auto &sig = cell->getPort(port);
sigspec.append(sig);
}
int padding = width / 2 - sigspec.size();
log_assert(padding >= 0);
if (padding) {
if (!isOutput)
@ -190,27 +297,32 @@ struct QlDspSimdPass : public Pass {
RTLIL::SigSpec sigspec;
sigspec.append(getConnection(dsp_a));
sigspec.append(getConnection(dsp_b));
simd->setPort(it.second, sigspec);
simd->setPort(port, sigspec);
}
// Concatenate FIR coefficient parameters into the single
// MODE_BITS parameter
Const mode_bits;
for (const auto &it : m_DspParams) {
auto val_a = dsp_a->getParam(it);
auto val_b = dsp_b->getParam(it);
mode_bits.bits().insert(mode_bits.bits().end(),
val_a.begin(), val_a.end());
mode_bits.bits().insert(mode_bits.bits().end(),
val_b.begin(), val_b.end());
if (mode_bit_params.size()) {
// Concatenate FIR coefficient parameters into the single
// MODE_BITS parameter
Const mode_bits;
for (const auto &it : mode_bit_params) {
auto val_a = dsp_a->getParam(it);
auto val_b = dsp_b->getParam(it);
mode_bits.bits().insert(mode_bits.bits().end(),
val_a.begin(), val_a.end());
mode_bits.bits().insert(mode_bits.bits().end(),
val_b.begin(), val_b.end());
}
simd->setParam(ID(MODE_BITS), mode_bits);
log_assert(mode_bits.size() == mode_bits_size);
}
// Enable the fractured mode by connecting the control
// port.
simd->setPort(ID(f_mode), State::S1);
simd->setParam(ID(MODE_BITS), mode_bits);
log_assert(mode_bits.size() == m_ModeBitsSize);
// Enable the fractured mode
if (dsp_version == 1)
simd->setPort(ID(f_mode), State::S1);
else
simd->setParam(ID(FRAC_MODE), State::S1);
// Handle the "is_inferred" attribute. If one of the fragments
// is not inferred mark the whole DSP as not inferred
@ -223,11 +335,12 @@ struct QlDspSimdPass : public Pass {
cellsToRemove.push_back(dsp_b);
}
}
cellsMerged += cellsToRemove.size();
// Remove old cells
for (auto cell : cellsToRemove)
module->remove(cell);
}
log("Merged %d half-block cells\n", cellsMerged);
}
// ..........................................
@ -257,19 +370,24 @@ struct QlDspSimdPass : public Pass {
}
/// Given a DSP cell populates and returns a DspConfig struct for it.
DspConfig getDspConfig(RTLIL::Cell *a_Cell, const std::vector<std::pair<IdString, IdString>> &dspCfgPorts)
DspConfig getDspConfig(RTLIL::Cell *a_Cell, const std::vector<IdString> &dspCfgPorts, const std::vector<IdString> &dspCfgParams)
{
DspConfig config;
for (const auto &it : dspCfgPorts) {
auto port = it.first;
for (auto port : dspCfgPorts) {
// Port unconnected
if (!a_Cell->hasPort(port))
continue;
config.connections[port] = sigmap(a_Cell->getPort(port));
}
for (auto param : dspCfgParams) {
// Param unset?
if (!a_Cell->hasParam(param))
continue;
config.parameters[param] = a_Cell->getParam(param);
}
return config;
}

View File

@ -0,0 +1,61 @@
read_verilog <<EOT
module foo(
input [7:0] A,
input [7:0] B,
input [7:0] C,
input [7:0] D,
output reg [7:0] X,
output reg [7:0] Y);
assign X = A * B;
assign Y = C * D;
endmodule
EOT
synth_quicklogic -dspv2 -run :map_dsp
# this is just taken from map_dsp step
wreduce t:$mul
ql_dsp_macc -dspv2
techmap -map +/mul2dsp.v -map +/quicklogic/qlf_k6n10f/dspv2_map.v -D USE_DSP_CFG_PARAMS=0 -D DSP_SIGNEDONLY -D DSP_A_MAXWIDTH=32 -D DSP_B_MAXWIDTH=18 -D DSP_A_MINWIDTH=10 -D DSP_B_MINWIDTH=10 -D DSP_NAME=$__MUL32X18
chtype -set $mul t:$__soft_mul
techmap -map +/mul2dsp.v -map +/quicklogic/qlf_k6n10f/dspv2_map.v -D USE_DSP_CFG_PARAMS=0 -D DSP_SIGNEDONLY -D DSP_A_MAXWIDTH=16 -D DSP_B_MAXWIDTH=9 -D DSP_A_MINWIDTH=4 -D DSP_B_MINWIDTH=4 -D DSP_NAME=$__MUL16X9
# TODO is this missing from synth_quicklogic?
read_verilog -lib +/quicklogic/qlf_k6n10f/dspv2_sim.v
select -assert-count 2 t:dspv2_16x9x32_cfg_ports
select -assert-count 0 t:dspv2_32x18x64_cfg_ports
equiv_opt -assert -async2sync -map +/quicklogic/qlf_k6n10f/dspv2_sim.v ql_dsp_simd -dspv2
design -load postopt
select -assert-count 0 t:dspv2_16x9x32_cfg_ports
select -assert-count 1 t:dspv2_32x18x64_cfg_ports
design -reset
read_verilog <<EOT
module foo(
input [7:0] A,
input [7:0] B,
input [7:0] C,
input [7:0] D,
input [7:0] E,
output reg [7:0] X,
output reg [7:0] Y);
assign X = A * B;
assign Y = C * (D + E); // <-- look here
endmodule
EOT
synth_quicklogic -dspv2 -run :map_dsp
wreduce t:$mul
ql_dsp_macc -dspv2
techmap -map +/mul2dsp.v -map +/quicklogic/qlf_k6n10f/dspv2_map.v -D USE_DSP_CFG_PARAMS=0 -D DSP_SIGNEDONLY -D DSP_A_MAXWIDTH=32 -D DSP_B_MAXWIDTH=18 -D DSP_A_MINWIDTH=10 -D DSP_B_MINWIDTH=10 -D DSP_NAME=$__MUL32X18
chtype -set $mul t:$__soft_mul
techmap -map +/mul2dsp.v -map +/quicklogic/qlf_k6n10f/dspv2_map.v -D USE_DSP_CFG_PARAMS=0 -D DSP_SIGNEDONLY -D DSP_A_MAXWIDTH=16 -D DSP_B_MAXWIDTH=9 -D DSP_A_MINWIDTH=4 -D DSP_B_MINWIDTH=4 -D DSP_NAME=$__MUL16X9
# TODO is this missing from synth_quicklogic?
read_verilog -lib +/quicklogic/qlf_k6n10f/dspv2_sim.v
select -assert-count 2 t:dspv2_16x9x32_cfg_ports
select -assert-count 0 t:dspv2_32x18x64_cfg_ports
equiv_opt -assert -async2sync -map +/quicklogic/qlf_k6n10f/dspv2_sim.v ql_dsp_simd -dspv2
design -load postopt
select -assert-count 0 t:dspv2_16x9x32_cfg_ports
select -assert-count 1 t:dspv2_32x18x64_cfg_ports