diff --git a/passes/opt/opt_lut.cpp b/passes/opt/opt_lut.cpp index 0eb6b13e5..befe346a3 100644 --- a/passes/opt/opt_lut.cpp +++ b/passes/opt/opt_lut.cpp @@ -26,12 +26,15 @@ PRIVATE_NAMESPACE_BEGIN struct OptLutWorker { + dict> &dlogic; RTLIL::Module *module; ModIndex index; SigMap sigmap; pool luts; dict luts_arity; + dict> luts_dlogics; + dict> luts_dlogic_inputs; int combined_count = 0; @@ -61,23 +64,37 @@ struct OptLutWorker void show_stats_by_arity() { dict arity_counts; + dict dlogic_counts; int max_arity = 0; + for (auto lut_arity : luts_arity) { max_arity = max(max_arity, lut_arity.second); arity_counts[lut_arity.second]++; } - log("Number of LUTs: %6zu\n", luts.size()); + for (auto &lut_dlogics : luts_dlogics) + { + for (auto &lut_dlogic : lut_dlogics.second) + { + dlogic_counts[lut_dlogic->type]++; + } + } + + log("Number of LUTs: %8zu\n", luts.size()); for (int arity = 1; arity <= max_arity; arity++) { if (arity_counts[arity]) - log(" %d-LUT: %13d\n", arity, arity_counts[arity]); + log(" %d-LUT %16d\n", arity, arity_counts[arity]); + } + for (auto &dlogic_count : dlogic_counts) + { + log(" with %-12s %4d\n", dlogic_count.first.c_str(), dlogic_count.second); } } - OptLutWorker(RTLIL::Module *module) : - module(module), index(module), sigmap(module) + OptLutWorker(dict> &dlogic, RTLIL::Module *module) : + dlogic(dlogic), module(module), index(module), sigmap(module) { log("Discovering LUTs.\n"); for (auto cell : module->selected_cells()) @@ -88,15 +105,84 @@ struct OptLutWorker SigSpec lut_input = cell->getPort("\\A"); int lut_arity = 0; - for (auto &bit : lut_input) + log("Found $lut\\WIDTH=%d cell %s.%s.\n", lut_width, log_id(module), log_id(cell)); + luts.insert(cell); + + // First, find all dedicated logic we're connected to. This results in an overapproximation + // of such connections. + pool lut_all_dlogics; + for (int i = 0; i < lut_width; i++) { - if (bit.wire) + SigBit bit = lut_input[i]; + for (auto &port : index.query_ports(bit)) + { + if (dlogic.count(port.cell->type)) + { + auto &dlogic_map = dlogic[port.cell->type]; + if (dlogic_map.count(i)) + { + if (port.port == dlogic_map[i]) + { + lut_all_dlogics.insert(port.cell); + } + } + } + } + } + + // Second, make sure that the connection to dedicated logic is legal. If it is not legal, + // it means one of the two things: + // * The connection is spurious. I.e. this is dedicated logic that will be packed + // with some other LUT, and it just happens to be conected to this LUT as well. + // * The connection is illegal. + // In either of these cases, we don't need to concern ourselves with preserving the connection + // between this LUT and this dedicated logic cell. + pool lut_legal_dlogics; + pool lut_dlogic_inputs; + for (auto lut_dlogic : lut_all_dlogics) + { + auto &dlogic_map = dlogic[lut_dlogic->type]; + bool legal = true; + for (auto &dlogic_conn : dlogic_map) + { + if (lut_width <= dlogic_conn.first) + { + log(" LUT has illegal connection to %s cell %s.%s.\n", lut_dlogic->type.c_str(), log_id(module), log_id(lut_dlogic)); + log(" LUT input A[%d] not present.\n", dlogic_conn.first); + legal = false; + break; + } + if (sigmap(lut_input[dlogic_conn.first]) != sigmap(lut_dlogic->getPort(dlogic_conn.second))) + { + log(" LUT has illegal connection to %s cell %s.%s.\n", lut_dlogic->type.c_str(), log_id(module), log_id(lut_dlogic)); + log(" LUT input A[%d] (wire %s) not connected to %s port %s (wire %s).\n", dlogic_conn.first, log_signal(lut_input[dlogic_conn.first]), lut_dlogic->type.c_str(), dlogic_conn.second.c_str(), log_signal(lut_dlogic->getPort(dlogic_conn.second))); + legal = false; + break; + } + } + + if (legal) + { + log(" LUT has legal connection to %s cell %s.%s.\n", lut_dlogic->type.c_str(), log_id(module), log_id(lut_dlogic)); + lut_legal_dlogics.insert(lut_dlogic); + for (auto &dlogic_conn : dlogic_map) + lut_dlogic_inputs.insert(dlogic_conn.first); + } + } + + // Third, determine LUT arity. An n-wide LUT that has k constant inputs and m inputs shared with dedicated + // logic implements an (n-k-m)-ary function. + for (int i = 0; i < lut_width; i++) + { + SigBit bit = lut_input[i]; + if (bit.wire || lut_dlogic_inputs.count(i)) lut_arity++; } - log("Found $lut cell %s.%s with WIDTH=%d implementing %d-LUT.\n", log_id(module), log_id(cell), lut_width, lut_arity); - luts.insert(cell); + log(" Cell implements a %d-LUT.\n", lut_arity); luts_arity[cell] = lut_arity; + luts_dlogics[cell] = lut_legal_dlogics; + luts_dlogic_inputs[cell] = lut_dlogic_inputs; } } show_stats_by_arity(); @@ -111,12 +197,13 @@ struct OptLutWorker SigSpec lutA_output = sigmap(lutA->getPort("\\Y")[0]); int lutA_width = lutA->getParam("\\WIDTH").as_int(); int lutA_arity = luts_arity[lutA]; + pool &lutA_dlogic_inputs = luts_dlogic_inputs[lutA]; auto lutA_output_ports = index.query_ports(lutA->getPort("\\Y")); if (lutA_output_ports.size() != 2) continue; - for (auto port : lutA_output_ports) + for (auto &port : lutA_output_ports) { if (port.cell == lutA) continue; @@ -128,6 +215,7 @@ struct OptLutWorker SigSpec lutB_output = sigmap(lutB->getPort("\\Y")[0]); int lutB_width = lutB->getParam("\\WIDTH").as_int(); int lutB_arity = luts_arity[lutB]; + pool &lutB_dlogic_inputs = luts_dlogic_inputs[lutB]; log("Found %s.%s (cell A) feeding %s.%s (cell B).\n", log_id(module), log_id(lutA), log_id(module), log_id(lutB)); @@ -140,7 +228,7 @@ struct OptLutWorker } for (auto &bit : lutB_input) { - if(bit.wire) + if (bit.wire) lutB_inputs.insert(sigmap(bit)); } @@ -152,7 +240,15 @@ struct OptLutWorker } int lutM_arity = lutA_arity + lutB_arity - 1 - common_inputs.size(); - log(" Cell A is a %d-LUT. Cell B is a %d-LUT. Cells share %zu input(s) and can be merged into one %d-LUT.\n", lutA_arity, lutB_arity, common_inputs.size(), lutM_arity); + if (lutA_dlogic_inputs.size()) + log(" Cell A is a %d-LUT with %zu dedicated connections. ", lutA_arity, lutA_dlogic_inputs.size()); + else + log(" Cell A is a %d-LUT. ", lutA_arity); + if (lutB_dlogic_inputs.size()) + log("Cell B is a %d-LUT with %zu dedicated connections.\n", lutB_arity, lutB_dlogic_inputs.size()); + else + log("Cell B is a %d-LUT.\n", lutB_arity); + log(" Cells share %zu input(s) and can be merged into one %d-LUT.\n", common_inputs.size(), lutM_arity); const int COMBINE_A = 1, COMBINE_B = 2, COMBINE_EITHER = COMBINE_A | COMBINE_B; int combine_mask = 0; @@ -160,6 +256,10 @@ struct OptLutWorker { log(" Not combining LUTs into cell A (combined LUT wider than cell A).\n"); } + else if (lutB_dlogic_inputs.size() > 0) + { + log(" Not combining LUTs into cell A (cell B is connected to dedicated logic).\n"); + } else if (lutB->get_bool_attribute("\\lut_keep")) { log(" Not combining LUTs into cell A (cell B has attribute \\lut_keep).\n"); @@ -172,6 +272,10 @@ struct OptLutWorker { log(" Not combining LUTs into cell B (combined LUT wider than cell B).\n"); } + else if (lutA_dlogic_inputs.size() > 0) + { + log(" Not combining LUTs into cell B (cell A is connected to dedicated logic).\n"); + } else if (lutA->get_bool_attribute("\\lut_keep")) { log(" Not combining LUTs into cell B (cell A has attribute \\lut_keep).\n"); @@ -204,11 +308,13 @@ struct OptLutWorker RTLIL::Cell *lutM, *lutR; pool lutM_inputs, lutR_inputs; + pool lutM_dlogic_inputs; if (combine == COMBINE_A) { log(" Combining LUTs into cell A.\n"); lutM = lutA; lutM_inputs = lutA_inputs; + lutM_dlogic_inputs = lutA_dlogic_inputs; lutR = lutB; lutR_inputs = lutB_inputs; } @@ -217,6 +323,7 @@ struct OptLutWorker log(" Combining LUTs into cell B.\n"); lutM = lutB; lutM_inputs = lutB_inputs; + lutM_dlogic_inputs = lutB_dlogic_inputs; lutR = lutA; lutR_inputs = lutA_inputs; } @@ -238,7 +345,13 @@ struct OptLutWorker std::vector lutM_new_inputs; for (int i = 0; i < lutM_width; i++) { - if ((!lutM_input[i].wire || sigmap(lutM_input[i]) == lutA_output) && lutR_unique.size()) + bool input_unused = false; + if (sigmap(lutM_input[i]) == lutA_output) + input_unused = true; + if (!lutM_input[i].wire && !lutM_dlogic_inputs.count(i)) + input_unused = true; + + if (input_unused && lutR_unique.size()) { SigBit new_input = lutR_unique.pop(); log(" Connecting input %d as %s.\n", i, log_signal(new_input)); @@ -246,7 +359,7 @@ struct OptLutWorker } else if (sigmap(lutM_input[i]) == lutA_output) { - log(" Disconnecting input %d.\n", i); + log(" Disconnecting cascade input %d.\n", i); lutM_new_inputs.push_back(SigBit()); } else @@ -292,26 +405,59 @@ struct OptLutWorker } }; +static void split(std::vector &tokens, const std::string &text, char sep) +{ + size_t start = 0, end = 0; + while ((end = text.find(sep, start)) != std::string::npos) { + tokens.push_back(text.substr(start, end - start)); + start = end + 1; + } + tokens.push_back(text.substr(start)); +} + struct OptLutPass : public Pass { OptLutPass() : Pass("opt_lut", "optimize LUT cells") { } void help() YS_OVERRIDE { + // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| log("\n"); log(" opt_lut [options] [selection]\n"); log("\n"); log("This pass combines cascaded $lut cells with unused inputs.\n"); log("\n"); + log(" -dlogic :=[:=...]\n"); + log(" preserve connections to dedicated logic cell that has ports\n"); + log(" connected to LUT inputs . this includes\n"); + log(" the case where both LUT and dedicated logic input are connected to\n"); + log(" the same constant.\n"); + log("\n"); } void execute(std::vector args, RTLIL::Design *design) YS_OVERRIDE { log_header(design, "Executing OPT_LUT pass (optimize LUTs).\n"); + dict> dlogic; + size_t argidx; for (argidx = 1; argidx < args.size(); argidx++) { - // if (args[argidx] == "-???") { - // continue; - // } + if (args[argidx] == "-dlogic" && argidx+1 < args.size()) { + std::vector tokens; + split(tokens, args[++argidx], ':'); + if (tokens.size() < 2) + log_cmd_error("The -dlogic option requires at least one connection.\n"); + IdString type = "\\" + tokens[0]; + for (auto it = tokens.begin() + 1; it != tokens.end(); ++it) { + std::vector conn_tokens; + split(conn_tokens, *it, '='); + if (conn_tokens.size() != 2) + log_cmd_error("Invalid format of -dlogic signal mapping.\n"); + IdString logic_port = "\\" + conn_tokens[0]; + int lut_input = atoi(conn_tokens[1].c_str()); + dlogic[type][lut_input] = logic_port; + } + continue; + } break; } extra_args(args, argidx, design); @@ -319,7 +465,7 @@ struct OptLutPass : public Pass { int total_count = 0; for (auto module : design->selected_modules()) { - OptLutWorker worker(module); + OptLutWorker worker(dlogic, module); total_count += worker.combined_count; } if (total_count) diff --git a/techlibs/ice40/synth_ice40.cc b/techlibs/ice40/synth_ice40.cc index 7094db50d..cc4627cd3 100644 --- a/techlibs/ice40/synth_ice40.cc +++ b/techlibs/ice40/synth_ice40.cc @@ -268,8 +268,8 @@ struct SynthIce40Pass : public ScriptPass run("abc -lut 4"); run("clean"); if (relut || help_mode) { - run("ice40_unlut", "(only if -relut)"); - run("opt_lut", " (only if -relut)"); + run("ice40_unlut", " (only if -relut)"); + run("opt_lut -dlogic SB_CARRY:I0=1:I1=2:CI=3", "(only if -relut)"); } } diff --git a/tests/opt/opt_lut.ys b/tests/opt/opt_lut.ys index 1c7dc1473..86ad93bb3 100644 --- a/tests/opt/opt_lut.ys +++ b/tests/opt/opt_lut.ys @@ -3,7 +3,7 @@ synth_ice40 ice40_unlut design -save preopt -opt_lut +opt_lut -dlogic SB_CARRY:I0=1:I1=2:CI=3 design -stash postopt design -copy-from preopt -as preopt top