diff --git a/passes/techmap/flowmap.cc b/passes/techmap/flowmap.cc index cdca78db8..be70b579b 100644 --- a/passes/techmap/flowmap.cc +++ b/passes/techmap/flowmap.cc @@ -19,7 +19,7 @@ // [[CITE]] // Jason Cong; Yuzheng Ding, "An Optimal Technology Mapping Algorithm for Delay Optimization in Lookup-Table Based FPGA Designs," -// Computer-Aided Design of Integrated Circuits and Systems, IEEE Transactions on, vol. 13, no. 1, Jan 1994 +// Computer-Aided Design of Integrated Circuits and Systems, IEEE Transactions on, Vol. 13, pp. 1-12, Jan. 1994. // doi: 10.1109/43.273754 // Required reading material: @@ -27,7 +27,8 @@ // Min-cut max-flow theorem: // https://www.coursera.org/lecture/algorithms-part2/maxflow-mincut-theorem-beb9G // FlowMap paper: -// http://cadlab.cs.ucla.edu/~cong/papers/iccad92.pdf +// http://cadlab.cs.ucla.edu/~cong/papers/iccad92.pdf (short version) +// https://limsk.ece.gatech.edu/book/papers/flowmap.pdf (long version) // Notes on implementation: // @@ -50,7 +51,8 @@ // 3. The paper ambiguously states: "Moreover, we can find such a cut (X′′, X̅′′) by performing a depth first search starting at the source s, // and including in X′′ all the nodes which are reachable from s." This actually refers to a specific kind of search, mincut computation. // Mincut computation involves computing the set of nodes reachable from s by an undirected path with no full (i.e. zero capacity) forward -// edges or empty (i.e. no flow) backward edges. +// edges or empty (i.e. no flow) backward edges. In addition, the depth first search is required to compute a max-volume max-flow min-cut +// specifically, because a max-flow min-cut is not, in general, unique. #include "kernel/yosys.h" #include "kernel/sigtools.h" @@ -63,10 +65,10 @@ PRIVATE_NAMESPACE_BEGIN struct GraphStyle { string label; - string color; + string color, fillcolor; - GraphStyle(string label = "", string color = "black") : - label(label), color(color) {} + GraphStyle(string label = "", string color = "black", string fillcolor = "") : + label(label), color(color), fillcolor(fillcolor) {} }; static string dot_escape(string value) @@ -109,13 +111,11 @@ static void dump_dot_graph(string filename, if (outputs[node]) shape = "octagon"; auto prop = node_style(node); - string id; - if (node == SigBit()) - id = "(source)"; - else - id = log_signal(node); - fprintf(f, " n%d [ shape=%s, fontname=\"Monospace\", label=\"%s%s\", color=\"%s\" ];\n", - ids[node], shape.c_str(), dot_escape(id).c_str(), dot_escape(prop.label.c_str()).c_str(), prop.color.c_str()); + string style = ""; + if (!prop.fillcolor.empty()) + style = "filled"; + fprintf(f, " n%d [ shape=%s, fontname=\"Monospace\", label=\"%s\", color=\"%s\", fillcolor=\"%s\", style=\"%s\" ];\n", + ids[node], shape.c_str(), dot_escape(prop.label.c_str()).c_str(), prop.color.c_str(), prop.fillcolor.c_str(), style.c_str()); } fprintf(f, " { rank=\"source\"; "); @@ -137,8 +137,8 @@ static void dump_dot_graph(string filename, if (nodes[source] && nodes[sink]) { auto prop = edge_style(source, sink); - fprintf(f, " n%d -> n%d [ label=\"%s\", color=\"%s\" ];\n", - ids[source], ids[sink], dot_escape(prop.label.c_str()).c_str(), prop.color.c_str()); + fprintf(f, " n%d -> n%d [ label=\"%s\", color=\"%s\", fillcolor=\"%s\" ];\n", + ids[source], ids[sink], dot_escape(prop.label.c_str()).c_str(), prop.color.c_str(), prop.fillcolor.c_str()); } } } @@ -163,7 +163,7 @@ struct FlowGraph void dump_dot_graph(string filename) { auto node_style = [&](RTLIL::SigBit node) { - string label; + string label = (node == source) ? "(source)" : log_signal(node); for (auto collapsed_node : collapsed[node]) label += stringf(" %s", log_signal(collapsed_node)); int flow = node_flow[node]; @@ -357,10 +357,12 @@ struct FlowGraph NodePrime source_prime = {source, true}; NodePrime sink_prime = {sink, false}; - pool worklist = {source_prime}, visited; + pool visited; + vector worklist = {source_prime}; while (!worklist.empty()) { - auto node_prime = worklist.pop(); + auto node_prime = worklist.back(); + worklist.pop_back(); if (visited[node_prime]) continue; visited.insert(node_prime); @@ -373,18 +375,18 @@ struct FlowGraph if (!node_prime.is_bottom) // top { if (node_flow[node_prime.node] < MAX_NODE_FLOW) - worklist.insert(node_prime.as_bottom()); + worklist.push_back(node_prime.as_bottom()); for (auto node_pred : edges_bw[node_prime.node]) if (edge_flow[{node_pred, node_prime.node}] > 0) - worklist.insert(NodePrime::bottom(node_pred)); + worklist.push_back(NodePrime::bottom(node_pred)); } else // bottom { if (node_flow[node_prime.node] > 0) - worklist.insert(node_prime.as_top()); + worklist.push_back(node_prime.as_top()); for (auto node_succ : edges_fw[node_prime.node]) if (true /* edge_flow[...] < ∞ */) - worklist.insert(NodePrime::top(node_succ)); + worklist.push_back(NodePrime::top(node_succ)); } } @@ -403,47 +405,73 @@ struct FlowGraph struct FlowmapWorker { int order; - pool cell_types; bool debug; RTLIL::Module *module; SigMap sigmap; ModIndex index; - pool cells; + + dict node_origins; pool nodes, inputs, outputs; dict> edges_fw, edges_bw; dict labels; - dict> lut_gates, lut_inputs; + pool lut_nodes; + dict> lut_gates; + dict> lut_edges_fw, lut_edges_bw; - dict node_origins; - dict> cell_fanout; + int gate_count = 0, lut_count = 0, packed_count = 0; + int gate_area = 0, lut_area = 0; - int mapped_count = 0, packed_count = 0, unique_packed_count = 0; + enum class GraphMode { + Label, + Cut, + }; - void dump_dot_graph(string filename, pool subgraph = {}, pair, pool> cut = {}) + void dump_dot_graph(string filename, GraphMode mode, + pool subgraph_nodes = {}, dict> subgraph_edges = {}, + dict> collapsed = {}, + pair, pool> cut = {}) { - if (subgraph.empty()) - subgraph = nodes; + if (subgraph_nodes.empty()) + subgraph_nodes = nodes; + if (subgraph_edges.empty()) + subgraph_edges = edges_fw; auto node_style = [&](RTLIL::SigBit node) { - string label, color; - if (labels[node] == -1) - label = string("\n"); - else - label = stringf("\nl=%d", labels[node]); - color = "black"; - if (cut.first[node]) - color = "blue"; - if (cut.second[node]) - color = "red"; - return GraphStyle{label, color}; + string label = log_signal(node); + for (auto collapsed_node : collapsed[node]) + if (collapsed_node != node) + label += stringf(" %s", log_signal(collapsed_node)); + switch (mode) + { + case GraphMode::Label: + if (labels[node] == -1) + { + label += "\nl=?"; + return GraphStyle{label}; + } + else + { + label += stringf("\nl=%d", labels[node]); + string fillcolor = stringf("/set311/%d", 1 + labels[node] % 11); + return GraphStyle{label, "", fillcolor}; + } + + case GraphMode::Cut: + if (cut.first[node]) + return GraphStyle{label, "blue"}; + if (cut.second[node]) + return GraphStyle{label, "red"}; + return GraphStyle{label}; + } + return GraphStyle{label}; }; auto edge_style = [&](RTLIL::SigBit, RTLIL::SigBit) { return GraphStyle{}; }; - ::dump_dot_graph(filename, subgraph, edges_fw, inputs, outputs, node_style, edge_style, module->name.str()); + ::dump_dot_graph(filename, subgraph_nodes, subgraph_edges, inputs, outputs, node_style, edge_style, module->name.str()); } pool find_subgraph(RTLIL::SigBit sink) @@ -502,57 +530,56 @@ struct FlowmapWorker return flow_graph; } - FlowmapWorker(int order, pool cell_types, bool debug, RTLIL::Module *module) : - order(order), cell_types(cell_types), debug(debug), module(module), sigmap(module), index(module) + void discover_nodes(pool cell_types) { - log("Labeling cells.\n"); for (auto cell : module->selected_cells()) { - if (cell_types[cell->type]) + if (!cell_types[cell->type]) + continue; + + if (!cell->known()) + log_error("Cell %s (%s.%s) is unknown.\n", cell->type.c_str(), log_id(module), log_id(cell)); + + pool fanout; + for (auto conn : cell->connections()) { - if (!cell->known()) + if (!cell->output(conn.first)) continue; + int offset = -1; + for (auto bit : conn.second) { - log_error("Cell %s (%s.%s) is unknown.\n", cell->type.c_str(), log_id(module), log_id(cell)); + offset++; + if (!bit.wire) continue; + auto mapped_bit = sigmap(bit); + if (nodes[mapped_bit]) + log_error("Multiple drivers found for wire %s.\n", log_signal(mapped_bit)); + nodes.insert(mapped_bit); + node_origins[mapped_bit] = ModIndex::PortInfo(cell, conn.first, offset); + fanout.insert(mapped_bit); } - cells.insert(cell); - - for (auto conn : cell->connections()) - { - if (!cell->output(conn.first)) continue; - int offset = -1; - for (auto bit : conn.second) - { - offset++; - if (!bit.wire) continue; - auto mapped_bit = sigmap(bit); - if (nodes[mapped_bit]) - log_error("Multiple drivers found for wire %s.\n", log_signal(mapped_bit)); - nodes.insert(mapped_bit); - node_origins[mapped_bit] = ModIndex::PortInfo(cell, conn.first, offset); - cell_fanout[cell].insert(mapped_bit); - } - } - - int fanin = 0; - for (auto conn : cell->connections()) - { - if (!cell->input(conn.first)) continue; - for (auto bit : sigmap(conn.second)) - { - if (!bit.wire) continue; - for (auto fanout_bit : cell_fanout[cell]) - { - edges_fw[bit].insert(fanout_bit); - edges_bw[fanout_bit].insert(bit); - } - fanin++; - } - } - - if (fanin > order) - log_error("Cell %s (%s.%s) with fan-in %d cannot be mapped to a %d-LUT.\n", - cell->type.c_str(), log_id(module), log_id(cell), fanin, order); } + + int fanin = 0; + for (auto conn : cell->connections()) + { + if (!cell->input(conn.first)) continue; + for (auto bit : sigmap(conn.second)) + { + if (!bit.wire) continue; + for (auto fanout_bit : fanout) + { + edges_fw[bit].insert(fanout_bit); + edges_bw[fanout_bit].insert(bit); + } + fanin++; + } + } + + if (fanin > order) + log_error("Cell %s (%s.%s) with fan-in %d cannot be mapped to a %d-LUT.\n", + cell->type.c_str(), log_id(module), log_id(cell), fanin, order); + + gate_count++; + gate_area += 1 << fanin; } for (auto edge : edges_fw) @@ -574,15 +601,23 @@ struct FlowmapWorker outputs.insert(node); } + if (debug) + { + dump_dot_graph("flowmap-initial.dot", GraphMode::Label); + log("Dumped initial graph to `flowmap-initial.dot`.\n"); + } + } + + void label_nodes() + { for (auto node : nodes) labels[node] = -1; for (auto input : inputs) - labels[input] = 0; - - if (debug) { - dump_dot_graph("flowmap-init.dot"); - log("Dumped complete combinatorial graph to `flowmap-init.dot`.\n"); + if (input.wire->attributes.count("\\$flowmap_level")) + labels[input] = input.wire->attributes["\\$flowmap_level"].as_int(); + else + labels[input] = 0; } pool worklist = nodes; @@ -644,23 +679,25 @@ struct FlowmapWorker k.insert(xi_node_pred); } log_assert((int)k.size() <= order); - lut_inputs[sink] = k; + lut_edges_bw[sink] = k; + for (auto k_node : k) + lut_edges_fw[k_node].insert(sink); if (debug) { log(" Maximum flow: %d. Assigned label %d.\n", flow, labels[sink]); - dump_dot_graph(stringf("flowmap-%d-sub.dot", debug_num), subgraph, {x, xi}); + dump_dot_graph(stringf("flowmap-%d-sub.dot", debug_num), GraphMode::Cut, subgraph, {}, {}, {x, xi}); log(" Dumped subgraph to `flowmap-%d-sub.dot`.\n", debug_num); flow_graph.dump_dot_graph(stringf("flowmap-%d-flow.dot", debug_num)); log(" Dumped flow graph to `flowmap-%d-flow.dot`.\n", debug_num); - log(" LUT packed:"); - for (auto xi_node : xi) - log(" %s", log_signal(xi_node)); - log(".\n"); log(" LUT inputs:"); for (auto k_node : k) log(" %s", log_signal(k_node)); log(".\n"); + log(" LUT packed gates:"); + for (auto xi_node : xi) + log(" %s", log_signal(xi_node)); + log(".\n"); } for (auto sink_succ : edges_fw[sink]) @@ -669,27 +706,49 @@ struct FlowmapWorker if (debug) { - dump_dot_graph("flowmap-done.dot"); - log("Dumped complete combinatorial graph to `flowmap-done.dot`.\n"); + dump_dot_graph("flowmap-labeled.dot", GraphMode::Label); + log("Dumped labeled graph to `flowmap-labeled.dot`.\n"); + } + } + + int pack_luts() + { + pool worklist = outputs; + while (!worklist.empty()) + { + auto lut_node = worklist.pop(); + lut_nodes.insert(lut_node); + for (auto input_node : lut_edges_bw[lut_node]) + if (!lut_nodes[input_node] && !inputs[input_node]) + worklist.insert(input_node); } int depth = 0; for (auto label : labels) depth = max(depth, label.second); - log("Maximum depth: %d levels.\n", depth); + log("Solved to %d LUTs in %d levels.\n", (int)lut_nodes.size(), depth); + if (debug) + { + pool lut_and_input_nodes; + lut_and_input_nodes.insert(lut_nodes.begin(), lut_nodes.end()); + lut_and_input_nodes.insert(inputs.begin(), inputs.end()); + dump_dot_graph("flowmap-packed.dot", GraphMode::Label, lut_and_input_nodes, lut_edges_fw, lut_gates); + log("Dumped packed graph to `flowmap-packed.dot`.\n"); + } + + return depth; + } + + void map_cells(int minlut) + { ConstEval ce(module); for (auto input_node : inputs) ce.stop(input_node); - log("\n"); - log("Mapping cells.\n"); - pool mapped_nodes; - worklist = outputs; - while (!worklist.empty()) + for (auto node : lut_nodes) { - auto node = worklist.pop(); if (node_origins.count(node)) { auto origin = node_origins[node]; @@ -721,8 +780,8 @@ struct FlowmapWorker log_id(module), log_id(gate_origin.cell), gate_origin.port.c_str(), gate_origin.offset, log_signal(gate_node)); } - vector input_nodes(lut_inputs[node].begin(), lut_inputs[node].end()); - RTLIL::Const lut_table(State::Sx, 1 << input_nodes.size()); + vector input_nodes(lut_edges_bw[node].begin(), lut_edges_bw[node].end()); + RTLIL::Const lut_table(State::Sx, max(1 << input_nodes.size(), 1 << minlut)); for (unsigned i = 0; i < (1 << input_nodes.size()); i++) { ce.push(); @@ -746,29 +805,25 @@ struct FlowmapWorker RTLIL::SigSpec lut_a, lut_y = node; for (auto input_node : input_nodes) lut_a.append_bit(input_node); + lut_a.append(RTLIL::Const(State::Sx, minlut - input_nodes.size())); RTLIL::Cell *lut = module->addLut(NEW_ID, lut_a, lut_y, lut_table); - mapped_count++; - + mapped_nodes.insert(node); for (auto gate_node : lut_gates[node]) { auto gate_origin = node_origins[gate_node]; lut->add_strpool_attribute("\\src", gate_origin.cell->get_strpool_attribute("\\src")); packed_count++; } + lut_count++; + lut_area += lut_table.size(); - log(" Packed into a %d-LUT %s.%s.\n", (int)input_nodes.size(), log_id(module), log_id(lut)); - - mapped_nodes.insert(node); - for (auto input_node : input_nodes) - { - if (!mapped_nodes[input_node] && !inputs[input_node]) - worklist.insert(input_node); - } + if ((int)input_nodes.size() >= minlut) + log(" Packed into a %d-LUT %s.%s.\n", (int)input_nodes.size(), log_id(module), log_id(lut)); + else + log(" Packed into a %d-LUT %s.%s (implemented as %d-LUT).\n", (int)input_nodes.size(), log_id(module), log_id(lut), minlut); } - unique_packed_count += nodes.size(); - for (auto node : mapped_nodes) { auto origin = node_origins[node]; @@ -777,6 +832,19 @@ struct FlowmapWorker origin.cell->setPort(origin.port, driver); } } + + FlowmapWorker(int order, int minlut, pool cell_types, bool debug, RTLIL::Module *module) : + order(order), debug(debug), module(module), sigmap(module), index(module) + { + log("Labeling cells.\n"); + discover_nodes(cell_types); + label_nodes(); + pack_luts(); + + log("\n"); + log("Mapping cells.\n"); + map_cells(minlut); + } }; static void split(std::vector &tokens, const std::string &text, char sep) @@ -802,10 +870,13 @@ struct FlowmapPass : public Pass { log("be evaluated with the `eval` pass, including cells with multiple output ports\n"); log("and multi-bit input and output ports.\n"); log("\n"); - log(" -maxlut \n"); + log(" -maxlut k\n"); log(" perform technology mapping for a k-LUT architecture. if not specified,\n"); log(" defaults to 3.\n"); log("\n"); + log(" -minlut n\n"); + log(" only produce n-input or larger LUTs. if not specified, defaults to 1.\n"); + log("\n"); log(" -cells [,,...]\n"); log(" map specified cells. if not specified, maps $_NOT_, $_AND_, $_OR_,\n"); log(" $_XOR_ and $_MUX_, which are the outputs of the `simplemap` pass.\n"); @@ -816,9 +887,8 @@ struct FlowmapPass : public Pass { } void execute(std::vector args, RTLIL::Design *design) YS_OVERRIDE { - log_header(design, "Executing FLOWMAP pass (pack LUTs with FlowMap).\n"); - int order = 3; + int minlut = 1; vector cells; bool debug = false; @@ -830,6 +900,11 @@ struct FlowmapPass : public Pass { order = atoi(args[++argidx].c_str()); continue; } + if (args[argidx] == "-minlut" && argidx + 1 < args.size()) + { + minlut = atoi(args[++argidx].c_str()); + continue; + } if (args[argidx] == "-cells" && argidx + 1 < args.size()) { split(cells, args[++argidx], ','); @@ -855,18 +930,24 @@ struct FlowmapPass : public Pass { cell_types = {"$_NOT_", "$_AND_", "$_OR_", "$_XOR_", "$_MUX_"}; } - int mapped_count = 0, packed_count = 0, unique_packed_count = 0; + log_header(design, "Executing FLOWMAP pass (pack LUTs with FlowMap).\n"); + + int gate_count = 0, lut_count = 0, packed_count = 0; + int gate_area = 0, lut_area = 0; for (auto module : design->selected_modules()) { - FlowmapWorker worker(order, cell_types, debug, module); - mapped_count += worker.mapped_count; + FlowmapWorker worker(order, minlut, cell_types, debug, module); + gate_count += worker.gate_count; + lut_count += worker.lut_count; packed_count += worker.packed_count; - unique_packed_count += worker.unique_packed_count; + gate_area += worker.gate_area; + lut_area += worker.lut_area; } log("\n"); - log("Mapped %d LUTs.\n", mapped_count); - log("Packed %d cells %d times.\n", unique_packed_count, packed_count); + log("Mapped %d LUTs.\n", lut_count); + log("Packed %d cells; duplicated %d cells.\n", packed_count, packed_count - gate_count); + log("Solution has %.1f%% area overhead.\n", (lut_area - gate_area) * 100.0 / gate_area); } } FlowmapPass; diff --git a/passes/tests/flowmap/flow.v b/passes/tests/flowmap/flow.v new file mode 100644 index 000000000..297ef910e --- /dev/null +++ b/passes/tests/flowmap/flow.v @@ -0,0 +1,22 @@ +// Exact reproduction of Figure 2(a) from 10.1109/43.273754. +module top(...); + input a,b,c,d,e,f; + wire nA = b&c; + wire A = !nA; + wire nB = c|d; + wire B = !nB; + wire nC = e&f; + wire C = !nC; + wire D = A|B; + wire E = a&D; + wire nF = D&C; + wire F = !nF; + wire nG = F|B; + wire G = !nG; + wire H = a&F; + wire I = E|G; + wire J = G&C; + wire np = H&I; + output p = !np; + output q = A|J; +endmodule diff --git a/passes/tests/flowmap/flowp.v b/passes/tests/flowmap/flowp.v new file mode 100644 index 000000000..2fb40ffa4 --- /dev/null +++ b/passes/tests/flowmap/flowp.v @@ -0,0 +1,16 @@ +// Like flow.v, but results in a network identical to Figure 2(b). +module top(...); + input a,b,c,d,e,f; + wire A = b&c; + wire B = c|d; + wire C = e&f; + wire D = A|B; + wire E = a&D; + wire F = D&C; + wire G = F|B; + wire H = a&F; + wire I = E|G; + wire J = G&C; + output p = H&I; + output q = A|J; +endmodule