mirror of https://github.com/YosysHQ/yosys.git
Speed up TopoSort. The main sorting algorithm implementation in TopoSort::sort_worker is 11-12x faster. Overall, the complete sequence of building the graph and sorting is about 2.5-3x faster. The overall impact in e.g. the replace_const_cells optimization pass is a ~25% speedup. End-to-end impact on our synthesis flow is about 3%.
This commit is contained in:
parent
8fb807cd24
commit
e0042bdff7
184
kernel/utils.h
184
kernel/utils.h
|
@ -31,34 +31,30 @@ YOSYS_NAMESPACE_BEGIN
|
||||||
// A map-like container, but you can save and restore the state
|
// A map-like container, but you can save and restore the state
|
||||||
// ------------------------------------------------
|
// ------------------------------------------------
|
||||||
|
|
||||||
template<typename Key, typename T, typename OPS = hash_ops<Key>>
|
template <typename Key, typename T, typename OPS = hash_ops<Key>> struct stackmap {
|
||||||
struct stackmap
|
private:
|
||||||
{
|
std::vector<dict<Key, T *, OPS>> backup_state;
|
||||||
private:
|
|
||||||
std::vector<dict<Key, T*, OPS>> backup_state;
|
|
||||||
dict<Key, T, OPS> current_state;
|
dict<Key, T, OPS> current_state;
|
||||||
static T empty_tuple;
|
static T empty_tuple;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
stackmap() { }
|
stackmap() {}
|
||||||
stackmap(const dict<Key, T, OPS> &other) : current_state(other) { }
|
stackmap(const dict<Key, T, OPS> &other) : current_state(other) {}
|
||||||
|
|
||||||
template<typename Other>
|
template <typename Other> stackmap &operator=(const Other &other)
|
||||||
void operator=(const Other &other)
|
|
||||||
{
|
{
|
||||||
for (auto &it : current_state)
|
for (const auto &it : current_state)
|
||||||
if (!backup_state.empty() && backup_state.back().count(it.first) == 0)
|
if (!backup_state.empty() && backup_state.back().count(it.first) == 0)
|
||||||
backup_state.back()[it.first] = new T(it.second);
|
backup_state.back()[it.first] = new T(it.second);
|
||||||
current_state.clear();
|
current_state.clear();
|
||||||
|
|
||||||
for (auto &it : other)
|
for (const auto &it : other)
|
||||||
set(it.first, it.second);
|
set(it.first, it.second);
|
||||||
|
|
||||||
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool has(const Key &k)
|
bool has(const Key &k) { return current_state.count(k) != 0; }
|
||||||
{
|
|
||||||
return current_state.count(k) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void set(const Key &k, const T &v)
|
void set(const Key &k, const T &v)
|
||||||
{
|
{
|
||||||
|
@ -83,7 +79,7 @@ public:
|
||||||
|
|
||||||
void reset(const Key &k)
|
void reset(const Key &k)
|
||||||
{
|
{
|
||||||
for (int i = GetSize(backup_state)-1; i >= 0; i--)
|
for (int i = GetSize(backup_state) - 1; i >= 0; i--)
|
||||||
if (backup_state[i].count(k) != 0) {
|
if (backup_state[i].count(k) != 0) {
|
||||||
if (backup_state[i].at(k) == nullptr)
|
if (backup_state[i].at(k) == nullptr)
|
||||||
current_state.erase(k);
|
current_state.erase(k);
|
||||||
|
@ -94,20 +90,14 @@ public:
|
||||||
current_state.erase(k);
|
current_state.erase(k);
|
||||||
}
|
}
|
||||||
|
|
||||||
const dict<Key, T, OPS> &stdmap()
|
const dict<Key, T, OPS> &stdmap() { return current_state; }
|
||||||
{
|
|
||||||
return current_state;
|
|
||||||
}
|
|
||||||
|
|
||||||
void save()
|
void save() { backup_state.resize(backup_state.size() + 1); }
|
||||||
{
|
|
||||||
backup_state.resize(backup_state.size()+1);
|
|
||||||
}
|
|
||||||
|
|
||||||
void restore()
|
void restore()
|
||||||
{
|
{
|
||||||
log_assert(!backup_state.empty());
|
log_assert(!backup_state.empty());
|
||||||
for (auto &it : backup_state.back())
|
for (const auto &it : backup_state.back())
|
||||||
if (it.second != nullptr) {
|
if (it.second != nullptr) {
|
||||||
current_state[it.first] = *it.second;
|
current_state[it.first] = *it.second;
|
||||||
delete it.second;
|
delete it.second;
|
||||||
|
@ -123,46 +113,116 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// ------------------------------------------------
|
// ------------------------------------------------
|
||||||
// A simple class for topological sorting
|
// A simple class for topological sorting
|
||||||
// ------------------------------------------------
|
// ------------------------------------------------
|
||||||
|
|
||||||
template<typename T, typename C = std::less<T>>
|
template <typename T, typename C = std::less<T>, typename OPS = hash_ops<T>> class TopoSort
|
||||||
struct TopoSort
|
|
||||||
{
|
{
|
||||||
bool analyze_loops, found_loops;
|
public:
|
||||||
std::map<T, std::set<T, C>, C> database;
|
// We use this ordering of the edges in the adjacency matrix for
|
||||||
std::set<std::set<T, C>> loops;
|
// exact compatibility with an older implementation.
|
||||||
std::vector<T> sorted;
|
struct IndirectCmp {
|
||||||
|
IndirectCmp(const std::vector<T> &nodes) : nodes_(nodes) {}
|
||||||
|
bool operator()(int a, int b) const
|
||||||
|
{
|
||||||
|
log_assert(static_cast<size_t>(a) < nodes_.size());
|
||||||
|
log_assert(static_cast<size_t>(b) < nodes_.size());
|
||||||
|
return node_cmp_(nodes_[a], nodes_[b]);
|
||||||
|
}
|
||||||
|
const C node_cmp_;
|
||||||
|
const std::vector<T> &nodes_;
|
||||||
|
};
|
||||||
|
|
||||||
TopoSort()
|
bool analyze_loops;
|
||||||
|
std::map<T, int, C> node_to_index;
|
||||||
|
std::vector<std::set<int, IndirectCmp>> edges;
|
||||||
|
std::vector<T> sorted;
|
||||||
|
std::set<std::set<T, C>> loops;
|
||||||
|
|
||||||
|
public:
|
||||||
|
TopoSort() : indirect_cmp(nodes)
|
||||||
{
|
{
|
||||||
analyze_loops = true;
|
analyze_loops = true;
|
||||||
found_loops = false;
|
found_loops = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void node(T n)
|
int node(T n)
|
||||||
{
|
{
|
||||||
if (database.count(n) == 0)
|
auto it = node_to_index.find(n);
|
||||||
database[n] = std::set<T, C>();
|
if (it == node_to_index.end()) {
|
||||||
|
int index = static_cast<size_t>(nodes.size());
|
||||||
|
node_to_index[n] = index;
|
||||||
|
nodes.push_back(n);
|
||||||
|
edges.push_back(std::set<int, IndirectCmp>(indirect_cmp));
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
return it->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
void edge(T left, T right)
|
void edge(int l_index, int r_index) { edges[r_index].insert(l_index); }
|
||||||
|
|
||||||
|
void edge(T left, T right) { edge(node(left), node(right)); }
|
||||||
|
|
||||||
|
bool has_edges(const T &node)
|
||||||
{
|
{
|
||||||
node(left);
|
auto it = node_to_index.find(node);
|
||||||
database[right].insert(left);
|
return it == node_to_index.end() || !edges[it->second].empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
void sort_worker(const T &n, std::set<T, C> &marked_cells, std::set<T, C> &active_cells, std::vector<T> &active_stack)
|
bool sort()
|
||||||
{
|
{
|
||||||
if (active_cells.count(n)) {
|
log_assert(GetSize(node_to_index) == GetSize(edges));
|
||||||
|
log_assert(GetSize(nodes) == GetSize(edges));
|
||||||
|
|
||||||
|
loops.clear();
|
||||||
|
sorted.clear();
|
||||||
|
found_loops = false;
|
||||||
|
|
||||||
|
std::vector<bool> marked_cells(edges.size(), false);
|
||||||
|
std::vector<bool> active_cells(edges.size(), false);
|
||||||
|
std::vector<int> active_stack;
|
||||||
|
|
||||||
|
marked_cells.reserve(edges.size());
|
||||||
|
sorted.reserve(edges.size());
|
||||||
|
|
||||||
|
for (const auto &it : node_to_index)
|
||||||
|
sort_worker(it.second, marked_cells, active_cells, active_stack);
|
||||||
|
|
||||||
|
log_assert(GetSize(sorted) == GetSize(nodes));
|
||||||
|
|
||||||
|
return !found_loops;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the more expensive representation of edges for
|
||||||
|
// a few passes that use it directly.
|
||||||
|
std::map<T, std::set<T, C>, C> get_database()
|
||||||
|
{
|
||||||
|
std::map<T, std::set<T, C>, C> database;
|
||||||
|
for (size_t i = 0; i < nodes.size(); ++i) {
|
||||||
|
std::set<T, C> converted_edge_set;
|
||||||
|
for (int other_node : edges[i]) {
|
||||||
|
converted_edge_set.insert(nodes[other_node]);
|
||||||
|
}
|
||||||
|
database.emplace(nodes[i], converted_edge_set);
|
||||||
|
}
|
||||||
|
return database;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool found_loops;
|
||||||
|
std::vector<T> nodes;
|
||||||
|
const IndirectCmp indirect_cmp;
|
||||||
|
void sort_worker(const int root_index, std::vector<bool> &marked_cells, std::vector<bool> &active_cells, std::vector<int> &active_stack)
|
||||||
|
{
|
||||||
|
if (active_cells[root_index]) {
|
||||||
found_loops = true;
|
found_loops = true;
|
||||||
if (analyze_loops) {
|
if (analyze_loops) {
|
||||||
std::set<T, C> loop;
|
std::set<T, C> loop;
|
||||||
for (int i = GetSize(active_stack)-1; i >= 0; i--) {
|
for (int i = GetSize(active_stack) - 1; i >= 0; i--) {
|
||||||
loop.insert(active_stack[i]);
|
const int index = active_stack[i];
|
||||||
if (active_stack[i] == n)
|
loop.insert(nodes[index]);
|
||||||
|
if (index == root_index)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
loops.insert(loop);
|
loops.insert(loop);
|
||||||
|
@ -170,42 +230,24 @@ struct TopoSort
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (marked_cells.count(n))
|
if (marked_cells[root_index])
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (!database.at(n).empty())
|
if (!edges[root_index].empty()) {
|
||||||
{
|
|
||||||
if (analyze_loops)
|
if (analyze_loops)
|
||||||
active_stack.push_back(n);
|
active_stack.push_back(root_index);
|
||||||
active_cells.insert(n);
|
active_cells[root_index] = true;
|
||||||
|
|
||||||
for (auto &left_n : database.at(n))
|
for (int left_n : edges[root_index])
|
||||||
sort_worker(left_n, marked_cells, active_cells, active_stack);
|
sort_worker(left_n, marked_cells, active_cells, active_stack);
|
||||||
|
|
||||||
if (analyze_loops)
|
if (analyze_loops)
|
||||||
active_stack.pop_back();
|
active_stack.pop_back();
|
||||||
active_cells.erase(n);
|
active_cells[root_index] = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
marked_cells.insert(n);
|
marked_cells[root_index] = true;
|
||||||
sorted.push_back(n);
|
sorted.push_back(nodes[root_index]);
|
||||||
}
|
|
||||||
|
|
||||||
bool sort()
|
|
||||||
{
|
|
||||||
loops.clear();
|
|
||||||
sorted.clear();
|
|
||||||
found_loops = false;
|
|
||||||
|
|
||||||
std::set<T, C> marked_cells;
|
|
||||||
std::set<T, C> active_cells;
|
|
||||||
std::vector<T> active_stack;
|
|
||||||
|
|
||||||
for (auto &it : database)
|
|
||||||
sort_worker(it.first, marked_cells, active_cells, active_stack);
|
|
||||||
|
|
||||||
log_assert(GetSize(sorted) == GetSize(database));
|
|
||||||
return !found_loops;
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -582,7 +582,7 @@ struct GliftPass : public Pass {
|
||||||
for (auto cell : module->selected_cells()) {
|
for (auto cell : module->selected_cells()) {
|
||||||
RTLIL::Module *tpl = design->module(cell->type);
|
RTLIL::Module *tpl = design->module(cell->type);
|
||||||
if (tpl != nullptr) {
|
if (tpl != nullptr) {
|
||||||
if (topo_modules.database.count(tpl) == 0)
|
if (!topo_modules.has_edges(tpl))
|
||||||
worklist.push_back(tpl);
|
worklist.push_back(tpl);
|
||||||
topo_modules.edge(tpl, module);
|
topo_modules.edge(tpl, module);
|
||||||
non_top_modules.insert(cell->type);
|
non_top_modules.insert(cell->type);
|
||||||
|
|
|
@ -424,13 +424,18 @@ void replace_const_cells(RTLIL::Design *design, RTLIL::Module *module, bool cons
|
||||||
for (auto &bit : sig)
|
for (auto &bit : sig)
|
||||||
outbit_to_cell[bit].insert(cell);
|
outbit_to_cell[bit].insert(cell);
|
||||||
}
|
}
|
||||||
cells.node(cell);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto &it_right : cell_to_inbit)
|
// Build the graph for the topological sort.
|
||||||
for (auto &it_sigbit : it_right.second)
|
for (auto &it_right : cell_to_inbit) {
|
||||||
for (auto &it_left : outbit_to_cell[it_sigbit])
|
const int r_index = cells.node(it_right.first);
|
||||||
cells.edge(it_left, it_right.first);
|
for (auto &it_sigbit : it_right.second) {
|
||||||
|
for (auto &it_left : outbit_to_cell[it_sigbit]) {
|
||||||
|
const int l_index = cells.node(it_left);
|
||||||
|
cells.edge(l_index, r_index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
cells.sort();
|
cells.sort();
|
||||||
|
|
||||||
|
|
|
@ -1032,7 +1032,7 @@ struct ShareWorker
|
||||||
}
|
}
|
||||||
|
|
||||||
bool found_scc = !toposort.sort();
|
bool found_scc = !toposort.sort();
|
||||||
topo_cell_drivers = std::move(toposort.database);
|
topo_cell_drivers = toposort.get_database();
|
||||||
|
|
||||||
if (found_scc && toposort.analyze_loops)
|
if (found_scc && toposort.analyze_loops)
|
||||||
for (auto &loop : toposort.loops) {
|
for (auto &loop : toposort.loops) {
|
||||||
|
|
|
@ -312,7 +312,7 @@ struct FlattenPass : public Pass {
|
||||||
for (auto cell : module->selected_cells()) {
|
for (auto cell : module->selected_cells()) {
|
||||||
RTLIL::Module *tpl = design->module(cell->type);
|
RTLIL::Module *tpl = design->module(cell->type);
|
||||||
if (tpl != nullptr) {
|
if (tpl != nullptr) {
|
||||||
if (topo_modules.database.count(tpl) == 0)
|
if (!topo_modules.has_edges(tpl))
|
||||||
worklist.insert(tpl);
|
worklist.insert(tpl);
|
||||||
topo_modules.edge(tpl, module);
|
topo_modules.edge(tpl, module);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue