mirror of https://github.com/YosysHQ/yosys.git
Added frequent subcircuit miner to subcircuit library
This commit is contained in:
parent
a338d1a082
commit
84cdfa55fc
|
@ -0,0 +1,2 @@
|
|||
demo
|
||||
scshell
|
|
@ -39,6 +39,7 @@ scshell: scshell.o subcircuit.o
|
|||
|
||||
test: scshell
|
||||
./scshell < test_macc22.txt
|
||||
./scshell < test_mine.txt
|
||||
perl test_perm.pl | ./scshell
|
||||
splrun test_shorts.spl | ./scshell
|
||||
splrun test_large.spl | ./scshell
|
||||
|
|
|
@ -14,20 +14,12 @@ Introduction
|
|||
|
||||
This is a library that implements a modified Ullmann Subgraph Isomorphism
|
||||
Algorithm with additional features aimed at working with coarse grain logic
|
||||
networks.
|
||||
networks. It also contains a simple frequent subcircuit mining algorithm.
|
||||
|
||||
A simple command line tool that exposes the features of the library is also
|
||||
included.
|
||||
|
||||
|
||||
Under-Construction Warning
|
||||
--------------------------
|
||||
|
||||
This work is under constructions. It is likely that they are bugs in the
|
||||
library that need fixing. Feel free to contact me at clifford@clifford.at
|
||||
if you have found a bug.
|
||||
|
||||
|
||||
C++11 Warning
|
||||
-------------
|
||||
|
||||
|
@ -97,6 +89,9 @@ Algorithm are provided by the library.
|
|||
|
||||
* Support for finding only non-overlapping matches.
|
||||
|
||||
* A simple miner for frequent subcircuts that operates on the same circuit
|
||||
description format.
|
||||
|
||||
* The public API of the library is using std::string identifiers for
|
||||
nodes, node types and ports. Internally the costly part of the
|
||||
algorithm is only using integer values, thus speeding up the
|
||||
|
@ -328,6 +323,32 @@ bool userCheckSolution(result):
|
|||
ignored. The default implementation always returns true.
|
||||
|
||||
|
||||
Mining for frequent SubCircuits
|
||||
-------------------------------
|
||||
|
||||
The solver also contains a miner for frequent subcircuits. The following code
|
||||
fragment will find all frequent subcircuits with at least minNodes nodes and
|
||||
at most maxNodes nodes that occurs at least minMatches times:
|
||||
|
||||
std::vector<SubCircuit::Solver::MineResult> results;
|
||||
mySolver.mine(results, minNodes, maxNodes, minMatches);
|
||||
|
||||
The miner works by finding frequent pairs of nodes and then combining them
|
||||
to larger subcircuits. Because of this incremental strategy the miner only
|
||||
works as expected on graphs with markAllExtern() set.
|
||||
|
||||
The mine() method has an optional fifth parameter that limits the number
|
||||
of matches counted in one graph. This can be useful when mining for circuits
|
||||
that are found in at least a number of graphs. E.g. the following call
|
||||
would find all subcircuits with 5 nodes that are found in at least 7 of
|
||||
the registered graphs:
|
||||
|
||||
mySolver.mine(results, 5, 5, 7, 1);
|
||||
|
||||
Note that this miner is not very efficient and therefore its use is not
|
||||
recommended for large circuits.
|
||||
|
||||
|
||||
Debugging
|
||||
---------
|
||||
|
||||
|
@ -420,6 +441,10 @@ The following commands can be used in scshell outside a graph ... endgraph block
|
|||
Call Solver::solve(). The <allow_overlap> must be "1" or "true"
|
||||
for true and "0" or "false" for false.
|
||||
|
||||
mine <min_nodes> <max_nodes> <min_matches> [<limit_matches_per_graph>]
|
||||
|
||||
Call Solver::mine().
|
||||
|
||||
expect <number>
|
||||
|
||||
Print all results so far since the last call to expect. Expect
|
||||
|
|
|
@ -26,6 +26,7 @@ int main()
|
|||
SubCircuit::Solver solver;
|
||||
std::map<std::string, std::set<std::string>> initialMappings;
|
||||
std::vector<SubCircuit::Solver::Result> results;
|
||||
std::vector<SubCircuit::Solver::MineResult> mineResults;
|
||||
std::vector<std::string> cmdBuffer;
|
||||
bool lastCommandExpect = false;
|
||||
|
||||
|
@ -162,6 +163,12 @@ int main()
|
|||
continue;
|
||||
}
|
||||
|
||||
if (cmdBuffer[0] == "mine" && 4 <= cmdBuffer.size() && cmdBuffer.size() <= 5) {
|
||||
solver.mine(mineResults, atoi(cmdBuffer[1].c_str()), atoi(cmdBuffer[2].c_str()),
|
||||
atoi(cmdBuffer[3].c_str()), cmdBuffer.size() == 5 ? atoi(cmdBuffer[4].c_str()) : -1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cmdBuffer[0] == "clearoverlap" && cmdBuffer.size() == 1) {
|
||||
solver.clearOverlapHistory();
|
||||
continue;
|
||||
|
@ -179,7 +186,7 @@ int main()
|
|||
|
||||
if (cmdBuffer[0] == "expect" && cmdBuffer.size() == 2) {
|
||||
int expected = atoi(cmdBuffer[1].c_str());
|
||||
printf("\n-- Expected %d, Got %d --\n", expected, int(results.size()));
|
||||
printf("\n-- Expected %d, Got %d --\n", expected, int(results.size()) + int(mineResults.size()));
|
||||
for (int i = 0; i < int(results.size()); i++) {
|
||||
printf("\nMatch #%d: (%s in %s)\n", i, results[i].needleGraphId.c_str(), results[i].haystackGraphId.c_str());
|
||||
for (const auto &it : results[i].mappings) {
|
||||
|
@ -189,9 +196,18 @@ int main()
|
|||
printf("\n");
|
||||
}
|
||||
}
|
||||
for (auto &result : mineResults) {
|
||||
printf("\nFrequent SubCircuit with %d nodes and %d matches:\n", int(result.nodes.size()), result.totalMatchesAfterLimits);
|
||||
printf(" primary match in %s:", result.graphId.c_str());
|
||||
for (auto &node : result.nodes)
|
||||
printf(" %s", node.nodeId.c_str());
|
||||
printf("\n");
|
||||
for (auto &it : result.matchesPerGraph)
|
||||
printf(" matches in %s: %d\n", it.first.c_str(), it.second);
|
||||
}
|
||||
printf("\n");
|
||||
if (expected != int(results.size())) {
|
||||
printf("^^ expected %d, Got %d ^^\n\n", expected, int(results.size()));
|
||||
if (expected != int(results.size()) + int(mineResults.size())) {
|
||||
printf("^^ expected %d, Got %d ^^\n\n", expected, int(results.size()) + int(mineResults.size()));
|
||||
printf(" +----------------+\n");
|
||||
printf(" | \\|/ ____ \\|/ |\n");
|
||||
printf(" | \"@'/ ,. \\`@\" |\n");
|
||||
|
@ -202,6 +218,7 @@ int main()
|
|||
return 1;
|
||||
}
|
||||
results.clear();
|
||||
mineResults.clear();
|
||||
lastCommandExpect = true;
|
||||
continue;
|
||||
}
|
||||
|
@ -215,7 +232,7 @@ int main()
|
|||
delete graph;
|
||||
|
||||
if (!lastCommandExpect) {
|
||||
printf("\n-- Got %d --\n", int(results.size()));
|
||||
printf("\n-- Got %d --\n", int(results.size()) + int(mineResults.size()));
|
||||
for (int i = 0; i < int(results.size()); i++) {
|
||||
printf("\nMatch #%d: (%s in %s)\n", i, results[i].needleGraphId.c_str(), results[i].haystackGraphId.c_str());
|
||||
for (const auto &it : results[i].mappings) {
|
||||
|
@ -225,6 +242,15 @@ int main()
|
|||
printf("\n");
|
||||
}
|
||||
}
|
||||
for (auto &result : mineResults) {
|
||||
printf("\nFrequent SubCircuit with %d nodes and %d matches:\n", int(result.nodes.size()), result.totalMatchesAfterLimits);
|
||||
printf(" primary match in %s:", result.graphId.c_str());
|
||||
for (auto &node : result.nodes)
|
||||
printf(" %s", node.nodeId.c_str());
|
||||
printf("\n");
|
||||
for (auto &it : result.matchesPerGraph)
|
||||
printf(" matches in %s: %d\n", it.first.c_str(), it.second);
|
||||
}
|
||||
} else
|
||||
printf("PASSED.\n");
|
||||
|
||||
|
|
|
@ -46,6 +46,42 @@ static std::string stringf(const char *fmt, ...)
|
|||
return string;
|
||||
}
|
||||
|
||||
SubCircuit::Graph::Graph(const Graph &other, const std::vector<std::string> &otherNodes)
|
||||
{
|
||||
allExtern = other.allExtern;
|
||||
|
||||
std::map<int, int> other2this;
|
||||
for (int i = 0; i < int(otherNodes.size()); i++) {
|
||||
assert(other.nodeMap.count(otherNodes[i]) > 0);
|
||||
other2this[other.nodeMap.at(otherNodes[i])] = i;
|
||||
nodeMap[otherNodes[i]] = i;
|
||||
}
|
||||
|
||||
std::map<int, int> edges2this;
|
||||
for (auto &i1 : other2this)
|
||||
for (auto &i2 : other.nodes[i1.first].ports)
|
||||
for (auto &i3 : i2.bits)
|
||||
if (edges2this.count(i3.edgeIdx) == 0)
|
||||
edges2this[i3.edgeIdx] = edges2this.size();
|
||||
|
||||
edges.resize(edges2this.size());
|
||||
for (auto &it : edges2this) {
|
||||
for (auto &bit : other.edges[it.first].portBits)
|
||||
if (other2this.count(bit.nodeIdx) > 0)
|
||||
edges[it.second].portBits.insert(BitRef(other2this[bit.nodeIdx], bit.portIdx, bit.bitIdx));
|
||||
edges[it.second].constValue = other.edges[it.first].constValue;
|
||||
edges[it.second].isExtern = other.edges[it.first].isExtern;
|
||||
}
|
||||
|
||||
nodes.resize(other2this.size());
|
||||
for (auto &it : other2this) {
|
||||
nodes[it.second] = other.nodes[it.first];
|
||||
for (auto &i2 : nodes[it.second].ports)
|
||||
for (auto &i3 : i2.bits)
|
||||
i3.edgeIdx = edges2this.at(i3.edgeIdx);
|
||||
}
|
||||
}
|
||||
|
||||
bool SubCircuit::Graph::BitRef::operator < (const BitRef &other) const
|
||||
{
|
||||
if (nodeIdx != other.nodeIdx)
|
||||
|
@ -1072,6 +1108,197 @@ class SubCircuit::SolverWorker
|
|||
}
|
||||
}
|
||||
|
||||
// additional data structes and functions for mining
|
||||
|
||||
struct NodeSet {
|
||||
std::string graphId;
|
||||
std::set<int> nodes;
|
||||
NodeSet(std::string graphId, int node1, int node2) {
|
||||
this->graphId = graphId;
|
||||
nodes.insert(node1);
|
||||
nodes.insert(node2);
|
||||
}
|
||||
NodeSet(std::string graphId, const std::vector<int> &nodes) {
|
||||
this->graphId = graphId;
|
||||
for (int node : nodes)
|
||||
this->nodes.insert(node);
|
||||
}
|
||||
void extend(const NodeSet &other) {
|
||||
assert(this->graphId == other.graphId);
|
||||
for (int node : other.nodes)
|
||||
nodes.insert(node);
|
||||
}
|
||||
int extendCandidate(const NodeSet &other) const {
|
||||
if (graphId != other.graphId)
|
||||
return 0;
|
||||
int newNodes = 0;
|
||||
bool intersect = false;
|
||||
for (int node : other.nodes)
|
||||
if (nodes.count(node) > 0)
|
||||
intersect = true;
|
||||
else
|
||||
newNodes++;
|
||||
return intersect ? newNodes : 0;
|
||||
}
|
||||
bool operator <(const NodeSet &other) const {
|
||||
if (graphId != other.graphId)
|
||||
return graphId < other.graphId;
|
||||
return nodes < other.nodes;
|
||||
}
|
||||
};
|
||||
|
||||
void solveForMining(std::vector<Solver::Result> &results, const GraphData &needle)
|
||||
{
|
||||
bool backupVerbose = verbose;
|
||||
verbose = false;
|
||||
|
||||
for (auto &it : graphData)
|
||||
{
|
||||
GraphData &haystack = it.second;
|
||||
assert(haystack.graph.allExtern);
|
||||
|
||||
std::vector<std::set<int>> enumerationMatrix;
|
||||
std::map<std::string, std::set<std::string>> initialMappings;
|
||||
generateEnumerationMatrix(enumerationMatrix, needle, haystack, initialMappings);
|
||||
|
||||
haystack.usedNodes.resize(haystack.graph.nodes.size());
|
||||
ullmannRecursion(results, enumerationMatrix, 0, needle, haystack, true, -1);
|
||||
}
|
||||
|
||||
verbose = backupVerbose;
|
||||
}
|
||||
|
||||
int testForMining(std::vector<Solver::MineResult> &results, std::set<NodeSet> &usedSets, std::vector<std::set<NodeSet>> &nextPool, NodeSet &testSet,
|
||||
const std::string &graphId, const Graph &graph, int minNodes, int minMatches, int limitMatchesPerGraph)
|
||||
{
|
||||
GraphData needle;
|
||||
std::vector<std::string> needle_nodes;
|
||||
for (int nodeIdx : testSet.nodes)
|
||||
needle_nodes.push_back(graph.nodes[nodeIdx].nodeId);
|
||||
needle.graph = Graph(graph, needle_nodes);
|
||||
diCache.add(needle.graph, needle.adjMatrix, graphId, userSolver);
|
||||
|
||||
std::vector<Solver::Result> ullmannResults;
|
||||
solveForMining(ullmannResults, needle);
|
||||
|
||||
int matches = 0;
|
||||
std::map<std::string, int> matchesPerGraph;
|
||||
std::set<NodeSet> thisNodeSetSet;
|
||||
|
||||
for (auto &it : ullmannResults)
|
||||
{
|
||||
std::vector<int> resultNodes;
|
||||
for (auto &i2 : it.mappings)
|
||||
resultNodes.push_back(graphData[it.haystackGraphId].graph.nodeMap[i2.second.haystackNodeId]);
|
||||
NodeSet resultSet(it.haystackGraphId, resultNodes);
|
||||
|
||||
if (usedSets.count(resultSet) > 0) {
|
||||
assert(thisNodeSetSet.count(resultSet) > 0);
|
||||
continue;
|
||||
}
|
||||
usedSets.insert(resultSet);
|
||||
thisNodeSetSet.insert(resultSet);
|
||||
|
||||
matchesPerGraph[it.haystackGraphId]++;
|
||||
if (limitMatchesPerGraph < 0 || matchesPerGraph[it.haystackGraphId] < limitMatchesPerGraph)
|
||||
matches++;
|
||||
}
|
||||
|
||||
if (matches < minMatches)
|
||||
return 0;
|
||||
|
||||
if (minNodes <= int(testSet.nodes.size()))
|
||||
{
|
||||
Solver::MineResult result;
|
||||
result.graphId = graphId;
|
||||
result.totalMatchesAfterLimits = matches;
|
||||
result.matchesPerGraph = matchesPerGraph;
|
||||
for (int nodeIdx : testSet.nodes) {
|
||||
Solver::MineResultNode resultNode;
|
||||
resultNode.nodeId = graph.nodes[nodeIdx].nodeId;
|
||||
resultNode.userData = graph.nodes[nodeIdx].userData;
|
||||
result.nodes.push_back(resultNode);
|
||||
}
|
||||
results.push_back(result);
|
||||
}
|
||||
|
||||
nextPool.push_back(thisNodeSetSet);
|
||||
return matches;
|
||||
}
|
||||
|
||||
void findNodePairs(std::vector<Solver::MineResult> &results, std::vector<std::set<NodeSet>> &nodePairs, int minNodes, int minMatches, int limitMatchesPerGraph)
|
||||
{
|
||||
std::set<NodeSet> usedPairs;
|
||||
|
||||
if (verbose)
|
||||
printf("\nFind frequent node pairs:\n");
|
||||
|
||||
for (auto &graph_it : graphData)
|
||||
for (int node1 = 0; node1 < int(graph_it.second.graph.nodes.size()); node1++)
|
||||
for (auto &adj_it : graph_it.second.adjMatrix.at(node1))
|
||||
{
|
||||
const std::string &graphId = graph_it.first;
|
||||
const auto &graph = graph_it.second.graph;
|
||||
int node2 = adj_it.first;
|
||||
NodeSet pair(graphId, node1, node2);
|
||||
|
||||
if (usedPairs.count(pair) > 0)
|
||||
continue;
|
||||
|
||||
int matches = testForMining(results, usedPairs, nodePairs, pair, graphId, graph, minNodes, minMatches, limitMatchesPerGraph);
|
||||
|
||||
if (verbose && matches > 0)
|
||||
printf("Pair %s[%s,%s] -> %d\n", graphId.c_str(), graph.nodes[node1].nodeId.c_str(),
|
||||
graph.nodes[node2].nodeId.c_str(), matches);
|
||||
}
|
||||
}
|
||||
|
||||
void findNextPool(std::vector<Solver::MineResult> &results, std::vector<std::set<NodeSet>> &pool,
|
||||
int oldSetSize, int increment, int minNodes, int minMatches, int limitMatchesPerGraph)
|
||||
{
|
||||
std::vector<std::set<NodeSet>> nextPool;
|
||||
std::map<std::string, std::vector<const NodeSet*>> poolPerGraph;
|
||||
|
||||
for (auto &i1 : pool)
|
||||
for (auto &i2 : i1)
|
||||
poolPerGraph[i2.graphId].push_back(&i2);
|
||||
|
||||
if (verbose)
|
||||
printf("\nFind frequent subcircuits of size %d using increment %d:\n", oldSetSize+increment, increment);
|
||||
|
||||
std::set<NodeSet> usedSets;
|
||||
for (auto &it : poolPerGraph)
|
||||
for (int idx1 = 0; idx1 < int(it.second.size()); idx1++)
|
||||
for (int idx2 = idx1; idx2 < int(it.second.size()); idx2++)
|
||||
{
|
||||
if (it.second[idx1]->extendCandidate(*it.second[idx2]) != increment)
|
||||
continue;
|
||||
|
||||
NodeSet mergedSet = *it.second[idx1];
|
||||
mergedSet.extend(*it.second[idx2]);
|
||||
|
||||
if (usedSets.count(mergedSet) > 0)
|
||||
continue;
|
||||
|
||||
const std::string &graphId = it.first;
|
||||
const auto &graph = graphData[it.first].graph;
|
||||
|
||||
int matches = testForMining(results, usedSets, nextPool, mergedSet, graphId, graph, minNodes, minMatches, limitMatchesPerGraph);
|
||||
|
||||
if (verbose) {
|
||||
printf("Set %s[", graphId.c_str());
|
||||
bool first = true;
|
||||
for (int nodeIdx : mergedSet.nodes) {
|
||||
printf("%s%s", first ? "" : ",", graph.nodes[nodeIdx].nodeId.c_str());
|
||||
first = false;
|
||||
}
|
||||
printf("] -> %d\n", matches);
|
||||
}
|
||||
}
|
||||
|
||||
pool.swap(nextPool);
|
||||
}
|
||||
|
||||
// interface to the public Solver class
|
||||
|
||||
protected:
|
||||
|
@ -1151,6 +1378,25 @@ protected:
|
|||
ullmannRecursion(results, enumerationMatrix, 0, needle, haystack, allowOverlap, maxSolutions > 0 ? results.size() + maxSolutions : -1);
|
||||
}
|
||||
|
||||
void mine(std::vector<Solver::MineResult> &results, int minNodes, int maxNodes, int minMatches, int limitMatchesPerGraph)
|
||||
{
|
||||
int nodeSetSize = 2;
|
||||
std::vector<std::set<NodeSet>> pool;
|
||||
findNodePairs(results, pool, minNodes, minMatches, limitMatchesPerGraph);
|
||||
|
||||
while (nodeSetSize < maxNodes)
|
||||
{
|
||||
int increment = nodeSetSize - 1;
|
||||
if (nodeSetSize + increment >= minNodes)
|
||||
increment = minNodes - nodeSetSize;
|
||||
if (nodeSetSize >= minNodes)
|
||||
increment = 1;
|
||||
|
||||
findNextPool(results, pool, nodeSetSize, increment, minNodes, minMatches, limitMatchesPerGraph);
|
||||
nodeSetSize += increment;
|
||||
}
|
||||
}
|
||||
|
||||
void clearOverlapHistory()
|
||||
{
|
||||
for (auto &it : graphData)
|
||||
|
@ -1252,6 +1498,11 @@ void SubCircuit::Solver::solve(std::vector<Result> &results, std::string needleG
|
|||
worker->solve(results, needleGraphId, haystackGraphId, initialMappings, allowOverlap, maxSolutions);
|
||||
}
|
||||
|
||||
void SubCircuit::Solver::mine(std::vector<MineResult> &results, int minNodes, int maxNodes, int minMatches, int limitMatchesPerGraph)
|
||||
{
|
||||
worker->mine(results, minNodes, maxNodes, minMatches, limitMatchesPerGraph);
|
||||
}
|
||||
|
||||
void SubCircuit::Solver::clearOverlapHistory()
|
||||
{
|
||||
worker->clearOverlapHistory();
|
||||
|
|
|
@ -73,6 +73,7 @@ namespace SubCircuit
|
|||
|
||||
public:
|
||||
Graph() : allExtern(false) { };
|
||||
Graph(const Graph &other, const std::vector<std::string> &otherNodes);
|
||||
|
||||
void createNode(std::string nodeId, std::string typeId, void *userData = NULL);
|
||||
void createPort(std::string nodeId, std::string portId, int width = 1, int minWidth = -1);
|
||||
|
@ -100,6 +101,17 @@ namespace SubCircuit
|
|||
std::map<std::string, ResultNodeMapping> mappings;
|
||||
};
|
||||
|
||||
struct MineResultNode {
|
||||
std::string nodeId;
|
||||
void *userData;
|
||||
};
|
||||
struct MineResult {
|
||||
std::string graphId;
|
||||
int totalMatchesAfterLimits;
|
||||
std::map<std::string, int> matchesPerGraph;
|
||||
std::vector<MineResultNode> nodes;
|
||||
};
|
||||
|
||||
private:
|
||||
SolverWorker *worker;
|
||||
|
||||
|
@ -131,6 +143,9 @@ namespace SubCircuit
|
|||
void solve(std::vector<Result> &results, std::string needleGraphId, std::string haystackGraphId, bool allowOverlap = true, int maxSolutions = -1);
|
||||
void solve(std::vector<Result> &results, std::string needleGraphId, std::string haystackGraphId,
|
||||
const std::map<std::string, std::set<std::string>> &initialMapping, bool allowOverlap = true, int maxSolutions = -1);
|
||||
|
||||
void mine(std::vector<MineResult> &results, int minNodes, int maxNodes, int minMatches, int limitMatchesPerGraph = -1);
|
||||
|
||||
void clearOverlapHistory();
|
||||
void clearConfig();
|
||||
};
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
|
||||
# verbose
|
||||
|
||||
graph macc22
|
||||
node mul_1 mul A 32 B 32 Y 32
|
||||
node mul_2 mul A 32 B 32 Y 32
|
||||
node add_1 add A 32 B 32 Y 32
|
||||
connect mul_1 Y add_1 A
|
||||
connect mul_2 Y add_1 B
|
||||
allextern
|
||||
endgraph
|
||||
|
||||
graph macc4x2
|
||||
node mul_1 mul A 32 B 32 Y 32
|
||||
node mul_2 mul A 32 B 32 Y 32
|
||||
node mul_3 mul A 32 B 32 Y 32
|
||||
node mul_4 mul A 32 B 32 Y 32
|
||||
node add_1 add A 32 B 32 Y 32
|
||||
node add_2 add A 32 B 32 Y 32
|
||||
node add_3 add A 32 B 32 Y 32
|
||||
connect mul_1 Y add_1 A
|
||||
connect mul_2 Y add_1 B
|
||||
connect mul_3 Y add_2 A
|
||||
connect mul_4 Y add_2 B
|
||||
connect add_1 Y add_3 A
|
||||
connect add_2 Y add_3 B
|
||||
allextern
|
||||
endgraph
|
||||
|
||||
swapgroup mul A B
|
||||
swapgroup add A B
|
||||
|
||||
mine 2 10 2
|
||||
expect 5
|
||||
|
Loading…
Reference in New Issue