Coloquinte tool included in Coriolis

This commit is contained in:
Gabriel Gouvine 2015-04-08 10:45:11 +02:00
parent 71e1c188ea
commit a1256175cf
30 changed files with 45091 additions and 0 deletions

16
coloquinte/CMakeLists.txt Normal file
View File

@ -0,0 +1,16 @@
# -*- explicit-buffer-name: "CMakeLists.txt<Coloquinte/src>" -*-
project(Coloquinte)
set(CMAKE_C_FLAGS_DEBUG " -Wall -O2 -g" CACHE STRING "C Compiler Debug options." FORCE)
set(CMAKE_C_FLAGS_RELEASE " -Wall -O3 -fopenmp" CACHE STRING "C Compiler Release options." FORCE)
set(CMAKE_CXX_FLAGS_DEBUG " -Wall -Og -g" CACHE STRING "C++ Compiler Debug options." FORCE)
set(CMAKE_CXX_FLAGS_RELEASE " -Wall -O3 -fopenmp" CACHE STRING "C++ Compiler Release options." FORCE)
cmake_minimum_required(VERSION 2.4.0)
add_definitions(-std=c++11)
add_subdirectory(cmake_modules)
add_subdirectory(src)

View File

@ -0,0 +1,2 @@
install ( FILES FindColoquinte.cmake DESTINATION share/cmake/Modules )

View File

@ -0,0 +1,129 @@
# - Find the Coloquinte includes and libraries.
# The following variables are set if Coriolis is found. If COLOQUINTE is not
# found, COLOQUINTE_FOUND is set to false.
# COLOQUINTE_FOUND - True when the Coriolis include directory is found.
# COLOQUINTE_INCLUDE_DIR - the path to where the Coriolis include files are.
# COLOQUINTE_LIBRARIES - The path to where the Coriolis library files are.
# =============================================================================
# Duplicated from <bootstrap> as <COLOQUINTE> is standalone.
#
# Setup the <PROJECT>_SEARCH_PATH.
# Where to find includes & libraries.
#
macro(setup_search_dir project)
if( NOT("$ENV{${project}_TOP}" STREQUAL "") )
message("-- ${project}_TOP is set to $ENV{${project}_TOP}")
list(INSERT ${project}_DIR_SEARCH 0 "${DESTDIR}$ENV{${project}_TOP}")
endif( NOT("$ENV{${project}_TOP}" STREQUAL "") )
if( NOT("$ENV{${project}_USER_TOP}" STREQUAL "") )
message("-- ${project}_USER_TOP is set to $ENV{${project}_USER_TOP}")
list(INSERT ${project}_DIR_SEARCH 0 "${DESTDIR}$ENV{${project}_USER_TOP}")
endif( NOT("$ENV{${project}_USER_TOP}" STREQUAL "") )
LIST(REMOVE_DUPLICATES ${project}_DIR_SEARCH)
message("-- Components of ${project}_DIR_SEARCH:")
foreach(PATH ${${project}_DIR_SEARCH})
message("-- ${PATH}")
endforeach(PATH)
endmacro(setup_search_dir project)
#
# Build <PROJECT>_INCLUDE_DIR & <PROJECT>_LIBRARIES and sets up <PROJECT>_FOUND
# Usage: set_library_path(<PROJECT> <library>)
#
# May be used any number of time on the same <PROJECT> to create a list of
# <library>.
#
macro(set_libraries_path configname library)
set(${configname}_FOUND "NOTFOUND")
if(${library}_LIBRARY_PATH)
set(${configname}_FOUND "YES")
set(${configname}_INCLUDE_DIR ${${library}_INCLUDE_PATH})
set(${configname}_LIBRARIES ${${library}_LIBRARY_PATH} ${${configname}_LIBRARIES})
mark_as_advanced(${configname}_INCLUDE_DIR ${configname}_LIBRARIES)
endif(${library}_LIBRARY_PATH)
if(NOT ${library}_INCLUDE_PATH)
set(${configname}_FOUND "NOTFOUND")
endif(NOT ${library}_INCLUDE_PATH)
endmacro ( set_libraries_path )
#
# Checks if a set of libraries has been found, could be blocking or not.
# Usage: hurricane_check_libraries(<PROJECT> <REQUIRED>)
#
# If <REQUIRED> is ommitted, it is guessed from <PROJECT>_FIND_REQUIRED.
#
macro(hurricane_check_libraries)
if(ARGC LESS 2)
set(REQUIRED ${ARGV0}_FIND_REQUIRED)
else(ARGC LESS 2)
set(REQUIRED ${ARGV1})
endif(ARGC LESS 2)
if(${ARGV0}_FOUND)
if(NOT ${ARGV0}_FIND_QUIETLY)
if(${ARGV0}_FOUND)
message(STATUS "Found ${ARGV0}:")
foreach(library ${${ARGV0}_LIBRARIES})
message(STATUS " ${library}")
endforeach(library)
endif(${ARGV0}_FOUND)
endif(NOT ${ARGV0}_FIND_QUIETLY)
else(${ARGV0}_FOUND)
if(REQUIRED)
message(FATAL_ERROR "${ARGV0} was not found. ${${ARGV0}_DIR_MESSAGE}")
endif(REQUIRED)
endif(${ARGV0}_FOUND)
endmacro(hurricane_check_libraries)
# End of <bootstrap> duplication.
# =============================================================================
#find_package(Eigen3 REQUIRED)
set(COLOQUINTE_INCLUDE_PATH_DESCRIPTION "directory containing the Coloquinte include files. E.g /usr/local/include/coloquinte or /soc/coriolis/include/Coloquinte")
set(COLOQUINTE_DIR_MESSAGE "Set the COLOQUINTE_INCLUDE_DIR cmake cache entry to the ${COLOQUINTE_INCLUDE_PATH_DESCRIPTION}")
# don't even bother under WIN32
if(UNIX)
setup_search_dir(IMPORTEDS)
message(STATUS "IMPORTEDS_DIR_SEARCH: ${IMPORTEDS_DIR_SEARCH}")
#
# Look for an installation.
#
find_path(COLOQUINTE_INCLUDE_PATH NAMES coloquinte/circuit.hxx PATHS
# Look in other places.
${IMPORTEDS_DIR_SEARCH}
PATH_SUFFIXES include
# Help the user find it if we cannot.
DOC "The ${COLOQUINTE_INCLUDE_PATH_DESCRIPTION}"
)
find_library(COLOQUINTE_LIBRARY_PATH
NAMES coloquinte
PATHS ${IMPORTEDS_DIR_SEARCH}
PATH_SUFFIXES lib${LIB_SUFFIX}
# Help the user find it if we cannot.
DOC "The ${COLOQUINTE_INCLUDE_PATH_DESCRIPTION}"
)
set(COLOQUINTE_FIND_QUIETLY FALSE)
set_libraries_path(COLOQUINTE COLOQUINTE)
hurricane_check_libraries(COLOQUINTE)
if(COLOQUINTE_FOUND)
add_definitions( -DHAVE_COLOQUINTE )
endif()
message(STATUS "COLOQUINTE_INCLUDE_PATH: ${COLOQUINTE_INCLUDE_PATH}")
message(STATUS "COLOQUINTE_LIBRARY: ${COLOQUINTE_LIBRARY}")
endif(UNIX)

View File

@ -0,0 +1,35 @@
# -*- explicit-buffer-name: "CMakeLists.txt<Coloquinte/src>" -*-
set ( includes coloquinte/circuit.hxx
coloquinte/circuit_helper.hxx
coloquinte/common.hxx
coloquinte/netlist.hxx
coloquinte/solvers.hxx
coloquinte/rough_legalizers.hxx
coloquinte/legalizer.hxx
coloquinte/detailed.hxx
coloquinte/topologies.hxx
coloquinte/optimization_subproblems.hxx
coloquinte/piecewise_linear.hxx
)
set ( cpps circuit.cxx
checkers.cxx
rough_legalizers.cxx
solvers.cxx
optimization_subproblems.cxx
piecewise_linear.cxx
orientation.cxx
detailed.cxx
cell_swapping.cxx
MCF_opt.cxx
row_opt.cxx
topologies.cxx
lookup_table.cxx
legalizer.cxx
)
add_library ( coloquinte ${cpps} )
install( TARGETS coloquinte DESTINATION lib${LIB_SUFFIX} )
install( FILES ${includes} DESTINATION include/coloquinte )

147
coloquinte/src/MCF_opt.cxx Normal file
View File

@ -0,0 +1,147 @@
#include "coloquinte/detailed.hxx"
#include "coloquinte/circuit_helper.hxx"
#include <lemon/smart_graph.h>
#include <lemon/network_simplex.h>
#include <cassert>
namespace coloquinte{
namespace dp{
void optimize_on_topology_HPWL(netlist const & circuit, detailed_placement & pl){
// Solves a minimum cost flow problem to optimize the placement at fixed topology
// Concretely, it means aligning the pins to minimize the wirelength
// It uses the Lemon network simplex solver from the Coin-OR initiative, which should scale well up to hundred of thousands of cells
using namespace lemon;
DIGRAPH_TYPEDEFS(SmartDigraph);
// Create a graph with the cells and bounds of the nets as node
SmartDigraph g;
std::vector<Node> cell_nodes(circuit.cell_cnt());
for(index_t i=0; i<circuit.cell_cnt(); ++i){
if((circuit.get_cell(i).attributes & XMovable) != 0)
cell_nodes[i] = g.addNode();
}
std::vector<Node> Lnet_nodes(circuit.net_cnt()), Unet_nodes(circuit.net_cnt());
for(index_t i=0; i<circuit.net_cnt(); ++i){
if(circuit.get_net(i).pin_cnt > 0){
Lnet_nodes[i] = g.addNode();
Unet_nodes[i] = g.addNode();
}
}
// Two nodes for position constraints
Node fixed = g.addNode();
typedef std::pair<SmartDigraph::Arc, int_t> arc_pair;
typedef std::pair<SmartDigraph::Node, int_t> node_pair;
// The arcs corresponding to constraints of the original problem
std::vector<arc_pair> constraint_arcs;
// Now we add every positional constraint, which becomes an arc in the min-cost flow problem
for(index_t i=0; i<circuit.cell_cnt(); ++i){ // The cells
for(index_t l = pl.neighbours_limits_[i]; l < pl.neighbours_limits_[i+1]; ++l){
index_t oi = pl.neighbours_[l].second;
if(oi == null_ind) continue;
if((circuit.get_cell(i).attributes & XMovable) != 0 and (circuit.get_cell(oi).attributes & XMovable) != 0){
// Two movable cells: OK
auto A = g.addArc(cell_nodes[oi], cell_nodes[i]);
constraint_arcs.push_back(arc_pair(A, -circuit.get_cell(i).size.x_));
}
else if((circuit.get_cell( i).attributes & XMovable) != 0){
// The cell c is movable and constrained on the right
auto A = g.addArc(fixed, cell_nodes[i]);
constraint_arcs.push_back(arc_pair(A, pl.plt_.positions_[oi].x_ - circuit.get_cell(i).size.x_));
}
else if((circuit.get_cell(oi).attributes & XMovable) != 0){
// The cell oc is movable and constrained on the left
auto A = g.addArc(cell_nodes[oi], fixed);
constraint_arcs.push_back(arc_pair(A, -pl.plt_.positions_[i].x_ - circuit.get_cell(i).size.x_));
}
}
}
for(index_t r=0; r<pl.row_cnt(); ++r){ // And the boundaries of each row
index_t lc = pl.row_first_cells_[r];
if(lc != null_ind and (circuit.get_cell(lc).attributes & XMovable) != 0){
auto Al = g.addArc(cell_nodes[lc], fixed);
constraint_arcs.push_back(arc_pair(Al, -pl.min_x_));
}
}
for(index_t r=0; r<pl.row_cnt(); ++r){ // And the boundaries of each row
index_t rc = pl.row_last_cells_[r];
if(rc != null_ind and (circuit.get_cell(rc).attributes & XMovable) != 0){
auto Ar = g.addArc(fixed, cell_nodes[rc]);
constraint_arcs.push_back(arc_pair(Ar, pl.max_x_ - circuit.get_cell(rc).size.x_));
}
}
// And every pin of every net: arcs too
for(index_t n=0; n<circuit.net_cnt(); ++n){
for(auto p : circuit.get_net(n)){
index_t c = p.cell_ind;
int_t pin_offs = (pl.plt_.orientations_[c].x_ ? p.offset.x_ : circuit.get_cell(c).size.x_ - p.offset.x_); // Offset to the beginning of the cell
if((circuit.get_cell(c).attributes & XMovable) != 0){
Arc Al = g.addArc(cell_nodes[c], Lnet_nodes[n]);
constraint_arcs.push_back(arc_pair(Al, pin_offs));
Arc Ar = g.addArc(Unet_nodes[n], cell_nodes[c]);
constraint_arcs.push_back(arc_pair(Ar, -pin_offs));
}
else{ // Fixed offset
auto Al = g.addArc(fixed, Lnet_nodes[n]);
constraint_arcs.push_back(arc_pair(Al, pl.plt_.positions_[c].x_ + pin_offs));
auto Ar = g.addArc(Unet_nodes[n], fixed);
constraint_arcs.push_back(arc_pair(Ar, - pl.plt_.positions_[c].x_ - pin_offs));
}
}
}
// Then the only capacitated arcs: the ones for the nets
std::vector<node_pair> net_supplies;
for(index_t n=0; n<circuit.net_cnt(); ++n){
if(circuit.get_net(n).pin_cnt > 0){
net_supplies.push_back(node_pair(Unet_nodes[n], circuit.get_net(n).weight));
net_supplies.push_back(node_pair(Lnet_nodes[n], -circuit.get_net(n).weight));
}
}
// Create the maps to have cost and capacity for the arcs
IntArcMap cost(g, 0);
IntArcMap capacity(g, circuit.net_cnt());
IntNodeMap supply(g, 0);
for(arc_pair A : constraint_arcs){
cost[A.first] = A.second;
}
for(node_pair N : net_supplies){
supply[N.first] = N.second;
}
// Then we (hope the solver can) solve it
NetworkSimplex<SmartDigraph> ns(g);
ns.supplyMap(supply).costMap(cost);
auto res = ns.run();
if(res != ns.OPTIMAL){
abort();
}
// And we get the new positions as the dual values of the current solution (compared to the fixed pin)
for(index_t c=0; c<circuit.cell_cnt(); ++c){ // The cells
if((circuit.get_cell(c).attributes & XMovable) != 0){
pl.plt_.positions_[c].x_ = ns.potential(cell_nodes[c]) - ns.potential(fixed);
}
}
pl.selfcheck();
}
} // namespace dp
} // namespace coloquinte

View File

@ -0,0 +1,183 @@
#include "coloquinte/detailed.hxx"
#include "coloquinte/circuit_helper.hxx"
namespace coloquinte{
namespace dp{
namespace{
// Tries to swap two cells;
inline bool try_swap(netlist const & circuit, detailed_placement & pl, index_t c1, index_t c2, bool try_flip,
std::function<std::int64_t(netlist const &, detailed_placement const &, std::vector<index_t> const &)> get_nets_cost){
assert(pl.cell_height(c1) == 1 and pl.cell_height(c2) == 1);
assert( (circuit.get_cell(c1).attributes & XMovable) != 0 and (circuit.get_cell(c1).attributes & YMovable) != 0);
assert( (circuit.get_cell(c2).attributes & XMovable) != 0 and (circuit.get_cell(c2).attributes & YMovable) != 0);
auto c1_bnds = pl.get_limit_positions(circuit, c1),
c2_bnds = pl.get_limit_positions(circuit, c2);
// Get the possible positions for a swap
int_t swp_min_c1 = c2_bnds.first,
swp_min_c2 = c1_bnds.first,
swp_max_c1 = c2_bnds.second - circuit.get_cell(c1).size.x_,
swp_max_c2 = c1_bnds.second - circuit.get_cell(c2).size.x_;
if(swp_max_c1 >= swp_min_c1 and swp_max_c2 >= swp_min_c2){
// Check both orientations of the cell
// Get all the nets involved and uniquify them (nets with more than one pin on the cells)
std::vector<index_t> involved_nets;
for(netlist::pin_t p : circuit.get_cell(c1)){
involved_nets.push_back(p.net_ind);
}
for(netlist::pin_t p : circuit.get_cell(c2)){
involved_nets.push_back(p.net_ind);
}
std::sort(involved_nets.begin(), involved_nets.end());
involved_nets.resize(std::distance(involved_nets.begin(), std::unique(involved_nets.begin(), involved_nets.end())));
// Test the cost for the old position and the cost swapping the cells
std::int64_t old_cost = get_nets_cost(circuit, pl, involved_nets);
// Save the old values
point<int_t> p1 = pl.plt_.positions_[c1];
point<int_t> p2 = pl.plt_.positions_[c2];
point<bool> o1 = pl.plt_.orientations_[c1];
point<bool> o2 = pl.plt_.orientations_[c2];
// Warning: won't work if the two cells don't have the same height
pl.plt_.positions_[c1].x_ = (swp_min_c1 + swp_max_c1) / 2;
pl.plt_.positions_[c2].x_ = (swp_min_c2 + swp_max_c2) / 2;
pl.plt_.positions_[c1].y_ = p2.y_;
pl.plt_.positions_[c2].y_ = p1.y_;
// For standard cell placement, we want all the rows to be aligned in the same way
if( (circuit.get_cell(c1).attributes & YFlippable) != 0 and (circuit.get_cell(c2).attributes & YFlippable) != 0)
std::swap(pl.plt_.orientations_[c1].y_, pl.plt_.orientations_[c2].y_);
if(try_flip and (circuit.get_cell(c1).attributes & XFlippable) != 0 and (circuit.get_cell(c2).attributes & XFlippable) != 0){
index_t bst_ind = 4;
for(index_t i=0; i<4; ++i){
pl.plt_.orientations_[c1].x_ = i % 2;
pl.plt_.orientations_[c2].x_ = i / 2;
std::int64_t new_cost = get_nets_cost(circuit, pl, involved_nets);
if(new_cost < old_cost){
old_cost = new_cost;
bst_ind = i;
}
}
// One of the orientations with the new positions was better
if(bst_ind < 4){
pl.swap_standard_cell_topologies(c1, c2);
pl.plt_.orientations_[c1].x_ = bst_ind % 2;
pl.plt_.orientations_[c2].x_ = bst_ind / 2;
// We kept the swap
return true;
}
else{
pl.plt_.positions_[c1] = p1;
pl.plt_.positions_[c2] = p2;
pl.plt_.orientations_[c1] = o1;
pl.plt_.orientations_[c2] = o2;
return false;
}
}
else if(get_nets_cost(circuit, pl, involved_nets) < old_cost){
pl.swap_standard_cell_topologies(c1, c2);
return true;
}
else{
// Reset the old values since we didn't swap anything
pl.plt_.positions_[c1] = p1;
pl.plt_.positions_[c2] = p2;
pl.plt_.orientations_[c1] = o1;
pl.plt_.orientations_[c2] = o2;
return false;
}
// A better solution would be
// Check the cost on y depending on the position (extremely simple: two positions for each cell)
// Check the cost on x depending on the position: piecewise linear and relatively complex
// * Get all external pins
// * Get all nets involving only one of the cells: piecewise linear cost for each of them
// * For nets involving the two cells, we have an additional cost
}
else{ // We just cannot swap those two cells without pushing anything
return false;
}
}
inline void generic_swaps_global(netlist const & circuit, detailed_placement & pl, index_t row_extent, index_t cell_extent, bool try_flip,
std::function<std::int64_t(netlist const &, detailed_placement const &, std::vector<index_t> const &)> get_nets_cost){
for(index_t main_row = 0; main_row < pl.row_cnt(); ++main_row){
for(index_t other_row = main_row+1; other_row <= std::min(pl.row_cnt()-1, main_row+row_extent) ; ++other_row){
index_t first_oc = pl.get_first_standard_cell_on_row(other_row); // The first candidate cell to be examined
for(index_t c = pl.get_first_standard_cell_on_row(main_row); c != null_ind; c = pl.get_next_standard_cell_on_row(c, main_row)){
assert(pl.cell_rows_[c] == main_row);
if( (circuit.get_cell(c).attributes & XMovable) == 0) continue; // Don't touch fixed cells
// Number of cells after/before the end of the cell
index_t nb_after = 0;
index_t nb_before = 0;
int_t pos_low = pl.plt_.positions_[c].x_ - circuit.get_cell(c).size.x_,
pos_hgh = pl.plt_.positions_[c].x_ + 2*circuit.get_cell(c).size.x_;
for(index_t oc=first_oc; oc != null_ind and nb_after <= row_extent; oc = pl.get_next_standard_cell_on_row(oc, other_row)){
assert(pl.cell_rows_[oc] == other_row);
if( (circuit.get_cell(oc).attributes & XMovable) == 0) continue; // Don't touche fixed cells
// Count the cells which should trigger stop or shouldn't be used at the next iteration
if(pl.plt_.positions_[oc].x_ >= pos_hgh) ++nb_after;
if(pl.plt_.positions_[oc].x_ + circuit.get_cell(oc).size.x_ <= pos_low) ++ nb_before;
if(try_swap(circuit, pl, c, oc, try_flip, get_nets_cost)){
std::swap(c, oc);
if(c == first_oc) first_oc = oc;
}
}
while(nb_before > cell_extent){
nb_before--;
first_oc = pl.get_next_standard_cell_on_row(first_oc, other_row);
}
}
}
}
pl.selfcheck();
}
} // End anonymous namespace
void swaps_global_HPWL(netlist const & circuit, detailed_placement & pl, index_t row_extent, index_t cell_extent, bool try_flip){
generic_swaps_global(circuit, pl, row_extent, cell_extent, try_flip,
[](netlist const & circuit, detailed_placement const & pl, std::vector<index_t> const & involved_nets) -> std::int64_t{
std::int64_t sum = 0;
for(index_t n : involved_nets){
if(circuit.get_net(n).pin_cnt <= 1) continue;
sum += get_HPWL_length(circuit, pl.plt_, n);
}
return sum;
});
}
void swaps_global_RSMT(netlist const & circuit, detailed_placement & pl, index_t row_extent, index_t cell_extent, bool try_flip){
generic_swaps_global(circuit, pl, row_extent, cell_extent, try_flip,
[](netlist const & circuit, detailed_placement const & pl, std::vector<index_t> const & involved_nets) -> std::int64_t{
std::int64_t sum = 0;
for(index_t n : involved_nets){
if(circuit.get_net(n).pin_cnt <= 1) continue;
sum += get_RSMT_length(circuit, pl.plt_, n);
}
return sum;
});
}
} // namespace dp
} // namespace coloquinte

View File

@ -0,0 +1,96 @@
#include "coloquinte/circuit.hxx"
#include <map>
namespace coloquinte{
void netlist::selfcheck() const{
index_t cell_cnt = cell_areas_.size();
assert(cell_cnt+1 == cell_limits_.size());
assert(cell_cnt == cell_sizes_.size());
assert(cell_cnt == cell_attributes_.size());
assert(cell_cnt == cell_internal_mapping_.size());
index_t net_cnt = net_weights_.size();
assert(net_cnt+1 == net_limits_.size());
assert(net_cnt == net_internal_mapping_.size());
index_t pin_cnt = pin_offsets_.size();
assert(pin_cnt == cell_indexes_.size());
assert(pin_cnt == pin_indexes_.size());
assert(pin_cnt == net_indexes_.size());
for(auto const p : pin_offsets_){
assert(std::isfinite(p.x_) and std::isfinite(p.y_));
}
}
// For compatibility reasons
void placement_t::selfcheck() const{
}
void verify_placement_legality(netlist const & circuit, placement_t const & pl, box<int_t> surface){
std::vector<box<int_t> > cells;
for(index_t i=0; i<circuit.cell_cnt(); ++i){
auto S = circuit.get_cell(i).size;
cells.push_back(box<int_t>(pl.positions_[i], pl.positions_[i] + S));
// Verify that they are within the placement surface; doesn't take fixed macros into account
if( (circuit.get_cell(i).attributes & XMovable) != 0 or (circuit.get_cell(i).attributes & YMovable) != 0){
assert(cells[i].in(surface));
}
}
// Simple sweepline algorithm to verify that there is no overlap
struct event{
int_t x_min, x_max, y;
index_t cell;
bool removal;
bool operator<(event const o) const{
return y < o.y
or (y == o.y and removal and not o.removal); // Remove before inserting
}
};
std::vector<event> all_events;
for(index_t i=0; i<circuit.cell_cnt(); ++i){
event b, e;
b.cell = i; e.cell = i;
b.x_min = cells[i].x_min_; e.x_min = cells[i].x_min_;
b.x_max = cells[i].x_max_; e.x_max = cells[i].x_max_;
b.y = cells[i].y_min_; b.removal = false;
e.y = cells[i].y_max_; e.removal = true;
if(b.x_max > b.x_min and e.y != b.y){
all_events.push_back(b);
all_events.push_back(e);
}
}
std::sort(all_events.begin(), all_events.end());
// Indexed by beginning of interval, with end of interval and cell within
std::map<int_t, std::pair<int_t, index_t> > active_rectangles;
for(event E : all_events){
if(E.removal){
auto it = active_rectangles.find(E.x_min);
assert(it != active_rectangles.end());
active_rectangles.erase(it);
}
else{ // Find anything that intersects with E; if not, add it
auto it = active_rectangles.lower_bound(E.x_min); // First interval after
if(it != active_rectangles.end()){
assert(it->first >= E.x_max); //Intersection between E.cell and it->second->second
}
if(it != active_rectangles.begin()){
--it;
assert(it->second.first <= E.x_min); //Intersection between E.cell and it->second->second
}
active_rectangles.insert(std::pair<int_t, std::pair<int_t, index_t> >(E.x_min, std::pair<int_t, index_t>(E.x_max, E.cell)));
}
}
}
} // namespace coloquinte

407
coloquinte/src/circuit.cxx Normal file
View File

@ -0,0 +1,407 @@
#include "coloquinte/circuit_helper.hxx"
#include "coloquinte/circuit.hxx"
namespace coloquinte{
std::int64_t get_HPWL_length(netlist const & circuit, placement_t const & pl, index_t net_ind){
if(circuit.get_net(net_ind).pin_cnt <= 1) return 0;
auto pins = get_pins_1D(circuit, pl, net_ind);
auto minmaxX = std::minmax_element(pins.x_.begin(), pins.x_.end()), minmaxY = std::minmax_element(pins.y_.begin(), pins.y_.end());
return ((minmaxX.second->pos - minmaxX.first->pos) + (minmaxY.second->pos - minmaxY.first->pos));
}
std::int64_t get_RSMT_length(netlist const & circuit, placement_t const & pl, index_t net_ind){
if(circuit.get_net(net_ind).pin_cnt <= 1) return 0;
auto pins = get_pins_2D(circuit, pl, net_ind);
std::vector<point<int_t> > points;
for(pin_2D const p : pins){
points.push_back(p.pos);
}
return RSMT_length(points, 8);
}
namespace gp{
void add_force(pin_1D const p1, pin_1D const p2, linear_system & L, float_t force){
if(p1.movable && p2.movable){
L.add_force(
force,
p1.cell_ind, p2.cell_ind,
p1.offs, p2.offs
);
}
else if(p1.movable){
L.add_fixed_force(
force,
p1.cell_ind,
p2.pos,
p1.offs
);
}
else if(p2.movable){
L.add_fixed_force(
force,
p2.cell_ind,
p1.pos,
p2.offs
);
}
}
void add_force(pin_1D const p1, pin_1D const p2, linear_system & L, float_t tol, float_t scale){
add_force(p1, p2, L, scale/std::max(tol, static_cast<float_t>(std::abs(p2.pos-p1.pos))));
}
point<linear_system> empty_linear_systems(netlist const & circuit, placement_t const & pl){
point<linear_system> ret = point<linear_system>(linear_system(circuit.cell_cnt()), linear_system(circuit.cell_cnt()));
for(index_t i=0; i<circuit.cell_cnt(); ++i){
bool found_true_net=false;
for(auto p : circuit.get_cell(i)){
if(circuit.get_net(p.net_ind).pin_cnt > 1){
found_true_net = true;
break;
}
}
if( (XMovable & circuit.get_cell(i).attributes) == 0 or not found_true_net){
ret.x_.add_triplet(i, i, 1.0f);
ret.x_.add_doublet(i, pl.positions_[i].x_);
}
if( (YMovable & circuit.get_cell(i).attributes) == 0 or not found_true_net){
ret.y_.add_triplet(i, i, 1.0f);
ret.y_.add_doublet(i, pl.positions_[i].y_);
}
}
return ret;
}
namespace{ // Anonymous namespace for helper functions
void get_HPWLF(std::vector<pin_1D> const & pins, linear_system & L, float_t tol){
if(pins.size() >= 2){
auto min_elt = std::min_element(pins.begin(), pins.end()), max_elt = std::max_element(pins.begin(), pins.end());
for(auto it = pins.begin(); it != pins.end(); ++it){
// Just comparing the iterator is poorer due to redundancies in the benchmarks!
if(it != min_elt){
add_force(*it, *min_elt, L, tol, 1.0f/(pins.size()-1));
if(it != max_elt){ // Hopefully only one connexion between the min and max pins
add_force(*it, *max_elt, L, tol, 1.0f/(pins.size()-1));
}
}
}
}
}
void get_HPWLR(std::vector<pin_1D> const & pins, linear_system & L, float_t tol){
std::vector<pin_1D> sorted_pins = pins;
std::sort(sorted_pins.begin(), sorted_pins.end());
// Pins are connected to the pin two places away
for(index_t i=0; i+2<sorted_pins.size(); ++i){
add_force(sorted_pins[i], sorted_pins[i+2], L, tol, 0.5f);
}
// The extreme pins are connected with their direct neighbour too
if(sorted_pins.size() > 1){
add_force(sorted_pins[0], sorted_pins[1], L, tol, 0.5f);
add_force(sorted_pins[sorted_pins.size()-1], sorted_pins[sorted_pins.size()-2], L, tol, 0.5f);
}
}
void get_star(std::vector<pin_1D> const & pins, linear_system & L, float_t tol, index_t star_index){
// The net is empty, but we still populate the diagonal to avoid divide by zeros
if(pins.size() < 2){
L.add_triplet(star_index, star_index, 1.0f);
return;
}
for(pin_1D p : pins){
pin_1D star_pin = pin_1D(star_index, 0, 0, true);
add_force(p, star_pin, L, 1.0/pins.size());
}
}
void get_clique(std::vector<pin_1D> const & pins, linear_system & L, float_t tol){
// Pins are connected to the pin two places away
for(index_t i=0; i+1<pins.size(); ++i){
for(index_t j=i+1; j<pins.size(); ++j){
add_force(pins[i], pins[j], L, tol, 1.0f/(pins.size()-1));
}
}
}
} // End anonymous namespace
point<linear_system> get_HPWLF_linear_system (netlist const & circuit, placement_t const & pl, float_t tol, index_t min_s, index_t max_s){
point<linear_system> L = empty_linear_systems(circuit, pl);
for(index_t i=0; i<circuit.net_cnt(); ++i){
// Has the net the right pin count?
index_t pin_cnt = circuit.get_net(i).pin_cnt;
if(pin_cnt < min_s or pin_cnt >= max_s) continue;
auto pins = get_pins_1D(circuit, pl, i);
get_HPWLF(pins.x_, L.x_, tol);
get_HPWLF(pins.y_, L.y_, tol);
}
return L;
}
point<linear_system> get_HPWLR_linear_system (netlist const & circuit, placement_t const & pl, float_t tol, index_t min_s, index_t max_s){
point<linear_system> L = empty_linear_systems(circuit, pl);
for(index_t i=0; i<circuit.net_cnt(); ++i){
// Has the net the right pin count?
index_t pin_cnt = circuit.get_net(i).pin_cnt;
if(pin_cnt < min_s or pin_cnt >= max_s) continue;
auto pins = get_pins_1D(circuit, pl, i);
get_HPWLR(pins.x_, L.x_, tol);
get_HPWLR(pins.y_, L.y_, tol);
}
return L;
}
point<linear_system> get_star_linear_system (netlist const & circuit, placement_t const & pl, float_t tol, index_t min_s, index_t max_s){
point<linear_system> L = empty_linear_systems(circuit, pl);
L.x_.add_variables(circuit.net_cnt());
L.y_.add_variables(circuit.net_cnt());
for(index_t i=0; i<circuit.net_cnt(); ++i){
// Has the net the right pin count?
index_t pin_cnt = circuit.get_net(i).pin_cnt;
if(pin_cnt < min_s or pin_cnt >= max_s){
// Put a one in the intermediate variable in order to avoid non-invertible matrices
L.x_.add_triplet(i+circuit.cell_cnt(), i+circuit.cell_cnt(), 1.0f);
L.y_.add_triplet(i+circuit.cell_cnt(), i+circuit.cell_cnt(), 1.0f);
continue;
}
auto pins = get_pins_1D(circuit, pl, i);
// Provide the index of the star's central pin in the linear system
get_star(pins.x_, L.x_, tol, i+circuit.cell_cnt());
get_star(pins.y_, L.y_, tol, i+circuit.cell_cnt());
}
return L;
}
point<linear_system> get_clique_linear_system (netlist const & circuit, placement_t const & pl, float_t tol, index_t min_s, index_t max_s){
point<linear_system> L = empty_linear_systems(circuit, pl);
for(index_t i=0; i<circuit.net_cnt(); ++i){
// Has the net the right pin count?
index_t pin_cnt = circuit.get_net(i).pin_cnt;
if(pin_cnt < min_s or pin_cnt >= max_s) continue;
auto pins = get_pins_1D(circuit, pl, i);
get_clique(pins.x_, L.x_, tol);
get_clique(pins.y_, L.y_, tol);
}
return L;
}
point<linear_system> get_MST_linear_system(netlist const & circuit, placement_t const & pl, float_t tol, index_t min_s, index_t max_s){
point<linear_system> L = empty_linear_systems(circuit, pl);
for(index_t i=0; i<circuit.net_cnt(); ++i){
// Has the net the right pin count?
index_t pin_cnt = circuit.get_net(i).pin_cnt;
if(pin_cnt < min_s or pin_cnt >= max_s or pin_cnt <= 1) continue;
auto pins = get_pins_2D(circuit, pl, i);
std::vector<point<int_t> > points;
for(pin_2D const p : pins){
points.push_back(p.pos);
}
auto const edges = get_MST_topology(points);
for(auto E : edges){
add_force(pins[E.first].x(), pins[E.second].x(), L.x_, tol, 1.0f);
add_force(pins[E.first].y(), pins[E.second].y(), L.y_, tol, 1.0f);
}
}
return L;
}
point<linear_system> get_RSMT_linear_system(netlist const & circuit, placement_t const & pl, float_t tol, index_t min_s, index_t max_s){
point<linear_system> L = empty_linear_systems(circuit, pl);
for(index_t i=0; i<circuit.net_cnt(); ++i){
// Has the net the right pin count?
index_t pin_cnt = circuit.get_net(i).pin_cnt;
if(pin_cnt < min_s or pin_cnt >= max_s or pin_cnt <= 1) continue;
auto pins = get_pins_2D(circuit, pl, i);
std::vector<point<int_t> > points;
for(pin_2D const p : pins){
points.push_back(p.pos);
}
auto const edges = get_RSMT_topology(points, 8);
for(auto E : edges.x_){
add_force(pins[E.first].x(), pins[E.second].x(), L.x_, tol, 1.0f);
}
for(auto E : edges.y_){
add_force(pins[E.first].y(), pins[E.second].y(), L.y_, tol, 1.0f);
}
}
return L;
}
std::int64_t get_HPWL_wirelength(netlist const & circuit, placement_t const & pl){
std::int64_t sum = 0;
for(index_t i=0; i<circuit.net_cnt(); ++i){
sum += get_HPWL_length(circuit, pl, i);
}
return sum;
}
// The true wirelength with minimum spanning trees, except for very small nets (<= 3) where we have HPWL == true WL
std::int64_t get_MST_wirelength(netlist const & circuit, placement_t const & pl){
std::int64_t sum = 0;
for(index_t i=0; i<circuit.net_cnt(); ++i){
auto pins = get_pins_2D(circuit, pl, i);
std::vector<point<int_t> > points;
for(pin_2D const p : pins){
points.push_back(p.pos);
}
sum += MST_length(points);
}
return sum;
}
std::int64_t get_RSMT_wirelength(netlist const & circuit, placement_t const & pl){
std::int64_t sum = 0;
for(index_t i=0; i<circuit.net_cnt(); ++i){
sum += get_RSMT_length(circuit, pl, i);
}
return sum;
}
void solve_linear_system(netlist const & circuit, placement_t & pl, point<linear_system> & L, index_t nbr_iter){
std::vector<float_t> x_sol, y_sol;
std::vector<float_t> x_guess(pl.cell_cnt()), y_guess(pl.cell_cnt());
assert(L.x_.internal_size() == x_guess.size());
assert(L.y_.internal_size() == y_guess.size());
for(index_t i=0; i<pl.cell_cnt(); ++i){
x_guess[i] = static_cast<float_t>(pl.positions_[i].x_);
y_guess[i] = static_cast<float_t>(pl.positions_[i].y_);
}
#pragma omp parallel sections num_threads(2)
{
#pragma omp section
x_sol = L.x_.solve_CG(x_guess, nbr_iter);
#pragma omp section
y_sol = L.y_.solve_CG(y_guess, nbr_iter);
}
for(index_t i=0; i<pl.cell_cnt(); ++i){
if( (circuit.get_cell(i).attributes & XMovable) != 0){
assert(std::isfinite(x_sol[i]));
pl.positions_[i].x_ = static_cast<int_t>(x_sol[i]);
}
if( (circuit.get_cell(i).attributes & YMovable) != 0){
assert(std::isfinite(y_sol[i]));
pl.positions_[i].y_ = static_cast<int_t>(y_sol[i]);
}
}
}
// Intended to be used by pulling forces to adapt the forces to the cell's areas
std::vector<float_t> get_area_scales(netlist const & circuit){
std::vector<float_t> ret(circuit.cell_cnt());
capacity_t int_tot_area = 0;
for(index_t i=0; i<circuit.cell_cnt(); ++i){
capacity_t A = circuit.get_cell(i).area;
ret[i] = static_cast<float_t>(A);
int_tot_area += A;
}
float_t inv_average_area = circuit.cell_cnt() / static_cast<float_t>(int_tot_area);
for(index_t i=0; i<circuit.cell_cnt(); ++i){
ret[i] *= inv_average_area;
}
return ret;
}
point<linear_system> get_pulling_forces (netlist const & circuit, placement_t const & pl, float_t typical_distance){
point<linear_system> L = empty_linear_systems(circuit, pl);
float_t typical_force = 1.0f / typical_distance;
std::vector<float_t> scaling = get_area_scales(circuit);
for(index_t i=0; i<pl.cell_cnt(); ++i){
L.x_.add_anchor(
typical_force * scaling[i],
i, pl.positions_[i].x_
);
L.y_.add_anchor(
typical_force * scaling[i],
i, pl.positions_[i].y_
);
}
return L;
}
point<linear_system> get_linear_pulling_forces (netlist const & circuit, placement_t const & UB_pl, placement_t const & LB_pl, float_t force, float_t min_distance){
point<linear_system> L = empty_linear_systems(circuit, UB_pl);
assert(LB_pl.cell_cnt() == UB_pl.cell_cnt());
std::vector<float_t> scaling = get_area_scales(circuit);
for(index_t i=0; i<LB_pl.cell_cnt(); ++i){
L.x_.add_anchor(
force * scaling[i] / (std::max(static_cast<float_t>(std::abs(UB_pl.positions_[i].x_ - LB_pl.positions_[i].x_)), min_distance)),
i, UB_pl.positions_[i].x_
);
L.y_.add_anchor(
force * scaling[i] / (std::max(static_cast<float_t>(std::abs(UB_pl.positions_[i].y_ - LB_pl.positions_[i].y_)), min_distance)),
i, UB_pl.positions_[i].y_
);
}
return L;
}
region_distribution get_rough_legalizer(netlist const & circuit, placement_t const & pl, box<int_t> surface){
return region_distribution::uniform_density_distribution(surface, circuit, pl);
}
void get_rough_legalization(netlist const & circuit, placement_t & pl, region_distribution const & legalizer){
auto exportation = legalizer.export_spread_positions_linear();
for(auto const C : exportation){
pl.positions_[C.index_in_placement_] = static_cast<point<int_t> >(C.pos_ - 0.5f * static_cast<point<float_t> >(circuit.get_cell(C.index_in_placement_).size));
}
}
float_t get_mean_linear_disruption(netlist const & circuit, placement_t const & LB_pl, placement_t const & UB_pl){
float_t tot_cost = 0.0;
float_t tot_area = 0.0;
for(index_t i=0; i<circuit.cell_cnt(); ++i){
float_t area = static_cast<float_t>(circuit.get_cell(i).area);
point<int_t> diff = LB_pl.positions_[i] - UB_pl.positions_[i];
if( (circuit.get_cell(i).attributes & XMovable) == 0) assert(diff.x_ == 0);
if( (circuit.get_cell(i).attributes & YMovable) == 0) assert(diff.y_ == 0);
tot_cost += area * (std::abs(diff.x_) + std::abs(diff.y_));
tot_area += area;
}
return tot_cost / tot_area;
}
float_t get_mean_quadratic_disruption(netlist const & circuit, placement_t const & LB_pl, placement_t const & UB_pl){
float_t tot_cost = 0.0;
float_t tot_area = 0.0;
for(index_t i=0; i<circuit.cell_cnt(); ++i){
float_t area = static_cast<float_t>(circuit.get_cell(i).area);
point<int_t> diff = LB_pl.positions_[i] - UB_pl.positions_[i];
if( (circuit.get_cell(i).attributes & XMovable) == 0) assert(diff.x_ == 0);
if( (circuit.get_cell(i).attributes & YMovable) == 0) assert(diff.y_ == 0);
float_t manhattan = (std::abs(diff.x_) + std::abs(diff.y_));
tot_cost += area * manhattan * manhattan;
tot_area += area;
}
return std::sqrt(tot_cost / tot_area);
}
} // namespace gp
} // namespace coloquinte

View File

@ -0,0 +1,59 @@
#ifndef COLOQUINTE_GP_CIRCUIT
#define COLOQUINTE_GP_CIRCUIT
#include "common.hxx"
#include "solvers.hxx"
#include "netlist.hxx"
#include "rough_legalizers.hxx"
#include <vector>
#include <cassert>
namespace coloquinte{
void verify_placement_legality(netlist const & circuit, placement_t const & pl, box<int_t> surface);
namespace gp{
point<linear_system> empty_linear_systems(netlist const & circuit, placement_t const & pl);
// Net models stuff
point<linear_system> get_HPWLF_linear_system (netlist const & circuit, placement_t const & pl, float_t tol, index_t min_s, index_t max_s);
point<linear_system> get_HPWLR_linear_system (netlist const & circuit, placement_t const & pl, float_t tol, index_t min_s, index_t max_s);
point<linear_system> get_star_linear_system (netlist const & circuit, placement_t const & pl, float_t tol, index_t min_s, index_t max_s);
point<linear_system> get_clique_linear_system (netlist const & circuit, placement_t const & pl, float_t tol, index_t min_s, index_t max_s);
point<linear_system> get_MST_linear_system (netlist const & circuit, placement_t const & pl, float_t tol, index_t min_s, index_t max_s);
point<linear_system> get_RSMT_linear_system (netlist const & circuit, placement_t const & pl, float_t tol, index_t min_s, index_t max_s);
// Additional forces
point<linear_system> get_pulling_forces (netlist const & circuit, placement_t const & pl, float_t typical_distance);
point<linear_system> get_linear_pulling_forces (netlist const & circuit, placement_t const & UB_pl, placement_t const & LB_pl, float_t force, float_t min_distance);
// Solve the final linear system
void solve_linear_system(netlist const & circuit, placement_t & pl, point<linear_system> & L, index_t nbr_iter);
// Cost-related stuff, whether wirelength or disruption
std::int64_t get_HPWL_wirelength (netlist const & circuit, placement_t const & pl);
std::int64_t get_MST_wirelength (netlist const & circuit, placement_t const & pl);
std::int64_t get_RSMT_wirelength (netlist const & circuit, placement_t const & pl);
float_t get_mean_linear_disruption(netlist const & circuit, placement_t const & LB_pl, placement_t const & UB_pl);
float_t get_mean_quadratic_disruption(netlist const & circuit, placement_t const & LB_pl, placement_t const & UB_pl);
// Legalizer-related stuff
region_distribution get_rough_legalizer(netlist const & circuit, placement_t const & pl, box<int_t> surface);
void get_rough_legalization(netlist const & circuit, placement_t & pl, region_distribution const & legalizer);
// Cell orientation optimization
void optimize_x_orientations(netlist const & circuit, placement_t & pl);
void optimize_y_orientations(netlist const & circuit, placement_t & pl);
void optimize_exact_orientations(netlist const & circuit, placement_t & pl);
//void spread_orientations(netlist const & circuit, placement_t & pl);
} // namespace gp
} // namespace coloquinte
#endif

View File

@ -0,0 +1,90 @@
#ifndef COLOQUINTE_GP_HELPERCIRCUIT
#define COLOQUINTE_GP_HELPERCIRCUIT
#include "common.hxx"
#include "netlist.hxx"
namespace coloquinte{
struct pin_1D{
index_t cell_ind;
int_t pos;
int_t offs;
bool movable;
bool operator<(pin_1D const o) const { return pos < o.pos; }
pin_1D(index_t c, int_t p, int_t o, bool m) : cell_ind(c), pos(p), offs(o), movable(m){}
};
struct pin_2D{
index_t cell_ind;
point<int_t> pos;
point<int_t> offs;
bool movable;
pin_2D(index_t c, point<int_t> p, point<int_t> o, bool m) : cell_ind(c), pos(p), offs(o), movable(m){}
pin_1D x() const{ return pin_1D(cell_ind, pos.x_, offs.x_, movable); }
pin_1D y() const{ return pin_1D(cell_ind, pos.y_, offs.y_, movable); }
};
inline int_t dist(pin_2D const a, pin_2D const b){
point<int_t> diff = a.pos - b.pos;
return std::abs(diff.x_) + std::abs(diff.y_);
}
inline std::vector<pin_2D> get_pins_2D(netlist const & circuit, placement_t const & pl, index_t net_ind){
std::vector<pin_2D> ret;
for(auto p : circuit.get_net(net_ind)){
assert(std::isfinite(pl.positions_[p.cell_ind].x_) and std::isfinite(pl.positions_[p.cell_ind].y_));
assert(std::isfinite(pl.orientations_[p.cell_ind].x_) and std::isfinite(pl.orientations_[p.cell_ind].y_));
point<int_t> offs;
offs.x_ = pl.orientations_[p.cell_ind].x_ ? p.offset.x_ : circuit.get_cell(p.cell_ind).size.x_ - p.offset.x_;
offs.y_ = pl.orientations_[p.cell_ind].y_ ? p.offset.y_ : circuit.get_cell(p.cell_ind).size.y_ - p.offset.y_;
point<int_t> pos = offs + pl.positions_[p.cell_ind];
assert(std::isfinite(offs.x_) and std::isfinite(offs.y_));
assert(std::isfinite(pos.x_) and std::isfinite(pos.y_));
bool movable = (circuit.get_cell(p.cell_ind).attributes & XMovable) != 0 and (circuit.get_cell(p.cell_ind).attributes & YMovable) != 0;
ret.push_back(pin_2D(p.cell_ind, pos, offs, movable));
}
return ret;
}
inline point<std::vector<pin_1D> > get_pins_1D(netlist const & circuit, placement_t const & pl, index_t net_ind){
point<std::vector<pin_1D> > ret;
for(auto p : circuit.get_net(net_ind)){
assert(std::isfinite(pl.positions_[p.cell_ind].x_) and std::isfinite(pl.positions_[p.cell_ind].y_));
assert(std::isfinite(pl.orientations_[p.cell_ind].x_) and std::isfinite(pl.orientations_[p.cell_ind].y_));
point<int_t> offs;
offs.x_ = pl.orientations_[p.cell_ind].x_ ? p.offset.x_ : circuit.get_cell(p.cell_ind).size.x_ - p.offset.x_;
offs.y_ = pl.orientations_[p.cell_ind].y_ ? p.offset.y_ : circuit.get_cell(p.cell_ind).size.y_ - p.offset.y_;
point<int_t> pos = offs + pl.positions_[p.cell_ind];
assert(std::isfinite(offs.x_) and std::isfinite(offs.y_));
assert(std::isfinite(pos.x_) and std::isfinite(pos.y_));
bool x_movable = (circuit.get_cell(p.cell_ind).attributes & XMovable) != 0;
bool y_movable = (circuit.get_cell(p.cell_ind).attributes & YMovable) != 0;
ret.x_.push_back(pin_1D(p.cell_ind, pos.x_, offs.x_, x_movable));
ret.y_.push_back(pin_1D(p.cell_ind, pos.y_, offs.y_, y_movable));
}
return ret;
}
std::int64_t MST_length(std::vector<point<int_t> > const & pins);
std::int64_t RSMT_length(std::vector<point<int_t> > const & pins, index_t exactitude_limit);
std::int64_t get_HPWL_length(netlist const & circuit, placement_t const & pl, index_t net_ind);
std::int64_t get_RSMT_length(netlist const & circuit, placement_t const & pl, index_t net_ind);
std::vector<std::pair<index_t, index_t> > get_MST_topology(std::vector<point<int_t> > const & pins);
point<std::vector<std::pair<index_t, index_t> > > get_RSMT_topology(std::vector<point<int_t> > const & pins, index_t exactitude_limit);
} // namespace coloquinte
#endif

View File

@ -0,0 +1,110 @@
#ifndef COLOQUINTE_GP_COMMON
#define COLOQUINTE_GP_COMMON
#include <cstdint>
#include <algorithm>
namespace coloquinte{
using float_t = float;
using int_t = std::int32_t;
using index_t = std::uint32_t;
using capacity_t = std::int64_t;
using mask_t = std::uint32_t;
using ext_object = std::uint64_t;
enum PlacementType{
Optimist = 0,
Pessimist = 1
};
enum Movability{
XMovable = 1 ,
YMovable = 1 << 1,
XFlippable = 1 << 2,
YFlippable = 1 << 3,
SoftMacro = 1 << 4
};
template<typename T>
struct point{
T x_, y_;
point(){}
point(T x, T y): x_(x), y_(y){}
template<typename S>
operator point<S>() const{
return point<S>(static_cast<S>(x_), static_cast<S>(y_));
}
void operator+=(point<T> const o){
x_ += o.x_;
y_ += o.y_;
}
};
template<typename T>
point<T> operator+(point<T> const a, point<T> const b){
return point<T>(a.x_+b.x_, a.y_+b.y_);
}
template<typename T>
point<T> operator-(point<T> const a, point<T> const b){
return point<T>(a.x_-b.x_, a.y_-b.y_);
}
template<typename T>
point<T> operator*(T lambda, point<T> const p){
return point<T>(lambda * p.x_, lambda * p.y_);
}
template<typename T>
point<T> operator*(point<T> const a, point<T> const b){
return point<T>(a.x_*b.x_, a.y_*b.y_);
}
template<typename T>
struct box{
T x_min_, x_max_, y_min_, y_max_;
box(){}
box(T x_mn, T x_mx, T y_mn, T y_mx) : x_min_(x_mn), x_max_(x_mx), y_min_(y_mn), y_max_(y_mx){}
box(point<T> mn, point<T> mx) : x_min_(mn.x_), x_max_(mx.x_), y_min_(mn.y_), y_max_(mx.y_){}
bool in(box<T> const o) const{
return x_max_ <= o.x_max_
&& y_max_ <= o.y_max_
&& x_min_ >= o.x_min_
&& y_min_ >= o.y_min_;
}
bool intersects(box<T> const o) const{
return x_min_ < o.x_max_
&& y_min_ < o.y_max_
&& o.x_min_ < x_max_
&& o.y_min_ < y_max_;
}
box<T> intersection(box<T> const o) const{
return box<T>(
std::max(x_min_, o.x_min_),
std::min(x_max_, o.x_max_),
std::max(y_min_, o.y_min_),
std::min(y_max_, o.y_max_)
);
}
point<T> dimensions() const{
return point<T>(x_max_-x_min_, y_max_-y_min_);
}
bool empty() const{
return dimensions().x_ <= 0 or dimensions().y_ <= 0;
}
template<typename S>
operator box<S>() const{
return box<S>(static_cast<S>(x_min_), static_cast<S>(x_max_), static_cast<S>(y_min_), static_cast<S>(y_max_));
}
};
using orientation_t = point<bool>;
} // Namespace coloquinte
#endif

View File

@ -0,0 +1,91 @@
#ifndef COLOQUINTE_DETAILED
#define COLOQUINTE_DETAILED
#include "common.hxx"
#include "netlist.hxx"
#include <vector>
#include <limits>
namespace coloquinte{
namespace dp{
const index_t null_ind = std::numeric_limits<index_t>::max();
struct detailed_placement{
// All position and orientation stuff
placement_t plt_;
std::vector<index_t> cell_rows_;
// The placement region
int_t min_x_, max_x_;
int_t y_origin_;
int_t row_height_;
// Encode the topological state of the circuit: which cells are near each other
// Makes extracting part of the circuit or optimizing positions at fixed topology easy
std::vector<std::pair<index_t, index_t> > neighbours_; // The cells before and after on each row; cells spanning multiple columns use several positions
// In order to get the neighbours in the detailed placement
std::vector<index_t> neighbours_limits_;
std::vector<index_t> row_first_cells_, row_last_cells_; // For each row, which cells are the on the boundaries
// Tests the coherency between positions, widths and topological representation
void selfcheck() const;
detailed_placement(
placement_t pl,
std::vector<index_t> placement_rows,
std::vector<index_t> cell_heights,
std::vector<std::vector<index_t> > rows,
int_t min_x, int_t max_x,
int_t y_origin,
index_t nbr_rows, int_t row_height
);
index_t cell_height(index_t c) const{ return neighbours_limits_[c+1] - neighbours_limits_[c]; }
index_t cell_cnt() const{ return cell_rows_.size(); }
index_t row_cnt() const{ return row_first_cells_.size(); }
index_t neighbour_index(index_t c, index_t r) const{
assert(r - cell_rows_[c] < cell_height(c));
return neighbours_limits_[c] + r - cell_rows_[c];
}
void swap_standard_cell_topologies(index_t c1, index_t c2);
std::pair<int_t, int_t> get_limit_positions(netlist const & circuit, index_t c) const;
index_t get_first_cell_on_row(index_t r);
index_t get_next_cell_on_row(index_t c, index_t r);
index_t get_prev_cell_on_row(index_t c, index_t r);
index_t get_first_standard_cell_on_row(index_t r);
index_t get_next_standard_cell_on_row(index_t c, index_t r);
void reorder_standard_cells(std::vector<index_t> const old_order, std::vector<index_t> const new_order);
void reorder_cells(std::vector<index_t> const old_order, std::vector<index_t> const new_order, index_t row);
};
void swaps_global_HPWL(netlist const & circuit, detailed_placement & pl, index_t row_extent, index_t cell_extent, bool try_flip = false);
void swaps_global_RSMT(netlist const & circuit, detailed_placement & pl, index_t row_extent, index_t cell_extent, bool try_flip = false);
void swaps_row_convex_HPWL(netlist const & circuit, detailed_placement & pl, index_t range);
void swaps_row_convex_RSMT(netlist const & circuit, detailed_placement & pl, index_t range);
void swaps_row_noncvx_HPWL(netlist const & circuit, detailed_placement & pl, index_t range);
void swaps_row_noncvx_RSMT(netlist const & circuit, detailed_placement & pl, index_t range);
void OSRP_convex_HPWL(netlist const & circuit, detailed_placement & pl);
void OSRP_convex_RSMT(netlist const & circuit, detailed_placement & pl);
void OSRP_noncvx_HPWL(netlist const & circuit, detailed_placement & pl);
void OSRP_noncvx_RSMT(netlist const & circuit, detailed_placement & pl);
void optimize_on_topology_HPWL(netlist const & circuit, detailed_placement & pl);
void row_compatible_orientation(netlist const & circuit, detailed_placement & pl, bool first_row_orient);
} // namespace dp
} // namespace coloquinte
#endif

View File

@ -0,0 +1,12 @@
#include "circuit.hxx"
#include "detailed.hxx"
namespace coloquinte{
namespace dp{
detailed_placement legalize(netlist const & circuit, placement_t const & pl, box<int_t> surface, int_t row_height);
void get_result(netlist const & circuit, detailed_placement const & dpl, placement_t & pl);
} // namespace dp
} // namespace coloquinte

View File

@ -0,0 +1,244 @@
#ifndef COLOQUINTE_NETLIST
#define COLOQUINTE_NETLIST
#include "common.hxx"
#include <vector>
#include <cassert>
namespace coloquinte{
// Structures for construction and circuit_loader
struct temporary_pin{
point<int_t> offset;
index_t cell_ind, net_ind;
temporary_pin(){}
temporary_pin(point<int_t> offs, index_t c, index_t n) : offset(offs), cell_ind(c), net_ind(n){}
};
struct temporary_cell{
point<int_t> size;
capacity_t area;
mask_t attributes;
index_t list_index;
temporary_cell(){}
temporary_cell(point<int_t> s, mask_t attr, index_t ind) : size(s), attributes(attr), list_index(ind){ area = static_cast<capacity_t>(s.x_) * static_cast<capacity_t>(s.y_);}
};
struct temporary_net{
int_t weight;
index_t list_index;
temporary_net(){}
temporary_net(index_t ind, int_t wght) : weight(wght), list_index(ind){}
};
// Main class
class netlist{
std::vector<int_t> net_weights_;
std::vector<capacity_t> cell_areas_;
std::vector<point<int_t> > cell_sizes_;
std::vector<mask_t> cell_attributes_;
// Mapping of the order given at construction time to the internal representation
std::vector<index_t> cell_internal_mapping_;
std::vector<index_t> net_internal_mapping_;
// Optimized sparse storage for nets
std::vector<index_t> net_limits_;
std::vector<index_t> cell_indexes_;
std::vector<point<int_t> > pin_offsets_;
// Sparse storage from cell to net appartenance
std::vector<index_t> cell_limits_;
std::vector<index_t> net_indexes_;
std::vector<index_t> pin_indexes_;
public:
netlist(std::vector<temporary_cell> cells, std::vector<temporary_net> nets, std::vector<temporary_pin> all_pins);
netlist(){}
void selfcheck() const;
struct pin_t{
point<int_t> offset;
index_t cell_ind, net_ind;
pin_t(point<int_t> offs, index_t c, index_t n) : offset(offs), cell_ind(c), net_ind(n){}
};
class net_pin_iterator{
index_t pin_ind, net_ind;
netlist const & N;
public:
pin_t operator*() const{
return pin_t(N.pin_offsets_[pin_ind], N.cell_indexes_[pin_ind], net_ind);
}
net_pin_iterator & operator++(){
pin_ind++;
return *this;
}
bool operator!=(net_pin_iterator const o) const{
return pin_ind != o.pin_ind;
}
net_pin_iterator(index_t net_index, index_t pin_index, netlist const & orig) : pin_ind(pin_index), net_ind(net_index), N(orig){}
};
class cell_pin_iterator{
index_t pin_ind, cell_ind;
netlist const & N;
public:
pin_t operator*() const{
return pin_t(N.pin_offsets_[N.pin_indexes_[pin_ind]], cell_ind, N.net_indexes_[pin_ind]);
}
cell_pin_iterator & operator++(){
pin_ind++;
return *this;
}
bool operator!=(cell_pin_iterator const o) const{
return pin_ind != o.pin_ind;
}
cell_pin_iterator(index_t cell_index, index_t pin_index, netlist const & orig) : pin_ind(pin_index), cell_ind(cell_index), N(orig){}
};
struct internal_cell{
point<int_t> size;
capacity_t area;
mask_t attributes;
netlist const & N;
index_t index;
index_t pin_cnt;
internal_cell(index_t ind, netlist const & orig) :
size(orig.cell_sizes_[ind]),
area(orig.cell_areas_[ind]),
attributes(orig.cell_attributes_[ind]),
N(orig),
index(ind),
pin_cnt(N.cell_limits_[ind+1] - N.cell_limits_[ind])
{}
cell_pin_iterator begin(){ return cell_pin_iterator(index, N.cell_limits_[index], N); }
cell_pin_iterator end(){ return cell_pin_iterator(index, N.cell_limits_[index+1], N); }
};
struct internal_net{
int_t weight;
netlist const & N;
index_t index;
index_t pin_cnt;
internal_net(index_t ind, netlist const & orig) :
weight(orig.net_weights_[ind]),
N(orig),
index(ind),
pin_cnt(N.net_limits_[ind+1] - N.net_limits_[ind])
{}
net_pin_iterator begin(){ return net_pin_iterator(index, N.net_limits_[index], N); }
net_pin_iterator end(){ return net_pin_iterator(index, N.net_limits_[index+1], N); }
};
internal_cell get_cell(index_t ind) const{
return internal_cell(ind, *this);
}
internal_net get_net(index_t ind) const{
return internal_net(ind, *this);
}
index_t cell_cnt() const{ return cell_internal_mapping_.size(); }
index_t net_cnt() const{ return net_internal_mapping_.size(); }
index_t pin_cnt() const{ return pin_offsets_.size(); }
index_t get_cell_ind(index_t external_ind) const{ return cell_internal_mapping_[external_ind]; }
index_t get_net_ind(index_t external_ind) const{ return net_internal_mapping_[external_ind]; }
};
inline netlist::netlist(std::vector<temporary_cell> cells, std::vector<temporary_net> nets, std::vector<temporary_pin> all_pins){
struct extended_pin : public temporary_pin{
index_t pin_index;
extended_pin(temporary_pin const p) : temporary_pin(p){}
};
std::vector<extended_pin> pins;
for(temporary_pin const p : all_pins){
pins.push_back(extended_pin(p));
}
cell_limits_.resize(cells.size()+1);
net_limits_.resize(nets.size()+1);
net_weights_.resize(nets.size());
cell_areas_.resize(cells.size());
cell_sizes_.resize(cells.size());
cell_attributes_.resize(cells.size());
cell_internal_mapping_.resize(cells.size());
net_internal_mapping_.resize(nets.size());
cell_indexes_.resize(pins.size());
pin_offsets_.resize(pins.size());
net_indexes_.resize(pins.size());
pin_indexes_.resize(pins.size());
for(index_t i=0; i<nets.size(); ++i){
net_internal_mapping_[i] = i;
}
for(index_t i=0; i<cells.size(); ++i){
cell_internal_mapping_[i] = i;
}
std::sort(pins.begin(), pins.end(), [](extended_pin const a, extended_pin const b){ return a.net_ind < b.net_ind; });
for(index_t n=0, p=0; n<nets.size(); ++n){
net_weights_[n] = nets[n].weight;
net_limits_[n] = p;
while(p<pins.size() && pins[p].net_ind == n){
cell_indexes_[p] = pins[p].cell_ind;
pin_offsets_[p] = pins[p].offset;
pins[p].pin_index = p;
++p;
}
}
net_limits_.back() = pins.size();
std::sort(pins.begin(), pins.end(), [](extended_pin const a, extended_pin const b){ return a.cell_ind < b.cell_ind; });
for(index_t c=0, p=0; c<cells.size(); ++c){
cell_areas_[c] = cells[c].area;
cell_attributes_[c] = cells[c].attributes;
cell_sizes_[c] = cells[c].size;
cell_limits_[c] = p;
while(p<pins.size() && pins[p].cell_ind == c){
net_indexes_[p] = pins[p].net_ind;
pin_indexes_[p] = pins[p].pin_index;
++p;
}
}
cell_limits_.back() = pins.size();
}
struct placement_t{
std::vector<point<int_t> > positions_;
std::vector<point<bool> > orientations_;
index_t cell_cnt() const{
assert(positions_.size() == orientations_.size());
return positions_.size();
}
void selfcheck() const;
};
} // namespace coloquinte
#endif

View File

@ -0,0 +1,158 @@
#ifndef COLOQUINTE_GP_OPTSUBPROBLEMS
#define COLOQUINTE_GP_OPTSUBPROBLEMS
#include "common.hxx"
#include <queue>
#include <vector>
#include <cassert>
namespace coloquinte{
typedef std::pair<int_t, capacity_t> t1D_elt;
std::vector<capacity_t> transport_1D(std::vector<t1D_elt> sources, std::vector<t1D_elt> sinks);
std::vector<std::vector<capacity_t> > transport_convex(std::vector<capacity_t> const & capacities, std::vector<capacity_t> const & demands, std::vector<std::vector<float_t> > const & costs);
std::vector<std::vector<capacity_t> > transport_generic(std::vector<capacity_t> const & capacities, std::vector<capacity_t> const & demands, std::vector<std::vector<float_t> > const & costs);
template<typename T>
struct legalizable_task{
T width;
T target_pos;
index_t ind;
legalizable_task(T w, T p, index_t i) : width(w), target_pos(p), ind(i){}
bool operator<(legalizable_task<T> const o) const{ return target_pos < o.target_pos; }
};
// A class to obtain the optimal positions minimizing total weighted displacement along a row
// It is an ordered single row problem/fixed order single machine scheduling problem, solved by the clumping/specialized cascading descent algorithm
// The cost is linear in the distance to the target position, weighted by the width of the cells
template<typename T>
class OSRP_leg{
struct OSRP_bound{
T absolute_pos; // Will be the target absolute position of the cell
T weight; // Will be the width of the cell
bool operator<(OSRP_bound const o) const{ return absolute_pos < o.absolute_pos; }
OSRP_bound(T w, T abs_pos) : absolute_pos(abs_pos), weight(w) {}
};
T begin, end;
std::vector<index_t> cells; // The indexes in the circuit
std::vector<T> constraining_pos; // Where the cells have been pushed and constrain the positions of preceding cells
std::vector<T> prev_width; // Cumulative width of the cells: calculates the absolute position of new cells
std::priority_queue<OSRP_bound> bounds;
// Get the cost of pushing a cell on the row
T get_displacement(legalizable_task<T> const newly_pushed, bool update);
public:
T current_width() const{ return prev_width.back(); }
T remaining_space() const{ return end - begin - current_width(); }
T last_available_pos() const{ return constraining_pos.back() + current_width(); }
T get_cost(legalizable_task<T> const task){ return get_displacement(task, false); }
void push(legalizable_task<T> const task){ get_displacement(task, true); }
// Initialize
OSRP_leg(T b, T e) : begin(b), end(e), prev_width(1, 0) {}
OSRP_leg(){}
typedef std::pair<index_t, T> result_t;
// Get the resulting placement
std::vector<result_t> get_placement() const;
};
struct cell_bound{
index_t c;
int_t pos;
int_t slope;
bool operator<(cell_bound const o) const{ return c < o.c; }
cell_bound(index_t order, int_t p, int_t s) : c(order), pos(p), slope(s) {}
};
bool place_convex_single_row(std::vector<int_t> const & widths, std::vector<std::pair<int_t, int_t> > const & ranges, std::vector<cell_bound> bounds, std::vector<int_t> const & const_slopes, std::vector<int_t> & positions);
bool place_noncvx_single_row(std::vector<int_t> const & widths, std::vector<std::pair<int_t, int_t> > const & ranges, std::vector<int> const & flippables, std::vector<cell_bound> bounds, std::vector<int_t> const & const_slopes, std::vector<int_t> & positions, std::vector<int> & flippings);
template<typename T>
inline T OSRP_leg<T>::get_displacement(legalizable_task<T> const newly_pushed, bool update){
T target_abs_pos = newly_pushed.target_pos - current_width();
T width = newly_pushed.width;
T slope = - width;
T cur_pos = end;
T cur_cost = 0;
std::vector<OSRP_bound> passed_bounds;
while( not bounds.empty() and
((slope < 0 and bounds.top().absolute_pos > target_abs_pos) // Not reached equilibrium
or bounds.top().absolute_pos > end - current_width() - width) // Still not a legal position
){
T old_pos = cur_pos;
cur_pos = bounds.top().absolute_pos;
cur_cost += (old_pos - cur_pos) * (slope + width); // The additional cost for the other cells encountered
slope += bounds.top().weight;
// Remember which bounds we encountered in order to reset the object to its initial state
if(not update)
passed_bounds.push_back(bounds.top());
bounds.pop();
}
T final_abs_pos = std::min(end - current_width() - width, // Always before the end and after the beginning
std::max(begin, slope >= 0 ? cur_pos : target_abs_pos) // but did we stop before reaching the target position?
);
cur_cost += (cur_pos - final_abs_pos) * (slope + width); // The additional cost for the other cells encountered
if(std::numeric_limits<T>::is_integer){
assert(final_abs_pos >= begin);
assert(final_abs_pos <= end - current_width() - width);
}
if(update){
prev_width.push_back(width + current_width());
cells.push_back(newly_pushed.ind);
constraining_pos.push_back(final_abs_pos);
if(slope > 0){ // Remaining capacity of an encountered bound
bounds.push(OSRP_bound(slope, cur_pos));
}
// The new bound, minus what it absorbs of the remaining slope
if(target_abs_pos > begin){
bounds.push(OSRP_bound(2*width + std::min(slope, static_cast<T>(0) ), target_abs_pos));
}
}
else{
for(OSRP_bound b : passed_bounds){
bounds.push(b);
}
}
return cur_cost + width * std::abs(final_abs_pos - target_abs_pos); // Add the cost of the new cell
}
template<typename T>
inline std::vector<std::pair<index_t, T> > OSRP_leg<T>::get_placement() const{
auto final_abs_pos = constraining_pos;
std::partial_sum(final_abs_pos.rbegin(), final_abs_pos.rend(), final_abs_pos.rbegin(), [](T a, T b)->T{ return std::min(a,b); });
std::vector<result_t> ret(cells.size());
for(index_t i=0; i<cells.size(); ++i){
ret[i] = result_t(cells[i], final_abs_pos[i] + prev_width[i]);
if(std::numeric_limits<T>::is_integer){
assert(final_abs_pos[i] >= begin);
assert(final_abs_pos[i] + prev_width[i+1] <= end);
}
}
return ret;
}
}
#endif

View File

@ -0,0 +1,29 @@
#include "common.hxx"
#include <vector>
namespace coloquinte{
typedef std::pair<int_t, int_t> p_v;
struct piecewise_linear_function{
std::vector<p_v> point_values;
static piecewise_linear_function minimum(piecewise_linear_function const & a, piecewise_linear_function const & b);
piecewise_linear_function previous_min_of_sum(piecewise_linear_function const & o, int_t added_cell_width) const;
piecewise_linear_function previous_min() const;
int_t value_at(int_t pos) const;
int_t last_before(int_t pos) const;
void add_monotone(int_t slope, int_t offset);
void add_bislope(int_t s_l, int_t s_r, int_t pos);
piecewise_linear_function(){}
piecewise_linear_function(int_t min_def, int_t max_def);
};
} // End namespace coloquinte

View File

@ -0,0 +1,252 @@
#ifndef COLOQUINTE_GP_ROUGH_LEGALIZER
#define COLOQUINTE_GP_ROUGH_LEGALIZER
#include "common.hxx"
#include "netlist.hxx"
#include <vector>
#include <cassert>
#include <cmath>
#include <functional>
/*
* A simple class to perform approximate legalization with extreme efficiency
*
* To be called during global placement or before an exact legalization
*
*/
namespace coloquinte{
namespace gp{
class region_distribution{
/*
* Coordinates are mostly float but obstacles and areas are integers for correctness
*/
public:
struct movable_cell{
capacity_t demand_; // == area; No FP!!!
point<float_t> pos_; // Target position, determining the cost to allocate it
// int_t x_size, y_size; // May split cells
index_t index_in_placement_;
movable_cell();
movable_cell(capacity_t demand, point<float_t> p, index_t ind);
};
// Specifies a maximum density of movable cells per usable area
// Representing either a macroblock or a routing congestion
struct density_limit{
box<int_t> box_;
float_t density_; // from 0.0 for a macro to 1.0 if it does nothing
};
private:
struct region;
struct cell_ref{
capacity_t allocated_capacity_;
point<float_t> pos_;
index_t index_in_list_;
cell_ref(){}
cell_ref(capacity_t demand, point<float_t> p, index_t ind) : allocated_capacity_(demand), pos_(p), index_in_list_(ind){}
friend region;
};
struct region{
public:
// Data members
capacity_t capacity_; // ==area; No floating point!!!
point<float_t> pos_;
std::vector<cell_ref> cell_references_;
// Constructors
region(){} // Necessary if we want to resize vectors
region(capacity_t cap, point<float_t> pos, std::vector<cell_ref> cells);
// Helper functions for bipartitioning
private:
static void distribute_new_cells(region & a, region & b, std::vector<cell_ref> cells); // Called by the other two to do the dirty work
public:
void distribute_cells(region & a, region & b) const; // Distribute the cells from one region to two
static void redistribute_cells(region & a, region & b); // Optimizes the distribution between two regions
// Helper functions for multipartitioning
private:
static void distribute_new_cells(std::vector<std::reference_wrapper<region_distribution::region> > regions, std::vector<cell_ref> cells);
public:
void distribute_cells(std::vector<std::reference_wrapper<region_distribution::region> > regions) const;
static void redistribute_cells(std::vector<std::reference_wrapper<region_distribution::region> > regions);
// Helper functions for 1D transportation
public:
static void distribute_new_cells(std::vector<std::reference_wrapper<region_distribution::region> > regions, std::vector<cell_ref> cells, std::function<float_t (point<float_t>)> coord);
static void redistribute_cells(std::vector<std::reference_wrapper<region_distribution::region> > & regions, std::function<float_t (point<float_t>)> coord);
public:
void uniquify_references();
void selfcheck() const;
// Accessors
capacity_t capacity() const;
capacity_t allocated_capacity() const;
capacity_t unused_capacity() const;
index_t cell_cnt() const;
float_t distance(cell_ref const & C) const;
float_t cost() const;
};
private:
// Members
index_t x_regions_cnt_, y_regions_cnt_;
std::vector<movable_cell> cell_list_;
std::vector<region> placement_regions_;
box<int_t> placement_area_;
std::vector<density_limit> density_map_;
const capacity_t full_density_mul; // Multiplicator giving the grain for fractional areas for the surface
capacity_t cell_density_mul; // ANd for the cells
float_t density_scaling_factor_;
private:
// Helper functions
region & get_region(index_t x_coord, index_t y_coord);
region const & get_region(index_t x_coord, index_t y_coord) const;
box<int_t> get_box(index_t x, index_t y, index_t x_cnt, index_t y_cnt) const;
static void sort_uniquify(std::vector<cell_ref> & cell_references);
static void just_uniquify(std::vector<cell_ref> & cell_references);
// Prepare regions with the right positions and capacities; different levels of nesting are compatible
std::vector<region> prepare_regions(index_t x_cnt, index_t y_cnt) const;
public:
inline index_t x_regions_cnt() const;
inline index_t y_regions_cnt() const;
inline index_t regions_cnt() const;
inline index_t cell_cnt() const;
inline index_t fractional_cell_cnt() const;
/*
* Two types of export
* Region center : upper bound of legalization cost
* 1D quadratic optimization : lower bound of legalization cost
*/
std::vector<movable_cell> export_positions() const;
std::vector<movable_cell> export_spread_positions_quadratic() const;
std::vector<movable_cell> export_spread_positions_linear() const;
// The cost as seen by the partitioning algorithms (but not the export)
float_t cost() const;
/*
* Further partitions
*/
void x_bipartition();
void y_bipartition();
void x_resize(index_t sz);
void y_resize(index_t sz);
void multipartition(index_t x_width, index_t y_width);
void multipartition(index_t width){ multipartition(width, width); }
/*
* Optimization functions
*/
// Bipartitioning: only two regions are considered at a time
void redo_adjacent_bipartitions();
void redo_diagonal_bipartitions();
void redo_bipartitions();
// Line partitioning: optimal on coordinate axis with Manhattan distance (Euclidean distance could use it in any direction)
void redo_line_partitions();
// Multipartitioning: several regions considered, slow runtimes
void redo_diag_partitions(index_t len);
void redo_multipartitions(index_t x_width, index_t y_width);
void redo_multipartitions(index_t width){ redo_multipartitions(width, width); }
// Try to remove duplicate fractional cells
void fractions_minimization();
// Verify
void selfcheck() const;
private:
region_distribution(box<int_t> placement_area, netlist const & circuit, placement_t const & pl, std::vector<density_limit> const & density_map, bool full_density);
public:
/*
* Obtain a region_distribution from a placement
*
* Full density: the object tries to pack the cells as much as possible while still respecting the density limits
* Uniform density: not only are the density limits respected, the allocated capacities are proportional to the allowed densities
*
*/
static region_distribution full_density_distribution(box<int_t> placement_area, netlist const & circuit, placement_t const & pl, std::vector<density_limit> const & density_map = std::vector<density_limit>());
static region_distribution uniform_density_distribution(box<int_t> placement_area, netlist const & circuit, placement_t const & pl, std::vector<density_limit> const & density_map = std::vector<density_limit>());
void update(netlist const & circuit, placement_t const & pl);
};
inline region_distribution::movable_cell::movable_cell(){}
inline region_distribution::movable_cell::movable_cell(capacity_t demand, point<float_t> p, index_t ind) : demand_(demand), pos_(p), index_in_placement_(ind){}
inline index_t region_distribution::x_regions_cnt() const { return x_regions_cnt_; }
inline index_t region_distribution::y_regions_cnt() const { return y_regions_cnt_; }
inline index_t region_distribution::regions_cnt() const { index_t ret = x_regions_cnt() * y_regions_cnt(); assert(placement_regions_.size() == ret); return ret; }
inline region_distribution::region & region_distribution::get_region(index_t x_coord, index_t y_coord){
return placement_regions_[y_coord * x_regions_cnt() + x_coord];
}
inline region_distribution::region const & region_distribution::get_region(index_t x_coord, index_t y_coord) const{
return placement_regions_[y_coord * x_regions_cnt() + x_coord];
}
inline index_t region_distribution::cell_cnt() const{ return cell_list_.size(); }
inline index_t region_distribution::fractional_cell_cnt() const{
index_t tot_cnt = 0;
for(auto const & R : placement_regions_){
tot_cnt += R.cell_cnt();
}
return tot_cnt;
}
inline capacity_t region_distribution::region::capacity() const{ return capacity_; }
inline capacity_t region_distribution::region::unused_capacity() const{ return capacity() - allocated_capacity(); }
inline capacity_t region_distribution::region::allocated_capacity() const{
capacity_t ret = 0;
for(cell_ref const C : cell_references_){
ret += C.allocated_capacity_;
}
return ret;
}
inline index_t region_distribution::region::cell_cnt() const{ return cell_references_.size(); }
inline float_t region_distribution::region::distance(region_distribution::cell_ref const & C) const{
return std::abs(pos_.x_ - C.pos_.x_) + std::abs(pos_.y_ - C.pos_.y_);
/*
float_t manhattan = std::max(static_cast<float_t>(0.0), std::max(C.pos_.x_ - surface_.x_max_, surface_.x_min_ - C.pos_.x_))
+ std::max(static_cast<float_t>(0.0), std::max(C.pos_.y_ - surface_.y_max_, surface_.y_min_ - C.pos_.y_));
return manhattan * (1.0 + manhattan * 0.0001);
*/
}
} // Namespace gp
} // Namespace coloquinte
#endif

View File

@ -0,0 +1,88 @@
#ifndef COLOQUINE_GP_SOLVERS
#define COLOQUINE_GP_SOLVERS
#include "common.hxx"
#include <vector>
namespace coloquinte{
namespace gp{
struct matrix_doublet{
index_t c_;
float val_;
bool operator<(matrix_doublet const o) const{ return c_ < o.c_; }
matrix_doublet(){}
matrix_doublet(index_t c, float v) : c_(c), val_(v){}
};
struct matrix_triplet{
index_t r_, c_;
float_t val_;
matrix_triplet(index_t ri, index_t ci, float_t v) : r_(ri), c_(ci), val_(v){}
bool operator<(matrix_triplet const o){ return r_ < o.r_ || (r_ == o.r_ && c_ < o.c_); }
};
class linear_system{
std::vector<matrix_triplet> matrix_;
std::vector<float_t> target_;
index_t internal_size_;
public:
void add_triplet(index_t row, index_t col, float_t val){ matrix_.push_back(matrix_triplet(row, col, val)); }
linear_system operator+(linear_system const & o) const;
void add_doublet(index_t row, float_t val){
target_[row] += val;
}
void add_force(
float_t force,
index_t c1, index_t c2,
float_t offs1, float_t offs2
){
add_triplet(c1, c1, force);
add_triplet(c2, c2, force);
add_triplet(c1, c2, -force);
add_triplet(c2, c1, -force);
add_doublet(c1, force * (offs2-offs1));
add_doublet(c2, force * (offs1-offs2));
}
void add_fixed_force(
float_t force,
index_t c,
float_t fixed_pos,
float_t offs
){
add_triplet(c, c, force);
add_doublet(c, force * (fixed_pos-offs));
}
void add_anchor(
float_t scale,
index_t c,
float_t pos
){
add_triplet(c, c, scale);
add_doublet(c, scale*pos);
}
linear_system(index_t s) : target_(s, 0.0), internal_size_(s){}
linear_system(index_t s, index_t i) : target_(s, 0.0), internal_size_(i){}
index_t size() const{ return target_.size(); }
index_t internal_size() const{ return internal_size_; }
void add_variables(index_t cnt){ target_.resize(target_.size() + cnt, 0.0); }
std::vector<float_t> solve_CG(std::vector<float_t> guess, index_t nbr_iter);
};
} // namespace gp
} // namespace coloquinte
#endif

View File

@ -0,0 +1,33 @@
#include "common.hxx"
#ifndef COLOQUINTE_TOPOLOGIES
#define COLOQUINTE_TOPOLOGIES
namespace coloquinte{
namespace steiner_lookup{
template<int pin_cnt>
struct Hconnectivity{
// The edges and the couple of pins connected to the extreme ones are represented by one char each
// The first 4 bits represent the first pin minus one, the next 4 bits the second pin minus one
std::uint8_t connexions[pin_cnt-3];
std::uint8_t extremes;
int_t get_wirelength(std::array<point<int_t>, pin_cnt> const sorted_points) const;
std::array<std::pair<index_t, index_t>, pin_cnt-1> get_x_topology(std::array<point<int_t>, pin_cnt> const sorted_points) const;
};
extern std::array<Hconnectivity<4>, 2> const topologies_4;
extern std::array<Hconnectivity<5>, 6> const topologies_5;
extern std::array<Hconnectivity<6>, 23> const topologies_6;
extern std::array<Hconnectivity<7>, 111> const topologies_7;
extern std::array<Hconnectivity<8>, 642> const topologies_8;
extern std::array<Hconnectivity<9>, 4334> const topologies_9;
extern std::array<Hconnectivity<10>, 33510> const topologies_10;
}
}
#endif

View File

@ -0,0 +1,47 @@
#ifndef COLOQUINTE_UNION_FIND
#define COLOQUINTE_UNION_FIND
#include "common.hxx"
#include <vector>
namespace coloquinte{
class union_find{
std::vector<index_t> connex_representants;
public:
index_t size() const { return connex_representants.size(); }
void merge(index_t a, index_t b){
connex_representants[find(a)] = b;
}
index_t find(index_t ind){
if(connex_representants[ind] != ind){
connex_representants[ind] = find(connex_representants[ind]);
}
return connex_representants[ind];
}
union_find(index_t s) : connex_representants(s){
for(index_t i=0; i<size(); ++i){
connex_representants[i] = i;
}
}
bool is_connex(){
bool connex = true;
for(index_t i=0; i+1<size(); ++i){
connex = connex && (find(i) == find(i+1));
}
return connex;
}
};
} // End namespace coloquinte
#endif

260
coloquinte/src/detailed.cxx Normal file
View File

@ -0,0 +1,260 @@
#include "coloquinte/detailed.hxx"
#include "coloquinte/circuit_helper.hxx"
#include <cassert>
namespace coloquinte{
namespace dp{
detailed_placement::detailed_placement(
placement_t pl,
std::vector<index_t> placement_rows,
std::vector<index_t> cell_heights,
std::vector<std::vector<index_t> > rows,
int_t min_x, int_t max_x,
int_t y_origin,
index_t nbr_rows, int_t row_height
)
:
plt_(pl),
cell_rows_(placement_rows),
min_x_(min_x), max_x_(max_x),
y_origin_(y_origin)
{
assert(row_height > 0);
assert(min_x < max_x);
assert(rows.size() == nbr_rows);
neighbours_limits_.push_back(0);
for(index_t h : cell_heights){
neighbours_limits_.push_back(neighbours_limits_.back() + h);
}
neighbours_ .resize(neighbours_limits_.back(), std::pair<index_t, index_t>(null_ind, null_ind) );
row_first_cells_ .resize(nbr_rows, null_ind);
row_last_cells_ .resize(nbr_rows, null_ind);
std::vector<bool> explored(neighbours_limits_.back(), false);
// Now we extract the dependencies
for(index_t r=0; r<rows.size(); ++r){
if(not rows[r].empty()){
row_first_cells_[r] = rows[r].front();
row_last_cells_[r] = rows[r].back();
}
for(index_t c : rows[r]){
// Has this row of the cell already been visited?
assert(not explored[neighbour_index(c, r)]);
explored[neighbour_index(c, r)] = true;
}
for(index_t i=0; i+1<rows[r].size(); ++i){
index_t c1 = rows[r][i], c2 = rows[r][i+1];
// Save in the internal format
neighbours_[neighbour_index(c1, r)].second = c2;
neighbours_[neighbour_index(c2, r)].first = c1;
// The positions are correct
}
}
// Every level of every cell must have been visited
for(bool o : explored)
assert(o);
// Verify that we haven't made any obvious mistake
selfcheck();
}
void detailed_placement::selfcheck() const{
assert(row_first_cells_.size() == row_last_cells_.size());
for(index_t i=0; i<cell_cnt(); ++i){
for(index_t l=0; l<cell_height(i); ++l){
// not verified now since we don't modify the position for the obstacles
// : assert(c.position.x_ >= min_x_ and c.position.x_ + c.width <= max_x_);
index_t n_ind = l + neighbours_limits_[i];
assert(cell_rows_[i] + cell_height(i) <= row_cnt());
if(neighbours_[n_ind].first != null_ind){
index_t oi = neighbours_[n_ind].first;
// Correct neighbour position
assert(neighbours_[neighbour_index(oi, cell_rows_[i]+l)].second == i);
}
else{
// Beginning of a row
assert(row_first_cells_[cell_rows_[i] + l] == i);
}
if(neighbours_[n_ind].second != null_ind){
index_t oi = neighbours_[n_ind].second;
// Correct neighbour position
assert(neighbours_[neighbour_index(oi, cell_rows_[i]+l)].first == i);
}
else{
// End of a row
assert(row_last_cells_[cell_rows_[i] + l] == i);
}
}
}
}
void detailed_placement::swap_standard_cell_topologies(index_t c1, index_t c2){
assert(cell_height(c1) == cell_height(c2));
assert(cell_height(c1) == 1 and cell_height(c2) == 1);
index_t row_c1 = cell_rows_[c1],
row_c2 = cell_rows_[c2];
index_t b_c1 = neighbours_[neighbours_limits_[c1]].first;
index_t b_c2 = neighbours_[neighbours_limits_[c2]].first;
index_t a_c1 = neighbours_[neighbours_limits_[c1]].second;
index_t a_c2 = neighbours_[neighbours_limits_[c2]].second;
// Two cases: they were adjacent or they were not
// Luckily updating in the neighbours first then swapping the recorded neighbours works in both cases for standard cells
// Update the pointers in the cells' neighbours
if(b_c1 != null_ind) neighbours_[neighbour_index(b_c1, row_c1)].second = c2;
else row_first_cells_[row_c1] = c2;
if(b_c2 != null_ind) neighbours_[neighbour_index(b_c2, row_c2)].second = c1;
else row_first_cells_[row_c2] = c1;
if(a_c1 != null_ind) neighbours_[neighbour_index(a_c1, row_c1)].first = c2;
else row_last_cells_[row_c1] = c2;
if(a_c2 != null_ind) neighbours_[neighbour_index(a_c2, row_c2)].first = c1;
else row_last_cells_[row_c2] = c1;
// Swap the properties in both cells
std::swap(neighbours_[neighbours_limits_[c1]], neighbours_[neighbours_limits_[c2]]);
std::swap(cell_rows_[c1], cell_rows_[c2]);
}
std::pair<int_t, int_t> detailed_placement::get_limit_positions(netlist const & circuit, index_t c) const{
auto ret = std::pair<int_t, int_t>(min_x_, max_x_);
for(index_t l=neighbours_limits_[c]; l<neighbours_limits_[c+1]; ++l){
index_t b_i = neighbours_[l].first,
a_i = neighbours_[l].second;
if(b_i != null_ind){
ret.first = std::max(ret.first, plt_.positions_[b_i].x_ + circuit.get_cell(b_i).size.x_);
}
if(a_i != null_ind){
ret.second = std::min(ret.second, plt_.positions_[a_i].x_);
}
}
return ret;
}
index_t detailed_placement::get_first_cell_on_row(index_t r){
return row_first_cells_[r];
}
index_t detailed_placement::get_first_standard_cell_on_row(index_t r){
index_t c = get_first_cell_on_row(r);
while(c != null_ind and cell_height(c) != 1){
index_t next_c = get_next_cell_on_row(c, r);
assert(c != next_c);
c = next_c;
}
assert(c == null_ind or cell_rows_[c] == r);
return c;
}
index_t detailed_placement::get_next_cell_on_row(index_t c, index_t r){
return neighbours_[neighbour_index(c, r)].second;
}
index_t detailed_placement::get_prev_cell_on_row(index_t c, index_t r){
return neighbours_[neighbour_index(c, r)].first;
}
index_t detailed_placement::get_next_standard_cell_on_row(index_t c, index_t r){
do{
index_t next_c = get_next_cell_on_row(c, r);
assert(c != next_c);
c = next_c;
}while(c != null_ind and cell_height(c) != 1);
assert(c == null_ind or cell_rows_[c] == r);
return c;
}
void detailed_placement::reorder_cells(std::vector<index_t> const old_order, std::vector<index_t> const new_order, index_t r){
assert(old_order.size() == new_order.size());
assert(not old_order.empty());
index_t before_row = get_prev_cell_on_row(old_order.front(), r);
index_t after_row = get_next_cell_on_row(old_order.back(), r);
for(index_t i=0; i<new_order.size(); ++i){
auto & nghs = neighbours_[neighbour_index(new_order[i], r)];
if(i > 0){
nghs.first = new_order[i-1];
}
else{
nghs.first = before_row;
}
if(i+1 < new_order.size()){
nghs.second = new_order[i+1];
}
else{
nghs.second = after_row;
}
}
if(before_row != null_ind) neighbours_[neighbour_index(before_row, r)].second = new_order.front();
else row_first_cells_[r] = new_order.front();
if(after_row != null_ind) neighbours_[neighbour_index(after_row, r)].first = new_order.back();
else row_last_cells_[r] = new_order.back();
}
void detailed_placement::reorder_standard_cells(std::vector<index_t> const old_order, std::vector<index_t> const new_order){
assert(old_order.size() == new_order.size());
assert(not old_order.empty());
index_t before_row = neighbours_[neighbours_limits_[old_order.front()]].first;
index_t after_row = neighbours_[neighbours_limits_[old_order.back() ]].second;
index_t r = cell_rows_[new_order.front()];
for(index_t i=0; i<new_order.size(); ++i){
assert(cell_height(new_order[i]) == 1);
assert(cell_rows_[new_order[i]] == r);
auto & nghs = neighbours_[neighbours_limits_[new_order[i]]];
if(i > 0){
nghs.first = new_order[i-1];
}
else{
nghs.first = before_row;
}
if(i+1 < new_order.size()){
nghs.second = new_order[i+1];
}
else{
nghs.second = after_row;
}
}
if(before_row != null_ind) neighbours_[neighbour_index(before_row, r)].second = new_order.front();
else row_first_cells_[r] = new_order.front();
if(after_row != null_ind) neighbours_[neighbour_index(after_row, r)].first = new_order.back();
else row_last_cells_[r] = new_order.back();
}
void row_compatible_orientation(netlist const & circuit, detailed_placement & pl, bool first_row_orient){
for(index_t c=0; c<circuit.cell_cnt(); ++c){
if( (circuit.get_cell(c).attributes & YFlippable) != 0 and pl.cell_height(c) == 1){
pl.plt_.orientations_[c].y_ = (pl.cell_rows_[c] % 2 != 0) ^ first_row_orient;
}
}
}
} // namespace dp
} // namespace coloquinte

View File

@ -0,0 +1,446 @@
#include "coloquinte/legalizer.hxx"
#include "coloquinte/optimization_subproblems.hxx"
#include <algorithm>
#include <cmath>
#include <queue>
namespace coloquinte{
namespace dp{
void get_result(netlist const & circuit, detailed_placement const & dpl, placement_t & gpl){
for(index_t c=0; c<circuit.cell_cnt(); ++c){
if( (circuit.get_cell(c).attributes & XMovable) != 0)
gpl.positions_[c].x_ = dpl.plt_.positions_[c].x_;
if( (circuit.get_cell(c).attributes & YMovable) != 0)
gpl.positions_[c].y_ = dpl.plt_.positions_[c].y_;
if( (circuit.get_cell(c).attributes & XFlippable) != 0)
gpl.orientations_[c].x_ = dpl.plt_.orientations_[c].x_;
if( (circuit.get_cell(c).attributes & YFlippable) != 0)
gpl.orientations_[c].y_ = dpl.plt_.orientations_[c].y_;
}
}
struct cell_to_leg{
int_t x_pos, y_pos;
index_t original_cell;
int_t width;
index_t nbr_rows;
bool operator<(cell_to_leg const o) const{ return x_pos < o.x_pos; }
cell_to_leg(int_t x, int_t y, index_t ind, int_t w, index_t rows)
: x_pos(x), y_pos(y),
original_cell(ind),
width(w),
nbr_rows(rows)
{}
legalizable_task<int_t> task() const{ return legalizable_task<int_t>(width, x_pos, original_cell); }
};
struct fixed_cell_interval{
int_t min_x, max_x;
index_t cell_ind;
bool operator<(fixed_cell_interval const o) const{ return min_x > o.min_x; }
fixed_cell_interval(int_t mn, int_t mx, index_t ind) : min_x(mn), max_x(mx), cell_ind(ind){}
};
struct cell_leg_properties{
int_t x_pos;
index_t row_pos;
index_t ind;
cell_leg_properties(){}
cell_leg_properties(int_t x, int_t r, index_t i) : x_pos(x), row_pos(r), ind(i){}
};
std::vector<cell_leg_properties> simple_legalize(
std::vector<std::vector<fixed_cell_interval> > obstacles, std::vector<cell_to_leg> cells,
std::vector<std::vector<index_t> > & rows,
int_t x_min, int_t x_max, int_t y_orig,
int_t row_height, index_t nbr_rows
){
std::vector<int_t> first_available_position(nbr_rows, x_min);
rows.resize(nbr_rows);
// Sort the cells by x position
std::sort(cells.begin(), cells.end());
std::vector<cell_leg_properties> ret;
for(cell_to_leg C : cells){
// Dumb, quick and dirty best-fit legalization
bool found_location = false;
// Properties of the current best solution
int_t best_x=0;
int_t best_cost=0;
index_t best_row=0;
// Helper function
auto check_row_cost = [&](index_t r, cell_to_leg const cell, int_t additional_cost){
// Find where to put the cell in these rows
// Simple method: get a range where we can put the cell
assert(r + cell.nbr_rows <= nbr_rows);
assert(additional_cost >= 0);
// First position where we can put it
int_t cur_pos = *std::max_element(first_available_position.begin() + r, first_available_position.begin() + r + cell.nbr_rows);
int_t max_lim = x_max - cell.width;
int_t interval_lim;
do{
interval_lim = max_lim;
// For each row, test if obstacles prevent us from putting a cell here
// Until we find a correct position or are beyond the maximum position
for(index_t i = 0; i<cell.nbr_rows; ++i){
// Find the first obstacle which is after this position
// TODO: use lower/upper bound
auto it=obstacles[r+i].rbegin();
for(; it != obstacles[r+i].rend() && it->max_x <= cur_pos; ++it){
}
if(it != obstacles[r+i].rend()){ // There is an obstacle on the right
assert(it->min_x < it->max_x);
int_t cur_lim = it->min_x - cell.width; // Where the obstacles contrains us
interval_lim = std::min(cur_lim, interval_lim); // Constraint
if(cur_lim < cur_pos){ // If this particular obstacle constrained us so that it is not possible to make it here, we increment the position
cur_pos = std::max(it->max_x, cur_pos);
}
}
}
// Do it again until we find a solution
// TODO: continue until we can't find a better solution (currently sticks before the first obstacle if there is enough whitespace)
}while(interval_lim < cur_pos and interval_lim < max_lim and cur_pos < max_lim); // Not admissible and we encountered an obstacle and there is still hope
if(interval_lim >= cur_pos){ // An admissible solution is found (and if cell.x_pos is between cur_pos and interval_lim it is optimal)
int_t row_best_x = std::min(interval_lim, std::max(cur_pos, cell.x_pos));
int_t row_cost_x = std::abs(row_best_x - cell.x_pos);
if(not found_location or row_cost_x + additional_cost < best_cost){
found_location = true;
best_cost = row_cost_x + additional_cost;
best_x = row_best_x;
best_row = r;
}
}
};
// The row where we would prefer the cell to go
if(C.nbr_rows > nbr_rows) throw std::runtime_error("Impossible to legalize a cell spanning more rows than are available\n");
index_t central_row = std::min( (index_t) std::max( (C.y_pos - y_orig) / row_height, 0), nbr_rows-C.nbr_rows);
// Try every possible row from the best one, until we can't improve the cost
for(index_t row_dist = 0;
(central_row + row_dist < nbr_rows or central_row >= row_dist)
and (not found_location or (int_t) row_dist * row_height * C.width < (int_t) row_height + best_cost);
++row_dist
){
if(central_row + row_dist < nbr_rows - C.nbr_rows){
int_t add_cost = C.width * std::abs(static_cast<int_t>(central_row + row_dist) * static_cast<int_t>(row_height) + y_orig - C.y_pos);
check_row_cost(central_row + row_dist, C, add_cost);
}
if(central_row >= row_dist){
int_t add_cost = C.width * std::abs(static_cast<int_t>(central_row - row_dist) * static_cast<int_t>(row_height) + y_orig - C.y_pos);
check_row_cost(central_row - row_dist, C, add_cost);
}
}
if(not found_location){ // We didn't find any whitespace to put the cell in
throw std::runtime_error("Didn't manage to pack a cell due to dumb algorithm\n");
}
else{
assert(best_x + C.width <= x_max and best_x >= x_min);
// Update the occupied rows
for(index_t r = best_row; r < best_row + C.nbr_rows; ++r){
// Include the obstacles
while(not obstacles[r].empty()
and obstacles[r].back().max_x <= best_x){
rows[r].push_back(obstacles[r].back().cell_ind);
obstacles[r].pop_back();
}
assert(obstacles[r].empty() or obstacles[r].back().min_x >= best_x + C.width);
rows[r].push_back(C.original_cell);
first_available_position[r] = best_x + C.width;
}
ret.push_back(cell_leg_properties(best_x, best_row, C.original_cell));
}
}
// Finally, push the remaining fixed cells
for(index_t r=0; r<nbr_rows; ++r){
while(not obstacles[r].empty()){
rows[r].push_back(obstacles[r].back().cell_ind);
obstacles[r].pop_back();
}
}
return ret;
}
// A better legalization function which is able to push already legalized cells
std::vector<cell_leg_properties> good_legalize(
std::vector<std::vector<fixed_cell_interval> > obstacles, std::vector<cell_to_leg> cells,
std::vector<std::vector<index_t> > & rows,
int_t x_min, int_t x_max, int_t y_orig,
int_t row_height, index_t nbr_rows
){
// Two possibilities:
// * Single OSRP (group of movable cells) at the current end of the row of standard cells
// * Multiple OSRPs, between each pair of obstacles
// -> allows pushing cells past obstacles
// -> tricky with multiple standard cell heights
// Therefore I chose single OSRP, which gets cleared and pushed to the final state whenever
// * we encounter a multiple-rows cell
// * a new standard cell gets past an obstacle
// The current group of standard cells on the right of the row
std::vector<OSRP_leg<int_t> > single_row_problems(nbr_rows);
for(index_t r=0; r<nbr_rows; ++r){
single_row_problems[r] = OSRP_leg<int_t>(x_min, obstacles[r].empty() ? x_max : obstacles[r].back().min_x);
}
rows.resize(nbr_rows);
// Sort the cells by x position
std::sort(cells.begin(), cells.end());
std::vector<cell_leg_properties> ret;
for(cell_to_leg C : cells){
// Dumb, quick and dirty best-fit legalization
bool found_location = false;
// Properties of the current best solution
int_t best_cost=0;
index_t best_row=0;
index_t obstacles_passed = 0;
// Helper function
auto check_row_cost = [&](index_t r, cell_to_leg const cell, int_t additional_cost){
// Find where to put the cell in these rows
// Check if we can put it in the current ranges and at what cost; if not or if the optimal position is beyond an obstacle, try after this obstacle too
assert(cell.nbr_rows > 0);
assert(r + cell.nbr_rows <= nbr_rows);
assert(additional_cost >= 0);
// Where can we put a standard cell if we allow to move the cells?
if(cell.nbr_rows == 1){
int_t cur_cost = 0;
// Can we simply add it to the single row problem?
bool found_here = single_row_problems[r].remaining_space() >= cell.width;
int_t loc_obstacles_passed = 0;
if(found_here){
// Check the cost of pushing it here with possible displacement
cur_cost = single_row_problems[r].get_cost(cell.task()); // Don't update the row
}
// Other positions where we can put it, without moving other cells this time
if(not found_here or cur_cost > 0){
index_t obstacles_to_throw = 0;
auto it = obstacles[r].rbegin();
while(it != obstacles[r].rend()){
++ obstacles_to_throw;
auto prev_it = it++;
int_t region_end = it != obstacles[r].rend() ? it->min_x : x_max;
if(region_end >= prev_it->max_x + cell.width){
int_t loc_x = std::min(region_end - cell.width, std::max(prev_it->max_x, cell.x_pos));
int_t loc_cost = cell.width * std::abs(cell.x_pos - loc_x);
if(not found_here or cur_cost > loc_cost){
found_here = true;
cur_cost = loc_cost;
loc_obstacles_passed = obstacles_to_throw;
}
}
}
}
if(found_here and (not found_location or cur_cost + additional_cost < best_cost)){
found_location = true;
//std::cout << "Found with displacement cost " << cur_cost << " and total cost " << cur_cost + additional_cost << std::endl;
best_cost = cur_cost + additional_cost;
best_row = r;
obstacles_passed = loc_obstacles_passed;
if(loc_obstacles_passed > 0) assert(not obstacles[r].empty());
}
}
else{
// If it is a fixed cell, we use fixed locations
throw std::runtime_error("I don't handle fucking macros\n");
}
};
// The row where we would prefer the cell to go
if(C.nbr_rows > nbr_rows) throw std::runtime_error("Impossible to legalize a cell spanning more rows than are available\n");
index_t central_row = std::min( (index_t) std::max( (C.y_pos - y_orig) / row_height, 0), nbr_rows-C.nbr_rows);
// Try every possible row from the best one, until we can't improve the cost
for(index_t row_dist = 0;
(central_row + row_dist < nbr_rows or central_row >= row_dist)
and (not found_location or (int_t) row_dist * row_height * C.width < (int_t) row_height + best_cost);
++row_dist
){
if(central_row + row_dist < nbr_rows - C.nbr_rows){
int_t add_cost = C.width * std::abs(static_cast<int_t>(central_row + row_dist) * static_cast<int_t>(row_height) + y_orig - C.y_pos);
check_row_cost(central_row + row_dist, C, add_cost);
}
if(central_row >= row_dist){
int_t add_cost = C.width * std::abs(static_cast<int_t>(central_row - row_dist) * static_cast<int_t>(row_height) + y_orig - C.y_pos);
check_row_cost(central_row - row_dist, C, add_cost);
}
}
if(not found_location){ // We didn't find any whitespace to put the cell in
throw std::runtime_error("Didn't manage to pack a cell: leave more whitespace and avoid macros near the right side\n");
}
else{
//std::cout << "Cell " << C.original_cell << " of width " << C.width << " targetting row " << central_row << " and position " << C.x_pos << " put at row " << best_row << " with displacement " << best_cost / C.width << " with " << obstacles_passed << " obstacles passed" << std::endl;
// If the cell spans multiple rows, it becomes fixed
// In this case or if the cell goes after an obstacle, push everything before the cell to the fixed state
if(C.nbr_rows == 1){
if(obstacles_passed == 0){ // Ok; just update the old single row problem
single_row_problems[best_row].push(C.task()); // Push it to the row
}
else{
assert(obstacles_passed > 0);
// Empty the single row problem
for(auto p : single_row_problems[best_row].get_placement()){
rows[best_row].push_back(p.first);
ret.push_back(cell_leg_properties(p.second, best_row, p.first));
}
// Find where to put it
int_t region_begin = x_min;
for(index_t i=0; i<obstacles_passed; ++i){
assert(not obstacles[best_row].empty());
region_begin = obstacles[best_row].back().max_x;
rows[best_row].push_back(obstacles[best_row].back().cell_ind);
obstacles[best_row].pop_back();
}
int_t region_end = obstacles[best_row].empty() ? x_max : obstacles[best_row].back().min_x;
single_row_problems[best_row] = OSRP_leg<int_t>(region_begin, region_end);
assert(region_end - region_begin >= C.width);
single_row_problems[best_row].push(C.task()); // Push this only cell to the single row problem
}
}
else{
throw std::runtime_error("I don't handle fucking macros\n");
}
}
}
for(index_t r=0; r<nbr_rows; ++r){
// Finally, push the remaining standard cells in the row
for(auto p : single_row_problems[r].get_placement()){
rows[r].push_back(p.first);
ret.push_back(cell_leg_properties(p.second, r, p.first));
}
// And the fixed cells
while(not obstacles[r].empty()){
rows[r].push_back(obstacles[r].back().cell_ind);
obstacles[r].pop_back();
}
}
rows.resize(nbr_rows);
return ret;
}
detailed_placement legalize(netlist const & circuit, placement_t const & pl, box<int_t> surface, int_t row_height){
if(row_height <= 0) throw std::runtime_error("The rows' height should be positive\n");
index_t nbr_rows = (surface.y_max_ - surface.y_min_) / row_height;
// The position of the ith row is surface.y_min_ + i * row_height
std::vector<std::vector<fixed_cell_interval> > row_occupation(nbr_rows);
std::vector<cell_to_leg> cells;
placement_t new_placement = pl;
std::vector<index_t> placement_rows(circuit.cell_cnt());
std::vector<index_t> cell_heights(circuit.cell_cnt());
for(index_t i=0; i<circuit.cell_cnt(); ++i){
auto cur = circuit.get_cell(i);
// Assumes fixed if not both XMovable and YMovable
if( (cur.attributes & XMovable) != 0 && (cur.attributes & YMovable) != 0){
// Just truncate the position we target
point<int_t> target_pos = pl.positions_[i];
index_t cur_cell_rows = (cur.size.y_ + row_height -1) / row_height;
cells.push_back(cell_to_leg(target_pos.x_, target_pos.y_, i, cur.size.x_, cur_cell_rows));
cell_heights[i] = cur_cell_rows;
}
else{
// In each row, we put the index of the fixed cell and the range that is already occupied
int_t low_x_pos = pl.positions_[i].x_,
hgh_x_pos = pl.positions_[i].x_ + cur.size.x_,
low_y_pos = pl.positions_[i].y_,
hgh_y_pos = pl.positions_[i].y_ + cur.size.y_;
new_placement.positions_[i] = point<int_t>(low_x_pos, low_y_pos);
if(hgh_y_pos <= surface.y_min_ or low_y_pos >= surface.y_max_ or hgh_x_pos <= surface.x_min_ or low_x_pos >= surface.x_max_){
placement_rows[i] = null_ind;
cell_heights[i] = 0;
}
else{
assert(low_x_pos < hgh_x_pos and low_y_pos < hgh_y_pos);
int_t rnd_hgh_x_pos = std::min(surface.x_max_, hgh_x_pos);
int_t rnd_hgh_y_pos = std::min(surface.y_max_, hgh_y_pos);
int_t rnd_low_x_pos = std::max(surface.x_min_, low_x_pos);
int_t rnd_low_y_pos = std::max(surface.y_min_, low_y_pos);
index_t first_row = (rnd_low_y_pos - surface.y_min_) / row_height;
index_t last_row = (index_t) (rnd_hgh_y_pos - surface.y_min_ + row_height - 1) / row_height; // Exclusive: if the cell spans the next row, i.e. pos % row_height >= 0, include it too
assert(last_row <= nbr_rows);
placement_rows[i] = first_row;
cell_heights[i] = last_row - first_row;
for(index_t r=first_row; r<last_row; ++r){
row_occupation[r].push_back(fixed_cell_interval(rnd_low_x_pos, rnd_hgh_x_pos, i));
}
}
}
}
for(std::vector<fixed_cell_interval> & L : row_occupation){
std::sort(L.begin(), L.end()); // Sorts from last to first, so that we may use pop_back()
// Doesn't collapse them yet, which may make for bigger complexities
for(index_t i=0; i+1<L.size(); ++i){
if(L[i].min_x < L[i+1].max_x)
throw std::runtime_error("Sorry, I don't handle overlapping fixed cells yet\n");
}
}
std::vector<std::vector<index_t> > cells_by_rows;
auto final_cells = good_legalize(row_occupation, cells, cells_by_rows,
surface.x_min_, surface.x_max_, surface.y_min_,
row_height, nbr_rows
);
for(cell_leg_properties C : final_cells){
new_placement.positions_[C.ind] = point<int_t>(C.x_pos, static_cast<int_t>(C.row_pos) * row_height + surface.y_min_);
placement_rows[C.ind] = C.row_pos;
}
return detailed_placement(
new_placement,
placement_rows,
cell_heights,
cells_by_rows,
surface.x_min_, surface.x_max_,
surface.y_min_,
nbr_rows, row_height
);
}
} // namespace dp
} // namespace coloquinte

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,511 @@
#include "coloquinte/optimization_subproblems.hxx"
namespace coloquinte{
std::vector<capacity_t> transport_1D(std::vector<t1D_elt> sources, std::vector<t1D_elt> sinks){
/* Description of the algorithm:
*
* For each cell, put it in its optimal region or the last region where a cell is if there is no space in it
* Push all changes in the derivative of the cost function to a priority queue; those changes occur
* when evicting the preceding cell from a region (most such changes are 0 and not considered, hence the complexity)
* when moving to a non-full region
* While the new cell overlaps with a new region, get the new slope (derivative) at this point
* and push all preceding cell until this region is freed or the slope becomes 0 (in which case the new region is now occupied)
*/
struct bound{
capacity_t pos;
int_t slope_diff;
bool operator<(bound const o) const{ return pos < o.pos; }
};
std::priority_queue<bound> bounds;
std::vector<capacity_t> constraining_pos;
std::vector<capacity_t> prev_cap(1, 0), prev_dem(1, 0);
for(auto const s : sinks){
prev_cap.push_back(s.second + prev_cap.back());
}
for(auto const s : sources){
prev_dem.push_back(s.second + prev_dem.back());
}
// The sinks have enough capacity to hold the whole demand
assert(prev_cap.back() >= prev_dem.back());
const capacity_t min_abs_pos = 0, max_abs_pos = prev_cap.back() - prev_dem.back();
assert(min_abs_pos <= max_abs_pos);
auto push_bound = [&](capacity_t p, int_t s){
assert(s >= 0);
if(p > min_abs_pos){
bound B;
B.pos = p;
B.slope_diff = s;
bounds.push(B);
}
};
// Distance to the right - distance to the left
auto get_slope = [&](index_t src, index_t boundary){
assert(boundary+1 < sinks.size());
assert(src < sources.size());
return std::abs(sources[src].first - sinks[boundary+1].first) - std::abs(sources[src].first - sinks[boundary].first);
};
capacity_t cur_abs_pos = min_abs_pos;
index_t opt_r=0, next_r=0, first_free_r=0;
for(index_t i=0; i<sources.size(); ++i){
// Update the optimal region
while(opt_r+1 < sinks.size() and (sinks[opt_r].first + sinks[opt_r+1].first)/2 < sources[i].first){
++opt_r;
}
// Update the next region
index_t prev_next_r = next_r;
while(next_r < sinks.size() and sinks[next_r].first <= sources[i].first){
++next_r;
}
index_t dest_reg = std::max(first_free_r, opt_r);
assert(dest_reg < sinks.size());
if(i>0){
// Push bounds due to changing the source crossing the boundary j/j+1
// Linear amortized complexity accross all sources (next_r grows)
// get_slope(i-1, j) - get_slope(i, j) == 0 if j >= next_r
// get_slope(i-1, j) - get_slope(i, j) == 0 if j < prev_next_r-1
for(index_t j=std::max(prev_next_r,1u)-1; j<std::min(first_free_r, next_r+1); ++j){
assert(get_slope(i,j) <= get_slope(i-1,j));
push_bound(prev_cap[j+1] - prev_dem[i], get_slope(i-1, j) - get_slope(i,j));
}
}
// Add the bounds due to crossing the boundaries alone
for(index_t j=first_free_r; j<opt_r; ++j){
assert(get_slope(i,j) <= 0);
push_bound(prev_cap[j+1] - prev_dem[i], -get_slope(i, j));
}
first_free_r = std::max(first_free_r, opt_r);
capacity_t this_abs_pos = std::max(cur_abs_pos, prev_cap[first_free_r] - prev_dem[i]); // Just after the previous cell or at the beginning of the destination region
while(first_free_r+1 < sinks.size() and this_abs_pos > std::max(prev_cap[first_free_r+1] - prev_dem[i+1], min_abs_pos)){ // Absolute position that wouldn't make the cell fit in the region, and we are not in the last region yet
capacity_t end_pos = std::max(prev_cap[first_free_r+1] - prev_dem[i+1], min_abs_pos);
int_t add_slope = get_slope(i, first_free_r);
int_t slope = add_slope;
while(not bounds.empty() and slope >= 0 and bounds.top().pos > end_pos){
this_abs_pos = bounds.top().pos;
slope -= bounds.top().slope_diff;
bounds.pop();
}
if(slope >= 0){ // We still push: the cell completely escapes the region
this_abs_pos = end_pos;
push_bound(end_pos, add_slope-slope);
}
else{ // Ok, absorbed the whole slope: push what remains and we still occupy the next region
push_bound(this_abs_pos, -slope);
++first_free_r;
}
}
cur_abs_pos = this_abs_pos;
constraining_pos.push_back(this_abs_pos);
}
assert(constraining_pos.size() == sources.size());
if(not constraining_pos.empty()){
// Calculate the final constraining_pos
constraining_pos.back() = std::min(max_abs_pos, constraining_pos.back());
}
std::partial_sum(constraining_pos.rbegin(), constraining_pos.rend(), constraining_pos.rbegin(), [](capacity_t a, capacity_t b)->capacity_t{ return std::min(a, b); });
for(index_t i=0; i<constraining_pos.size(); ++i){
constraining_pos[i] += prev_dem[i];
}
return constraining_pos;
}
namespace{ // Anonymous namespace to hide the transportation structures
class current_allocation{
static const index_t null_ind = std::numeric_limits<index_t>::max();
// Internal data structures
// Priority queue element to determine the source to be used between regions
struct movable_source{
index_t source;
float_t cost;
bool operator<(movable_source const o) const{
return cost > o.cost // Sorted by cost
|| (cost == o.cost && source < o.source); // And by index to limit the number of fractional elements between two regions
}
movable_source(index_t s, float_t c) : source(s), cost(c) {}
};
// Member data
// The current state
std::vector<std::vector<capacity_t> > sr_allocations; // For each region, for each source, the capacity allocated by the region
std::vector<std::vector<float_t> > sr_costs; // The costs from a region to a source
std::vector<capacity_t> s_demands; // The demands of the sources
std::vector<capacity_t> r_capacities; // The remaining capacities of the regions
// Shortest path data
std::vector<float_t> r_costs; // The costs of allocating to a region
std::vector<index_t> r_parents; // The parents of the regions i.e. the regions where we push sources first (or null_ind)
std::vector<index_t> r_sources; // The source involved in these edges
std::vector<capacity_t> arc_capacities; // The capacities of the edges to the parents, or of the region if no parent
// Best edges data
std::vector<std::vector<std::priority_queue<movable_source> > > best_interregions_costs; // What is the best source to move to go from region k1 to region k2?
index_t dijkstra_cnt;
// Helper functions
// Number of regions
index_t region_cnt() const{
assert(sr_costs.size() == sr_allocations.size());
return sr_costs.size();
}
// Update the edge between two regions
void update_edge(index_t r1, index_t r2);
// Add a source to all heaps of a region; returns if we need to update a path
bool add_source_to_heaps(index_t r, index_t source);
// Initialize the heaps of a region
void create_heaps(index_t reg);
// Run the shortest path algorithm to update the cost of each region
void dijkstra_update();
// Update the edge and returns if we need to rerun Dijkstra
bool push_edge(index_t reg, capacity_t flow);
// Updates a full path when pushing an element; returns if we need to rerun Dijkstra
bool push_path(index_t pushed_reg, capacity_t demanded, capacity_t & flow);
public:
// Add a new source to the transportation problem; should be done in decreasing order of demand to keep low complexity
void add_source(index_t elt_ind);
current_allocation(std::vector<capacity_t> caps, std::vector<capacity_t> demands, std::vector<std::vector<float_t> > costs)
:
sr_allocations(caps.size()),
sr_costs(costs),
s_demands(demands),
r_capacities(caps),
r_costs(caps.size(), 0.0),
r_parents(caps.size(), null_ind),
r_sources(caps.size(), null_ind),
arc_capacities(caps),
best_interregions_costs(caps.size(), std::vector<std::priority_queue<movable_source> >(caps.size())),
dijkstra_cnt(0)
{
assert(caps.size() > 0);
assert(costs.size() == caps.size());
dijkstra_update();
}
std::vector<std::vector<capacity_t> > get_allocations() const{ return sr_allocations; }
index_t get_iterations_cnt() const { return dijkstra_cnt; }
};
void current_allocation::update_edge(index_t r1, index_t r2){
while(not best_interregions_costs[r1][r2].empty() and sr_allocations[r1][best_interregions_costs[r1][r2].top().source] == 0){
best_interregions_costs[r1][r2].pop();
}
if(not best_interregions_costs[r1][r2].empty()){
// There is an edge
movable_source cur = best_interregions_costs[r1][r2].top();
float_t new_cost = r_costs[r2] + cur.cost;
if(new_cost < r_costs[r1]){
r_costs[r1] = cur.cost;
r_sources[r1] = cur.source;
r_parents[r1] = r2;
arc_capacities[r1] = sr_allocations[r1][cur.source];
}
}
}
bool current_allocation::add_source_to_heaps(index_t r, index_t source){
bool need_rerun = false;
for(index_t i=0; i<region_cnt(); ++i){
if(i == r) continue;
best_interregions_costs[r][i].push(
movable_source(source,
sr_costs[i][source] - sr_costs[r][source]
)
);
while(sr_allocations[r][best_interregions_costs[r][i].top().source] == 0){
best_interregions_costs[r][i].pop();
}
need_rerun = (best_interregions_costs[r][i].top().source == source) or need_rerun;
}
return need_rerun;
}
void current_allocation::create_heaps(index_t reg){
// Get all relevant elements
std::vector<std::vector<movable_source> > interregion_costs(region_cnt());
for(index_t i=0; i<sr_allocations[reg].size(); ++i){
if(sr_allocations[reg][i] > 0){
for(index_t oreg=0; oreg<region_cnt(); ++oreg){
if(oreg == reg) continue;
interregion_costs[oreg].push_back(
movable_source(
i,
sr_costs[oreg][i] - sr_costs[reg][i]
)
);
}
}
}
// Create the heaps
for(index_t oreg=0; oreg<region_cnt(); ++oreg){
best_interregions_costs[reg][oreg] = std::priority_queue<movable_source>(interregion_costs[oreg].begin(), interregion_costs[oreg].end());
}
}
// Returns if the path has been modified so that we would need to rerun Dijkstra
bool current_allocation::push_edge(index_t reg, capacity_t flow){
index_t cur_source = r_sources[reg];
// Does this edge allocates a new source in the destination region? If yes, update the corresponding heaps
bool already_present = sr_allocations[r_parents[reg]][cur_source] > 0;
// Deallocating from the first region is handled by the get_edge function: just substract the flow
sr_allocations[ reg ][cur_source] -= flow;
sr_allocations[r_parents[reg]][cur_source] += flow;
assert(sr_allocations[reg][cur_source] >= 0); // The source to be pushed was indeed present in the region
assert(r_capacities[reg] == 0); // The region is full, which explains why we need to push
assert(flow <= arc_capacities[reg]); // The flow is not bigger than what can be sent
arc_capacities[reg] = sr_allocations[reg][cur_source]; // Just update the capacity if it turns out that we don't need to run Dijkstra
if(arc_capacities[reg] == 0){
// The source may have been deleted from a region: rerun Dijkstra at the end
return true;
}
else if(not already_present and r_capacities[r_parents[reg]] == 0){
// A new source is allocated to a full region: rerun Dijkstra at the end if it changed the heap's top
return add_source_to_heaps(r_parents[reg], cur_source);
}
else{
// The edge is still present with the same cost and non-zero updated capacity
// The path still exists: no need to rerun Dijkstra yet
return false;
}
}
void current_allocation::dijkstra_update(){
// Simple case of the regions with remaining capacity
std::vector<int> visited(region_cnt(), 0);
index_t visited_cnt = 0;
for(index_t i=0; i<region_cnt(); ++i){
r_sources[i] = null_ind;
r_parents[i] = null_ind;
if(r_capacities[i] > 0){
r_costs[i] = 0.0;
arc_capacities[i] = r_capacities[i];
visited[i] = 1;
++visited_cnt;
}
else{
r_costs[i] = std::numeric_limits<float_t>::infinity();
arc_capacities[i] = 0;
}
}
// if(visited_cnt <= 0) throw std::runtime_error("Capacity problem: no region has been marked as reachable\n");
if(visited_cnt == region_cnt()){ return; }
// Get the costs for every non-visited region
for(index_t i=0; i<region_cnt(); ++i) if(visited[i] == 0){ // For every region that is not visited yet
for(index_t j=0; j<region_cnt(); ++j) if(visited[j] == 1){ // For every already visited region
// Get the best interregion cost
update_edge(i,j);
}
}
while(visited_cnt < region_cnt()){
// Find the region with the lowest cost to visit; mark it visited
index_t best_reg = null_ind;
float_t best_cost = std::numeric_limits<float_t>::infinity();
for(index_t i=0; i<region_cnt(); ++i) if(visited[i] == 0){ // For every region that is not visited yet
if(r_costs[i] < best_cost){
best_cost = r_costs[i];
best_reg = i;
}
}
if(best_reg == null_ind) break; // Some regions are unreachable, typically because they have zero capacity at the beginning
visited[best_reg] = 1;
++visited_cnt;
// Update the cost for every unvisited region
for(index_t i=0; i<region_cnt(); ++i) if(visited[i] == 0){ // For every region that is not visited yet
update_edge(i, best_reg);
}
}
}
bool current_allocation::push_path(index_t pushed_reg, capacity_t demanded, capacity_t & flow){
// Get the final flow sent, which is smaller than the capacities on the path
flow = demanded;
for(index_t reg = pushed_reg; reg != null_ind; reg = r_parents[reg]){
flow = std::min(flow, arc_capacities[reg]);
}
bool rerun_dijkstra = false;
// Update the path between the regions
index_t reg = pushed_reg;
for(; r_parents[reg] != null_ind; reg = r_parents[reg]){
assert(r_capacities[reg] == 0);
rerun_dijkstra = push_edge(reg, flow) or rerun_dijkstra;
}
assert(r_capacities[reg] > 0);
assert(arc_capacities[reg] == r_capacities[reg]);
assert(r_capacities[reg] >= flow);
// Update the capacities at the end
r_capacities[reg] -= flow;
arc_capacities[reg] -= flow;
// The last region on the path is the one that satisfies the demand
if(r_capacities[reg] == 0){ // If we just consumed the available capacity, it becomes useful to move sources off this region: build the heap
create_heaps(reg);
rerun_dijkstra = true;
}
assert(flow > 0);
// If an edge changes cost or a region is full,
// we need to update the costs, parents, sources and arc_capacities using a Dijkstra
// but later
return rerun_dijkstra;
}
void current_allocation::add_source(index_t elt_ind){ //capacity_t demand, std::vector<float_t> const & costs){
for(index_t i=0; i<region_cnt(); ++i){
sr_allocations[i].push_back(0);
}
bool need_rerun = false;
capacity_t demand = s_demands[elt_ind];
while(demand > 0){
// In case we modified the structures earlier
if(need_rerun){
dijkstra_update();
need_rerun = false;
}
++ dijkstra_cnt;
index_t best_reg = null_ind;
float_t best_cost = std::numeric_limits<float_t>::infinity();
for(index_t reg=0; reg<region_cnt(); ++reg){
// Find the region which gets the source
if(r_costs[reg] + sr_costs[reg][elt_ind] < best_cost){
best_reg = reg;
best_cost = r_costs[reg] + sr_costs[reg][elt_ind];
}
}
if(best_reg == null_ind){ throw std::runtime_error("No reachable region found\n"); }
capacity_t flow = 0;
// Tells us whether we need to update the data structures
need_rerun = push_path(best_reg, demand, flow);
demand -= flow;
// Lazily store the change
sr_allocations[best_reg][elt_ind] += flow;
}
// Set the source's demand
for(index_t i=0; i<region_cnt(); ++i){
if(r_capacities[i] == 0 and sr_allocations[i][elt_ind] > 0){
need_rerun = add_source_to_heaps(i, elt_ind) or need_rerun;
}
}
// We leave a clean set with correct paths for the next iteration
if(need_rerun)
dijkstra_update();
}
} // End anonymous namespace
std::vector<std::vector<capacity_t> > transport_generic(std::vector<capacity_t> const & capacities, std::vector<capacity_t> const & demands, std::vector<std::vector<float_t> > const & costs){
current_allocation transporter(capacities, demands, costs);
for(index_t i=0; i<demands.size(); ++i){
transporter.add_source(i);
}
return transporter.get_allocations();
}
bool place_convex_single_row(std::vector<int_t> const & widths, std::vector<std::pair<int_t, int_t> > const & ranges, std::vector<cell_bound> bounds, std::vector<int_t> const & const_slopes, std::vector<int_t> & positions){
std::sort(bounds.begin(), bounds.end());
struct bound{
int_t abs_pos;
int_t slope_diff;
bool operator<(bound const o) const{ return abs_pos < o.abs_pos; }
bound(int_t p, int_t s) : abs_pos(p), slope_diff(s) {}
};
std::priority_queue<bound> prio_queue;
std::vector<int_t> prev_widths(widths.size()+1, 0);
std::partial_sum(widths.begin(), widths.end(), std::next(prev_widths.begin()));
std::vector<int_t> constraining_pos(widths.size());
int_t lower_lim = std::numeric_limits<int_t>::min();
for(index_t i=0, j=0; i<widths.size(); ++i){
int_t old_width = prev_widths[i];
int_t new_width = prev_widths[i+1];
lower_lim = std::max(ranges[i].first - old_width, lower_lim);
int_t upper_lim = ranges[i].second - new_width;
for(; j<bounds.size() and bounds[j].c == i; ++j){
prio_queue.push(bound(bounds[j].pos - old_width, bounds[j].slope));
}
if(upper_lim < lower_lim){ // Infeasible
return false;
}
int_t cur_slope = const_slopes[i];
int_t cur_pos = upper_lim;
while(not prio_queue.empty() and (cur_slope > 0 or prio_queue.top().abs_pos > upper_lim)){
cur_slope -= prio_queue.top().slope_diff;
cur_pos = prio_queue.top().abs_pos;
prio_queue.pop();
}
int_t final_abs_pos = std::max(std::min(cur_pos, upper_lim), lower_lim);
constraining_pos[i] = final_abs_pos;
if(cur_slope < 0){
prio_queue.push(bound(final_abs_pos, -cur_slope));
}
}
positions.resize(constraining_pos.size());
std::partial_sum(constraining_pos.rbegin(), constraining_pos.rend(), positions.rbegin(), [](int_t a, int_t b)->int_t{ return std::min(a,b); });
for(index_t i=0; i<positions.size(); ++i){
positions[i] += prev_widths[i];
}
return true;
}
bool place_noncvx_single_row(std::vector<int_t> const & widths, std::vector<std::pair<int_t, int_t> > const & ranges, std::vector<int> const & flippables, std::vector<cell_bound> bounds, std::vector<int_t> const & const_slopes, std::vector<int_t> & positions, std::vector<int> & flippings){
flippings = std::vector<int>(positions.size(), 0);
return place_convex_single_row(widths, ranges, bounds, const_slopes, positions);
}
} // Namespace coloquinte

View File

@ -0,0 +1,166 @@
#include "coloquinte/circuit_helper.hxx"
#include <stack>
#include <functional>
#include <algorithm>
namespace coloquinte{
namespace gp{
namespace{
index_t const null_ind = std::numeric_limits<index_t>::max();
inline void opt_orient(netlist const & circuit, placement_t & pl, std::function<int_t (point<int_t>)> i_coor, std::function<bool & (point<bool> &)> b_coor,mask_t FLIPPABLE){
std::stack<index_t> opt_cells;
for(index_t cell_ind = 0; cell_ind < circuit.cell_cnt(); ++cell_ind){
if( (circuit.get_cell(cell_ind).attributes & FLIPPABLE) != 0)
opt_cells.push(cell_ind);
}
while(not opt_cells.empty()){
index_t cell_ind = opt_cells.top(); opt_cells.pop();
assert((circuit.get_cell(cell_ind).attributes & FLIPPABLE) != 0);
// What is the current orientation?
bool old_orientation = b_coor(pl.orientations_[cell_ind]);
int_t pos = i_coor(pl.positions_[cell_ind]);
int_t size = i_coor(circuit.get_cell(cell_ind).size);
// Check both orientations of the cell
std::vector<index_t> involved_nets;
for(netlist::pin_t p : circuit.get_cell(cell_ind)){
involved_nets.push_back(p.net_ind);
}
// Deal with cells with multiple pins in one net (uniquify)
std::sort(involved_nets.begin(), involved_nets.end());
involved_nets.resize(std::distance(involved_nets.begin(), std::unique(involved_nets.begin(), involved_nets.end())));
std::int64_t p_cost = 0, n_cost = 0;
std::vector<index_t> extreme_elements;
for(index_t n : involved_nets){
std::vector<pin_1D> other_pins;
std::vector<int_t> offsets;
for(auto p : circuit.get_net(n)){
if(p.cell_ind != cell_ind){
other_pins.push_back(pin_1D(
p.cell_ind,
i_coor(pl.positions_[p.cell_ind])
+ (b_coor(pl.orientations_[p.cell_ind]) ? i_coor(p.offset) : i_coor(circuit.get_cell(p.cell_ind).size) - i_coor(p.offset)),
0, // Don't care about the offset
(circuit.get_cell(p.cell_ind).attributes & FLIPPABLE) != 0)
);
}
else{
offsets.push_back(i_coor(p.offset));
}
}
assert(offsets.size() > 0);
if(other_pins.size() > 0){ // Else the orientation of the cell doesn't change anything
auto minmaxC = std::minmax_element(other_pins.begin(), other_pins.end());
auto minmaxO = std::minmax_element(offsets.begin(), offsets.end());
p_cost += std::max(pos + *minmaxO.second, minmaxC.second->pos) - std::min(pos + *minmaxO.first, minmaxC.first->pos);
n_cost += std::max(pos + size - *minmaxO.first, minmaxC.second->pos) - std::min(pos + size - *minmaxO.second, minmaxC.first->pos);
int_t min_pin_pos = std::min(pos + *minmaxO.second, pos + size - *minmaxO.first),
max_pin_pos = std::max(pos + *minmaxO.second, pos + size - *minmaxO.first);
// Do the extreme elements change between the two positions?
if(minmaxC.second->movable
and (minmaxC.second->pos < max_pin_pos)
and (minmaxC.second->pos > min_pin_pos) ){
extreme_elements.push_back(minmaxC.second->cell_ind);
}
if(minmaxC.first->movable
and (minmaxC.first->pos < max_pin_pos)
and (minmaxC.first->pos > min_pin_pos) ){
extreme_elements.push_back(minmaxC.first->cell_ind);
}
}
}
if(p_cost < n_cost)
b_coor(pl.orientations_[cell_ind]) = true;
if(p_cost > n_cost)
b_coor(pl.orientations_[cell_ind]) = false;
// If we changed the orientation, check the extreme pins which changed and try their cells again
if(b_coor(pl.orientations_[cell_ind]) != old_orientation){
std::sort(extreme_elements.begin(), extreme_elements.end());
extreme_elements.resize(std::distance(extreme_elements.begin(), std::unique(extreme_elements.begin(), extreme_elements.end())));
for(index_t extreme_cell : extreme_elements){
if( (circuit.get_cell(extreme_cell).attributes & FLIPPABLE) != 0)
opt_cells.push(extreme_cell);
}
}
}
}
/*
inline void spread_orient(netlist const & circuit, placement_t & pl, std::function<float_t & (point<float_t> &)> coor, mask_t FLIPPABLE){
std::vector<float_t> weights(circuit.cell_cnt(), 0.0);
for(index_t n=0; n<circuit.net_cnt(); ++n){
float_t min_pos=INF, max_pos=-INF;
float_t min_offs=INF, max_offs=-INF;
index_t min_ind=null_ind, max_ind=null_ind;
for(netlist::pin_t p : circuit.get_net(n)){
if( (circuit.get_cell(p.cell_ind).attributes & FLIPPABLE) != 0){
float_t pos = coor(pl.positions_[p.cell_ind]);
if(pos < min_pos){
min_pos = pos;
min_ind = p.cell_ind;
min_offs = coor(p.offset);
}
if(pos > max_pos){
max_pos = pos;
max_ind = p.cell_ind;
max_offs = coor(p.offset);
}
}
else{
float_t pos = coor(pl.positions_[p.cell_ind]) + coor(pl.orientations_[p.cell_ind]) * coor(p.offset);
if(pos < min_pos){
min_pos = pos;
min_ind = null_ind;
}
if(pos > max_pos){
max_pos = pos;
max_ind = null_ind;
}
}
}
float_t net_weight = circuit.get_net(n).weight;
if(min_ind != null_ind) weights[min_ind] += net_weight * min_offs;
if(max_ind != null_ind) weights[max_ind] -= net_weight * max_offs;
}
for(index_t c=0; c<circuit.cell_cnt(); ++c){
coor(pl.orientations_[c]) = (weights[c] >= 0.0) ? 1.0 : -1.0;
}
}
*/
} // End anonymous namespace
void optimize_x_orientations(netlist const & circuit, placement_t & pl){
opt_orient(circuit, pl, [](point<int_t> p) -> int_t { return p.x_; }, [](point<bool> & p) -> bool & { return p.x_; }, XFlippable);
}
void optimize_y_orientations(netlist const & circuit, placement_t & pl){
opt_orient(circuit, pl, [](point<int_t> p) -> int_t { return p.y_; }, [](point<bool> & p) -> bool & { return p.y_; }, YFlippable);
}
// Iteratively optimize feasible orientations; performs only one pass
void optimize_exact_orientations(netlist const & circuit, placement_t & pl){
optimize_x_orientations(circuit, pl);
optimize_y_orientations(circuit, pl);
}
/*
void spread_orientations(netlist const & circuit, placement_t & pl){
spread_orient(circuit, pl, [](point<float_t> & p) -> float_t & { return p.x_; }, XFlippable);
spread_orient(circuit, pl, [](point<float_t> & p) -> float_t & { return p.y_; }, YFlippable);
}
*/
} // namespace gp
} // namespace coloquinte

View File

@ -0,0 +1,258 @@
#include "coloquinte/piecewise_linear.hxx"
#include <cassert>
namespace coloquinte{
namespace{
struct pl_edge{
p_v f, s;
static void push_intersections(pl_edge a, pl_edge b, piecewise_linear_function & lf){
// Strict, because it makes everything easier
//assert(a.f.first < b.s.first and a.s.first > b.f.first);
//assert(a.f.first < a.s.first and b.f.first < b.s.first);
assert(a.f.first <= b.s.first and a.s.first >= b.f.first);
assert(a.f.first <= a.s.first and b.f.first <= b.s.first);
// ra = (a.s.second - a.f.second) / (a.s.first - a.f.first)
// xintersect = (yb - ya - xb * rb + xa * ra) / (ra - rb)
double ra = static_cast<double>(a.s.second - a.f.second) / (a.s.first - a.f.first);
double rb = static_cast<double>(b.s.second - b.f.second) / (b.s.first - b.f.first);
double xintersect = (b.f.second - a.f.second - rb * b.f.first + ra * a.f.first) / (ra - rb);
if( not xintersect ) return;
int_t pos = xintersect;
if( std::ceil(xintersect) == std::floor(xintersect) ){ // Exact integer intersection
if(pos > std::max(a.f.first, b.f.first) and pos < std::min(a.s.first, b.s.first) ){ // Necessarily smaller than s.first due to the previous condition
lf.point_values.push_back(p_v(pos, a.value_at(pos)));
}
}
else{ // Non exact intersection: create two integers since I don't want to mess with floating point
int_t pos1 = pos;
int_t pos2 = pos + 1;
// Value_at is only an approximation, but it shouldn't be too bad
if(pos1 > std::max(a.f.first, b.f.first) and pos1 < std::min(a.s.first, b.s.first))
lf.point_values.push_back(p_v(pos1, std::min(a.value_at(pos1), b.value_at(pos1))));
if(pos2 > std::max(a.f.first, b.f.first) and pos2 < std::min(a.s.first, b.s.first))
lf.point_values.push_back(p_v(pos2, std::min(a.value_at(pos2), b.value_at(pos2))));
}
}
// Lower-rounded value
int_t value_at(int_t pos) const{
assert(pos >= f.first and pos <= s.first and s.first > f.first);
return (static_cast<std::int64_t>(f.second) * (s.first - pos) + static_cast<std::int64_t>(s.second) * (pos - f.first)) / (s.first - f.first);
}
// Lower-rounded value
int_t pos_at(int_t val) const{
assert(val <= std::max(f.second, s.second) and val >= std::min(f.second, s.second));
assert(f.second != s.second);
return (static_cast<std::int64_t>(f.first) * (s.second - val) + static_cast<std::int64_t>(s.first) * (val - f.second)) / (s.second - f.second);
}
bool above(p_v const o) const{
int_t pos = o.first;
assert(pos > f.first and pos < s.first);
return (static_cast<std::int64_t>(f.second) * (s.first - pos) + static_cast<std::int64_t>(s.second) * (pos - f.first)) > o.second * (s.first - f.first);
}
pl_edge(p_v a, p_v b) : f(a), s(b) {}
};
} // End anonymous namespace
void piecewise_linear_function::add_monotone(int_t slope, int_t offset){
for(auto & V : point_values){
// Offset taken into account here, multiplied with the slope
V.second += slope * (V.first - point_values.front().first - offset);
}
}
void piecewise_linear_function::add_bislope(int_t s_l, int_t s_r, int_t pos){
//assert(pos <= point_values.back().first);
//assert(pos >= point_values.front().first);
/*
if(pos >= point_values.back().first){
add_monotone(s_l, pos - point_values.front().first);
}
else if(pos <= point_values.front().first){
add_monotone(s_r, pos - point_values.front().first);
}
else{
auto it = point_values.begin();
while(it->first < pos){
it->second += s_l * (it->first - pos);
++it;
assert(it != point_values.end());
}
if(it->first != pos){
point_values.insert(it, p_v(pos, pl_edge(*std::prev(it), *it).value_at(pos)));
}
for(auto & V : point_values){
if(V.first > pos)
V.second += s_r * (V.first - pos);
}
}
*/
auto it = std::lower_bound(point_values.begin(), point_values.end(), pos, [](p_v o, int_t v){ return o.first < v; });
if(it != point_values.end() and it->first != pos and it != point_values.begin()){
assert(it->first > pos);
point_values.insert(it, p_v(pos, pl_edge(*std::prev(it), *it).value_at(pos)));
}
for(auto & V : point_values){
if(V.first > pos)
V.second += s_r * (V.first - pos);
if(V.first < pos)
V.second += s_l * (V.first - pos);
}
}
piecewise_linear_function::piecewise_linear_function(int_t min_def, int_t max_def){
point_values.push_back(p_v(min_def, 0));
point_values.push_back(p_v(max_def, 0));
}
piecewise_linear_function piecewise_linear_function::previous_min() const{
piecewise_linear_function ret;
assert(not point_values.empty());
auto it = point_values.begin();
ret.point_values.push_back(*it);
++it;
// Use the previous minimum to detect when we find something smaller
for(; it != point_values.end(); ++it){
int_t cur_min = ret.point_values.back().second;
assert(it->first >= ret.point_values.back().first);
if(it->second < cur_min){
if(std::prev(it)->first != ret.point_values.back().first){ // May be equal, in which case we don't need to push anything new
int_t pos = pl_edge(*std::prev(it), *it).pos_at(cur_min);
if(pos != ret.point_values.back().first and pos != it->first){
ret.point_values.push_back(p_v(pos, cur_min));
}
}
ret.point_values.push_back(*it);
}
}
return ret;
}
piecewise_linear_function piecewise_linear_function::previous_min_of_sum(piecewise_linear_function const & a, int_t shift) const{
piecewise_linear_function ret;
// Go to the correct definition
auto b_begin = point_values.begin(), a_begin = a.point_values.begin();
auto b_it = b_begin, a_it = a_begin;
auto b_end = point_values.end(), a_end = a.point_values.end();
while(a_it != a_end){
if(b_it == b_end or a_it->first < b_it->first+shift){ // Ok, create an edge and calculate the value
if(b_it != b_begin){
int_t value;
if(b_it != b_end){
pl_edge b_edge(*std::prev(b_it), *b_it);
value = b_edge.value_at(a_it->first-shift);
}
else{
value = point_values.back().second;
}
ret.point_values.push_back(p_v(a_it->first, a_it->second + value));
}
++a_it;
}
else if(a_it->first > b_it->first+shift){
if(a_it != a_begin){
pl_edge a_edge(*std::prev(a_it), *a_it);
int_t value = a_edge.value_at(b_it->first+shift);
ret.point_values.push_back(p_v(b_it->first+shift, b_it->second + value));
}
++b_it;
}
else{ // if(a_it->first == b_it->first+shift){
ret.point_values.push_back(p_v(a_it->first, a_it->second + b_it->second));
++a_it;
++b_it;
}
}
return ret.previous_min();
}
int_t piecewise_linear_function::last_before(int_t pos) const{
auto it = point_values.rbegin();
while(it != point_values.rend()){
if(it->first <= pos){
if(it != point_values.rbegin() and std::prev(it)->first > pos){ // On a negative slope
return pos;
}
else{
return it->first; // First point or not mapped to a negative slope in the original function
}
}
++it;
}
assert(false); // We should have found it if the bound was correct
}
int_t piecewise_linear_function::value_at(int_t pos) const{
// First position bigger or equal than pos
auto it = std::lower_bound(point_values.begin(), point_values.end(), pos, [](p_v o, int_t v){ return o.first < v; });
if(pos != it->first){
assert(it != point_values.begin());
return pl_edge(*std::prev(it), *it).value_at(pos);
}
else{
return it->second;
}
}
piecewise_linear_function piecewise_linear_function::piecewise_linear_function::minimum(piecewise_linear_function const & a, piecewise_linear_function const & b){
assert(a.point_values.front().first == b.point_values.front().first);
assert(a.point_values.back().first == b.point_values.back().first);
piecewise_linear_function ret;
auto a_it = a.point_values.begin(), b_it = b.point_values.begin();
auto a_end = a.point_values.end(), b_end = b.point_values.end();
ret.point_values.push_back(p_v(a_it->first, std::min(a_it->second, b_it->second)));
assert(std::next(a_it) != a_end and std::next(b_it) != b_end);
while(std::next(a_it) != a_end and std::next(b_it) != b_end){
pl_edge a_edge(*a_it, *std::next(a_it)), b_edge(*b_it, *std::next(b_it));
// Three cases: one of them always below, or both intersect
// Both intersect: we push the values when intersecting
pl_edge::push_intersections(a_edge, b_edge, ret);
// In any case, we push the value of the one below if it finishes, and increment the iterator
if(a_edge.s.first < b_edge.s.first){
++a_it;
if(b_edge.above(a_edge.s)){ // We push a_edge.s
ret.point_values.push_back(a_edge.s);
}
}
else if(a_edge.s.first > b_edge.s.first){
++b_it;
if(a_edge.above(b_edge.s)){ // We push a_edge.s
ret.point_values.push_back(b_edge.s);
}
}
else{
ret.point_values.push_back(p_v(a_edge.s.first, std::min(a_edge.s.second, b_edge.s.second)));
++a_it;
++b_it;
}
}
return ret;
}
} // End namespace coloquinte

File diff suppressed because it is too large Load Diff

599
coloquinte/src/row_opt.cxx Normal file
View File

@ -0,0 +1,599 @@
#include "coloquinte/detailed.hxx"
#include "coloquinte/circuit_helper.hxx"
#include "coloquinte/optimization_subproblems.hxx"
#include "coloquinte/union_find.hxx"
#include "coloquinte/piecewise_linear.hxx"
#include <cassert>
#include <iostream>
namespace coloquinte{
namespace dp{
namespace{
struct minmax{
int_t min, max;
minmax(){}
minmax(int_t f, int_t s) : min(f), max(s){}
void merge(minmax const o){
min = std::min(min, o.min);
max = std::max(max, o.max);
}
void merge(int_t const o){
merge(minmax(o, o));
}
};
struct order_gettr{
index_t cell_ind, seq_order;
bool operator<(order_gettr const o) const{ return cell_ind < o.cell_ind; }
bool operator<(index_t const o) const{ return cell_ind < o; }
order_gettr(index_t c, index_t i) : cell_ind(c), seq_order(i) {}
};
std::vector<order_gettr> get_sorted_ordered_cells(std::vector<index_t> const & cells){
std::vector<order_gettr> ret;
for(index_t i=0; i<cells.size(); ++i){
ret.push_back(order_gettr(cells[i],i));
}
std::sort(ret.begin(), ret.end());
return ret;
}
std::vector<index_t> get_unique_nets(netlist const & circuit, std::vector<index_t> const & cells){
std::vector<index_t> involved_nets;
for(index_t c : cells){
for(netlist::pin_t p : circuit.get_cell(c)){
involved_nets.push_back(p.net_ind);
}
}
// Uniquify the nets
std::sort(involved_nets.begin(), involved_nets.end());
involved_nets.resize(std::distance(involved_nets.begin(), std::unique(involved_nets.begin(), involved_nets.end())));
return involved_nets;
}
struct Hnet_group{
struct Hpin{
index_t cell_index; // Not indexes in the circuit!!! Rather in the internal algorithm
minmax offset;
bool operator<(Hpin const o) const{ return cell_index < o.cell_index; }
};
struct Hnet{
bool has_ext_pins;
minmax ext_pins;
int_t weight;
Hnet(){
has_ext_pins = false;
ext_pins = minmax(std::numeric_limits<int_t>::max(), 0);
weight = 1;
}
};
std::vector<index_t> net_limits;
std::vector<Hnet> nets;
std::vector<Hpin> pins;
std::vector<int_t> cell_widths;
Hnet_group(){
net_limits.push_back(0);
}
void add_net(std::vector<pin_1D> const added_pins, int_t weight){
Hnet cur_net;
cur_net.weight = weight;
std::vector<Hpin> new_pins;
for(auto const p : added_pins){
if(p.movable){
Hpin new_pin;
new_pin.cell_index = p.cell_ind;
new_pin.offset = minmax(p.offs, p.offs);
new_pins.push_back(new_pin);
}
else{
cur_net.has_ext_pins = true;
cur_net.ext_pins.merge(p.pos);
}
}
std::sort(new_pins.begin(), new_pins.end());
if(not new_pins.empty()){ // Possible when generating from a Steiner topology
// Uniquify just in case there are several pins on the net on a single cell
index_t j=0;
auto prev_pin = new_pins[0];
for(auto it = new_pins.begin()+1; it != new_pins.end(); ++it){
if(it->cell_index == prev_pin.cell_index){
prev_pin.offset.merge(it->offset);
}
else{
new_pins[j] = prev_pin;
++j;
prev_pin = *it;
}
}
new_pins[j]=prev_pin;
new_pins.resize(j+1);
nets.push_back(cur_net);
net_limits.push_back(net_limits.back() + new_pins.size());
pins.insert(pins.end(), new_pins.begin(), new_pins.end());
}
}
std::int64_t get_cost(std::vector<int_t> const & pos) const{
std::int64_t cost=0;
for(index_t n=0; n<nets.size(); ++n){
auto cur_net = nets[n];
minmax mm(std::numeric_limits<int_t>::max(), std::numeric_limits<int_t>::min());
if(cur_net.has_ext_pins){
mm = cur_net.ext_pins;
}
assert(net_limits[n+1] > net_limits[n]);
for(index_t p=net_limits[n]; p<net_limits[n+1]; ++p){
int_t cur_pos = pos[pins[p].cell_index];
mm.merge( minmax(cur_pos + pins[p].offset.min, cur_pos + pins[p].offset.max) );
}
cost += static_cast<std::int64_t>(cur_net.weight) * (mm.max - mm.min);
}
return cost;
}
std::int64_t get_cost(std::vector<int_t> const & pos, std::vector<int> const & flip) const{
std::int64_t cost=0;
for(index_t n=0; n<nets.size(); ++n){
auto cur_net = nets[n];
minmax mm(std::numeric_limits<int_t>::max(), std::numeric_limits<int_t>::min());
if(cur_net.has_ext_pins){
mm = cur_net.ext_pins;
}
assert(net_limits[n+1] > net_limits[n]);
for(index_t p=net_limits[n]; p<net_limits[n+1]; ++p){
int_t cur_pos = pos[pins[p].cell_index];
bool flipped = flip[pins[p].cell_index];
int_t wdth = cell_widths[pins[p].cell_index];
mm.merge( flipped ?
minmax(cur_pos + wdth - pins[p].offset.max, cur_pos + wdth - pins[p].offset.min)
: minmax(cur_pos + pins[p].offset.min, cur_pos + pins[p].offset.max)
);
}
cost += static_cast<std::int64_t>(cur_net.weight) * (mm.max - mm.min);
}
return cost;
}
};
Hnet_group get_B2B_netgroup(netlist const & circuit, detailed_placement const & pl, std::vector<index_t> const & cells){
std::vector<order_gettr> cells_in_row = get_sorted_ordered_cells(cells);
std::vector<index_t> involved_nets = get_unique_nets(circuit, cells);
Hnet_group ret;
for(index_t c : cells)
ret.cell_widths.push_back(circuit.get_cell(c).size.x_);
for(index_t n : involved_nets){
std::vector<pin_1D> cur_pins = get_pins_1D(circuit, pl.plt_, n).x_;
for(pin_1D & p : cur_pins){
auto it = std::lower_bound(cells_in_row.begin(), cells_in_row.end(), p.cell_ind);
if(it != cells_in_row.end() and it->cell_ind == p.cell_ind){
p.cell_ind = it->seq_order;
}
else{ // Found a pin which remains fixed for this round
p.movable = false;
}
}
ret.add_net(cur_pins, circuit.get_net(n).weight);
}
return ret;
}
Hnet_group get_RSMT_netgroup(netlist const & circuit, detailed_placement const & pl, std::vector<index_t> const & cells){
std::vector<order_gettr> cells_in_row = get_sorted_ordered_cells(cells);
std::vector<index_t> involved_nets = get_unique_nets(circuit, cells);
Hnet_group ret;
for(index_t c : cells)
ret.cell_widths.push_back(circuit.get_cell(c).size.x_);
for(index_t n : involved_nets){
auto vpins = get_pins_2D(circuit, pl.plt_, n);
for(auto & p : vpins){
auto it = std::lower_bound(cells_in_row.begin(), cells_in_row.end(), p.cell_ind);
if(it != cells_in_row.end() and it->cell_ind == p.cell_ind){
p.cell_ind = it->seq_order;
}
else{
p.movable = false;
}
}
std::vector<point<int_t> > pin_locations;
for(auto p : vpins)
pin_locations.push_back(p.pos);
auto const Htopo = get_RSMT_topology(pin_locations, 8).x_;
// In the horizontal topology, we transform the parts of the tree that are on the row into HPWL subnets
// Two pins sharing an edge are in the same subnet if one of them is on the row: use union-find
union_find UF(vpins.size());
for(auto E : Htopo){
if( vpins[E.first].movable or vpins[E.second].movable){
UF.merge(E.first, E.second);
}
}
std::vector<std::vector<pin_1D> > connex_comps(vpins.size());
for(index_t i=0; i<vpins.size(); ++i){
connex_comps[UF.find(i)].push_back(vpins[i].x());;
}
int_t weight = circuit.get_net(n).weight;
for(index_t i=0; i<vpins.size(); ++i){
if(not connex_comps[i].empty()){
ret.add_net(connex_comps[i], weight);
}
}
}
return ret;
}
// Optimizes an ordered sequence of standard cells on the same row, returns the cost and the corresponding positions
inline std::int64_t optimize_convex_sequence(Hnet_group const & nets, std::vector<index_t> const & permutation, std::vector<int_t> & positions, std::vector<std::pair<int_t, int_t> > const & cell_ranges){
// Get the widths of the cells in row order
std::vector<int_t> loc_widths(permutation.size());
std::vector<std::pair<int_t, int_t> > loc_ranges(permutation.size());
for(index_t i=0; i<permutation.size(); ++i){
loc_widths[permutation[i]] = nets.cell_widths[i];
loc_ranges[permutation[i]] = cell_ranges[i];
}
std::vector<cell_bound> bounds;
std::vector<int_t> right_slopes(permutation.size(), 0);
for(index_t n=0; n<nets.nets.size(); ++n){
index_t fst_c=std::numeric_limits<index_t>::max(), lst_c=0;
int_t fst_pin_offs=0, lst_pin_offs=0;
assert(nets.net_limits[n+1] > nets.net_limits[n]);
auto cur_net = nets.nets[n];
for(index_t p=nets.net_limits[n]; p<nets.net_limits[n+1]; ++p){
// Permutation: index in the Hnet_group to index in the row
index_t cur_cell = permutation[nets.pins[p].cell_index];
if(cur_cell < fst_c){
fst_c = cur_cell;
fst_pin_offs = nets.pins[p].offset.min;
}
if(cur_cell >= lst_c){
lst_c = cur_cell;
lst_pin_offs = nets.pins[p].offset.max;
}
}
if(cur_net.has_ext_pins){
bounds.push_back(cell_bound(fst_c, cur_net.ext_pins.min - fst_pin_offs, cur_net.weight));
bounds.push_back(cell_bound(lst_c, cur_net.ext_pins.max - lst_pin_offs, cur_net.weight));
right_slopes[lst_c] += cur_net.weight;
}
else{
right_slopes[lst_c] += cur_net.weight;
right_slopes[fst_c] -= cur_net.weight;
}
}
bool feasible = place_convex_single_row(loc_widths, loc_ranges, bounds, right_slopes, positions);
auto permuted_positions = positions;
for(index_t i=0; i<permutation.size(); ++i){
permuted_positions[i] = positions[permutation[i]];
}
if(feasible)
return nets.get_cost(permuted_positions);
else
return std::numeric_limits<std::int64_t>::max(); // Infeasible: return a very big cost
}
// TODO: take modified order relative to the obstacles into account
inline std::int64_t optimize_noncvx_sequence(Hnet_group const & nets, std::vector<index_t> const & permutation, std::vector<int_t> & positions, std::vector<int> & flippings, std::vector<int> const & flippability, std::vector<std::pair<int_t, int_t> > const & cell_ranges){
// Get the widths of the cells in row order
std::vector<int_t> loc_widths(permutation.size());
std::vector<int> loc_flipps(permutation.size());
std::vector<std::pair<int_t, int_t> > loc_ranges(permutation.size());
for(index_t i=0; i<permutation.size(); ++i){
loc_widths[permutation[i]] = nets.cell_widths[i];
loc_ranges[permutation[i]] = cell_ranges[i];
loc_flipps[permutation[i]] = flippability[i];
}
int_t min_limit = std::numeric_limits<int_t>::min();
for(index_t i=0; i<loc_ranges.size(); ++i){
min_limit = std::max(loc_ranges[i].first, min_limit);
loc_ranges[i].first = min_limit;
min_limit += loc_widths[i];
}
int_t max_limit = std::numeric_limits<int_t>::max();
for(index_t i=loc_ranges.size(); i>0; --i){
max_limit = std::min(loc_ranges[i-1].second, max_limit);
max_limit -= loc_widths[i-1];
loc_ranges[i-1].second = max_limit;
}
for(index_t i=0; i<loc_ranges.size(); ++i){
if(loc_ranges[i].first > loc_ranges[i].second){
return std::numeric_limits<std::int64_t>::max(); // Infeasible: return a very big cost
}
}
std::vector<piecewise_linear_function> unflipped_cost_functions, flipped_cost_functions;
for(index_t i=0; i<loc_ranges.size(); ++i){
auto cur = piecewise_linear_function(loc_ranges[i].first, loc_ranges[i].second);
unflipped_cost_functions.push_back(cur);
flipped_cost_functions.push_back(cur);
}
for(index_t n=0; n<nets.nets.size(); ++n){
index_t fst_c=std::numeric_limits<index_t>::max(), lst_c=0;
int_t fst_pin_offs_mn=0, lst_pin_offs_mn=0,
fst_pin_offs_mx=0, lst_pin_offs_mx=0;
assert(nets.net_limits[n+1] > nets.net_limits[n]);
auto cur_net = nets.nets[n];
for(index_t p=nets.net_limits[n]; p<nets.net_limits[n+1]; ++p){
// Permutation: index in the Hnet_group to index in the row
index_t cur_cell = permutation[nets.pins[p].cell_index];
if(cur_cell < fst_c){
fst_c = cur_cell;
fst_pin_offs_mn = nets.pins[p].offset.min;
fst_pin_offs_mx = nets.pins[p].offset.max;
}
if(cur_cell >= lst_c){
lst_c = cur_cell;
lst_pin_offs_mn = nets.pins[p].offset.min;
lst_pin_offs_mx = nets.pins[p].offset.max;
}
}
if(cur_net.has_ext_pins){
unflipped_cost_functions[fst_c].add_bislope(-cur_net.weight, 0, cur_net.ext_pins.min - fst_pin_offs_mn);
unflipped_cost_functions[lst_c].add_bislope(0, cur_net.weight, cur_net.ext_pins.max - lst_pin_offs_mx);
flipped_cost_functions[fst_c].add_bislope(-cur_net.weight, 0, cur_net.ext_pins.min - loc_widths[fst_c] + fst_pin_offs_mx);
flipped_cost_functions[lst_c].add_bislope(0, cur_net.weight, cur_net.ext_pins.max - loc_widths[lst_c] + lst_pin_offs_mn);
}
else{
unflipped_cost_functions[fst_c].add_monotone(-cur_net.weight, -fst_pin_offs_mn);
unflipped_cost_functions[lst_c].add_monotone( cur_net.weight, -lst_pin_offs_mx);
flipped_cost_functions[fst_c].add_monotone(-cur_net.weight, fst_pin_offs_mx - loc_widths[fst_c] );
flipped_cost_functions[lst_c].add_monotone( cur_net.weight, lst_pin_offs_mn - loc_widths[lst_c] );
}
}
std::vector<piecewise_linear_function> prev_mins, merged_costs;
for(index_t i=0; i<loc_ranges.size(); ++i){
merged_costs.push_back(loc_flipps[i] ?
piecewise_linear_function::minimum(unflipped_cost_functions[i], flipped_cost_functions[i])
: unflipped_cost_functions[i]
);
if(i>0){
prev_mins.push_back(prev_mins.back().previous_min_of_sum(merged_costs.back(), loc_widths[i-1]));
}
else{
prev_mins.push_back(merged_costs.back().previous_min());
}
}
for(auto const M : prev_mins){
for(index_t i=0; i+1<M.point_values.size(); ++i){
assert(M.point_values[i].second >= M.point_values[i+1].second);
}
}
flippings.resize(cell_ranges.size(), 0); positions.resize(cell_ranges.size(), 0);
int_t pos = std::numeric_limits<int_t>::max();
for(index_t i=loc_ranges.size(); i>0; --i){
// Find the best position and flipping for each cell
pos = prev_mins[i-1].last_before(std::min(pos - loc_widths[i-1], loc_ranges[i-1].second) );
positions[i-1] = pos;
if(loc_flipps[i-1] and flipped_cost_functions[i-1].value_at(pos) < unflipped_cost_functions[i-1].value_at(pos)){
flippings[i-1] = 1;
}
}
for(index_t i=0; i<loc_ranges.size(); ++i){
assert(positions[i] >= loc_ranges[i].first);
assert(positions[i] <= loc_ranges[i].second);
}
for(index_t i=0; i+1<loc_ranges.size(); ++i){
assert(positions[i] + loc_widths[i] <= positions[i+1]);
}
auto permuted_positions = positions;
auto permuted_flippings = flippings;
for(index_t i=0; i<permutation.size(); ++i){
permuted_positions[i] = positions[permutation[i]];
permuted_flippings[i] = flippings[permutation[i]];
}
return nets.get_cost(permuted_positions, permuted_flippings);
}
std::vector<std::pair<int_t, int_t> > get_cell_ranges(netlist const & circuit, detailed_placement const & pl, std::vector<index_t> const & cells){
std::vector<std::pair<int_t, int_t> > lims;
for(index_t i=0; i+1<cells.size(); ++i){
assert(pl.plt_.positions_[cells[i]].x_ + circuit.get_cell(cells[i]).size.x_ <= pl.plt_.positions_[cells[i+1]].x_);
}
// Extreme limits, except macros are allowed to be beyond the limit of the placement area
int_t lower_lim = pl.get_limit_positions(circuit, cells.front()).first;
int_t upper_lim = pl.get_limit_positions(circuit, cells.back()).second;
for(index_t OSRP_cell : cells){
auto attr = circuit.get_cell(OSRP_cell).attributes;
auto cur_lim = std::pair<int_t, int_t>(lower_lim, upper_lim);
int_t pos = pl.plt_.positions_[OSRP_cell].x_;
if( (attr & XMovable) == 0 or pl.cell_height(OSRP_cell) != 1){
cur_lim = std::pair<int_t, int_t>(pos, pos + circuit.get_cell(OSRP_cell).size.x_);
}
else{
assert(pos >= lower_lim);
assert(pos + circuit.get_cell(OSRP_cell).size.x_ <= upper_lim);
}
lims.push_back(cur_lim);
}
return lims;
}
template<bool NON_CONVEX, bool RSMT>
void OSRP_generic(netlist const & circuit, detailed_placement & pl){
for(index_t r=0; r<pl.row_cnt(); ++r){
// Complete optimization on a row, comprising possible obstacles
std::vector<index_t> cells;
std::vector<int> flippability;
// Get the movable cells, if we can flip them, and the obstacles on the row
for(index_t OSRP_cell = pl.get_first_cell_on_row(r); OSRP_cell != null_ind; OSRP_cell = pl.get_next_cell_on_row(OSRP_cell, r)){
auto attr = circuit.get_cell(OSRP_cell).attributes;
cells.push_back(OSRP_cell);
flippability.push_back( (attr & XFlippable) != 0 ? 1 : 0);
}
if(not cells.empty()){
std::vector<std::pair<int_t, int_t> > lims = get_cell_ranges(circuit, pl, cells); // Limit positions for each cell
Hnet_group nets = RSMT ?
get_RSMT_netgroup(circuit, pl, cells)
: get_B2B_netgroup(circuit, pl, cells);
std::vector<index_t> no_permutation(cells.size());
for(index_t i=0; i<cells.size(); ++i) no_permutation[i] = i;
std::vector<int_t> final_positions;
if(NON_CONVEX){
std::vector<int> flipped;
optimize_noncvx_sequence(nets, no_permutation, final_positions, flipped, flippability, lims);
for(index_t i=0; i<cells.size(); ++i){
bool old_orient = pl.plt_.orientations_[cells[i]].x_;
pl.plt_.orientations_[cells[i]].x_ = flipped[i] ? not old_orient : old_orient;
}
}
else{
optimize_convex_sequence(nets, no_permutation, final_positions, lims);
}
// Update the positions and orientations
for(index_t i=0; i<cells.size(); ++i){
pl.plt_.positions_[cells[i]].x_ = final_positions[i];
}
}
} // Iteration on the rows
pl.selfcheck();
}
template<bool NON_CONVEX, bool RSMT>
void swaps_row_generic(netlist const & circuit, detailed_placement & pl, index_t range){
assert(range >= 2);
for(index_t r=0; r<pl.row_cnt(); ++r){
index_t OSRP_cell = pl.get_first_cell_on_row(r);
while(OSRP_cell != null_ind){
std::vector<index_t> cells;
std::vector<std::pair<int_t, int_t> > lims;
std::vector<int> flippables;
for(index_t nbr_cells=0;
OSRP_cell != null_ind
and nbr_cells < range;
OSRP_cell = pl.get_next_cell_on_row(OSRP_cell, r), ++nbr_cells
){
cells.push_back(OSRP_cell);
flippables.push_back( (circuit.get_cell(OSRP_cell).attributes & XFlippable) != 0);
}
if(not cells.empty()){
std::vector<std::pair<int_t, int_t> > lims = get_cell_ranges(circuit, pl, cells); // Limit positions for each cell
Hnet_group nets = RSMT ?
get_RSMT_netgroup(circuit, pl, cells)
: get_B2B_netgroup(circuit, pl, cells);
std::int64_t best_cost = std::numeric_limits<std::int64_t>::max();
std::vector<int_t> positions(cells.size());
std::vector<int> flippings(cells.size());
std::vector<int_t> best_positions(cells.size());
std::vector<int> best_flippings(cells.size());
std::vector<index_t> permutation(cells.size());
for(index_t i=0; i<cells.size(); ++i) permutation[i] = i;
std::vector<index_t> best_permutation;
// Check every possible permutation of the cells
do{
std::int64_t cur_cost = NON_CONVEX ?
optimize_noncvx_sequence(nets, permutation, positions, flippings, flippables, lims) :
optimize_convex_sequence(nets, permutation, positions, lims);
if(cur_cost <= best_cost){
best_cost = cur_cost;
best_permutation = permutation;
best_flippings = flippings;
best_positions = positions;
}
}while(std::next_permutation(permutation.begin(), permutation.end()));
std::vector<index_t> new_cell_order(cells.size());
// Update the positions and the topology
for(index_t i=0; i<cells.size(); ++i){
index_t r_ind = best_permutation[i]; // In the row from in the Hnet_group
new_cell_order[r_ind] = cells[i];
pl.plt_.positions_[cells[i]].x_ = best_positions[r_ind];
if(NON_CONVEX){
bool old_orient = pl.plt_.orientations_[cells[i]].x_;
pl.plt_.orientations_[cells[i]].x_ = best_flippings[r_ind] ? not old_orient : old_orient;
}
}
pl.reorder_cells(cells, new_cell_order, r);
cells = new_cell_order;
assert(best_cost < std::numeric_limits<std::int64_t>::max());
}
if(OSRP_cell != null_ind){
assert(cells.size() == range);
OSRP_cell = cells[range/2];
}
} // Iteration on the entire row
} // Iteration on the rows
pl.selfcheck();
}
} // End anonymous namespace
void OSRP_convex_HPWL(netlist const & circuit, detailed_placement & pl){ OSRP_generic< false, false>(circuit, pl); }
void OSRP_convex_RSMT(netlist const & circuit, detailed_placement & pl){ OSRP_generic< false, true >(circuit, pl); }
void OSRP_noncvx_HPWL(netlist const & circuit, detailed_placement & pl){ OSRP_generic< true , false>(circuit, pl); }
void OSRP_noncvx_RSMT(netlist const & circuit, detailed_placement & pl){ OSRP_generic< true , true >(circuit, pl); }
void swaps_row_convex_HPWL(netlist const & circuit, detailed_placement & pl, index_t range){ swaps_row_generic< false, false>(circuit, pl, range); }
void swaps_row_convex_RSMT(netlist const & circuit, detailed_placement & pl, index_t range){ swaps_row_generic< false, true >(circuit, pl, range); }
void swaps_row_noncvx_HPWL(netlist const & circuit, detailed_placement & pl, index_t range){ swaps_row_generic< true , false>(circuit, pl, range); }
void swaps_row_noncvx_RSMT(netlist const & circuit, detailed_placement & pl, index_t range){ swaps_row_generic< true , true >(circuit, pl, range); }
} // namespace dp
} // namespace coloquinte

381
coloquinte/src/solvers.cxx Normal file
View File

@ -0,0 +1,381 @@
#include "coloquinte/solvers.hxx"
#include <cassert>
namespace coloquinte{
namespace gp{
linear_system linear_system::operator+(linear_system const & o) const{
if(o.internal_size() != internal_size()){ throw std::runtime_error("Mismatched system sizes"); }
linear_system ret(target_.size() + o.target_.size() - internal_size(), internal_size());
ret.matrix_ = matrix_;
std::vector<matrix_triplet> omatrix = o.matrix_;
for(matrix_triplet & t : omatrix){
if(t.c_ >= internal_size()){
t.c_ += (target_.size() - internal_size());
}
if(t.r_ >= internal_size()){
t.r_ += (target_.size() - internal_size());
}
}
ret.matrix_.insert(ret.matrix_.end(), omatrix.begin(), omatrix.end());
// ret.target_.resize(target_.size() + o.target_.size() - internal_size);
for(index_t i=0; i<internal_size(); ++i){
ret.target_[i] = target_[i] + o.target_[i];
}
for(index_t i=internal_size(); i<target_.size(); ++i){
ret.target_[i] = target_[i];
}
for(index_t i=internal_size(); i<o.target_.size(); ++i){
ret.target_[i + target_.size() - internal_size()] = o.target_[i];
}
return ret;
}
// The classical compressed sparse row storage
struct csr_matrix{
std::vector<std::uint32_t> row_limits, col_indexes;
std::vector<float> values, diag;
std::vector<float> mul(std::vector<float> const & x) const;
std::vector<float> solve_CG(std::vector<float> const & goal, std::vector<float> guess, std::uint32_t min_iter, std::uint32_t max_iter, float tol) const;
csr_matrix(std::vector<std::uint32_t> const & row_l, std::vector<std::uint32_t> const & col_i, std::vector<float> const & vals, std::vector<float> const D) : row_limits(row_l), col_indexes(col_i), values(vals), diag(D){
assert(values.size() == col_indexes.size());
assert(diag.size()+1 == row_limits.size());
}
};
// A matrix with successive rows padded to the same length and accessed column-major; hopefully a little better
template<std::uint32_t unroll_len>
struct ellpack_matrix{
std::vector<std::uint32_t> row_limits, col_indexes;
std::vector<float> values, diag;
std::vector<float> mul(std::vector<float> const & x) const;
std::vector<float> solve_CG(std::vector<float> goal, std::vector<float> guess, std::uint32_t min_iter, std::uint32_t max_iter, float tol) const;
ellpack_matrix(std::vector<std::uint32_t> const & row_l, std::vector<std::uint32_t> const & col_i, std::vector<float> const & vals, std::vector<float> const D) : row_limits(row_l), col_indexes(col_i), values(vals), diag(D){
assert(values.size() == col_indexes.size());
assert(diag.size() % unroll_len == 0);
assert((row_limits.size()-1) * unroll_len == diag.size() );
assert(row_limits.back() * unroll_len == values.size());
assert(values.size() % unroll_len == 0);
assert(col_indexes.size() % unroll_len == 0);
}
};
// The proxy matrix for compressed sparse storage
class doublet_matrix{
std::vector<std::uint32_t> row_limits;
std::vector<matrix_doublet> doublets;
std::uint32_t size;
void get_compressed(std::vector<std::uint32_t> & limits, std::vector<matrix_doublet> & elements, std::vector<float> & diag) const;
public:
doublet_matrix(std::vector<matrix_triplet> const & triplets, std::uint32_t size);
csr_matrix get_compressed_matrix() const;
template<std::uint32_t unroll_len>
ellpack_matrix<unroll_len> get_ellpack_matrix() const;
};
doublet_matrix::doublet_matrix(std::vector<matrix_triplet> const & triplets, std::uint32_t n) : size(n){
row_limits.resize(size+1, 0);
// First store the row sizes in the array
for(uint32_t i=0; i<triplets.size(); ++i){
++row_limits[triplets[i].r_+1];
}
// The total size of the uncompressed matrix
uint32_t tot_triplets=0;
// Get the beginning position of each row in the csr matrix
for(uint32_t i=1; i<n+1; ++i){
uint32_t new_tot_triplets = tot_triplets + row_limits[i];
row_limits[i] = tot_triplets; // Stores the beginning of the row
tot_triplets = new_tot_triplets;
}
assert(tot_triplets == triplets.size());
// Now we know the size and can allocate storage for the indices and values
doublets.resize(tot_triplets);
// We store the triplets in the new storage and tranform beginning positions into end positions
for(uint32_t i=0; i<triplets.size(); ++i){
doublets[row_limits[triplets[i].r_+1]] = matrix_doublet(triplets[i].c_, triplets[i].val_);
++row_limits[triplets[i].r_+1]; // row_limits will hold the end position of the row
}
}
void doublet_matrix::get_compressed(std::vector<std::uint32_t> & sizes, std::vector<matrix_doublet> & elements, std::vector<float> & diag) const{
assert(size+1 == row_limits.size());
sizes.resize(size);
diag.resize(size, 0.0);
std::vector<matrix_doublet> tmp_doublets = doublets;
for(uint32_t i=0; i<size; ++i){
// Sort the elements in the row
std::sort(tmp_doublets.begin() + row_limits[i], tmp_doublets.begin() + row_limits[i+1]);
// Compress them and extract the diagonal
std::uint32_t l=0;
matrix_doublet cur(tmp_doublets[row_limits[i]]);
for(uint32_t j=row_limits[i]+1; j<row_limits[i+1]; ++j){
if(tmp_doublets[j].c_ == cur.c_){
cur.val_ += tmp_doublets[j].val_;
}
else{
if(i != cur.c_){
elements.push_back(cur);
++l;
}
else{
diag[i] = cur.val_;
}
cur = tmp_doublets[j];
}
}
if(i != cur.c_){
elements.push_back(cur);
++l;
}
else{
diag[i] = cur.val_;
}
sizes[i] = l;
}
}
csr_matrix doublet_matrix::get_compressed_matrix() const{
std::vector<matrix_doublet> tmp_doublets;
std::vector<std::uint32_t> sizes;
std::vector<float> diag;
get_compressed(sizes, tmp_doublets, diag);
// Get the limits of each row
std::vector<std::uint32_t> new_row_limits(row_limits.size());
new_row_limits[0] = 0;
for(std::uint32_t i=0; i<size; ++i){
new_row_limits[i+1] = new_row_limits[i] + sizes[i];
}
// Store the doublets to the sparse storage
std::vector<std::uint32_t> col_indices(tmp_doublets.size());
std::vector<float> values(tmp_doublets.size());
for(std::uint32_t i=0; i<tmp_doublets.size(); ++i){
col_indices[i] = tmp_doublets[i].c_;
values[i] = tmp_doublets[i].val_;
}
return csr_matrix(new_row_limits, col_indices, values, diag);
}
template<std::uint32_t unroll_len>
ellpack_matrix<unroll_len> doublet_matrix::get_ellpack_matrix() const{
std::vector<matrix_doublet> tmp_doublets;
std::vector<std::uint32_t> sizes;
std::vector<float> diag;
get_compressed(sizes, tmp_doublets, diag);
std::uint32_t unrolled_size = (diag.size() % unroll_len == 0)? diag.size()/unroll_len : diag.size() / unroll_len + 1;
sizes.resize(unroll_len * unrolled_size, 0);
diag.resize(unroll_len * unrolled_size, 1.0);
// Store the maximum size of a group of rows
std::vector<std::uint32_t> new_row_limits(unrolled_size+1);
new_row_limits[0] = 0;
for(std::uint32_t i=0; i<unrolled_size; ++i){
std::uint32_t max_sz = sizes[unroll_len*i];
for(int j=1; j<unroll_len; ++j){
max_sz = std::max(max_sz, sizes[unroll_len*i + j]);
}
new_row_limits[i+1] = new_row_limits[i] + max_sz;
}
std::vector<std::uint32_t> col_indices(unroll_len * new_row_limits.back());
std::vector<float> values(unroll_len * new_row_limits.back());
std::uint32_t d = 0;
for(std::uint32_t i=0; i<sizes.size(); ++i){ // For every line
std::uint32_t ui = i/unroll_len;
std::uint32_t k = i%unroll_len;
std::uint32_t max_sz = new_row_limits[ui+1] - new_row_limits[ui];
std::uint32_t row_begin = new_row_limits[ui];
for(std::uint32_t j=0; j<sizes[i]; ++j, ++d){ // For the non-zero values
col_indices[unroll_len * (row_begin+j) + k] = tmp_doublets[d].c_;
values[unroll_len * (row_begin+j) + k] = tmp_doublets[d].val_;
}
for(std::uint32_t j=sizes[i]; j<max_sz; ++j){ // For the padding zeroes
col_indices[unroll_len * (row_begin+j) + k] = 0;
values[unroll_len * (row_begin+j) + k] = 0;
}
}
return ellpack_matrix<unroll_len>(new_row_limits, col_indices, values, diag);
}
std::vector<float> csr_matrix::mul(std::vector<float> const & x) const{
std::vector<float> res(x.size());
assert(x.size() == diag.size());
for(std::uint32_t i=0; i<diag.size(); ++i){
res[i] = diag[i] * x[i];
for(std::uint32_t j=row_limits[i]; j<row_limits[i+1]; ++j){
res[i] += values[j] * x[col_indexes[j]];
}
}
return res;
}
template<std::uint32_t unroll_len>
std::vector<float> ellpack_matrix<unroll_len>::mul(std::vector<float> const & x) const{
std::vector<float> res(x.size());
assert(x.size() % unroll_len == 0);
assert(x.size() == diag.size());
for(std::uint32_t i=0; i+1<row_limits.size(); ++i){
float cur[unroll_len];
for(int k=0; k<unroll_len; ++k){
cur[k] = diag[unroll_len*i+k] * x[unroll_len*i+k];
}
for(std::uint32_t j=row_limits[i]; j<row_limits[i+1]; ++j){
for(int k=0; k<unroll_len; ++k){
cur[k] += values[unroll_len*j+k] * x[col_indexes[unroll_len*j+k]];
}
}
for(int k=0; k<unroll_len; ++k){
res[unroll_len*i+k] = cur[k];
}
}
return res;
}
template<std::uint32_t unroll_len>
float dot_prod(std::vector<float> const & a, std::vector<float> const & b){
assert(a.size() == b.size());
float vals[unroll_len];
for(int j=0; j<unroll_len; ++j) vals[j] = 0.0;
for(std::uint32_t i=0; i<a.size() / unroll_len; ++i){
for(int j=0; j<unroll_len; ++j){
vals[j] += a[unroll_len*i + j] * b[unroll_len*i + j];
}
}
float res = 0.0;
for(int j=0; j<unroll_len; ++j) res += vals[j];
for(int i = unroll_len*(a.size() / unroll_len); i< a.size(); ++i){
res += a[i] * b[i];
}
return res;
}
std::vector<float> csr_matrix::solve_CG(std::vector<float> const & goal, std::vector<float> x, std::uint32_t min_iter, std::uint32_t max_iter, float tol_ratio) const{
std::uint32_t n = diag.size();
assert(goal.size() == n);
assert(x.size() == n);
std::vector<float> r, p(n), z(n), mul_res, preconditioner(n);
r = mul(x);
for(uint32_t i=0; i<n; ++i){
r[i] = goal[i] - r[i];
preconditioner[i] = 1.0/diag[i];
assert(std::isfinite(preconditioner[i]));
z[i] = preconditioner[i] * r[i];
p[i] = z[i];
}
float cross_norm = dot_prod<16>(r, z);
assert(std::isfinite(cross_norm));
float_t const epsilon = std::numeric_limits<float_t>::min();
float start_norm = cross_norm;
for(uint32_t k=0; k < max_iter; ++k){
mul_res = mul(p);
float_t pr_prod = dot_prod<16>(p, mul_res);
float_t alpha = cross_norm / pr_prod;
if(
not std::isfinite(cross_norm) or not std::isfinite(alpha) or not std::isfinite(pr_prod)
or cross_norm <= epsilon or alpha <= epsilon or pr_prod <= epsilon
){
break;
}
// Update the result
for(uint32_t i=0; i<n; ++i){
x[i] = x[i] + alpha * p[i];
r[i] = r[i] - alpha * mul_res[i];
z[i] = preconditioner[i] * r[i];
}
float new_cross_norm = dot_prod<16>(r, z);
// Update the scaled residual and the search direction
if(k >= min_iter && new_cross_norm <= tol_ratio * start_norm){
break;
}
float beta = new_cross_norm / cross_norm;
cross_norm = new_cross_norm;
for(uint32_t i=0; i<n; ++i)
p[i] = z[i] + beta * p[i];
}
return x;
}
template<std::uint32_t unroll_len>
std::vector<float> ellpack_matrix<unroll_len>::solve_CG(std::vector<float> goal, std::vector<float> x, std::uint32_t min_iter, std::uint32_t max_iter, float tol_ratio) const{
std::uint32_t n = diag.size();
std::uint32_t old_n = x.size();
assert(goal.size() == x.size());
x.resize(diag.size(), 0.0);
goal.resize(diag.size(), 0.0);
std::vector<float> r, p(n), z(n), mul_res, preconditioner(n);
r = mul(x);
for(uint32_t i=0; i<n; ++i){
r[i] = goal[i] - r[i];
preconditioner[i] = 1.0/diag[i];
z[i] = preconditioner[i] * r[i];
p[i] = z[i];
}
float cross_norm = dot_prod<unroll_len>(r, z);
float start_norm = cross_norm;
for(uint32_t k=0; k < max_iter; ++k){
mul_res = mul(p);
float alpha = cross_norm / dot_prod<unroll_len>(p, mul_res);
// Update the result
for(uint32_t i=0; i<n; ++i){
x[i] = x[i] + alpha * p[i];
r[i] = r[i] - alpha * mul_res[i];
z[i] = preconditioner[i] * r[i];
}
float new_cross_norm = dot_prod<unroll_len>(r, z);
// Update the scaled residual and the search direction
if(k >= min_iter && new_cross_norm <= tol_ratio * start_norm){
break;
}
float beta = new_cross_norm / cross_norm;
cross_norm = new_cross_norm;
for(uint32_t i=0; i<n; ++i)
p[i] = z[i] + beta * p[i];
}
x.resize(old_n);
return x;
}
std::vector<float_t> linear_system::solve_CG(std::vector<float_t> guess, index_t nbr_iter){
doublet_matrix tmp(matrix_, size());
csr_matrix mat = tmp.get_compressed_matrix();
//ellpack_matrix<16> mat = tmp.get_ellpack_matrix<16>();
guess.resize(target_.size(), 0.0);
auto ret = mat.solve_CG(target_, guess, nbr_iter, nbr_iter, 0.0);
ret.resize(internal_size());
return ret;
}
}
}

View File

@ -0,0 +1,513 @@
#include "coloquinte/topologies.hxx"
#include "coloquinte/circuit_helper.hxx"
#include "coloquinte/union_find.hxx"
#include <array>
#include <algorithm>
#include <cassert>
#include <set>
namespace coloquinte{
using edge_t = std::pair<index_t, index_t>;
namespace{
struct minmax_t{
int_t min, max;
minmax_t(int_t mn, int_t mx) : min(mn), max(mx) {}
minmax_t() {}
void merge(minmax_t const o){
min = std::min(o.max, min);
max = std::max(o.min, max);
}
void merge(int_t const p){
min = std::min(p, min);
max = std::max(p, max);
}
};
}
namespace steiner_lookup{
template<int pin_cnt>
int_t Hconnectivity<pin_cnt>::get_wirelength(std::array<point<int_t>, pin_cnt> const sorted_points) const{
std::array<minmax_t, pin_cnt-2> minmaxs;
for(index_t i=0; i<pin_cnt-2; ++i){
minmaxs[i] = minmax_t(sorted_points[i+1].y_, sorted_points[i+1].y_);
}
std::uint8_t b_con = extremes & 15u, e_con = extremes >> 4;
minmaxs[b_con].merge(sorted_points.front() .y_);
minmaxs[e_con].merge(sorted_points.back() .y_);
for(std::uint8_t const E : connexions){
minmaxs[(E >> 4)].merge(minmaxs[(E & 15u)]);
}
int_t cost = sorted_points.back().x_ - sorted_points.front().x_ + sorted_points[b_con+1].x_ - sorted_points[e_con+1].x_;
for(std::uint8_t const E : connexions){
cost += std::abs(sorted_points[(E >> 4) +1].x_ - sorted_points[(E & 15u) +1].x_);
}
for(index_t i=0; i<pin_cnt-2; ++i){
cost += (minmaxs[i].max - minmaxs[i].min);
}
return cost;
}
template<int pin_cnt>
std::array<edge_t, pin_cnt-1> Hconnectivity<pin_cnt>::get_x_topology(std::array<point<int_t>, pin_cnt> const sorted_points) const{
std::array<edge_t, pin_cnt-1> ret;
std::uint8_t b_con = extremes & 15u, e_con = extremes >> 4;
ret[0] = edge_t(0, b_con+1);
ret[1] = edge_t(pin_cnt-1, e_con+1);
for(index_t i=0; i<pin_cnt-3; ++i){
std::uint8_t E = connexions[i];
ret[i+2] = edge_t((E & 15u) +1, (E >> 4) +1);
}
return ret;
}
} // End namespace steiner_lookup
namespace {
template<int n, int array_size>
int_t get_wirelength_from_sorted(std::vector<point<int_t> > const & pins, std::array<steiner_lookup::Hconnectivity<n>, array_size> const & lookups){
std::array<point<int_t>, n> points;
std::copy_n(pins.begin(), n, points.begin());
int_t cost = std::numeric_limits<int_t>::max();
for(auto const L : lookups){
cost = std::min(cost, L.get_wirelength(points));
}
return cost;
}
std::int64_t get_wirelength_from_topo(std::vector<point<int_t> > const & points, std::vector<std::pair<index_t, index_t> > Htopo){
std::vector<minmax_t> minmaxs(points.size());
for(index_t i=0; i<points.size(); ++i){
minmaxs[i] = minmax_t(points[i].y_, points[i].y_);
}
for(auto const E : Htopo){
minmaxs[E.second].merge(minmaxs[E.first]);
}
std::int64_t cost = 0;
for(edge_t const E : Htopo){
cost += std::abs(points[E.first].x_ - points[E.second].x_);
}
for(index_t i=0; i<points.size(); ++i){
cost += (minmaxs[i].max - minmaxs[i].min);
}
return cost;
}
struct indexed_pt : point<int_t>{
index_t index;
indexed_pt(point<int_t> pt, index_t pos) : point<int_t>(pt), index(pos) {}
indexed_pt(){}
};
template<int n, int array_size>
std::vector<std::pair<index_t, index_t> > get_topology_from_sorted(std::vector<point<int_t> > const & pins, std::array<steiner_lookup::Hconnectivity<n>, array_size> const & lookups){
std::array<point<int_t>, n> points;
std::copy_n(pins.begin(), n, points.begin());
// Find the horizontal topology with the smallest cost
int_t cost = std::numeric_limits<int_t>::max();
index_t ind = std::numeric_limits<index_t>::max();
for(index_t i=0; i<array_size; ++i){
int_t this_cost = lookups[i].get_wirelength(points);
if(this_cost < cost){
cost = this_cost;
ind = i;
}
}
assert(ind != std::numeric_limits<index_t>::max());
auto ret = lookups[ind].get_x_topology(points);
return std::vector<std::pair<index_t, index_t> >(ret.begin(), ret.end());
}
std::vector<edge_t> get_vertical_topology(std::vector<point<int_t> > pins, std::vector<edge_t> const & Htopo){
index_t const null_ind = std::numeric_limits<index_t>::max();
std::vector<indexed_pt> ipoints(pins.size());
for(index_t i=0; i<pins.size(); ++i){
ipoints[i] = indexed_pt(pins[i], i);
}
std::sort(ipoints.begin(), ipoints.end(), [](indexed_pt a , indexed_pt b){return a.y_ < b.y_; });
// First pin with y ordering
std::vector<index_t> min_y_pin(pins.size());
for(index_t i=0; i<ipoints.size(); ++i){
min_y_pin[ipoints[i].index] = i;
}
std::vector<index_t> max_y_pin = min_y_pin;
std::vector<index_t> nxt_y_pin(pins.size(), null_ind);
std::vector<edge_t> ret;
for(auto const E : Htopo){
// Assuming a correctly ordered horizontal topology where the first node of the edge is never visited again
index_t f=E.first, s=E.second;
index_t first_yf=min_y_pin[f], first_ys=min_y_pin[s];
// Push the edges from the first and insert one of its elements in the second's linked structure
if(max_y_pin[f] < min_y_pin[s] or max_y_pin[s] < min_y_pin[f]){
for(index_t yf=first_yf; nxt_y_pin[yf] != null_ind; yf = nxt_y_pin[yf]){
ret.push_back(edge_t(yf, nxt_y_pin[yf]));
}
if(max_y_pin[f] < min_y_pin[s]){
nxt_y_pin[max_y_pin[f]] = min_y_pin[s];
min_y_pin[s] = max_y_pin[f];
}
else if(max_y_pin[s] < min_y_pin[f]){
nxt_y_pin[max_y_pin[s]] = min_y_pin[f];
max_y_pin[s] = min_y_pin[f];
nxt_y_pin[min_y_pin[f]] = null_ind;
}
else{
abort();
}
}
else{ // Need to chose a pin with two connexions because there will be no L route
// One pin from the second is in the middle of the first
if(max_y_pin[f] > max_y_pin[s]){
index_t middle_pin = max_y_pin[s];
index_t yf=first_yf;
// Make the first connexions
for(; nxt_y_pin[yf] < middle_pin; yf = nxt_y_pin[yf]){
ret.push_back(edge_t(yf, nxt_y_pin[yf]));
}
// Make the two connexions with the new pin
ret.push_back(edge_t(yf, middle_pin));
yf = nxt_y_pin[yf];
ret.push_back(edge_t(yf, middle_pin));
// Finish the connexions
for(; nxt_y_pin[yf] != null_ind; yf = nxt_y_pin[yf]){
ret.push_back(edge_t(yf, nxt_y_pin[yf]));
}
}
// One pin from the first is in the middle of the second
else{
for(index_t yf=first_yf; nxt_y_pin[yf] != null_ind; yf = nxt_y_pin[yf]){
ret.push_back(edge_t(yf, nxt_y_pin[yf]));
}
index_t middle_pin = max_y_pin[f];
// Find the place where we can insert this pin
index_t ys=first_ys;
for(; nxt_y_pin[ys] < middle_pin; ys = nxt_y_pin[ys]);
nxt_y_pin[middle_pin] = nxt_y_pin[ys];
nxt_y_pin[ys] = middle_pin;
}
}
}
// The last visited gives the remaining connexions to push
for(index_t yf=min_y_pin[Htopo.back().second]; nxt_y_pin[yf] != null_ind; yf = nxt_y_pin[yf]){
ret.push_back(edge_t(yf, nxt_y_pin[yf]));
}
// Back to the original ordering
for(auto & E : ret){
E.first = ipoints[E.first].index;
E.second = ipoints[E.second].index;
}
return ret;
}
inline void northeast_octant_neighbours(std::vector<point<int_t> > pins, std::vector<std::pair<index_t, index_t> > & edges){
std::vector<indexed_pt> point_list;
for(index_t i=0; i<pins.size(); ++i){
point_list.push_back(indexed_pt(pins[i], i));
}
std::sort(point_list.begin(), point_list.end(),
[](indexed_pt const a, indexed_pt const b){ return a.x_ + a.y_ < b.x_ + b.y_; }
);
// Decreasing order of x and y; multiset not necessary because no two elements have same coordinate
std::set<indexed_pt, std::function<bool (indexed_pt const, indexed_pt const)> >
active_upper_octant([](indexed_pt const a, indexed_pt const b)->bool{return a.x_ > b.x_;}),
active_lower_octant([](indexed_pt const a, indexed_pt const b)->bool{return a.y_ > b.y_;});
for(indexed_pt const current : point_list){
{ // North to north-east region
auto first_it = active_upper_octant.lower_bound(current); // Largest x with x <= current.x
auto it = first_it;
for(; it != active_upper_octant.end() && it->x_ - it->y_ >= current.x_ - current.y_; ++it){
edges.push_back(std::pair<index_t, index_t>(current.index, it->index));
}
if(first_it != active_upper_octant.end()){ active_upper_octant.erase(first_it, it); }
active_upper_octant.insert(it, current); // Hint to insert the element since it is the correct position
} // End region
{ // North-east to east region
auto first_it = active_lower_octant.lower_bound(current); // Largest y with y <= current.y
auto it = first_it;
for(; it != active_lower_octant.end() && it->y_ - it->x_ >= current.y_ - current.x_; ++it){
edges.push_back(std::pair<index_t, index_t>(current.index, it->index));
}
if(first_it != active_lower_octant.end()){ active_lower_octant.erase(first_it, it); }
active_lower_octant.insert(it, current); // Hint to insert the element since it is the correct position
} // End region
}
}
// Gets the nearest octant neighbour for each point in the south-east quadrant
inline void southeast_octant_neighbours(std::vector<point<int_t> > pins, std::vector<std::pair<index_t, index_t> > & edges){
for(auto & pin : pins){
pin.y_ = - pin.y_;
}
northeast_octant_neighbours(pins, edges);
}
std::vector<std::pair<index_t, index_t> > get_small_horizontal_topology_from_sorted(std::vector<point<int_t> > const & pins){
assert(pins.size() <= 10);
switch(pins.size()){
case 2:
return std::vector<edge_t>(1, edge_t(0, 1));
case 3:
return std::vector<edge_t>{{0, 1}, {1, 2}};
case 4:
return get_topology_from_sorted<4, 2>(pins, steiner_lookup::topologies_4);
case 5:
return get_topology_from_sorted<5, 6>(pins, steiner_lookup::topologies_5);
case 6:
return get_topology_from_sorted<6, 23>(pins, steiner_lookup::topologies_6);
case 7:
return get_topology_from_sorted<7, 111>(pins, steiner_lookup::topologies_7);
case 8:
return get_topology_from_sorted<8, 642>(pins, steiner_lookup::topologies_8);
case 9:
return get_topology_from_sorted<9, 4334>(pins, steiner_lookup::topologies_9);
case 10:
return get_topology_from_sorted<10, 33510>(pins, steiner_lookup::topologies_10);
default: // Only 1 and 0 left (11 and more are protected by an assertion)
return std::vector<edge_t>();
}
}
// Get an ordering of the edges that is compatible with the processing functions
std::vector<edge_t> get_tree_topo_sort(std::vector<edge_t> const & topo){
std::vector<edge_t> sorted_topo;
std::vector<std::vector<index_t> > neighbours(topo.size()+1);
for(edge_t const E : topo){
neighbours[E.first].push_back(E.second);
neighbours[E.second].push_back(E.first);
}
std::vector<index_t> to_visit;
std::vector<int_t> nbr_unvisited(topo.size()+1);
for(index_t i=0; i<=topo.size(); ++i){
nbr_unvisited[i] = neighbours[i].size();
assert(topo.size() == 0 or nbr_unvisited[i] >= 1);
if(nbr_unvisited[i] == 1)
to_visit.push_back(i);
}
std::vector<int> visited(topo.size()+1, 0);
while(not to_visit.empty()){
index_t f = to_visit.back();
assert(visited[f] == 0);
visited[f] = 1;
to_visit.pop_back();
for(index_t s : neighbours[f]){
--nbr_unvisited[s];
if(visited[s] == 0){ // It is not a node we already visited
sorted_topo.push_back(edge_t(f, s));
}
if(nbr_unvisited[s] == 1){
to_visit.push_back(s);
}
}
}
assert(sorted_topo.size() == topo.size());
return sorted_topo;
}
std::vector<edge_t> get_big_horizontal_topology_from_sorted(std::vector<point<int_t> > const & pins, index_t exactitude_limit){
auto spanning = get_MST_topology(pins);
// TODO: perform local optimizations on the topology using exact Steiner tree algorithms
// Remove horizontal suboptimalities i.e. when the connexions to the left and right are unbalanced
// Reuse existing code by translation to vertical topology
auto first_Htopo = get_tree_topo_sort(spanning);
auto Vtopo = get_vertical_topology(pins, first_Htopo);
Vtopo = get_tree_topo_sort(Vtopo);
std::vector<point<int_t> > inverted_coords = pins;
for(point<int_t> & pt : inverted_coords){
std::swap(pt.x_, pt.y_);
}
auto Htopo = get_vertical_topology(inverted_coords, Vtopo);
// Sort the tree so that it is usable when building an RSMT
return get_tree_topo_sort(Htopo);
}
} // End anonymous namespace
std::vector<std::pair<index_t, index_t> > get_MST_topology(std::vector<point<int_t> > const & pins){
std::vector<edge_t> edges;
if(pins.size() <= 2){
if(pins.size() == 2){
edges.push_back(edge_t(0, 1));
}
if(pins.size() == 3){
auto D = [](point<int_t> a, point<int_t> b){ return std::abs(a.x_ - b.x_) + std::abs(a.y_ - b.y_); };
auto dists = std::array<int_t, 3>({D(pins[1], pins[2]), D(pins[1], pins[2]), D(pins[0], pins[1])});
index_t mx = std::max_element(dists.begin(), dists.end()) - dists.begin();
for(index_t i=0; i<3; ++i){
if(i != mx)
edges.push_back(edge_t((i+1) % 3, (i+2) % 3));
}
}
return edges;
}
northeast_octant_neighbours(pins, edges);
southeast_octant_neighbours(pins, edges);
std::vector<edge_t> returned_edges;
auto edge_length = [&](edge_t E){
point<int_t> p1 = pins[E.first],
p2 = pins[E.second];
return std::abs(p1.x_ - p2.x_) + std::abs(p1.y_ - p2.y_);
};
// Perform Kruskal to get the tree
std::sort(edges.begin(), edges.end(), [&](edge_t a, edge_t b){ return edge_length(a) < edge_length(b); });
union_find merger(pins.size());
for(index_t i=0; i<edges.size() && returned_edges.size()+1 < pins.size(); ++i){
edge_t E = edges[i];
if(merger.find(E.first) != merger.find(E.second)){
merger.merge(E.first, E.second);
assert(merger.find(E.first) == merger.find(E.second));
returned_edges.push_back(E);
}
}
assert(returned_edges.size() + 1 == pins.size());
assert(merger.is_connex());
return returned_edges;
}
std::int64_t MST_length(std::vector<point<int_t> > const & pins){
auto edges = get_MST_topology(pins);
std::int64_t sum = 0;
for(auto E : edges){
sum += std::abs(pins[E.first].x_ - pins[E.second].x_);
sum += std::abs(pins[E.first].y_ - pins[E.second].y_);
}
return sum;
}
std::int64_t RSMT_length(std::vector<point<int_t> > const & pins, index_t exactitude_limit){
assert(exactitude_limit <= 10 and exactitude_limit >= 3);
if(pins.size() <= 3){
if(pins.size() == 2){
return std::abs(pins[0].x_ - pins[1].x_) + std::abs(pins[0].y_ - pins[1].y_);
}
else if(pins.size() == 3){
auto minmaxX = std::minmax_element(pins.begin(), pins.end(), [](point<int_t> a, point<int_t> b){ return a.x_ < b.x_; }),
minmaxY = std::minmax_element(pins.begin(), pins.end(), [](point<int_t> a, point<int_t> b){ return a.y_ < b.y_; });
return (minmaxX.second->x_ - minmaxX.first->x_) + (minmaxY.second->y_ - minmaxY.first->y_);
}
else{
return 0;
}
}
else{
std::vector<point<int_t> > points = pins;
std::sort(points.begin(), points.end(), [](point<int_t> a , point<int_t> b){return a.x_ < b.x_; });
if(points.size() <= exactitude_limit){
switch(points.size()){
case 4:
return get_wirelength_from_sorted<4, 2>(points, steiner_lookup::topologies_4);
case 5:
return get_wirelength_from_sorted<5, 6>(points, steiner_lookup::topologies_5);
case 6:
return get_wirelength_from_sorted<6, 23>(points, steiner_lookup::topologies_6);
case 7:
return get_wirelength_from_sorted<7, 111>(points, steiner_lookup::topologies_7);
case 8:
return get_wirelength_from_sorted<8, 642>(points, steiner_lookup::topologies_8);
case 9:
return get_wirelength_from_sorted<9, 4334>(points, steiner_lookup::topologies_9);
case 10:
return get_wirelength_from_sorted<10, 33510>(points, steiner_lookup::topologies_10);
default:
abort();
}
}
else{ // Need to create the full topology, then calculate the length back
//return MST_length(points);
auto horizontal_topology = get_big_horizontal_topology_from_sorted(points, exactitude_limit);
return get_wirelength_from_topo(points, horizontal_topology);
}
}
}
point<std::vector<std::pair<index_t, index_t> > > get_RSMT_topology(std::vector<point<int_t> > const & pins, index_t exactitude_limit){
assert(exactitude_limit <= 10 and exactitude_limit >= 3);
// For 3 pin and fewer, the topology is very simple
if(pins.size() <= 2){
if(pins.size() == 2){
auto ret = std::vector<edge_t>(1, edge_t(0, 1));
return point<std::vector<edge_t> >(ret, ret);
}
else{
return point<std::vector<edge_t> >();
}
}
else if(pins.size() == 3){
std::vector<indexed_pt> ipoints(pins.size());
for(index_t i=0; i<pins.size(); ++i){
ipoints[i] = indexed_pt(pins[i], i);
}
auto xpoints=ipoints;
std::sort(xpoints.begin(), xpoints.end(), [](indexed_pt a , indexed_pt b){return a.x_ < b.x_; });
auto ypoints=ipoints;
std::sort(ypoints.begin(), ypoints.end(), [](indexed_pt a , indexed_pt b){return a.y_ < b.y_; });
return point<std::vector<edge_t> >{{{xpoints[0].index, xpoints[1].index}, {xpoints[1].index, xpoints[2].index}}, {{ypoints[0].index, ypoints[1].index}, {ypoints[1].index, ypoints[2].index}}};
}
else{
std::vector<edge_t> horizontal_topology;
// Sort the pins by x coordinate
std::vector<indexed_pt> ipoints(pins.size());
for(index_t i=0; i<pins.size(); ++i){
ipoints[i] = indexed_pt(pins[i], i);
}
std::sort(ipoints.begin(), ipoints.end(), [](indexed_pt a , indexed_pt b){return a.x_ < b.x_; });
std::vector<point<int_t> > sorted_pins(pins.size());
for(index_t i=0; i<pins.size(); ++i){
sorted_pins[i] = ipoints[i];
}
// Get the topology for this ordering
if(pins.size() <= exactitude_limit){
horizontal_topology = get_small_horizontal_topology_from_sorted(sorted_pins);
}
else{
horizontal_topology = get_big_horizontal_topology_from_sorted(sorted_pins, exactitude_limit);
}
// Back to the original ordering
for(auto & E : horizontal_topology){
E.first = ipoints[E.first].index;
E.second = ipoints[E.second].index;
}
return point<std::vector<edge_t> >(horizontal_topology, get_vertical_topology(sorted_pins, horizontal_topology));
}
}
} // Namespace coloquinte