yosys/kernel/hashlib.h

1398 lines
34 KiB
C
Raw Normal View History

// This is free and unencumbered software released into the public domain.
2015-07-02 04:14:30 -05:00
//
// Anyone is free to copy, modify, publish, use, compile, sell, or
// distribute this software, either in source code form or as a compiled
// binary, for any purpose, commercial or non-commercial, and by any
// means.
// -------------------------------------------------------
// Written by Claire Xenia Wolf <claire@yosyshq.com> in 2014
// -------------------------------------------------------
#ifndef HASHLIB_H
2016-05-14 04:43:20 -05:00
#define HASHLIB_H
#include <stdexcept>
2015-01-31 17:27:07 -06:00
#include <algorithm>
#include <string>
#include <variant>
#include <vector>
#include <type_traits>
#include <stdint.h>
#define YS_HASHING_VERSION 1
namespace hashlib {
/**
* HASHING
*
* Also refer to docs/source/yosys_internals/hashing.rst
*
* The Hasher knows how to hash 32 and 64-bit integers. That's it.
* In the future, it could be expanded to do vectors with SIMD.
*
* The Hasher doesn't know how to hash common standard containers
* and compositions. However, hashlib provides centralized wrappers.
*
* Hashlib doesn't know how to hash silly Yosys-specific types.
* Hashlib doesn't depend on Yosys and can be used standalone.
* Please don't use hashlib standalone for new projects.
2024-10-15 05:00:51 -05:00
* Never directly include kernel/hashlib.h in Yosys code.
* Instead include kernel/yosys_common.h
*
* The hash_ops type is now always left to its default value, derived
* from templated functions through SFINAE. Providing custom ops is
* still supported.
*
* HASH TABLES
*
* We implement associative data structures with separate chaining.
* Linked lists use integers into the indirection hashtable array
* instead of pointers.
*/
2014-12-30 06:22:33 -06:00
const int hashtable_size_trigger = 2;
const int hashtable_size_factor = 3;
2014-12-26 20:04:50 -06:00
namespace legacy {
2024-11-11 08:46:25 -06:00
inline uint32_t djb2_add(uint32_t a, uint32_t b) {
return ((a << 5) + a) + b;
}
};
/**
* Hash a type with an accumulator in a record or array context
*/
template<typename T>
struct hash_ops;
2014-12-27 05:02:57 -06:00
/**
* Hash a single instance in isolation.
* Can have explicit specialization, but the default redirects to hash_ops
*/
template<typename T>
struct hash_top_ops;
2014-12-28 17:12:36 -06:00
inline unsigned int mkhash_xorshift(unsigned int a) {
if (sizeof(a) == 4) {
a ^= a << 13;
a ^= a >> 17;
a ^= a << 5;
} else if (sizeof(a) == 8) {
a ^= a << 13;
a ^= a >> 7;
a ^= a << 17;
} else
throw std::runtime_error("mkhash_xorshift() only implemented for 32 bit and 64 bit ints");
return a;
}
class HasherDJB32 {
public:
using hash_t = uint32_t;
HasherDJB32() {
// traditionally 5381 is used as starting value for the djb2 hash
state = 5381;
}
2024-10-18 05:34:25 -05:00
static void set_fudge(hash_t f) {
2024-10-01 09:02:41 -05:00
fudge = f;
}
private:
uint32_t state;
2024-10-01 09:02:41 -05:00
static uint32_t fudge;
// The XOR version of DJB2
[[nodiscard]]
static uint32_t djb2_xor(uint32_t a, uint32_t b) {
2024-10-01 09:02:41 -05:00
uint32_t hash = ((a << 5) + a) ^ b;
return hash;
2021-05-24 14:27:29 -05:00
}
public:
void hash32(uint32_t i) {
state = djb2_xor(i, state);
state = mkhash_xorshift(fudge ^ state);
return;
}
void hash64(uint64_t i) {
2024-11-20 05:11:37 -06:00
state = djb2_xor((uint32_t)(i & 0xFFFFFFFFULL), state);
state = djb2_xor((uint32_t)(i >> 32ULL), state);
state = mkhash_xorshift(fudge ^ state);
return;
2015-08-12 06:37:09 -05:00
}
2024-10-18 05:34:25 -05:00
[[nodiscard]]
hash_t yield() {
return (hash_t)state;
}
template<typename T>
2024-11-11 08:45:11 -06:00
void eat(T&& t) {
2024-11-19 13:04:19 -06:00
*this = hash_ops<std::remove_cv_t<std::remove_reference_t<T>>>::hash_into(std::forward<T>(t), *this);
2024-10-09 08:00:31 -05:00
}
template<typename T>
2024-11-11 08:45:11 -06:00
void eat(const T& t) {
2024-11-19 13:04:19 -06:00
*this = hash_ops<T>::hash_into(t, *this);
}
2024-11-11 08:45:11 -06:00
void commutative_eat(hash_t t) {
state ^= t;
}
void force(hash_t new_state) {
state = new_state;
}
};
using Hasher = HasherDJB32;
template<typename T>
struct hash_top_ops {
static inline bool cmp(const T &a, const T &b) {
return hash_ops<T>::cmp(a, b);
}
static inline Hasher hash(const T &a) {
2024-11-19 13:04:19 -06:00
return hash_ops<T>::hash_into(a, Hasher());
}
};
template<typename T>
struct hash_ops {
static inline bool cmp(const T &a, const T &b) {
return a == b;
}
2024-11-19 13:04:19 -06:00
static inline Hasher hash_into(const T &a, Hasher h) {
2024-11-20 05:11:37 -06:00
if constexpr (std::is_integral_v<T>) {
static_assert(sizeof(T) <= sizeof(uint64_t));
if (sizeof(T) == sizeof(uint64_t))
h.hash64(a);
else
h.hash32(a);
return h;
} else if constexpr (std::is_enum_v<T>) {
using u_type = std::underlying_type_t<T>;
2024-11-19 13:04:19 -06:00
return hash_ops<u_type>::hash_into((u_type) a, h);
} else if constexpr (std::is_pointer_v<T>) {
2024-11-19 13:04:19 -06:00
return hash_ops<uintptr_t>::hash_into((uintptr_t) a, h);
} else if constexpr (std::is_same_v<T, std::string>) {
for (auto c : a)
h.hash32(c);
return h;
} else {
2024-11-19 13:04:19 -06:00
return a.hash_into(h);
}
}
};
template<typename P, typename Q> struct hash_ops<std::pair<P, Q>> {
2014-12-31 06:05:33 -06:00
static inline bool cmp(std::pair<P, Q> a, std::pair<P, Q> b) {
return a == b;
}
2024-11-19 13:04:19 -06:00
static inline Hasher hash_into(std::pair<P, Q> a, Hasher h) {
h = hash_ops<P>::hash_into(a.first, h);
h = hash_ops<Q>::hash_into(a.second, h);
return h;
}
};
2015-04-07 10:23:30 -05:00
template<typename... T> struct hash_ops<std::tuple<T...>> {
static inline bool cmp(std::tuple<T...> a, std::tuple<T...> b) {
return a == b;
}
template<size_t I = 0>
2024-11-19 13:04:19 -06:00
static inline typename std::enable_if<I == sizeof...(T), Hasher>::type hash_into(std::tuple<T...>, Hasher h) {
return h;
2015-04-07 10:23:30 -05:00
}
template<size_t I = 0>
2024-11-19 13:04:19 -06:00
static inline typename std::enable_if<I != sizeof...(T), Hasher>::type hash_into(std::tuple<T...> a, Hasher h) {
2016-02-14 02:35:25 -06:00
typedef hash_ops<typename std::tuple_element<I, std::tuple<T...>>::type> element_ops_t;
2024-11-19 13:04:19 -06:00
h = hash_into<I+1>(a, h);
h = element_ops_t::hash_into(std::get<I>(a), h);
return h;
2015-04-07 10:23:30 -05:00
}
};
2014-12-30 16:45:43 -06:00
template<typename T> struct hash_ops<std::vector<T>> {
2014-12-31 06:05:33 -06:00
static inline bool cmp(std::vector<T> a, std::vector<T> b) {
2014-12-30 16:45:43 -06:00
return a == b;
}
2024-11-19 13:04:19 -06:00
static inline Hasher hash_into(std::vector<T> a, Hasher h) {
2024-11-20 05:11:37 -06:00
h.eat((uint32_t)a.size());
2014-12-30 16:45:43 -06:00
for (auto k : a)
2024-11-11 08:45:11 -06:00
h.eat(k);
2014-12-30 16:45:43 -06:00
return h;
}
};
2024-10-18 09:18:19 -05:00
template<typename T, size_t N> struct hash_ops<std::array<T, N>> {
static inline bool cmp(std::array<T, N> a, std::array<T, N> b) {
return a == b;
}
2024-11-19 13:04:19 -06:00
static inline Hasher hash_into(std::array<T, N> a, Hasher h) {
2024-10-18 09:18:19 -05:00
for (const auto& k : a)
2024-11-19 13:04:19 -06:00
h = hash_ops<T>::hash_into(k, h);
2024-10-18 09:18:19 -05:00
return h;
}
};
struct hash_cstr_ops {
2014-12-31 06:05:33 -06:00
static inline bool cmp(const char *a, const char *b) {
2024-11-20 05:11:37 -06:00
return strcmp(a, b) == 0;
}
2024-11-19 13:04:19 -06:00
static inline Hasher hash_into(const char *a, Hasher h) {
while (*a)
h.hash32(*(a++));
return h;
}
};
template <> struct hash_ops<char*> : hash_cstr_ops {};
struct hash_ptr_ops {
2014-12-31 06:05:33 -06:00
static inline bool cmp(const void *a, const void *b) {
return a == b;
}
2024-11-19 13:04:19 -06:00
static inline Hasher hash_into(const void *a, Hasher h) {
return hash_ops<uintptr_t>::hash_into((uintptr_t)a, h);
}
};
2014-12-26 20:04:50 -06:00
struct hash_obj_ops {
2014-12-31 06:05:33 -06:00
static inline bool cmp(const void *a, const void *b) {
2014-12-26 20:04:50 -06:00
return a == b;
}
template<typename T>
2024-11-19 13:04:19 -06:00
static inline Hasher hash_into(const T *a, Hasher h) {
2024-11-20 10:06:49 -06:00
if (a)
a->hash_into(h);
else
h.eat(0);
return h;
2014-12-26 20:04:50 -06:00
}
};
/**
* If you find yourself using this function, think hard
* about if it's the right thing to do. Mixing finalized
* hashes together with XORs or worse can destroy
* desirable qualities of the hash function
*/
2015-10-25 13:31:29 -05:00
template<typename T>
2024-10-18 05:34:25 -05:00
[[nodiscard]]
Hasher::hash_t run_hash(const T& obj) {
return hash_top_ops<T>::hash(obj).yield();
2015-10-25 13:31:29 -05:00
}
/** Refer to docs/source/yosys_internals/hashing.rst */
template<typename T>
[[nodiscard]]
[[deprecated]]
inline unsigned int mkhash(const T &v) {
return (unsigned int) run_hash<T>(v);
}
template<> struct hash_ops<std::monostate> {
static inline bool cmp(std::monostate a, std::monostate b) {
return a == b;
}
2024-11-19 13:04:19 -06:00
static inline Hasher hash_into(std::monostate, Hasher h) {
return h;
}
};
template<typename... T> struct hash_ops<std::variant<T...>> {
static inline bool cmp(std::variant<T...> a, std::variant<T...> b) {
return a == b;
}
2024-11-19 13:04:19 -06:00
static inline Hasher hash_into(std::variant<T...> a, Hasher h) {
2024-11-11 08:45:11 -06:00
std::visit([& h](const auto &v) { h.eat(v); }, a);
h.eat(a.index());
return h;
}
};
template<typename T> struct hash_ops<std::optional<T>> {
static inline bool cmp(std::optional<T> a, std::optional<T> b) {
return a == b;
}
2024-11-19 13:04:19 -06:00
static inline Hasher hash_into(std::optional<T> a, Hasher h) {
if(a.has_value())
2024-11-11 08:45:11 -06:00
h.eat(*a);
else
2024-11-11 08:45:11 -06:00
h.eat(0);
return h;
}
};
inline unsigned int hashtable_size(unsigned int min_size)
{
// Primes as generated by https://oeis.org/A175953
static std::vector<unsigned int> zero_and_some_primes = {
2014-12-30 20:58:29 -06:00
0, 23, 29, 37, 47, 59, 79, 101, 127, 163, 211, 269, 337, 431, 541, 677,
2014-12-30 06:22:33 -06:00
853, 1069, 1361, 1709, 2137, 2677, 3347, 4201, 5261, 6577, 8231, 10289,
12889, 16127, 20161, 25219, 31531, 39419, 49277, 61603, 77017, 96281,
120371, 150473, 188107, 235159, 293957, 367453, 459317, 574157, 717697,
897133, 1121423, 1401791, 1752239, 2190299, 2737937, 3422429, 4278037,
5347553, 6684443, 8355563, 10444457, 13055587, 16319519, 20399411,
25499291, 31874149, 39842687, 49803361, 62254207, 77817767, 97272239,
121590311, 151987889, 189984863, 237481091, 296851369, 371064217,
2024-07-02 02:10:18 -05:00
463830313, 579787991, 724735009, 905918777, 1132398479, 1415498113,
1769372713, 2211715897, 2764644871, 2764644887, 3455806139
2014-12-30 06:22:33 -06:00
};
2014-12-30 20:58:29 -06:00
for (auto p : zero_and_some_primes)
if (p >= min_size) return p;
2014-12-30 06:22:33 -06:00
if (sizeof(unsigned int) == 4)
throw std::length_error("hash table exceeded maximum size.\nDesign is likely too large for yosys to handle, if possible try not to flatten the design.");
2014-12-30 06:22:33 -06:00
2014-12-30 20:58:29 -06:00
for (auto p : zero_and_some_primes)
2014-12-30 06:22:33 -06:00
if (100129 * p > min_size) return 100129 * p;
throw std::length_error("hash table exceeded maximum size.");
}
template<typename K, typename T, typename OPS = hash_top_ops<K>> class dict;
template<typename K, int offset = 0, typename OPS = hash_top_ops<K>> class idict;
template<typename K, typename OPS = hash_top_ops<K>> class pool;
template<typename K, typename OPS = hash_top_ops<K>> class mfp;
template<typename K, typename T, typename OPS>
class dict {
struct entry_t
{
std::pair<K, T> udata;
2014-12-30 20:58:29 -06:00
int next;
2014-12-30 20:58:29 -06:00
entry_t() { }
entry_t(const std::pair<K, T> &udata, int next) : udata(udata), next(next) { }
2015-02-09 13:11:51 -06:00
entry_t(std::pair<K, T> &&udata, int next) : udata(std::move(udata)), next(next) { }
bool operator<(const entry_t &other) const { return udata.first < other.udata.first; }
};
std::vector<int> hashtable;
std::vector<entry_t> entries;
OPS ops;
2015-02-09 13:11:51 -06:00
#ifdef NDEBUG
static inline void do_assert(bool) { }
#else
2014-12-30 20:58:29 -06:00
static inline void do_assert(bool cond) {
if (!cond) throw std::runtime_error("dict<> assert failed.");
}
2014-12-30 20:58:29 -06:00
#endif
2024-10-18 05:34:25 -05:00
Hasher::hash_t do_hash(const K &key) const
{
Hasher::hash_t hash = 0;
if (!hashtable.empty())
hash = ops.hash(key).yield() % (unsigned int)(hashtable.size());
return hash;
}
2014-12-30 20:58:29 -06:00
void do_rehash()
{
2014-12-30 20:58:29 -06:00
hashtable.clear();
hashtable.resize(hashtable_size(entries.capacity() * hashtable_size_factor), -1);
2014-12-30 20:58:29 -06:00
for (int i = 0; i < int(entries.size()); i++) {
do_assert(-1 <= entries[i].next && entries[i].next < int(entries.size()));
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(entries[i].udata.first);
2014-12-30 20:58:29 -06:00
entries[i].next = hashtable[hash];
hashtable[hash] = i;
}
}
2024-10-18 05:34:25 -05:00
int do_erase(int index, Hasher::hash_t hash)
{
2014-12-30 20:58:29 -06:00
do_assert(index < int(entries.size()));
if (hashtable.empty() || index < 0)
return 0;
int k = hashtable[hash];
2015-02-09 13:11:51 -06:00
do_assert(0 <= k && k < int(entries.size()));
2014-12-30 20:58:29 -06:00
if (k == index) {
hashtable[hash] = entries[index].next;
} else {
while (entries[k].next != index) {
k = entries[k].next;
do_assert(0 <= k && k < int(entries.size()));
}
entries[k].next = entries[index].next;
}
2014-12-30 06:22:33 -06:00
2014-12-30 20:58:29 -06:00
int back_idx = entries.size()-1;
2014-12-30 20:58:29 -06:00
if (index != back_idx)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t back_hash = do_hash(entries[back_idx].udata.first);
2014-12-30 20:58:29 -06:00
k = hashtable[back_hash];
2015-02-09 13:11:51 -06:00
do_assert(0 <= k && k < int(entries.size()));
2014-12-30 20:58:29 -06:00
if (k == back_idx) {
hashtable[back_hash] = index;
} else {
2014-12-30 20:58:29 -06:00
while (entries[k].next != back_idx) {
k = entries[k].next;
do_assert(0 <= k && k < int(entries.size()));
}
entries[k].next = index;
}
2014-12-30 20:58:29 -06:00
entries[index] = std::move(entries[back_idx]);
}
entries.pop_back();
if (entries.empty())
hashtable.clear();
2014-12-30 20:58:29 -06:00
return 1;
}
2024-10-18 05:34:25 -05:00
int do_lookup(const K &key, Hasher::hash_t &hash) const
{
2014-12-30 20:58:29 -06:00
if (hashtable.empty())
return -1;
if (entries.size() * hashtable_size_trigger > hashtable.size()) {
((dict*)this)->do_rehash();
hash = do_hash(key);
}
2014-12-30 20:58:29 -06:00
int index = hashtable[hash];
while (index >= 0 && !ops.cmp(entries[index].udata.first, key)) {
index = entries[index].next;
do_assert(-1 <= index && index < int(entries.size()));
}
2014-12-30 20:58:29 -06:00
return index;
}
2024-10-18 05:34:25 -05:00
int do_insert(const K &key, Hasher::hash_t &hash)
2015-02-09 13:11:51 -06:00
{
if (hashtable.empty()) {
entries.emplace_back(std::pair<K, T>(key, T()), -1);
2015-02-09 13:11:51 -06:00
do_rehash();
hash = do_hash(key);
} else {
entries.emplace_back(std::pair<K, T>(key, T()), hashtable[hash]);
2015-02-09 13:11:51 -06:00
hashtable[hash] = entries.size() - 1;
}
return entries.size() - 1;
}
2024-10-18 05:34:25 -05:00
int do_insert(const std::pair<K, T> &value, Hasher::hash_t &hash)
{
2014-12-30 20:58:29 -06:00
if (hashtable.empty()) {
entries.emplace_back(value, -1);
2014-12-30 20:58:29 -06:00
do_rehash();
hash = do_hash(value.first);
} else {
entries.emplace_back(value, hashtable[hash]);
hashtable[hash] = entries.size() - 1;
}
return entries.size() - 1;
}
2024-10-18 05:34:25 -05:00
int do_insert(std::pair<K, T> &&rvalue, Hasher::hash_t &hash)
{
if (hashtable.empty()) {
auto key = rvalue.first;
entries.emplace_back(std::forward<std::pair<K, T>>(rvalue), -1);
do_rehash();
hash = do_hash(key);
} else {
entries.emplace_back(std::forward<std::pair<K, T>>(rvalue), hashtable[hash]);
2014-12-30 20:58:29 -06:00
hashtable[hash] = entries.size() - 1;
}
2014-12-30 20:58:29 -06:00
return entries.size() - 1;
}
public:
class const_iterator
{
2014-12-30 21:24:04 -06:00
friend class dict;
2014-12-30 20:58:29 -06:00
protected:
const dict *ptr;
int index;
const_iterator(const dict *ptr, int index) : ptr(ptr), index(index) { }
public:
typedef std::forward_iterator_tag iterator_category;
typedef std::pair<K, T> value_type;
typedef ptrdiff_t difference_type;
typedef std::pair<K, T>* pointer;
typedef std::pair<K, T>& reference;
const_iterator() { }
const_iterator operator++() { index--; return *this; }
const_iterator operator+=(int amt) { index -= amt; return *this; }
bool operator<(const const_iterator &other) const { return index > other.index; }
bool operator==(const const_iterator &other) const { return index == other.index; }
bool operator!=(const const_iterator &other) const { return index != other.index; }
const std::pair<K, T> &operator*() const { return ptr->entries[index].udata; }
const std::pair<K, T> *operator->() const { return &ptr->entries[index].udata; }
};
class iterator
{
2014-12-30 21:24:04 -06:00
friend class dict;
2014-12-30 20:58:29 -06:00
protected:
dict *ptr;
int index;
iterator(dict *ptr, int index) : ptr(ptr), index(index) { }
public:
typedef std::forward_iterator_tag iterator_category;
typedef std::pair<K, T> value_type;
typedef ptrdiff_t difference_type;
typedef std::pair<K, T>* pointer;
typedef std::pair<K, T>& reference;
iterator() { }
iterator operator++() { index--; return *this; }
iterator operator+=(int amt) { index -= amt; return *this; }
bool operator<(const iterator &other) const { return index > other.index; }
bool operator==(const iterator &other) const { return index == other.index; }
bool operator!=(const iterator &other) const { return index != other.index; }
std::pair<K, T> &operator*() { return ptr->entries[index].udata; }
std::pair<K, T> *operator->() { return &ptr->entries[index].udata; }
const std::pair<K, T> &operator*() const { return ptr->entries[index].udata; }
const std::pair<K, T> *operator->() const { return &ptr->entries[index].udata; }
operator const_iterator() const { return const_iterator(ptr, index); }
};
constexpr dict()
{
}
dict(const dict &other)
{
2014-12-30 20:58:29 -06:00
entries = other.entries;
do_rehash();
}
dict(dict &&other)
{
swap(other);
}
dict &operator=(const dict &other) {
2014-12-30 20:58:29 -06:00
entries = other.entries;
do_rehash();
return *this;
}
dict &operator=(dict &&other) {
clear();
swap(other);
return *this;
}
dict(const std::initializer_list<std::pair<K, T>> &list)
{
for (auto &it : list)
insert(it);
}
template<class InputIterator>
dict(InputIterator first, InputIterator last)
{
insert(first, last);
}
template<class InputIterator>
void insert(InputIterator first, InputIterator last)
{
for (; first != last; ++first)
insert(*first);
}
2015-02-09 13:11:51 -06:00
std::pair<iterator, bool> insert(const K &key)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(key);
2015-02-09 13:11:51 -06:00
int i = do_lookup(key, hash);
if (i >= 0)
return std::pair<iterator, bool>(iterator(this, i), false);
i = do_insert(key, hash);
return std::pair<iterator, bool>(iterator(this, i), true);
}
std::pair<iterator, bool> insert(const std::pair<K, T> &value)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(value.first);
2014-12-30 20:58:29 -06:00
int i = do_lookup(value.first, hash);
if (i >= 0)
return std::pair<iterator, bool>(iterator(this, i), false);
2014-12-30 20:58:29 -06:00
i = do_insert(value, hash);
return std::pair<iterator, bool>(iterator(this, i), true);
}
std::pair<iterator, bool> insert(std::pair<K, T> &&rvalue)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(rvalue.first);
int i = do_lookup(rvalue.first, hash);
if (i >= 0)
return std::pair<iterator, bool>(iterator(this, i), false);
i = do_insert(std::forward<std::pair<K, T>>(rvalue), hash);
return std::pair<iterator, bool>(iterator(this, i), true);
}
std::pair<iterator, bool> emplace(K const &key, T const &value)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(key);
int i = do_lookup(key, hash);
if (i >= 0)
return std::pair<iterator, bool>(iterator(this, i), false);
i = do_insert(std::make_pair(key, value), hash);
return std::pair<iterator, bool>(iterator(this, i), true);
}
std::pair<iterator, bool> emplace(K const &key, T &&rvalue)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(key);
int i = do_lookup(key, hash);
if (i >= 0)
return std::pair<iterator, bool>(iterator(this, i), false);
i = do_insert(std::make_pair(key, std::forward<T>(rvalue)), hash);
return std::pair<iterator, bool>(iterator(this, i), true);
}
std::pair<iterator, bool> emplace(K &&rkey, T const &value)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(rkey);
int i = do_lookup(rkey, hash);
if (i >= 0)
return std::pair<iterator, bool>(iterator(this, i), false);
i = do_insert(std::make_pair(std::forward<K>(rkey), value), hash);
return std::pair<iterator, bool>(iterator(this, i), true);
}
std::pair<iterator, bool> emplace(K &&rkey, T &&rvalue)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(rkey);
int i = do_lookup(rkey, hash);
if (i >= 0)
return std::pair<iterator, bool>(iterator(this, i), false);
i = do_insert(std::make_pair(std::forward<K>(rkey), std::forward<T>(rvalue)), hash);
return std::pair<iterator, bool>(iterator(this, i), true);
}
int erase(const K &key)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(key);
2014-12-30 20:58:29 -06:00
int index = do_lookup(key, hash);
return do_erase(index, hash);
}
iterator erase(iterator it)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(it->first);
2014-12-30 20:58:29 -06:00
do_erase(it.index, hash);
return ++it;
}
int count(const K &key) const
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(key);
2014-12-30 20:58:29 -06:00
int i = do_lookup(key, hash);
return i < 0 ? 0 : 1;
}
int count(const K &key, const_iterator it) const
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(key);
int i = do_lookup(key, hash);
return i < 0 || i > it.index ? 0 : 1;
}
iterator find(const K &key)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(key);
2014-12-30 20:58:29 -06:00
int i = do_lookup(key, hash);
if (i < 0)
return end();
return iterator(this, i);
}
const_iterator find(const K &key) const
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(key);
2014-12-30 20:58:29 -06:00
int i = do_lookup(key, hash);
if (i < 0)
return end();
return const_iterator(this, i);
}
T& at(const K &key)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(key);
2014-12-30 20:58:29 -06:00
int i = do_lookup(key, hash);
if (i < 0)
throw std::out_of_range("dict::at()");
return entries[i].udata.second;
}
const T& at(const K &key) const
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(key);
2014-12-30 20:58:29 -06:00
int i = do_lookup(key, hash);
if (i < 0)
throw std::out_of_range("dict::at()");
return entries[i].udata.second;
}
const T& at(const K &key, const T &defval) const
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(key);
int i = do_lookup(key, hash);
if (i < 0)
return defval;
return entries[i].udata.second;
}
T& operator[](const K &key)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(key);
2014-12-30 20:58:29 -06:00
int i = do_lookup(key, hash);
if (i < 0)
2014-12-30 20:58:29 -06:00
i = do_insert(std::pair<K, T>(key, T()), hash);
return entries[i].udata.second;
}
2015-01-23 17:13:27 -06:00
template<typename Compare = std::less<K>>
void sort(Compare comp = Compare())
{
std::sort(entries.begin(), entries.end(), [comp](const entry_t &a, const entry_t &b){ return comp(b.udata.first, a.udata.first); });
do_rehash();
}
void swap(dict &other)
{
hashtable.swap(other.hashtable);
entries.swap(other.entries);
}
bool operator==(const dict &other) const {
2014-12-30 20:58:29 -06:00
if (size() != other.size())
return false;
2014-12-30 20:58:29 -06:00
for (auto &it : entries) {
auto oit = other.find(it.udata.first);
2015-02-09 13:11:51 -06:00
if (oit == other.end() || !(oit->second == it.udata.second))
2014-12-30 20:58:29 -06:00
return false;
}
return true;
}
bool operator!=(const dict &other) const {
2015-02-09 13:11:51 -06:00
return !operator==(other);
}
2024-11-19 13:04:19 -06:00
Hasher hash_into(Hasher h) const {
for (auto &it : entries) {
Hasher entry_hash;
2024-11-11 08:45:11 -06:00
entry_hash.eat(it.udata.first);
entry_hash.eat(it.udata.second);
h.commutative_eat(entry_hash.yield());
}
2024-11-20 10:06:49 -06:00
h.eat(entries.size());
return h;
}
void reserve(size_t n) { entries.reserve(n); }
2014-12-30 20:58:29 -06:00
size_t size() const { return entries.size(); }
bool empty() const { return entries.empty(); }
void clear() { hashtable.clear(); entries.clear(); }
2014-12-30 20:58:29 -06:00
iterator begin() { return iterator(this, int(entries.size())-1); }
iterator element(int n) { return iterator(this, int(entries.size())-1-n); }
2014-12-30 06:30:22 -06:00
iterator end() { return iterator(nullptr, -1); }
2014-12-30 20:58:29 -06:00
const_iterator begin() const { return const_iterator(this, int(entries.size())-1); }
const_iterator element(int n) const { return const_iterator(this, int(entries.size())-1-n); }
2014-12-30 06:30:22 -06:00
const_iterator end() const { return const_iterator(nullptr, -1); }
};
template<typename K, typename OPS>
class pool
{
template<typename, int, typename> friend class idict;
2015-01-18 05:12:33 -06:00
protected:
struct entry_t
{
K udata;
int next;
entry_t() { }
entry_t(const K &udata, int next) : udata(udata), next(next) { }
entry_t(K &&udata, int next) : udata(std::move(udata)), next(next) { }
};
std::vector<int> hashtable;
std::vector<entry_t> entries;
OPS ops;
2015-02-09 13:11:51 -06:00
#ifdef NDEBUG
static inline void do_assert(bool) { }
#else
static inline void do_assert(bool cond) {
if (!cond) throw std::runtime_error("pool<> assert failed.");
}
#endif
2024-10-18 05:34:25 -05:00
Hasher::hash_t do_hash(const K &key) const
{
Hasher::hash_t hash = 0;
if (!hashtable.empty())
hash = ops.hash(key).yield() % (unsigned int)(hashtable.size());
return hash;
}
void do_rehash()
{
hashtable.clear();
hashtable.resize(hashtable_size(entries.capacity() * hashtable_size_factor), -1);
for (int i = 0; i < int(entries.size()); i++) {
do_assert(-1 <= entries[i].next && entries[i].next < int(entries.size()));
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(entries[i].udata);
entries[i].next = hashtable[hash];
hashtable[hash] = i;
}
}
2024-10-18 05:34:25 -05:00
int do_erase(int index, Hasher::hash_t hash)
{
do_assert(index < int(entries.size()));
if (hashtable.empty() || index < 0)
return 0;
int k = hashtable[hash];
if (k == index) {
hashtable[hash] = entries[index].next;
} else {
while (entries[k].next != index) {
k = entries[k].next;
do_assert(0 <= k && k < int(entries.size()));
}
entries[k].next = entries[index].next;
}
2014-12-30 06:22:33 -06:00
int back_idx = entries.size()-1;
if (index != back_idx)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t back_hash = do_hash(entries[back_idx].udata);
k = hashtable[back_hash];
if (k == back_idx) {
hashtable[back_hash] = index;
} else {
while (entries[k].next != back_idx) {
k = entries[k].next;
do_assert(0 <= k && k < int(entries.size()));
}
entries[k].next = index;
}
entries[index] = std::move(entries[back_idx]);
}
entries.pop_back();
if (entries.empty())
hashtable.clear();
return 1;
}
2024-10-18 05:34:25 -05:00
int do_lookup(const K &key, Hasher::hash_t &hash) const
{
if (hashtable.empty())
return -1;
if (entries.size() * hashtable_size_trigger > hashtable.size()) {
((pool*)this)->do_rehash();
hash = do_hash(key);
}
int index = hashtable[hash];
while (index >= 0 && !ops.cmp(entries[index].udata, key)) {
index = entries[index].next;
do_assert(-1 <= index && index < int(entries.size()));
}
return index;
}
2024-10-18 05:34:25 -05:00
int do_insert(const K &value, Hasher::hash_t &hash)
{
if (hashtable.empty()) {
entries.emplace_back(value, -1);
do_rehash();
hash = do_hash(value);
} else {
entries.emplace_back(value, hashtable[hash]);
hashtable[hash] = entries.size() - 1;
}
return entries.size() - 1;
}
2024-10-18 05:34:25 -05:00
int do_insert(K &&rvalue, Hasher::hash_t &hash)
{
if (hashtable.empty()) {
entries.emplace_back(std::forward<K>(rvalue), -1);
do_rehash();
hash = do_hash(rvalue);
} else {
entries.emplace_back(std::forward<K>(rvalue), hashtable[hash]);
hashtable[hash] = entries.size() - 1;
}
return entries.size() - 1;
}
public:
class const_iterator
{
2014-12-30 21:24:04 -06:00
friend class pool;
protected:
const pool *ptr;
int index;
const_iterator(const pool *ptr, int index) : ptr(ptr), index(index) { }
public:
typedef std::forward_iterator_tag iterator_category;
typedef K value_type;
typedef ptrdiff_t difference_type;
typedef K* pointer;
typedef K& reference;
const_iterator() { }
const_iterator operator++() { index--; return *this; }
bool operator==(const const_iterator &other) const { return index == other.index; }
bool operator!=(const const_iterator &other) const { return index != other.index; }
const K &operator*() const { return ptr->entries[index].udata; }
const K *operator->() const { return &ptr->entries[index].udata; }
};
class iterator
{
2014-12-30 21:24:04 -06:00
friend class pool;
protected:
pool *ptr;
int index;
iterator(pool *ptr, int index) : ptr(ptr), index(index) { }
public:
typedef std::forward_iterator_tag iterator_category;
typedef K value_type;
typedef ptrdiff_t difference_type;
typedef K* pointer;
typedef K& reference;
iterator() { }
iterator operator++() { index--; return *this; }
bool operator==(const iterator &other) const { return index == other.index; }
bool operator!=(const iterator &other) const { return index != other.index; }
K &operator*() { return ptr->entries[index].udata; }
K *operator->() { return &ptr->entries[index].udata; }
const K &operator*() const { return ptr->entries[index].udata; }
const K *operator->() const { return &ptr->entries[index].udata; }
operator const_iterator() const { return const_iterator(ptr, index); }
};
constexpr pool()
{
}
pool(const pool &other)
{
entries = other.entries;
do_rehash();
}
pool(pool &&other)
{
swap(other);
}
pool &operator=(const pool &other) {
entries = other.entries;
do_rehash();
return *this;
}
pool &operator=(pool &&other) {
clear();
swap(other);
return *this;
}
pool(const std::initializer_list<K> &list)
{
for (auto &it : list)
insert(it);
}
template<class InputIterator>
pool(InputIterator first, InputIterator last)
{
insert(first, last);
}
template<class InputIterator>
void insert(InputIterator first, InputIterator last)
{
for (; first != last; ++first)
insert(*first);
}
std::pair<iterator, bool> insert(const K &value)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(value);
int i = do_lookup(value, hash);
if (i >= 0)
return std::pair<iterator, bool>(iterator(this, i), false);
i = do_insert(value, hash);
return std::pair<iterator, bool>(iterator(this, i), true);
}
std::pair<iterator, bool> insert(K &&rvalue)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(rvalue);
int i = do_lookup(rvalue, hash);
if (i >= 0)
return std::pair<iterator, bool>(iterator(this, i), false);
i = do_insert(std::forward<K>(rvalue), hash);
return std::pair<iterator, bool>(iterator(this, i), true);
}
2020-04-22 10:14:07 -05:00
template<typename... Args>
std::pair<iterator, bool> emplace(Args&&... args)
{
return insert(K(std::forward<Args>(args)...));
}
int erase(const K &key)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(key);
int index = do_lookup(key, hash);
return do_erase(index, hash);
}
iterator erase(iterator it)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(*it);
do_erase(it.index, hash);
return ++it;
}
int count(const K &key) const
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(key);
int i = do_lookup(key, hash);
return i < 0 ? 0 : 1;
}
int count(const K &key, const_iterator it) const
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(key);
int i = do_lookup(key, hash);
return i < 0 || i > it.index ? 0 : 1;
}
iterator find(const K &key)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(key);
int i = do_lookup(key, hash);
if (i < 0)
return end();
return iterator(this, i);
}
const_iterator find(const K &key) const
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(key);
int i = do_lookup(key, hash);
if (i < 0)
return end();
return const_iterator(this, i);
}
bool operator[](const K &key)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = do_hash(key);
int i = do_lookup(key, hash);
return i >= 0;
}
2015-01-23 17:13:27 -06:00
template<typename Compare = std::less<K>>
void sort(Compare comp = Compare())
{
std::sort(entries.begin(), entries.end(), [comp](const entry_t &a, const entry_t &b){ return comp(b.udata, a.udata); });
do_rehash();
}
2015-04-07 08:07:01 -05:00
K pop()
{
iterator it = begin();
K ret = *it;
erase(it);
return ret;
}
void swap(pool &other)
{
hashtable.swap(other.hashtable);
entries.swap(other.entries);
}
bool operator==(const pool &other) const {
if (size() != other.size())
return false;
for (auto &it : entries)
if (!other.count(it.udata))
return false;
return true;
}
bool operator!=(const pool &other) const {
2015-02-09 13:11:51 -06:00
return !operator==(other);
}
2024-11-19 13:04:19 -06:00
Hasher hash_into(Hasher h) const {
for (auto &it : entries) {
2024-11-11 08:45:11 -06:00
h.commutative_eat(ops.hash(it.udata).yield());
}
2024-11-20 10:06:49 -06:00
h.eat(entries.size());
return h;
}
void reserve(size_t n) { entries.reserve(n); }
size_t size() const { return entries.size(); }
bool empty() const { return entries.empty(); }
void clear() { hashtable.clear(); entries.clear(); }
iterator begin() { return iterator(this, int(entries.size())-1); }
iterator element(int n) { return iterator(this, int(entries.size())-1-n); }
2014-12-30 06:30:22 -06:00
iterator end() { return iterator(nullptr, -1); }
const_iterator begin() const { return const_iterator(this, int(entries.size())-1); }
const_iterator element(int n) const { return const_iterator(this, int(entries.size())-1-n); }
2014-12-30 06:30:22 -06:00
const_iterator end() const { return const_iterator(nullptr, -1); }
};
template<typename K, int offset, typename OPS>
2015-01-18 05:12:33 -06:00
class idict
{
pool<K, OPS> database;
2015-01-18 05:12:33 -06:00
public:
class const_iterator
{
friend class idict;
protected:
const idict &container;
int index;
const_iterator(const idict &container, int index) : container(container), index(index) { }
public:
typedef std::forward_iterator_tag iterator_category;
typedef K value_type;
typedef ptrdiff_t difference_type;
typedef K* pointer;
typedef K& reference;
const_iterator() { }
const_iterator operator++() { index++; return *this; }
bool operator==(const const_iterator &other) const { return index == other.index; }
bool operator!=(const const_iterator &other) const { return index != other.index; }
const K &operator*() const { return container[index]; }
const K *operator->() const { return &container[index]; }
};
2015-01-18 05:12:33 -06:00
constexpr idict()
{
}
2015-01-18 05:12:33 -06:00
int operator()(const K &key)
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = database.do_hash(key);
2015-01-18 05:12:33 -06:00
int i = database.do_lookup(key, hash);
if (i < 0)
i = database.do_insert(key, hash);
return i + offset;
}
int at(const K &key) const
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = database.do_hash(key);
2015-01-18 05:12:33 -06:00
int i = database.do_lookup(key, hash);
if (i < 0)
throw std::out_of_range("idict::at()");
return i + offset;
}
int at(const K &key, int defval) const
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = database.do_hash(key);
int i = database.do_lookup(key, hash);
if (i < 0)
return defval;
return i + offset;
}
2015-01-18 05:12:33 -06:00
int count(const K &key) const
{
2024-10-18 05:34:25 -05:00
Hasher::hash_t hash = database.do_hash(key);
2015-01-18 05:12:33 -06:00
int i = database.do_lookup(key, hash);
return i < 0 ? 0 : 1;
}
void expect(const K &key, int i)
{
int j = (*this)(key);
if (i != j)
throw std::out_of_range("idict::expect()");
}
const K &operator[](int index) const
{
return database.entries.at(index - offset).udata;
}
2015-10-27 09:04:47 -05:00
void swap(idict &other)
{
database.swap(other.database);
}
void reserve(size_t n) { database.reserve(n); }
2015-10-27 09:04:47 -05:00
size_t size() const { return database.size(); }
bool empty() const { return database.empty(); }
void clear() { database.clear(); }
const_iterator begin() const { return const_iterator(*this, offset); }
const_iterator element(int n) const { return const_iterator(*this, n); }
const_iterator end() const { return const_iterator(*this, offset + size()); }
2015-01-18 05:12:33 -06:00
};
2024-07-18 09:02:11 -05:00
/**
* Union-find data structure with a promotion method
* mfp stands for "merge, find, promote"
* i-prefixed methods operate on indices in parents
*/
template<typename K, typename OPS>
2015-10-27 09:04:47 -05:00
class mfp
{
mutable idict<K, 0, OPS> database;
2015-10-27 09:04:47 -05:00
mutable std::vector<int> parents;
public:
typedef typename idict<K, 0>::const_iterator const_iterator;
2015-11-30 12:43:52 -06:00
constexpr mfp()
{
}
2024-07-18 09:02:11 -05:00
// Finds a given element's index. If it isn't in the data structure,
// it is added as its own set
2015-10-27 09:04:47 -05:00
int operator()(const K &key) const
{
int i = database(key);
2024-07-18 09:02:11 -05:00
// If the lookup caused the database to grow,
// also add a corresponding entry in parents initialized to -1 (no parent)
2015-10-27 09:04:47 -05:00
parents.resize(database.size(), -1);
return i;
}
2024-07-18 09:02:11 -05:00
// Finds an element at given index
2015-10-27 09:04:47 -05:00
const K &operator[](int index) const
{
return database[index];
}
int ifind(int i) const
{
int p = i, k = i;
while (parents[p] != -1)
p = parents[p];
2024-07-18 09:02:11 -05:00
// p is now the representative of i
// Now we traverse from i up to the representative again
// and make p the parent of all the nodes along the way.
// This is a side effect and doesn't affect the return value.
// It speeds up future find operations
2015-10-27 09:04:47 -05:00
while (k != p) {
int next_k = parents[k];
parents[k] = p;
k = next_k;
}
return p;
}
2024-07-18 09:02:11 -05:00
// Merge sets if the given indices belong to different sets
2015-10-27 09:04:47 -05:00
void imerge(int i, int j)
{
i = ifind(i);
j = ifind(j);
if (i != j)
parents[i] = j;
}
void ipromote(int i)
{
int k = i;
while (k != -1) {
int next_k = parents[k];
parents[k] = i;
k = next_k;
}
parents[i] = -1;
}
2015-10-28 05:21:55 -05:00
int lookup(const K &a) const
{
return ifind((*this)(a));
}
2015-10-27 09:04:47 -05:00
const K &find(const K &a) const
{
2016-02-01 03:03:03 -06:00
int i = database.at(a, -1);
if (i < 0)
return a;
return (*this)[ifind(i)];
2015-10-27 09:04:47 -05:00
}
void merge(const K &a, const K &b)
{
imerge((*this)(a), (*this)(b));
}
void promote(const K &a)
{
2016-02-01 03:03:03 -06:00
int i = database.at(a, -1);
if (i >= 0)
ipromote(i);
2015-10-27 09:04:47 -05:00
}
void swap(mfp &other)
{
database.swap(other.database);
parents.swap(other.parents);
}
void reserve(size_t n) { database.reserve(n); }
2015-10-27 09:04:47 -05:00
size_t size() const { return database.size(); }
bool empty() const { return database.empty(); }
void clear() { database.clear(); parents.clear(); }
2015-11-30 12:43:52 -06:00
const_iterator begin() const { return database.begin(); }
const_iterator element(int n) const { return database.element(n); }
2015-11-30 12:43:52 -06:00
const_iterator end() const { return database.end(); }
2015-10-27 09:04:47 -05:00
};
} /* namespace hashlib */
#endif