swarm/bmt: async section writer interface to BMT (#778)
- AsyncHasher implements AsyncWriter interface - add extra level for zerohashes in pool to lookup empty data hash - remove unused segment, hash and depth fields from Tree - Hash pkg function -> syncHash moved to test - add asyncHash helper func to tests using shuffle - add TestAsyncCorrectness to tests - add BenchmarkBMTAsync to tests - refactor benchmarks using subbenchmarks - improved comments - preinitialise base hashers on the nodes
This commit is contained in:
parent
526abe2736
commit
fd982d3f3b
354
swarm/bmt/bmt.go
354
swarm/bmt/bmt.go
|
@ -14,7 +14,7 @@
|
||||||
// You should have received a copy of the GNU Lesser General Public License
|
// You should have received a copy of the GNU Lesser General Public License
|
||||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
// Package bmt provides a binary merkle tree implementation
|
// Package bmt provides a binary merkle tree implementation used for swarm chunk hash
|
||||||
package bmt
|
package bmt
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -26,16 +26,16 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size
|
Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size.
|
||||||
It is defined as the root hash of the binary merkle tree built over fixed size segments
|
It is defined as the root hash of the binary merkle tree built over fixed size segments
|
||||||
of the underlying chunk using any base hash function (e.g keccak 256 SHA3).
|
of the underlying chunk using any base hash function (e.g., keccak 256 SHA3).
|
||||||
Chunk with data shorter than the fixed size are hashed as if they had zero padding
|
Chunks with data shorter than the fixed size are hashed as if they had zero padding.
|
||||||
|
|
||||||
BMT hash is used as the chunk hash function in swarm which in turn is the basis for the
|
BMT hash is used as the chunk hash function in swarm which in turn is the basis for the
|
||||||
128 branching swarm hash http://swarm-guide.readthedocs.io/en/latest/architecture.html#swarm-hash
|
128 branching swarm hash http://swarm-guide.readthedocs.io/en/latest/architecture.html#swarm-hash
|
||||||
|
|
||||||
The BMT is optimal for providing compact inclusion proofs, i.e. prove that a
|
The BMT is optimal for providing compact inclusion proofs, i.e. prove that a
|
||||||
segment is a substring of a chunk starting at a particular offset
|
segment is a substring of a chunk starting at a particular offset.
|
||||||
The size of the underlying segments is fixed to the size of the base hash (called the resolution
|
The size of the underlying segments is fixed to the size of the base hash (called the resolution
|
||||||
of the BMT hash), Using Keccak256 SHA3 hash is 32 bytes, the EVM word size to optimize for on-chain BMT verification
|
of the BMT hash), Using Keccak256 SHA3 hash is 32 bytes, the EVM word size to optimize for on-chain BMT verification
|
||||||
as well as the hash size optimal for inclusion proofs in the merkle tree of the swarm hash.
|
as well as the hash size optimal for inclusion proofs in the merkle tree of the swarm hash.
|
||||||
|
@ -46,11 +46,12 @@ Two implementations are provided:
|
||||||
that is simple to understand
|
that is simple to understand
|
||||||
* Hasher is optimized for speed taking advantage of concurrency with minimalistic
|
* Hasher is optimized for speed taking advantage of concurrency with minimalistic
|
||||||
control structure to coordinate the concurrent routines
|
control structure to coordinate the concurrent routines
|
||||||
It implements the following interfaces
|
|
||||||
* standard golang hash.Hash
|
BMT Hasher implements the following interfaces
|
||||||
* SwarmHash
|
* standard golang hash.Hash - synchronous, reusable
|
||||||
* io.Writer
|
* SwarmHash - SumWithSpan provided
|
||||||
* TODO: SegmentWriter
|
* io.Writer - synchronous left-to-right datawriter
|
||||||
|
* AsyncWriter - concurrent section writes and asynchronous Sum call
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -69,7 +70,7 @@ type BaseHasherFunc func() hash.Hash
|
||||||
// Hasher a reusable hasher for fixed maximum size chunks representing a BMT
|
// Hasher a reusable hasher for fixed maximum size chunks representing a BMT
|
||||||
// - implements the hash.Hash interface
|
// - implements the hash.Hash interface
|
||||||
// - reuses a pool of trees for amortised memory allocation and resource control
|
// - reuses a pool of trees for amortised memory allocation and resource control
|
||||||
// - supports order-agnostic concurrent segment writes (TODO:)
|
// - supports order-agnostic concurrent segment writes and section (double segment) writes
|
||||||
// as well as sequential read and write
|
// as well as sequential read and write
|
||||||
// - the same hasher instance must not be called concurrently on more than one chunk
|
// - the same hasher instance must not be called concurrently on more than one chunk
|
||||||
// - the same hasher instance is synchronously reuseable
|
// - the same hasher instance is synchronously reuseable
|
||||||
|
@ -81,8 +82,7 @@ type Hasher struct {
|
||||||
bmt *tree // prebuilt BMT resource for flowcontrol and proofs
|
bmt *tree // prebuilt BMT resource for flowcontrol and proofs
|
||||||
}
|
}
|
||||||
|
|
||||||
// New creates a reusable Hasher
|
// New creates a reusable BMT Hasher that
|
||||||
// implements the hash.Hash interface
|
|
||||||
// pulls a new tree from a resource pool for hashing each chunk
|
// pulls a new tree from a resource pool for hashing each chunk
|
||||||
func New(p *TreePool) *Hasher {
|
func New(p *TreePool) *Hasher {
|
||||||
return &Hasher{
|
return &Hasher{
|
||||||
|
@ -90,9 +90,9 @@ func New(p *TreePool) *Hasher {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TreePool provides a pool of trees used as resources by Hasher
|
// TreePool provides a pool of trees used as resources by the BMT Hasher.
|
||||||
// a tree popped from the pool is guaranteed to have clean state
|
// A tree popped from the pool is guaranteed to have a clean state ready
|
||||||
// for hashing a new chunk
|
// for hashing a new chunk.
|
||||||
type TreePool struct {
|
type TreePool struct {
|
||||||
lock sync.Mutex
|
lock sync.Mutex
|
||||||
c chan *tree // the channel to obtain a resource from the pool
|
c chan *tree // the channel to obtain a resource from the pool
|
||||||
|
@ -101,7 +101,7 @@ type TreePool struct {
|
||||||
SegmentCount int // the number of segments on the base level of the BMT
|
SegmentCount int // the number of segments on the base level of the BMT
|
||||||
Capacity int // pool capacity, controls concurrency
|
Capacity int // pool capacity, controls concurrency
|
||||||
Depth int // depth of the bmt trees = int(log2(segmentCount))+1
|
Depth int // depth of the bmt trees = int(log2(segmentCount))+1
|
||||||
Datalength int // the total length of the data (count * size)
|
Size int // the total length of the data (count * size)
|
||||||
count int // current count of (ever) allocated resources
|
count int // current count of (ever) allocated resources
|
||||||
zerohashes [][]byte // lookup table for predictable padding subtrees for all levels
|
zerohashes [][]byte // lookup table for predictable padding subtrees for all levels
|
||||||
}
|
}
|
||||||
|
@ -112,12 +112,12 @@ func NewTreePool(hasher BaseHasherFunc, segmentCount, capacity int) *TreePool {
|
||||||
// initialises the zerohashes lookup table
|
// initialises the zerohashes lookup table
|
||||||
depth := calculateDepthFor(segmentCount)
|
depth := calculateDepthFor(segmentCount)
|
||||||
segmentSize := hasher().Size()
|
segmentSize := hasher().Size()
|
||||||
zerohashes := make([][]byte, depth)
|
zerohashes := make([][]byte, depth+1)
|
||||||
zeros := make([]byte, segmentSize)
|
zeros := make([]byte, segmentSize)
|
||||||
zerohashes[0] = zeros
|
zerohashes[0] = zeros
|
||||||
h := hasher()
|
h := hasher()
|
||||||
for i := 1; i < depth; i++ {
|
for i := 1; i < depth+1; i++ {
|
||||||
zeros = doHash(h, nil, zeros, zeros)
|
zeros = doSum(h, nil, zeros, zeros)
|
||||||
zerohashes[i] = zeros
|
zerohashes[i] = zeros
|
||||||
}
|
}
|
||||||
return &TreePool{
|
return &TreePool{
|
||||||
|
@ -126,7 +126,7 @@ func NewTreePool(hasher BaseHasherFunc, segmentCount, capacity int) *TreePool {
|
||||||
SegmentSize: segmentSize,
|
SegmentSize: segmentSize,
|
||||||
SegmentCount: segmentCount,
|
SegmentCount: segmentCount,
|
||||||
Capacity: capacity,
|
Capacity: capacity,
|
||||||
Datalength: segmentCount * segmentSize,
|
Size: segmentCount * segmentSize,
|
||||||
Depth: depth,
|
Depth: depth,
|
||||||
zerohashes: zerohashes,
|
zerohashes: zerohashes,
|
||||||
}
|
}
|
||||||
|
@ -155,7 +155,7 @@ func (p *TreePool) reserve() *tree {
|
||||||
select {
|
select {
|
||||||
case t = <-p.c:
|
case t = <-p.c:
|
||||||
default:
|
default:
|
||||||
t = newTree(p.SegmentSize, p.Depth)
|
t = newTree(p.SegmentSize, p.Depth, p.hasher)
|
||||||
p.count++
|
p.count++
|
||||||
}
|
}
|
||||||
return t
|
return t
|
||||||
|
@ -173,13 +173,10 @@ func (p *TreePool) release(t *tree) {
|
||||||
// the tree is 'locked' while not in the pool
|
// the tree is 'locked' while not in the pool
|
||||||
type tree struct {
|
type tree struct {
|
||||||
leaves []*node // leaf nodes of the tree, other nodes accessible via parent links
|
leaves []*node // leaf nodes of the tree, other nodes accessible via parent links
|
||||||
cur int // index of rightmost currently open segment
|
cursor int // index of rightmost currently open segment
|
||||||
offset int // offset (cursor position) within currently open segment
|
offset int // offset (cursor position) within currently open segment
|
||||||
segment []byte // the rightmost open segment (not complete)
|
|
||||||
section []byte // the rightmost open section (double segment)
|
section []byte // the rightmost open section (double segment)
|
||||||
depth int // number of levels
|
|
||||||
result chan []byte // result channel
|
result chan []byte // result channel
|
||||||
hash []byte // to record the result
|
|
||||||
span []byte // The span of the data subsumed under the chunk
|
span []byte // The span of the data subsumed under the chunk
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -188,14 +185,16 @@ type node struct {
|
||||||
isLeft bool // whether it is left side of the parent double segment
|
isLeft bool // whether it is left side of the parent double segment
|
||||||
parent *node // pointer to parent node in the BMT
|
parent *node // pointer to parent node in the BMT
|
||||||
state int32 // atomic increment impl concurrent boolean toggle
|
state int32 // atomic increment impl concurrent boolean toggle
|
||||||
left, right []byte // this is where the content segment is set
|
left, right []byte // this is where the two children sections are written
|
||||||
|
hasher hash.Hash // preconstructed hasher on nodes
|
||||||
}
|
}
|
||||||
|
|
||||||
// newNode constructs a segment hasher node in the BMT (used by newTree)
|
// newNode constructs a segment hasher node in the BMT (used by newTree)
|
||||||
func newNode(index int, parent *node) *node {
|
func newNode(index int, parent *node, hasher hash.Hash) *node {
|
||||||
return &node{
|
return &node{
|
||||||
parent: parent,
|
parent: parent,
|
||||||
isLeft: index%2 == 0,
|
isLeft: index%2 == 0,
|
||||||
|
hasher: hasher,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -253,16 +252,21 @@ func (t *tree) draw(hash []byte) string {
|
||||||
|
|
||||||
// newTree initialises a tree by building up the nodes of a BMT
|
// newTree initialises a tree by building up the nodes of a BMT
|
||||||
// - segment size is stipulated to be the size of the hash
|
// - segment size is stipulated to be the size of the hash
|
||||||
func newTree(segmentSize, depth int) *tree {
|
func newTree(segmentSize, depth int, hashfunc func() hash.Hash) *tree {
|
||||||
n := newNode(0, nil)
|
n := newNode(0, nil, hashfunc())
|
||||||
prevlevel := []*node{n}
|
prevlevel := []*node{n}
|
||||||
// iterate over levels and creates 2^(depth-level) nodes
|
// iterate over levels and creates 2^(depth-level) nodes
|
||||||
|
// the 0 level is on double segment sections so we start at depth - 2 since
|
||||||
count := 2
|
count := 2
|
||||||
for level := depth - 2; level >= 0; level-- {
|
for level := depth - 2; level >= 0; level-- {
|
||||||
nodes := make([]*node, count)
|
nodes := make([]*node, count)
|
||||||
for i := 0; i < count; i++ {
|
for i := 0; i < count; i++ {
|
||||||
parent := prevlevel[i/2]
|
parent := prevlevel[i/2]
|
||||||
nodes[i] = newNode(i, parent)
|
var hasher hash.Hash
|
||||||
|
if level == 0 {
|
||||||
|
hasher = hashfunc()
|
||||||
|
}
|
||||||
|
nodes[i] = newNode(i, parent, hasher)
|
||||||
}
|
}
|
||||||
prevlevel = nodes
|
prevlevel = nodes
|
||||||
count *= 2
|
count *= 2
|
||||||
|
@ -270,13 +274,12 @@ func newTree(segmentSize, depth int) *tree {
|
||||||
// the datanode level is the nodes on the last level
|
// the datanode level is the nodes on the last level
|
||||||
return &tree{
|
return &tree{
|
||||||
leaves: prevlevel,
|
leaves: prevlevel,
|
||||||
result: make(chan []byte, 1),
|
result: make(chan []byte),
|
||||||
segment: make([]byte, segmentSize),
|
|
||||||
section: make([]byte, 2*segmentSize),
|
section: make([]byte, 2*segmentSize),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// methods needed by hash.Hash
|
// methods needed to implement hash.Hash
|
||||||
|
|
||||||
// Size returns the size
|
// Size returns the size
|
||||||
func (h *Hasher) Size() int {
|
func (h *Hasher) Size() int {
|
||||||
|
@ -285,63 +288,40 @@ func (h *Hasher) Size() int {
|
||||||
|
|
||||||
// BlockSize returns the block size
|
// BlockSize returns the block size
|
||||||
func (h *Hasher) BlockSize() int {
|
func (h *Hasher) BlockSize() int {
|
||||||
return h.pool.SegmentSize
|
return 2 * h.pool.SegmentSize
|
||||||
}
|
}
|
||||||
|
|
||||||
// Hash hashes the data and the span using the bmt hasher
|
// Sum returns the BMT root hash of the buffer
|
||||||
func Hash(h *Hasher, span, data []byte) []byte {
|
// using Sum presupposes sequential synchronous writes (io.Writer interface)
|
||||||
h.ResetWithLength(span)
|
|
||||||
h.Write(data)
|
|
||||||
return h.Sum(nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Datalength returns the maximum data size that is hashed by the hasher =
|
|
||||||
// segment count times segment size
|
|
||||||
func (h *Hasher) DataLength() int {
|
|
||||||
return h.pool.Datalength
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sum returns the hash of the buffer
|
|
||||||
// hash.Hash interface Sum method appends the byte slice to the underlying
|
// hash.Hash interface Sum method appends the byte slice to the underlying
|
||||||
// data before it calculates and returns the hash of the chunk
|
// data before it calculates and returns the hash of the chunk
|
||||||
// caller must make sure Sum is not called concurrently with Write, writeSection
|
// caller must make sure Sum is not called concurrently with Write, writeSection
|
||||||
// and WriteSegment (TODO:)
|
func (h *Hasher) Sum(b []byte) (s []byte) {
|
||||||
func (h *Hasher) Sum(b []byte) (r []byte) {
|
t := h.getTree()
|
||||||
return h.sum(b, true, true)
|
// write the last section with final flag set to true
|
||||||
}
|
go h.writeSection(t.cursor, t.section, true, true)
|
||||||
|
// wait for the result
|
||||||
// sum implements Sum taking parameters
|
s = <-t.result
|
||||||
// * if the tree is released right away
|
|
||||||
// * if sequential write is used (can read sections)
|
|
||||||
func (h *Hasher) sum(b []byte, release, section bool) (r []byte) {
|
|
||||||
t := h.bmt
|
|
||||||
bh := h.pool.hasher()
|
|
||||||
go h.writeSection(t.cur, t.section, true)
|
|
||||||
bmtHash := <-t.result
|
|
||||||
span := t.span
|
span := t.span
|
||||||
// fmt.Println(t.draw(bmtHash))
|
// release the tree resource back to the pool
|
||||||
if release {
|
|
||||||
h.releaseTree()
|
h.releaseTree()
|
||||||
}
|
|
||||||
// b + sha3(span + BMT(pure_chunk))
|
// b + sha3(span + BMT(pure_chunk))
|
||||||
if span == nil {
|
if len(span) == 0 {
|
||||||
return append(b, bmtHash...)
|
return append(b, s...)
|
||||||
}
|
}
|
||||||
return doHash(bh, b, span, bmtHash)
|
return doSum(h.pool.hasher(), b, span, s)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Hasher implements the SwarmHash interface
|
// methods needed to implement the SwarmHash and the io.Writer interfaces
|
||||||
|
|
||||||
// Hasher implements the io.Writer interface
|
// Write calls sequentially add to the buffer to be hashed,
|
||||||
|
// with every full segment calls writeSection in a go routine
|
||||||
// Write fills the buffer to hash,
|
|
||||||
// with every full segment calls writeSection
|
|
||||||
func (h *Hasher) Write(b []byte) (int, error) {
|
func (h *Hasher) Write(b []byte) (int, error) {
|
||||||
l := len(b)
|
l := len(b)
|
||||||
if l <= 0 {
|
if l == 0 {
|
||||||
return 0, nil
|
return 0, nil
|
||||||
}
|
}
|
||||||
t := h.bmt
|
t := h.getTree()
|
||||||
secsize := 2 * h.pool.SegmentSize
|
secsize := 2 * h.pool.SegmentSize
|
||||||
// calculate length of missing bit to complete current open section
|
// calculate length of missing bit to complete current open section
|
||||||
smax := secsize - t.offset
|
smax := secsize - t.offset
|
||||||
|
@ -359,20 +339,21 @@ func (h *Hasher) Write(b []byte) (int, error) {
|
||||||
return l, nil
|
return l, nil
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if t.cur == h.pool.SegmentCount*2 {
|
// if end of a section
|
||||||
|
if t.cursor == h.pool.SegmentCount*2 {
|
||||||
return 0, nil
|
return 0, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// read full segments and the last possibly partial segment from the input buffer
|
// read full sections and the last possibly partial section from the input buffer
|
||||||
for smax < l {
|
for smax < l {
|
||||||
// section complete; push to tree asynchronously
|
// section complete; push to tree asynchronously
|
||||||
go h.writeSection(t.cur, t.section, false)
|
go h.writeSection(t.cursor, t.section, true, false)
|
||||||
// reset section
|
// reset section
|
||||||
t.section = make([]byte, secsize)
|
t.section = make([]byte, secsize)
|
||||||
// copy from imput buffer at smax to right half of section
|
// copy from input buffer at smax to right half of section
|
||||||
copy(t.section, b[smax:])
|
copy(t.section, b[smax:])
|
||||||
// advance cursor
|
// advance cursor
|
||||||
t.cur++
|
t.cursor++
|
||||||
// smax here represents successive offsets in the input buffer
|
// smax here represents successive offsets in the input buffer
|
||||||
smax += secsize
|
smax += secsize
|
||||||
}
|
}
|
||||||
|
@ -382,83 +363,225 @@ func (h *Hasher) Write(b []byte) (int, error) {
|
||||||
|
|
||||||
// Reset needs to be called before writing to the hasher
|
// Reset needs to be called before writing to the hasher
|
||||||
func (h *Hasher) Reset() {
|
func (h *Hasher) Reset() {
|
||||||
h.getTree()
|
h.releaseTree()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Hasher implements the SwarmHash interface
|
// methods needed to implement the SwarmHash interface
|
||||||
|
|
||||||
// ResetWithLength needs to be called before writing to the hasher
|
// ResetWithLength needs to be called before writing to the hasher
|
||||||
// the argument is supposed to be the byte slice binary representation of
|
// the argument is supposed to be the byte slice binary representation of
|
||||||
// the length of the data subsumed under the hash, i.e., span
|
// the length of the data subsumed under the hash, i.e., span
|
||||||
func (h *Hasher) ResetWithLength(span []byte) {
|
func (h *Hasher) ResetWithLength(span []byte) {
|
||||||
h.Reset()
|
h.Reset()
|
||||||
h.bmt.span = span
|
h.getTree().span = span
|
||||||
}
|
}
|
||||||
|
|
||||||
// releaseTree gives back the Tree to the pool whereby it unlocks
|
// releaseTree gives back the Tree to the pool whereby it unlocks
|
||||||
// it resets tree, segment and index
|
// it resets tree, segment and index
|
||||||
func (h *Hasher) releaseTree() {
|
func (h *Hasher) releaseTree() {
|
||||||
t := h.bmt
|
t := h.bmt
|
||||||
if t != nil {
|
if t == nil {
|
||||||
t.cur = 0
|
return
|
||||||
|
}
|
||||||
|
h.bmt = nil
|
||||||
|
go func() {
|
||||||
|
t.cursor = 0
|
||||||
t.offset = 0
|
t.offset = 0
|
||||||
t.span = nil
|
t.span = nil
|
||||||
t.hash = nil
|
|
||||||
h.bmt = nil
|
|
||||||
t.section = make([]byte, h.pool.SegmentSize*2)
|
t.section = make([]byte, h.pool.SegmentSize*2)
|
||||||
t.segment = make([]byte, h.pool.SegmentSize)
|
select {
|
||||||
|
case <-t.result:
|
||||||
|
default:
|
||||||
|
}
|
||||||
h.pool.release(t)
|
h.pool.release(t)
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewAsyncWriter extends Hasher with an interface for concurrent segment/section writes
|
||||||
|
func (h *Hasher) NewAsyncWriter(double bool) *AsyncHasher {
|
||||||
|
secsize := h.pool.SegmentSize
|
||||||
|
if double {
|
||||||
|
secsize *= 2
|
||||||
|
}
|
||||||
|
write := func(i int, section []byte, final bool) {
|
||||||
|
h.writeSection(i, section, double, final)
|
||||||
|
}
|
||||||
|
return &AsyncHasher{
|
||||||
|
Hasher: h,
|
||||||
|
double: double,
|
||||||
|
secsize: secsize,
|
||||||
|
write: write,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: writeSegment writes the ith segment into the BMT tree
|
// SectionWriter is an asynchronous segment/section writer interface
|
||||||
// func (h *Hasher) writeSegment(i int, s []byte) {
|
type SectionWriter interface {
|
||||||
// go h.run(h.bmt.leaves[i/2], h.pool.hasher(), i%2 == 0, s)
|
Reset() // standard init to be called before reuse
|
||||||
// }
|
Write(index int, data []byte) // write into section of index
|
||||||
|
Sum(b []byte, length int, span []byte) []byte // returns the hash of the buffer
|
||||||
|
SectionSize() int // size of the async section unit to use
|
||||||
|
}
|
||||||
|
|
||||||
|
// AsyncHasher extends BMT Hasher with an asynchronous segment/section writer interface
|
||||||
|
// AsyncHasher is unsafe and does not check indexes and section data lengths
|
||||||
|
// it must be used with the right indexes and length and the right number of sections
|
||||||
|
//
|
||||||
|
// behaviour is undefined if
|
||||||
|
// * non-final sections are shorter or longer than secsize
|
||||||
|
// * if final section does not match length
|
||||||
|
// * write a section with index that is higher than length/secsize
|
||||||
|
// * set length in Sum call when length/secsize < maxsec
|
||||||
|
//
|
||||||
|
// * if Sum() is not called on a Hasher that is fully written
|
||||||
|
// a process will block, can be terminated with Reset
|
||||||
|
// * it will not leak processes if not all sections are written but it blocks
|
||||||
|
// and keeps the resource which can be released calling Reset()
|
||||||
|
type AsyncHasher struct {
|
||||||
|
*Hasher // extends the Hasher
|
||||||
|
mtx sync.Mutex // to lock the cursor access
|
||||||
|
double bool // whether to use double segments (call Hasher.writeSection)
|
||||||
|
secsize int // size of base section (size of hash or double)
|
||||||
|
write func(i int, section []byte, final bool)
|
||||||
|
}
|
||||||
|
|
||||||
|
// methods needed to implement AsyncWriter
|
||||||
|
|
||||||
|
// SectionSize returns the size of async section unit to use
|
||||||
|
func (sw *AsyncHasher) SectionSize() int {
|
||||||
|
return sw.secsize
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write writes the i-th section of the BMT base
|
||||||
|
// this function can and is meant to be called concurrently
|
||||||
|
// it sets max segment threadsafely
|
||||||
|
func (sw *AsyncHasher) Write(i int, section []byte) {
|
||||||
|
sw.mtx.Lock()
|
||||||
|
defer sw.mtx.Unlock()
|
||||||
|
t := sw.getTree()
|
||||||
|
// cursor keeps track of the rightmost section written so far
|
||||||
|
// if index is lower than cursor then just write non-final section as is
|
||||||
|
if i < t.cursor {
|
||||||
|
// if index is not the rightmost, safe to write section
|
||||||
|
go sw.write(i, section, false)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// if there is a previous rightmost section safe to write section
|
||||||
|
if t.offset > 0 {
|
||||||
|
if i == t.cursor {
|
||||||
|
// i==cursor implies cursor was set by Hash call so we can write section as final one
|
||||||
|
// since it can be shorter, first we copy it to the padded buffer
|
||||||
|
t.section = make([]byte, sw.secsize)
|
||||||
|
copy(t.section, section)
|
||||||
|
go sw.write(i, t.section, true)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// the rightmost section just changed, so we write the previous one as non-final
|
||||||
|
go sw.write(t.cursor, t.section, false)
|
||||||
|
}
|
||||||
|
// set i as the index of the righmost section written so far
|
||||||
|
// set t.offset to cursor*secsize+1
|
||||||
|
t.cursor = i
|
||||||
|
t.offset = i*sw.secsize + 1
|
||||||
|
t.section = make([]byte, sw.secsize)
|
||||||
|
copy(t.section, section)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sum can be called any time once the length and the span is known
|
||||||
|
// potentially even before all segments have been written
|
||||||
|
// in such cases Sum will block until all segments are present and
|
||||||
|
// the hash for the length can be calculated.
|
||||||
|
//
|
||||||
|
// b: digest is appended to b
|
||||||
|
// length: known length of the input (unsafe; undefined if out of range)
|
||||||
|
// meta: metadata to hash together with BMT root for the final digest
|
||||||
|
// e.g., span for protection against existential forgery
|
||||||
|
func (sw *AsyncHasher) Sum(b []byte, length int, meta []byte) (s []byte) {
|
||||||
|
sw.mtx.Lock()
|
||||||
|
t := sw.getTree()
|
||||||
|
if length == 0 {
|
||||||
|
sw.mtx.Unlock()
|
||||||
|
s = sw.pool.zerohashes[sw.pool.Depth]
|
||||||
|
} else {
|
||||||
|
// for non-zero input the rightmost section is written to the tree asynchronously
|
||||||
|
// if the actual last section has been written (t.cursor == length/t.secsize)
|
||||||
|
maxsec := (length - 1) / sw.secsize
|
||||||
|
if t.offset > 0 {
|
||||||
|
go sw.write(t.cursor, t.section, maxsec == t.cursor)
|
||||||
|
}
|
||||||
|
// set cursor to maxsec so final section is written when it arrives
|
||||||
|
t.cursor = maxsec
|
||||||
|
t.offset = length
|
||||||
|
result := t.result
|
||||||
|
sw.mtx.Unlock()
|
||||||
|
// wait for the result or reset
|
||||||
|
s = <-result
|
||||||
|
}
|
||||||
|
// relesase the tree back to the pool
|
||||||
|
sw.releaseTree()
|
||||||
|
// if no meta is given just append digest to b
|
||||||
|
if len(meta) == 0 {
|
||||||
|
return append(b, s...)
|
||||||
|
}
|
||||||
|
// hash together meta and BMT root hash using the pools
|
||||||
|
return doSum(sw.pool.hasher(), b, meta, s)
|
||||||
|
}
|
||||||
|
|
||||||
// writeSection writes the hash of i-th section into level 1 node of the BMT tree
|
// writeSection writes the hash of i-th section into level 1 node of the BMT tree
|
||||||
func (h *Hasher) writeSection(i int, section []byte, final bool) {
|
func (h *Hasher) writeSection(i int, section []byte, double bool, final bool) {
|
||||||
// select the leaf node for the section
|
// select the leaf node for the section
|
||||||
n := h.bmt.leaves[i]
|
var n *node
|
||||||
isLeft := n.isLeft
|
var isLeft bool
|
||||||
|
var hasher hash.Hash
|
||||||
|
var level int
|
||||||
|
t := h.getTree()
|
||||||
|
if double {
|
||||||
|
level++
|
||||||
|
n = t.leaves[i]
|
||||||
|
hasher = n.hasher
|
||||||
|
isLeft = n.isLeft
|
||||||
n = n.parent
|
n = n.parent
|
||||||
bh := h.pool.hasher()
|
|
||||||
// hash the section
|
// hash the section
|
||||||
s := doHash(bh, nil, section)
|
section = doSum(hasher, nil, section)
|
||||||
|
} else {
|
||||||
|
n = t.leaves[i/2]
|
||||||
|
hasher = n.hasher
|
||||||
|
isLeft = i%2 == 0
|
||||||
|
}
|
||||||
// write hash into parent node
|
// write hash into parent node
|
||||||
if final {
|
if final {
|
||||||
// for the last segment use writeFinalNode
|
// for the last segment use writeFinalNode
|
||||||
h.writeFinalNode(1, n, bh, isLeft, s)
|
h.writeFinalNode(level, n, hasher, isLeft, section)
|
||||||
} else {
|
} else {
|
||||||
h.writeNode(n, bh, isLeft, s)
|
h.writeNode(n, hasher, isLeft, section)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// writeNode pushes the data to the node
|
// writeNode pushes the data to the node
|
||||||
// if it is the first of 2 sisters written the routine returns
|
// if it is the first of 2 sisters written, the routine terminates
|
||||||
// if it is the second, it calculates the hash and writes it
|
// if it is the second, it calculates the hash and writes it
|
||||||
// to the parent node recursively
|
// to the parent node recursively
|
||||||
|
// since hashing the parent is synchronous the same hasher can be used
|
||||||
func (h *Hasher) writeNode(n *node, bh hash.Hash, isLeft bool, s []byte) {
|
func (h *Hasher) writeNode(n *node, bh hash.Hash, isLeft bool, s []byte) {
|
||||||
level := 1
|
level := 1
|
||||||
for {
|
for {
|
||||||
// at the root of the bmt just write the result to the result channel
|
// at the root of the bmt just write the result to the result channel
|
||||||
if n == nil {
|
if n == nil {
|
||||||
h.bmt.result <- s
|
h.getTree().result <- s
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// otherwise assign child hash to branc
|
// otherwise assign child hash to left or right segment
|
||||||
if isLeft {
|
if isLeft {
|
||||||
n.left = s
|
n.left = s
|
||||||
} else {
|
} else {
|
||||||
n.right = s
|
n.right = s
|
||||||
}
|
}
|
||||||
// the child-thread first arriving will quit
|
// the child-thread first arriving will terminate
|
||||||
if n.toggle() {
|
if n.toggle() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// the thread coming later now can be sure both left and right children are written
|
// the thread coming second now can be sure both left and right children are written
|
||||||
// it calculates the hash of left|right and pushes it to the parent
|
// so it calculates the hash of left|right and pushes it to the parent
|
||||||
s = doHash(bh, nil, n.left, n.right)
|
s = doSum(bh, nil, n.left, n.right)
|
||||||
isLeft = n.isLeft
|
isLeft = n.isLeft
|
||||||
n = n.parent
|
n = n.parent
|
||||||
level++
|
level++
|
||||||
|
@ -476,7 +599,7 @@ func (h *Hasher) writeFinalNode(level int, n *node, bh hash.Hash, isLeft bool, s
|
||||||
// at the root of the bmt just write the result to the result channel
|
// at the root of the bmt just write the result to the result channel
|
||||||
if n == nil {
|
if n == nil {
|
||||||
if s != nil {
|
if s != nil {
|
||||||
h.bmt.result <- s
|
h.getTree().result <- s
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -485,25 +608,28 @@ func (h *Hasher) writeFinalNode(level int, n *node, bh hash.Hash, isLeft bool, s
|
||||||
// coming from left sister branch
|
// coming from left sister branch
|
||||||
// when the final section's path is going via left child node
|
// when the final section's path is going via left child node
|
||||||
// we include an all-zero subtree hash for the right level and toggle the node.
|
// we include an all-zero subtree hash for the right level and toggle the node.
|
||||||
// when the path is going through right child node, nothing to do
|
|
||||||
n.right = h.pool.zerohashes[level]
|
n.right = h.pool.zerohashes[level]
|
||||||
if s != nil {
|
if s != nil {
|
||||||
n.left = s
|
n.left = s
|
||||||
// if a left final node carries a hash, it must be the first (and only thread)
|
// if a left final node carries a hash, it must be the first (and only thread)
|
||||||
// so the toggle is already in passive state no need no call
|
// so the toggle is already in passive state no need no call
|
||||||
// yet thread needs to carry on pushing hash to parent
|
// yet thread needs to carry on pushing hash to parent
|
||||||
|
noHash = false
|
||||||
} else {
|
} else {
|
||||||
// if again first thread then propagate nil and calculate no hash
|
// if again first thread then propagate nil and calculate no hash
|
||||||
noHash = n.toggle()
|
noHash = n.toggle()
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// right sister branch
|
// right sister branch
|
||||||
// if s is nil, then thread arrived first at previous node and here there will be two,
|
|
||||||
// so no need to do anything
|
|
||||||
if s != nil {
|
if s != nil {
|
||||||
|
// if hash was pushed from right child node, write right segment change state
|
||||||
n.right = s
|
n.right = s
|
||||||
|
// if toggle is true, we arrived first so no hashing just push nil to parent
|
||||||
noHash = n.toggle()
|
noHash = n.toggle()
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
// if s is nil, then thread arrived first at previous node and here there will be two,
|
||||||
|
// so no need to do anything and keep s = nil for parent
|
||||||
noHash = true
|
noHash = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -513,15 +639,16 @@ func (h *Hasher) writeFinalNode(level int, n *node, bh hash.Hash, isLeft bool, s
|
||||||
if noHash {
|
if noHash {
|
||||||
s = nil
|
s = nil
|
||||||
} else {
|
} else {
|
||||||
s = doHash(bh, nil, n.left, n.right)
|
s = doSum(bh, nil, n.left, n.right)
|
||||||
}
|
}
|
||||||
|
// iterate to parent
|
||||||
isLeft = n.isLeft
|
isLeft = n.isLeft
|
||||||
n = n.parent
|
n = n.parent
|
||||||
level++
|
level++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// getTree obtains a BMT resource by reserving one from the pool
|
// getTree obtains a BMT resource by reserving one from the pool and assigns it to the bmt field
|
||||||
func (h *Hasher) getTree() *tree {
|
func (h *Hasher) getTree() *tree {
|
||||||
if h.bmt != nil {
|
if h.bmt != nil {
|
||||||
return h.bmt
|
return h.bmt
|
||||||
|
@ -539,7 +666,7 @@ func (n *node) toggle() bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
// calculates the hash of the data using hash.Hash
|
// calculates the hash of the data using hash.Hash
|
||||||
func doHash(h hash.Hash, b []byte, data ...[]byte) []byte {
|
func doSum(h hash.Hash, b []byte, data ...[]byte) []byte {
|
||||||
h.Reset()
|
h.Reset()
|
||||||
for _, v := range data {
|
for _, v := range data {
|
||||||
h.Write(v)
|
h.Write(v)
|
||||||
|
@ -547,6 +674,7 @@ func doHash(h hash.Hash, b []byte, data ...[]byte) []byte {
|
||||||
return h.Sum(b)
|
return h.Sum(b)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// hashstr is a pretty printer for bytes used in tree.draw
|
||||||
func hashstr(b []byte) string {
|
func hashstr(b []byte) string {
|
||||||
end := len(b)
|
end := len(b)
|
||||||
if end > 4 {
|
if end > 4 {
|
||||||
|
|
|
@ -39,13 +39,12 @@ var counts = []int{1, 2, 3, 4, 5, 8, 9, 15, 16, 17, 32, 37, 42, 53, 63, 64, 65,
|
||||||
// calculates the Keccak256 SHA3 hash of the data
|
// calculates the Keccak256 SHA3 hash of the data
|
||||||
func sha3hash(data ...[]byte) []byte {
|
func sha3hash(data ...[]byte) []byte {
|
||||||
h := sha3.NewKeccak256()
|
h := sha3.NewKeccak256()
|
||||||
return doHash(h, nil, data...)
|
return doSum(h, nil, data...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestRefHasher tests that the RefHasher computes the expected BMT hash for
|
// TestRefHasher tests that the RefHasher computes the expected BMT hash for
|
||||||
// all data lengths between 0 and 256 bytes
|
// some small data lengths
|
||||||
func TestRefHasher(t *testing.T) {
|
func TestRefHasher(t *testing.T) {
|
||||||
|
|
||||||
// the test struct is used to specify the expected BMT hash for
|
// the test struct is used to specify the expected BMT hash for
|
||||||
// segment counts between from and to and lengths from 1 to datalength
|
// segment counts between from and to and lengths from 1 to datalength
|
||||||
type test struct {
|
type test struct {
|
||||||
|
@ -129,7 +128,7 @@ func TestRefHasher(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// tests if hasher responds with correct hash
|
// tests if hasher responds with correct hash comparing the reference implementation return value
|
||||||
func TestHasherEmptyData(t *testing.T) {
|
func TestHasherEmptyData(t *testing.T) {
|
||||||
hasher := sha3.NewKeccak256
|
hasher := sha3.NewKeccak256
|
||||||
var data []byte
|
var data []byte
|
||||||
|
@ -140,7 +139,7 @@ func TestHasherEmptyData(t *testing.T) {
|
||||||
bmt := New(pool)
|
bmt := New(pool)
|
||||||
rbmt := NewRefHasher(hasher, count)
|
rbmt := NewRefHasher(hasher, count)
|
||||||
refHash := rbmt.Hash(data)
|
refHash := rbmt.Hash(data)
|
||||||
expHash := Hash(bmt, nil, data)
|
expHash := syncHash(bmt, nil, data)
|
||||||
if !bytes.Equal(expHash, refHash) {
|
if !bytes.Equal(expHash, refHash) {
|
||||||
t.Fatalf("hash mismatch with reference. expected %x, got %x", refHash, expHash)
|
t.Fatalf("hash mismatch with reference. expected %x, got %x", refHash, expHash)
|
||||||
}
|
}
|
||||||
|
@ -148,7 +147,8 @@ func TestHasherEmptyData(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestHasherCorrectness(t *testing.T) {
|
// tests sequential write with entire max size written in one go
|
||||||
|
func TestSyncHasherCorrectness(t *testing.T) {
|
||||||
data := newData(BufferSize)
|
data := newData(BufferSize)
|
||||||
hasher := sha3.NewKeccak256
|
hasher := sha3.NewKeccak256
|
||||||
size := hasher().Size()
|
size := hasher().Size()
|
||||||
|
@ -157,7 +157,7 @@ func TestHasherCorrectness(t *testing.T) {
|
||||||
for _, count := range counts {
|
for _, count := range counts {
|
||||||
t.Run(fmt.Sprintf("segments_%v", count), func(t *testing.T) {
|
t.Run(fmt.Sprintf("segments_%v", count), func(t *testing.T) {
|
||||||
max := count * size
|
max := count * size
|
||||||
incr := 1
|
var incr int
|
||||||
capacity := 1
|
capacity := 1
|
||||||
pool := NewTreePool(hasher, count, capacity)
|
pool := NewTreePool(hasher, count, capacity)
|
||||||
defer pool.Drain(0)
|
defer pool.Drain(0)
|
||||||
|
@ -173,6 +173,44 @@ func TestHasherCorrectness(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// tests order-neutral concurrent writes with entire max size written in one go
|
||||||
|
func TestAsyncCorrectness(t *testing.T) {
|
||||||
|
data := newData(BufferSize)
|
||||||
|
hasher := sha3.NewKeccak256
|
||||||
|
size := hasher().Size()
|
||||||
|
whs := []whenHash{first, last, random}
|
||||||
|
|
||||||
|
for _, double := range []bool{false, true} {
|
||||||
|
for _, wh := range whs {
|
||||||
|
for _, count := range counts {
|
||||||
|
t.Run(fmt.Sprintf("double_%v_hash_when_%v_segments_%v", double, wh, count), func(t *testing.T) {
|
||||||
|
max := count * size
|
||||||
|
var incr int
|
||||||
|
capacity := 1
|
||||||
|
pool := NewTreePool(hasher, count, capacity)
|
||||||
|
defer pool.Drain(0)
|
||||||
|
for n := 1; n <= max; n += incr {
|
||||||
|
incr = 1 + rand.Intn(5)
|
||||||
|
bmt := New(pool)
|
||||||
|
d := data[:n]
|
||||||
|
rbmt := NewRefHasher(hasher, count)
|
||||||
|
exp := rbmt.Hash(d)
|
||||||
|
got := syncHash(bmt, nil, d)
|
||||||
|
if !bytes.Equal(got, exp) {
|
||||||
|
t.Fatalf("wrong sync hash for datalength %v: expected %x (ref), got %x", n, exp, got)
|
||||||
|
}
|
||||||
|
sw := bmt.NewAsyncWriter(double)
|
||||||
|
got = asyncHashRandom(sw, nil, d, wh)
|
||||||
|
if !bytes.Equal(got, exp) {
|
||||||
|
t.Fatalf("wrong async hash for datalength %v: expected %x, got %x", n, exp, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Tests that the BMT hasher can be synchronously reused with poolsizes 1 and PoolSize
|
// Tests that the BMT hasher can be synchronously reused with poolsizes 1 and PoolSize
|
||||||
func TestHasherReuse(t *testing.T) {
|
func TestHasherReuse(t *testing.T) {
|
||||||
t.Run(fmt.Sprintf("poolsize_%d", 1), func(t *testing.T) {
|
t.Run(fmt.Sprintf("poolsize_%d", 1), func(t *testing.T) {
|
||||||
|
@ -183,6 +221,7 @@ func TestHasherReuse(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// tests if bmt reuse is not corrupting result
|
||||||
func testHasherReuse(poolsize int, t *testing.T) {
|
func testHasherReuse(poolsize int, t *testing.T) {
|
||||||
hasher := sha3.NewKeccak256
|
hasher := sha3.NewKeccak256
|
||||||
pool := NewTreePool(hasher, SegmentCount, poolsize)
|
pool := NewTreePool(hasher, SegmentCount, poolsize)
|
||||||
|
@ -191,7 +230,7 @@ func testHasherReuse(poolsize int, t *testing.T) {
|
||||||
|
|
||||||
for i := 0; i < 100; i++ {
|
for i := 0; i < 100; i++ {
|
||||||
data := newData(BufferSize)
|
data := newData(BufferSize)
|
||||||
n := rand.Intn(bmt.DataLength())
|
n := rand.Intn(bmt.Size())
|
||||||
err := testHasherCorrectness(bmt, hasher, data, n, SegmentCount)
|
err := testHasherCorrectness(bmt, hasher, data, n, SegmentCount)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
|
@ -199,8 +238,8 @@ func testHasherReuse(poolsize int, t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tests if pool can be cleanly reused even in concurrent use
|
// Tests if pool can be cleanly reused even in concurrent use by several hasher
|
||||||
func TestBMTHasherConcurrentUse(t *testing.T) {
|
func TestBMTConcurrentUse(t *testing.T) {
|
||||||
hasher := sha3.NewKeccak256
|
hasher := sha3.NewKeccak256
|
||||||
pool := NewTreePool(hasher, SegmentCount, PoolSize)
|
pool := NewTreePool(hasher, SegmentCount, PoolSize)
|
||||||
defer pool.Drain(0)
|
defer pool.Drain(0)
|
||||||
|
@ -211,7 +250,7 @@ func TestBMTHasherConcurrentUse(t *testing.T) {
|
||||||
go func() {
|
go func() {
|
||||||
bmt := New(pool)
|
bmt := New(pool)
|
||||||
data := newData(BufferSize)
|
data := newData(BufferSize)
|
||||||
n := rand.Intn(bmt.DataLength())
|
n := rand.Intn(bmt.Size())
|
||||||
errc <- testHasherCorrectness(bmt, hasher, data, n, 128)
|
errc <- testHasherCorrectness(bmt, hasher, data, n, 128)
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
@ -234,7 +273,7 @@ LOOP:
|
||||||
|
|
||||||
// Tests BMT Hasher io.Writer interface is working correctly
|
// Tests BMT Hasher io.Writer interface is working correctly
|
||||||
// even multiple short random write buffers
|
// even multiple short random write buffers
|
||||||
func TestBMTHasherWriterBuffers(t *testing.T) {
|
func TestBMTWriterBuffers(t *testing.T) {
|
||||||
hasher := sha3.NewKeccak256
|
hasher := sha3.NewKeccak256
|
||||||
|
|
||||||
for _, count := range counts {
|
for _, count := range counts {
|
||||||
|
@ -247,7 +286,7 @@ func TestBMTHasherWriterBuffers(t *testing.T) {
|
||||||
data := newData(n)
|
data := newData(n)
|
||||||
rbmt := NewRefHasher(hasher, count)
|
rbmt := NewRefHasher(hasher, count)
|
||||||
refHash := rbmt.Hash(data)
|
refHash := rbmt.Hash(data)
|
||||||
expHash := Hash(bmt, nil, data)
|
expHash := syncHash(bmt, nil, data)
|
||||||
if !bytes.Equal(expHash, refHash) {
|
if !bytes.Equal(expHash, refHash) {
|
||||||
t.Fatalf("hash mismatch with reference. expected %x, got %x", refHash, expHash)
|
t.Fatalf("hash mismatch with reference. expected %x, got %x", refHash, expHash)
|
||||||
}
|
}
|
||||||
|
@ -308,57 +347,65 @@ func testHasherCorrectness(bmt *Hasher, hasher BaseHasherFunc, d []byte, n, coun
|
||||||
data := d[:n]
|
data := d[:n]
|
||||||
rbmt := NewRefHasher(hasher, count)
|
rbmt := NewRefHasher(hasher, count)
|
||||||
exp := sha3hash(span, rbmt.Hash(data))
|
exp := sha3hash(span, rbmt.Hash(data))
|
||||||
got := Hash(bmt, span, data)
|
got := syncHash(bmt, span, data)
|
||||||
if !bytes.Equal(got, exp) {
|
if !bytes.Equal(got, exp) {
|
||||||
return fmt.Errorf("wrong hash: expected %x, got %x", exp, got)
|
return fmt.Errorf("wrong hash: expected %x, got %x", exp, got)
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkSHA3_4k(t *testing.B) { benchmarkSHA3(4096, t) }
|
//
|
||||||
func BenchmarkSHA3_2k(t *testing.B) { benchmarkSHA3(4096/2, t) }
|
func BenchmarkBMT(t *testing.B) {
|
||||||
func BenchmarkSHA3_1k(t *testing.B) { benchmarkSHA3(4096/4, t) }
|
for size := 4096; size >= 128; size /= 2 {
|
||||||
func BenchmarkSHA3_512b(t *testing.B) { benchmarkSHA3(4096/8, t) }
|
t.Run(fmt.Sprintf("%v_size_%v", "SHA3", size), func(t *testing.B) {
|
||||||
func BenchmarkSHA3_256b(t *testing.B) { benchmarkSHA3(4096/16, t) }
|
benchmarkSHA3(t, size)
|
||||||
func BenchmarkSHA3_128b(t *testing.B) { benchmarkSHA3(4096/32, t) }
|
})
|
||||||
|
t.Run(fmt.Sprintf("%v_size_%v", "Baseline", size), func(t *testing.B) {
|
||||||
|
benchmarkBMTBaseline(t, size)
|
||||||
|
})
|
||||||
|
t.Run(fmt.Sprintf("%v_size_%v", "REF", size), func(t *testing.B) {
|
||||||
|
benchmarkRefHasher(t, size)
|
||||||
|
})
|
||||||
|
t.Run(fmt.Sprintf("%v_size_%v", "BMT", size), func(t *testing.B) {
|
||||||
|
benchmarkBMT(t, size)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkBMTBaseline_4k(t *testing.B) { benchmarkBMTBaseline(4096, t) }
|
type whenHash = int
|
||||||
func BenchmarkBMTBaseline_2k(t *testing.B) { benchmarkBMTBaseline(4096/2, t) }
|
|
||||||
func BenchmarkBMTBaseline_1k(t *testing.B) { benchmarkBMTBaseline(4096/4, t) }
|
|
||||||
func BenchmarkBMTBaseline_512b(t *testing.B) { benchmarkBMTBaseline(4096/8, t) }
|
|
||||||
func BenchmarkBMTBaseline_256b(t *testing.B) { benchmarkBMTBaseline(4096/16, t) }
|
|
||||||
func BenchmarkBMTBaseline_128b(t *testing.B) { benchmarkBMTBaseline(4096/32, t) }
|
|
||||||
|
|
||||||
func BenchmarkRefHasher_4k(t *testing.B) { benchmarkRefHasher(4096, t) }
|
const (
|
||||||
func BenchmarkRefHasher_2k(t *testing.B) { benchmarkRefHasher(4096/2, t) }
|
first whenHash = iota
|
||||||
func BenchmarkRefHasher_1k(t *testing.B) { benchmarkRefHasher(4096/4, t) }
|
last
|
||||||
func BenchmarkRefHasher_512b(t *testing.B) { benchmarkRefHasher(4096/8, t) }
|
random
|
||||||
func BenchmarkRefHasher_256b(t *testing.B) { benchmarkRefHasher(4096/16, t) }
|
)
|
||||||
func BenchmarkRefHasher_128b(t *testing.B) { benchmarkRefHasher(4096/32, t) }
|
|
||||||
|
|
||||||
func BenchmarkBMTHasher_4k(t *testing.B) { benchmarkBMTHasher(4096, t) }
|
func BenchmarkBMTAsync(t *testing.B) {
|
||||||
func BenchmarkBMTHasher_2k(t *testing.B) { benchmarkBMTHasher(4096/2, t) }
|
whs := []whenHash{first, last, random}
|
||||||
func BenchmarkBMTHasher_1k(t *testing.B) { benchmarkBMTHasher(4096/4, t) }
|
for size := 4096; size >= 128; size /= 2 {
|
||||||
func BenchmarkBMTHasher_512b(t *testing.B) { benchmarkBMTHasher(4096/8, t) }
|
for _, wh := range whs {
|
||||||
func BenchmarkBMTHasher_256b(t *testing.B) { benchmarkBMTHasher(4096/16, t) }
|
for _, double := range []bool{false, true} {
|
||||||
func BenchmarkBMTHasher_128b(t *testing.B) { benchmarkBMTHasher(4096/32, t) }
|
t.Run(fmt.Sprintf("double_%v_hash_when_%v_size_%v", double, wh, size), func(t *testing.B) {
|
||||||
|
benchmarkBMTAsync(t, size, wh, double)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkBMTHasherNoPool_4k(t *testing.B) { benchmarkBMTHasherPool(1, 4096, t) }
|
func BenchmarkPool(t *testing.B) {
|
||||||
func BenchmarkBMTHasherNoPool_2k(t *testing.B) { benchmarkBMTHasherPool(1, 4096/2, t) }
|
caps := []int{1, PoolSize}
|
||||||
func BenchmarkBMTHasherNoPool_1k(t *testing.B) { benchmarkBMTHasherPool(1, 4096/4, t) }
|
for size := 4096; size >= 128; size /= 2 {
|
||||||
func BenchmarkBMTHasherNoPool_512b(t *testing.B) { benchmarkBMTHasherPool(1, 4096/8, t) }
|
for _, c := range caps {
|
||||||
func BenchmarkBMTHasherNoPool_256b(t *testing.B) { benchmarkBMTHasherPool(1, 4096/16, t) }
|
t.Run(fmt.Sprintf("poolsize_%v_size_%v", c, size), func(t *testing.B) {
|
||||||
func BenchmarkBMTHasherNoPool_128b(t *testing.B) { benchmarkBMTHasherPool(1, 4096/32, t) }
|
benchmarkPool(t, c, size)
|
||||||
|
})
|
||||||
func BenchmarkBMTHasherPool_4k(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096, t) }
|
}
|
||||||
func BenchmarkBMTHasherPool_2k(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/2, t) }
|
}
|
||||||
func BenchmarkBMTHasherPool_1k(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/4, t) }
|
}
|
||||||
func BenchmarkBMTHasherPool_512b(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/8, t) }
|
|
||||||
func BenchmarkBMTHasherPool_256b(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/16, t) }
|
|
||||||
func BenchmarkBMTHasherPool_128b(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/32, t) }
|
|
||||||
|
|
||||||
// benchmarks simple sha3 hash on chunks
|
// benchmarks simple sha3 hash on chunks
|
||||||
func benchmarkSHA3(n int, t *testing.B) {
|
func benchmarkSHA3(t *testing.B, n int) {
|
||||||
data := newData(n)
|
data := newData(n)
|
||||||
hasher := sha3.NewKeccak256
|
hasher := sha3.NewKeccak256
|
||||||
h := hasher()
|
h := hasher()
|
||||||
|
@ -366,9 +413,7 @@ func benchmarkSHA3(n int, t *testing.B) {
|
||||||
t.ReportAllocs()
|
t.ReportAllocs()
|
||||||
t.ResetTimer()
|
t.ResetTimer()
|
||||||
for i := 0; i < t.N; i++ {
|
for i := 0; i < t.N; i++ {
|
||||||
h.Reset()
|
doSum(h, nil, data)
|
||||||
h.Write(data)
|
|
||||||
h.Sum(nil)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -377,7 +422,7 @@ func benchmarkSHA3(n int, t *testing.B) {
|
||||||
// doing it on n PoolSize each reusing the base hasher
|
// doing it on n PoolSize each reusing the base hasher
|
||||||
// the premise is that this is the minimum computation needed for a BMT
|
// the premise is that this is the minimum computation needed for a BMT
|
||||||
// therefore this serves as a theoretical optimum for concurrent implementations
|
// therefore this serves as a theoretical optimum for concurrent implementations
|
||||||
func benchmarkBMTBaseline(n int, t *testing.B) {
|
func benchmarkBMTBaseline(t *testing.B, n int) {
|
||||||
hasher := sha3.NewKeccak256
|
hasher := sha3.NewKeccak256
|
||||||
hashSize := hasher().Size()
|
hashSize := hasher().Size()
|
||||||
data := newData(hashSize)
|
data := newData(hashSize)
|
||||||
|
@ -394,9 +439,7 @@ func benchmarkBMTBaseline(n int, t *testing.B) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
h := hasher()
|
h := hasher()
|
||||||
for atomic.AddInt32(&i, 1) < count {
|
for atomic.AddInt32(&i, 1) < count {
|
||||||
h.Reset()
|
doSum(h, nil, data)
|
||||||
h.Write(data)
|
|
||||||
h.Sum(nil)
|
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
@ -405,21 +448,39 @@ func benchmarkBMTBaseline(n int, t *testing.B) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// benchmarks BMT Hasher
|
// benchmarks BMT Hasher
|
||||||
func benchmarkBMTHasher(n int, t *testing.B) {
|
func benchmarkBMT(t *testing.B, n int) {
|
||||||
data := newData(n)
|
data := newData(n)
|
||||||
hasher := sha3.NewKeccak256
|
hasher := sha3.NewKeccak256
|
||||||
pool := NewTreePool(hasher, SegmentCount, PoolSize)
|
pool := NewTreePool(hasher, SegmentCount, PoolSize)
|
||||||
|
bmt := New(pool)
|
||||||
|
|
||||||
t.ReportAllocs()
|
t.ReportAllocs()
|
||||||
t.ResetTimer()
|
t.ResetTimer()
|
||||||
for i := 0; i < t.N; i++ {
|
for i := 0; i < t.N; i++ {
|
||||||
bmt := New(pool)
|
syncHash(bmt, nil, data)
|
||||||
Hash(bmt, nil, data)
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// benchmarks BMT hasher with asynchronous concurrent segment/section writes
|
||||||
|
func benchmarkBMTAsync(t *testing.B, n int, wh whenHash, double bool) {
|
||||||
|
data := newData(n)
|
||||||
|
hasher := sha3.NewKeccak256
|
||||||
|
pool := NewTreePool(hasher, SegmentCount, PoolSize)
|
||||||
|
bmt := New(pool).NewAsyncWriter(double)
|
||||||
|
idxs, segments := splitAndShuffle(bmt.SectionSize(), data)
|
||||||
|
shuffle(len(idxs), func(i int, j int) {
|
||||||
|
idxs[i], idxs[j] = idxs[j], idxs[i]
|
||||||
|
})
|
||||||
|
|
||||||
|
t.ReportAllocs()
|
||||||
|
t.ResetTimer()
|
||||||
|
for i := 0; i < t.N; i++ {
|
||||||
|
asyncHash(bmt, nil, n, wh, idxs, segments)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// benchmarks 100 concurrent bmt hashes with pool capacity
|
// benchmarks 100 concurrent bmt hashes with pool capacity
|
||||||
func benchmarkBMTHasherPool(poolsize, n int, t *testing.B) {
|
func benchmarkPool(t *testing.B, poolsize, n int) {
|
||||||
data := newData(n)
|
data := newData(n)
|
||||||
hasher := sha3.NewKeccak256
|
hasher := sha3.NewKeccak256
|
||||||
pool := NewTreePool(hasher, SegmentCount, poolsize)
|
pool := NewTreePool(hasher, SegmentCount, poolsize)
|
||||||
|
@ -434,7 +495,7 @@ func benchmarkBMTHasherPool(poolsize, n int, t *testing.B) {
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
bmt := New(pool)
|
bmt := New(pool)
|
||||||
Hash(bmt, nil, data)
|
syncHash(bmt, nil, data)
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
@ -442,7 +503,7 @@ func benchmarkBMTHasherPool(poolsize, n int, t *testing.B) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// benchmarks the reference hasher
|
// benchmarks the reference hasher
|
||||||
func benchmarkRefHasher(n int, t *testing.B) {
|
func benchmarkRefHasher(t *testing.B, n int) {
|
||||||
data := newData(n)
|
data := newData(n)
|
||||||
hasher := sha3.NewKeccak256
|
hasher := sha3.NewKeccak256
|
||||||
rbmt := NewRefHasher(hasher, 128)
|
rbmt := NewRefHasher(hasher, 128)
|
||||||
|
@ -462,3 +523,93 @@ func newData(bufferSize int) []byte {
|
||||||
}
|
}
|
||||||
return data
|
return data
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Hash hashes the data and the span using the bmt hasher
|
||||||
|
func syncHash(h *Hasher, span, data []byte) []byte {
|
||||||
|
h.ResetWithLength(span)
|
||||||
|
h.Write(data)
|
||||||
|
return h.Sum(nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitAndShuffle(secsize int, data []byte) (idxs []int, segments [][]byte) {
|
||||||
|
l := len(data)
|
||||||
|
n := l / secsize
|
||||||
|
if l%secsize > 0 {
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
idxs = append(idxs, i)
|
||||||
|
end := (i + 1) * secsize
|
||||||
|
if end > l {
|
||||||
|
end = l
|
||||||
|
}
|
||||||
|
section := data[i*secsize : end]
|
||||||
|
segments = append(segments, section)
|
||||||
|
}
|
||||||
|
shuffle(n, func(i int, j int) {
|
||||||
|
idxs[i], idxs[j] = idxs[j], idxs[i]
|
||||||
|
})
|
||||||
|
return idxs, segments
|
||||||
|
}
|
||||||
|
|
||||||
|
// splits the input data performs a random shuffle to mock async section writes
|
||||||
|
func asyncHashRandom(bmt SectionWriter, span []byte, data []byte, wh whenHash) (s []byte) {
|
||||||
|
idxs, segments := splitAndShuffle(bmt.SectionSize(), data)
|
||||||
|
return asyncHash(bmt, span, len(data), wh, idxs, segments)
|
||||||
|
}
|
||||||
|
|
||||||
|
// mock for async section writes for BMT SectionWriter
|
||||||
|
// requires a permutation (a random shuffle) of list of all indexes of segments
|
||||||
|
// and writes them in order to the appropriate section
|
||||||
|
// the Sum function is called according to the wh parameter (first, last, random [relative to segment writes])
|
||||||
|
func asyncHash(bmt SectionWriter, span []byte, l int, wh whenHash, idxs []int, segments [][]byte) (s []byte) {
|
||||||
|
bmt.Reset()
|
||||||
|
if l == 0 {
|
||||||
|
return bmt.Sum(nil, l, span)
|
||||||
|
}
|
||||||
|
c := make(chan []byte, 1)
|
||||||
|
hashf := func() {
|
||||||
|
c <- bmt.Sum(nil, l, span)
|
||||||
|
}
|
||||||
|
maxsize := len(idxs)
|
||||||
|
var r int
|
||||||
|
if wh == random {
|
||||||
|
r = rand.Intn(maxsize)
|
||||||
|
}
|
||||||
|
for i, idx := range idxs {
|
||||||
|
bmt.Write(idx, segments[idx])
|
||||||
|
if (wh == first || wh == random) && i == r {
|
||||||
|
go hashf()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if wh == last {
|
||||||
|
return bmt.Sum(nil, l, span)
|
||||||
|
}
|
||||||
|
return <-c
|
||||||
|
}
|
||||||
|
|
||||||
|
// this is also in swarm/network_test.go
|
||||||
|
// shuffle pseudo-randomizes the order of elements.
|
||||||
|
// n is the number of elements. Shuffle panics if n < 0.
|
||||||
|
// swap swaps the elements with indexes i and j.
|
||||||
|
func shuffle(n int, swap func(i, j int)) {
|
||||||
|
if n < 0 {
|
||||||
|
panic("invalid argument to Shuffle")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fisher-Yates shuffle: https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle
|
||||||
|
// Shuffle really ought not be called with n that doesn't fit in 32 bits.
|
||||||
|
// Not only will it take a very long time, but with 2³¹! possible permutations,
|
||||||
|
// there's no way that any PRNG can have a big enough internal state to
|
||||||
|
// generate even a minuscule percentage of the possible permutations.
|
||||||
|
// Nevertheless, the right API signature accepts an int n, so handle it as best we can.
|
||||||
|
i := n - 1
|
||||||
|
for ; i > 1<<31-1-1; i-- {
|
||||||
|
j := int(rand.Int63n(int64(i + 1)))
|
||||||
|
swap(i, j)
|
||||||
|
}
|
||||||
|
for ; i > 0; i-- {
|
||||||
|
j := int(rand.Int31n(int32(i + 1)))
|
||||||
|
swap(i, j)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue