This commit is contained in:
Martin HS 2024-11-28 14:45:32 +01:00 committed by GitHub
commit 012d2fc058
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 360 additions and 49 deletions

View File

@ -81,6 +81,9 @@ func prefixedRlpHash(prefix byte, x interface{}) (h common.Hash) {
type TrieHasher interface {
Reset()
Update([]byte, []byte) error
// UpdateSafe is identical to Update, except that this method will copy the
// value slice. The caller is free to modify the value bytes after this method returns.
UpdateSafe([]byte, []byte) error
Hash() common.Hash
}
@ -95,10 +98,7 @@ type DerivableList interface {
func encodeForDerive(list DerivableList, i int, buf *bytes.Buffer) []byte {
buf.Reset()
list.EncodeIndex(i, buf)
// It's really unfortunate that we need to perform this copy.
// StackTrie holds onto the values until Hash is called, so the values
// written to it must not alias.
return common.CopyBytes(buf.Bytes())
return buf.Bytes()
}
// DeriveSha creates the tree hashes of transactions, receipts, and withdrawals in a block header.
@ -118,17 +118,17 @@ func DeriveSha(list DerivableList, hasher TrieHasher) common.Hash {
for i := 1; i < list.Len() && i <= 0x7f; i++ {
indexBuf = rlp.AppendUint64(indexBuf[:0], uint64(i))
value := encodeForDerive(list, i, valueBuf)
hasher.Update(indexBuf, value)
hasher.UpdateSafe(indexBuf, value)
}
if list.Len() > 0 {
indexBuf = rlp.AppendUint64(indexBuf[:0], 0)
value := encodeForDerive(list, 0, valueBuf)
hasher.Update(indexBuf, value)
hasher.UpdateSafe(indexBuf, value)
}
for i := 0x80; i < list.Len(); i++ {
indexBuf = rlp.AppendUint64(indexBuf[:0], uint64(i))
value := encodeForDerive(list, i, valueBuf)
hasher.Update(indexBuf, value)
hasher.UpdateSafe(indexBuf, value)
}
return hasher.Hash()
}

View File

@ -81,26 +81,31 @@ func BenchmarkDeriveSha200(b *testing.B) {
if err != nil {
b.Fatal(err)
}
var exp common.Hash
var got common.Hash
want := types.DeriveSha(txs, trie.NewEmpty(triedb.NewDatabase(rawdb.NewMemoryDatabase(), nil)))
var have common.Hash
b.Run("std_trie", func(b *testing.B) {
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
exp = types.DeriveSha(txs, trie.NewEmpty(triedb.NewDatabase(rawdb.NewMemoryDatabase(), nil)))
have = types.DeriveSha(txs, trie.NewEmpty(triedb.NewDatabase(rawdb.NewMemoryDatabase(), nil)))
}
if have != want {
b.Errorf("have %x want %x", have, want)
}
})
st := trie.NewStackTrie(nil)
b.Run("stack_trie", func(b *testing.B) {
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
got = types.DeriveSha(txs, trie.NewStackTrie(nil))
st.Reset()
have = types.DeriveSha(txs, st)
}
if have != want {
b.Errorf("have %x want %x", have, want)
}
})
if got != exp {
b.Errorf("got %x exp %x", got, exp)
}
}
func TestFuzzDeriveSha(t *testing.T) {
@ -226,6 +231,12 @@ func (d *hashToHumanReadable) Update(i []byte, i2 []byte) error {
return nil
}
// UpdateSafe is identical to Update, except that this method will copy the
// value slice. The caller is free to modify the value bytes after this method returns.
func (d *hashToHumanReadable) UpdateSafe(key, value []byte) error {
return d.Update(key, common.CopyBytes(value))
}
func (d *hashToHumanReadable) Hash() common.Hash {
return common.Hash{}
}

View File

@ -2,6 +2,7 @@ package types
import (
"crypto/ecdsa"
"sync"
"testing"
"github.com/ethereum/go-ethereum/common"
@ -58,12 +59,22 @@ func TestBlobTxSize(t *testing.T) {
}
}
// emptyInit ensures that we init the kzg empties only once
var (
emptyBlob = new(kzg4844.Blob)
emptyBlobCommit, _ = kzg4844.BlobToCommitment(emptyBlob)
emptyBlobProof, _ = kzg4844.ComputeBlobProof(emptyBlob, emptyBlobCommit)
emptyInit sync.Once
emptyBlob *kzg4844.Blob
emptyBlobCommit kzg4844.Commitment
emptyBlobProof kzg4844.Proof
)
func initEmpties() {
emptyInit.Do(func() {
emptyBlob = new(kzg4844.Blob)
emptyBlobCommit, _ = kzg4844.BlobToCommitment(emptyBlob)
emptyBlobProof, _ = kzg4844.ComputeBlobProof(emptyBlob, emptyBlobCommit)
})
}
func createEmptyBlobTx(key *ecdsa.PrivateKey, withSidecar bool) *Transaction {
blobtx := createEmptyBlobTxInner(withSidecar)
signer := NewCancunSigner(blobtx.ChainID.ToBig())
@ -71,6 +82,7 @@ func createEmptyBlobTx(key *ecdsa.PrivateKey, withSidecar bool) *Transaction {
}
func createEmptyBlobTxInner(withSidecar bool) *BlobTx {
initEmpties()
sidecar := &BlobTxSidecar{
Blobs: []kzg4844.Blob{*emptyBlob},
Commitments: []kzg4844.Commitment{emptyBlobCommit},

View File

@ -53,6 +53,12 @@ func (h *testHasher) Update(key, val []byte) error {
return nil
}
// UpdateSafe is identical to Update, except that this method will copy the
// value slice. The caller is free to modify the value bytes after this method returns.
func (h *testHasher) UpdateSafe(key, value []byte) error {
return h.Update(key, common.CopyBytes(value))
}
// Hash returns the hash value.
func (h *testHasher) Hash() common.Hash {
return common.BytesToHash(h.hasher.Sum(nil))

92
trie/bytepool.go Normal file
View File

@ -0,0 +1,92 @@
// Copyright 2024 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package trie
// bytesPool is a pool for byteslices. It is safe for concurrent use.
type bytesPool struct {
c chan []byte
w int
}
// newBytesPool creates a new bytesPool. The sliceCap sets the capacity of
// newly allocated slices, and the nitems determines how many items the pool
// will hold, at maximum.
func newBytesPool(sliceCap, nitems int) *bytesPool {
return &bytesPool{
c: make(chan []byte, nitems),
w: sliceCap,
}
}
// Get returns a slice. Safe for concurrent use.
func (bp *bytesPool) Get() []byte {
select {
case b := <-bp.c:
return b
default:
return make([]byte, 0, bp.w)
}
}
// Put returns a slice to the pool. Safe for concurrent use. This method
// will ignore slices that are too small or too large (>3x the cap)
func (bp *bytesPool) Put(b []byte) {
if c := cap(b); c < bp.w || c > 3*bp.w {
return
}
select {
case bp.c <- b:
default:
}
}
// unsafeBytesPool is a pool for byteslices. It is not safe for concurrent use.
type unsafeBytesPool struct {
items [][]byte
w int
}
// newUnsafeBytesPool creates a new bytesPool. The sliceCap sets the capacity of
// newly allocated slices, and the nitems determines how many items the pool
// will hold, at maximum.
func newUnsafeBytesPool(sliceCap, nitems int) *unsafeBytesPool {
return &unsafeBytesPool{
items: make([][]byte, 0, nitems),
w: sliceCap,
}
}
// Get returns a slice.
func (bp *unsafeBytesPool) Get() []byte {
if len(bp.items) > 0 {
last := bp.items[len(bp.items)-1]
bp.items = bp.items[:len(bp.items)-1]
return last
}
return make([]byte, 0, bp.w)
}
// Put returns a slice to the pool. This method
// will ignore slices that are too small or too large (>3x the cap)
func (bp *unsafeBytesPool) Put(b []byte) {
if c := cap(b); c < bp.w || c > 3*bp.w {
return
}
if len(bp.items) < cap(bp.items) {
bp.items = append(bp.items, b)
}
}

View File

@ -104,6 +104,17 @@ func keybytesToHex(str []byte) []byte {
return nibbles
}
// writeHexKey writes the hexkey into the given slice.
// OBS! This method omits the termination flag.
// OBS! The dst slice must be at least 2x as large as the key
func writeHexKey(dst []byte, key []byte) {
_ = dst[2*len(key)-1]
for i, b := range key {
dst[i*2] = b / 16
dst[i*2+1] = b % 16
}
}
// hexToKeybytes turns hex nibbles into key bytes.
// This can only be used for keys of even length.
func hexToKeybytes(hex []byte) []byte {

View File

@ -188,6 +188,14 @@ func (h *hasher) hashData(data []byte) hashNode {
return n
}
// hashDataTo hashes the provided data to the given destination buffer. The caller
// must ensure that the dst buffer is of appropriate size.
func (h *hasher) hashDataTo(dst, data []byte) {
h.sha.Reset()
h.sha.Write(data)
h.sha.Read(dst)
}
// proofHash is used to construct trie proofs, and returns the 'collapsed'
// node (for later RLP encoding) as well as the hashed node -- unless the
// node is smaller than 32 bytes, in which case it will be returned as is.

View File

@ -45,6 +45,23 @@ type (
}
hashNode []byte
valueNode []byte
//fullnodeEncoder is a type used exclusively for encoding. Briefly instantiating
// a fullnodeEncoder and initializing with existing slices is less memory
// intense than using the fullNode type.
fullnodeEncoder struct {
Children [17][]byte
flags nodeFlag
}
//shortNodeEncoder is a type used exclusively for encoding. Briefly instantiating
// a shortNodeEncoder and initializing with existing slices is less memory
// intense than using the shortNode type.
shortNodeEncoder struct {
Key []byte
Val []byte
flags nodeFlag
}
)
// nilValueNode is used when collapsing internal trie nodes for hashing, since
@ -67,16 +84,20 @@ type nodeFlag struct {
dirty bool // whether the node has changes that must be written to the database
}
func (n *fullNode) cache() (hashNode, bool) { return n.flags.hash, n.flags.dirty }
func (n *shortNode) cache() (hashNode, bool) { return n.flags.hash, n.flags.dirty }
func (n hashNode) cache() (hashNode, bool) { return nil, true }
func (n valueNode) cache() (hashNode, bool) { return nil, true }
func (n *fullNode) cache() (hashNode, bool) { return n.flags.hash, n.flags.dirty }
func (n *fullnodeEncoder) cache() (hashNode, bool) { return n.flags.hash, n.flags.dirty }
func (n *shortNode) cache() (hashNode, bool) { return n.flags.hash, n.flags.dirty }
func (n *shortNodeEncoder) cache() (hashNode, bool) { return n.flags.hash, n.flags.dirty }
func (n hashNode) cache() (hashNode, bool) { return nil, true }
func (n valueNode) cache() (hashNode, bool) { return nil, true }
// Pretty printing.
func (n *fullNode) String() string { return n.fstring("") }
func (n *shortNode) String() string { return n.fstring("") }
func (n hashNode) String() string { return n.fstring("") }
func (n valueNode) String() string { return n.fstring("") }
func (n *fullNode) String() string { return n.fstring("") }
func (n *fullnodeEncoder) String() string { return n.fstring("") }
func (n *shortNode) String() string { return n.fstring("") }
func (n *shortNodeEncoder) String() string { return n.fstring("") }
func (n hashNode) String() string { return n.fstring("") }
func (n valueNode) String() string { return n.fstring("") }
func (n *fullNode) fstring(ind string) string {
resp := fmt.Sprintf("[\n%s ", ind)
@ -89,9 +110,24 @@ func (n *fullNode) fstring(ind string) string {
}
return resp + fmt.Sprintf("\n%s] ", ind)
}
func (n *fullnodeEncoder) fstring(ind string) string {
resp := fmt.Sprintf("[\n%s ", ind)
for i, node := range &n.Children {
if node == nil {
resp += fmt.Sprintf("%s: <nil> ", indices[i])
} else {
resp += fmt.Sprintf("%s: %x", indices[i], node)
}
}
return resp + fmt.Sprintf("\n%s] ", ind)
}
func (n *shortNode) fstring(ind string) string {
return fmt.Sprintf("{%x: %v} ", n.Key, n.Val.fstring(ind+" "))
}
func (n *shortNodeEncoder) fstring(ind string) string {
return fmt.Sprintf("{%x: %x} ", n.Key, n.Val)
}
func (n hashNode) fstring(ind string) string {
return fmt.Sprintf("<%x> ", []byte(n))
}

View File

@ -40,6 +40,20 @@ func (n *fullNode) encode(w rlp.EncoderBuffer) {
w.ListEnd(offset)
}
func (n *fullnodeEncoder) encode(w rlp.EncoderBuffer) {
offset := w.List()
for _, c := range n.Children {
if c == nil {
w.Write(rlp.EmptyString)
} else if len(c) < 32 {
w.Write(c) // rawNode
} else {
w.WriteBytes(c) // hashNode
}
}
w.ListEnd(offset)
}
func (n *shortNode) encode(w rlp.EncoderBuffer) {
offset := w.List()
w.WriteBytes(n.Key)
@ -51,6 +65,20 @@ func (n *shortNode) encode(w rlp.EncoderBuffer) {
w.ListEnd(offset)
}
func (n *shortNodeEncoder) encode(w rlp.EncoderBuffer) {
offset := w.List()
w.WriteBytes(n.Key)
if n.Val == nil {
w.Write(rlp.EmptyString)
} else if len(n.Val) < 32 {
w.Write(n.Val) // rawNode
} else {
w.WriteBytes(n.Val) // hashNode
}
w.ListEnd(offset)
}
func (n hashNode) encode(w rlp.EncoderBuffer) {
w.WriteBytes(n)
}

View File

@ -27,6 +27,7 @@ import (
var (
stPool = sync.Pool{New: func() any { return new(stNode) }}
bPool = newBytesPool(32, 100)
_ = types.TrieHasher((*StackTrie)(nil))
)
@ -47,6 +48,9 @@ type StackTrie struct {
h *hasher
last []byte
onTrieNode OnTrieNode
kBuf []byte // buf space used for hex-key during insertions
pBuf []byte // buf space used for path during insertions
vPool *unsafeBytesPool
}
// NewStackTrie allocates and initializes an empty trie. The committed nodes
@ -56,15 +60,36 @@ func NewStackTrie(onTrieNode OnTrieNode) *StackTrie {
root: stPool.Get().(*stNode),
h: newHasher(false),
onTrieNode: onTrieNode,
kBuf: make([]byte, 0, 32),
pBuf: make([]byte, 0, 32),
vPool: newUnsafeBytesPool(300, 20),
}
}
// UpdateSafe is identical to Update, except that this method will copy the
// value slice. The caller is free to modify the value bytes after this method returns.
func (t *StackTrie) UpdateSafe(key, value []byte) error {
// The stacktrie always copies the value (is already safe).
return t.Update(key, value)
}
// Update inserts a (key, value) pair into the stack trie.
// The value is copied, and the caller is free to modify the value after this
// method returns.
func (t *StackTrie) Update(key, value []byte) error {
if len(value) == 0 {
return errors.New("trying to insert empty (deletion)")
}
k := t.TrieKey(key)
var k []byte
{ // Need to expand the 'key' into hex-form. We use the dedicated buf for that.
if cap(t.kBuf) < 2*len(key) { // realloc to ensure sufficient cap
t.kBuf = make([]byte, 2*len(key), 2*len(key))
}
// resize to ensure correct size
t.kBuf = t.kBuf[:2*len(key)]
writeHexKey(t.kBuf, key)
k = t.kBuf
}
if bytes.Compare(t.last, k) >= 0 {
return errors.New("non-ascending key order")
}
@ -73,7 +98,14 @@ func (t *StackTrie) Update(key, value []byte) error {
} else {
t.last = append(t.last[:0], k...) // reuse key slice
}
t.insert(t.root, k, value, nil)
vBuf := t.vPool.Get()
if cap(vBuf) < len(value) {
vBuf = common.CopyBytes(value)
} else {
vBuf = vBuf[:len(value)]
copy(vBuf, value)
}
t.insert(t.root, k, vBuf, t.pBuf[:0])
return nil
}
@ -81,6 +113,7 @@ func (t *StackTrie) Update(key, value []byte) error {
func (t *StackTrie) Reset() {
t.root = stPool.Get().(*stNode)
t.last = nil
t.onTrieNode = nil
}
// TrieKey returns the internal key representation for the given user key.
@ -129,6 +162,12 @@ const (
)
func (n *stNode) reset() *stNode {
if n.typ == hashedNode {
// On hashnodes, we 'own' the val: it is guaranteed to be not held
// by external caller. Hence, when we arrive here, we can put it back
// into the pool
bPool.Put(n.val)
}
n.key = n.key[:0]
n.val = nil
for i := range n.children {
@ -150,8 +189,11 @@ func (n *stNode) getDiffIndex(key []byte) int {
return len(n.key)
}
// Helper function to that inserts a (key, value) pair into
// the trie.
// Helper function to that inserts a (key, value) pair into the trie.
// - The key is not retained by this method, but always copied if needed.
// - The value is retained by this method, as long as the leaf that it represents
// remains unhashed. However: it is never modified.
// - The path is not retained by this method.
func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) {
switch st.typ {
case branchNode: /* Branch */
@ -283,7 +325,7 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) {
case emptyNode: /* Empty */
st.typ = leafNode
st.key = key
st.key = append(st.key, key...)
st.val = value
case hashedNode:
@ -318,35 +360,32 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
return
case branchNode:
var nodes fullNode
var nodes fullnodeEncoder
for i, child := range st.children {
if child == nil {
nodes.Children[i] = nilValueNode
continue
}
t.hash(child, append(path, byte(i)))
if len(child.val) < 32 {
nodes.Children[i] = rawNode(child.val)
} else {
nodes.Children[i] = hashNode(child.val)
nodes.Children[i] = child.val
}
nodes.encode(t.h.encbuf)
blob = t.h.encodedBytes()
for i, child := range st.children {
if child == nil {
continue
}
st.children[i] = nil
stPool.Put(child.reset()) // Release child back to pool.
}
nodes.encode(t.h.encbuf)
blob = t.h.encodedBytes()
case extNode:
// recursively hash and commit child as the first step
t.hash(st.children[0], append(path, st.key...))
// encode the extension node
n := shortNode{Key: hexToCompactInPlace(st.key)}
if len(st.children[0].val) < 32 {
n.Val = rawNode(st.children[0].val)
} else {
n.Val = hashNode(st.children[0].val)
n := shortNodeEncoder{
Key: hexToCompactInPlace(st.key),
Val: st.children[0].val,
}
n.encode(t.h.encbuf)
blob = t.h.encodedBytes()
@ -356,9 +395,13 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
case leafNode:
st.key = append(st.key, byte(16))
n := shortNode{Key: hexToCompactInPlace(st.key), Val: valueNode(st.val)}
n.encode(t.h.encbuf)
{
w := t.h.encbuf
offset := w.List()
w.WriteBytes(hexToCompactInPlace(st.key))
w.WriteBytes(st.val)
w.ListEnd(offset)
}
blob = t.h.encodedBytes()
default:
@ -368,15 +411,27 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
st.typ = hashedNode
st.key = st.key[:0]
// Release reference to (potentially externally held) value-slice.
if cap(st.val) > 0 && t.vPool != nil {
t.vPool.Put(st.val)
}
st.val = nil
// Skip committing the non-root node if the size is smaller than 32 bytes
// as tiny nodes are always embedded in their parent except root node.
if len(blob) < 32 && len(path) > 0 {
st.val = common.CopyBytes(blob)
val := bPool.Get()
val = val[:len(blob)]
copy(val, blob)
st.val = val
return
}
// Write the hash to the 'val'. We allocate a new val here to not mutate
// input values.
st.val = t.h.hashData(blob)
val := bPool.Get()
val = val[:32]
t.h.hashDataTo(val, blob)
st.val = val
// Invoke the callback it's provided. Notably, the path and blob slices are
// volatile, please deep-copy the slices in callback if the contents need

View File

@ -18,6 +18,7 @@ package trie
import (
"bytes"
"encoding/binary"
"math/big"
"testing"
@ -398,3 +399,48 @@ func TestStackTrieErrors(t *testing.T) {
assert.NotNil(t, s.Update([]byte{0x10}, []byte{0xb}), "out of order insert")
assert.NotNil(t, s.Update([]byte{0xaa}, []byte{0xb}), "repeat insert same key")
}
func BenchmarkInsert100K(b *testing.B) {
var num = 100_000
var key = make([]byte, 8)
var val = make([]byte, 20)
var hash common.Hash
b.ReportAllocs()
for i := 0; i < b.N; i++ {
s := NewStackTrie(nil)
var k uint64
for j := 0; j < num; j++ {
binary.BigEndian.PutUint64(key, k)
if err := s.Update(key, val); err != nil {
b.Fatal(err)
}
k += 1024
}
if hash == (common.Hash{}) {
hash = s.Hash()
} else {
if hash != s.Hash() && false {
b.Fatalf("hash wrong, have %x want %x", s.Hash(), hash)
}
}
}
}
func TestInsert100K(t *testing.T) {
var num = 100_000
var key = make([]byte, 8)
var val = make([]byte, 20)
s := NewStackTrie(nil)
var k uint64
for j := 0; j < num; j++ {
binary.BigEndian.PutUint64(key, k)
if err := s.Update(key, val); err != nil {
t.Fatal(err)
}
k += 1024
}
want := common.HexToHash("0xb0071bd257342925d9d8a9f002b9d2b646a35437aa8b089628ab56e428d29a1a")
if have := s.Hash(); have != want {
t.Fatalf("hash wrong, have %x want %x", have, want)
}
}

View File

@ -308,6 +308,12 @@ func (t *Trie) Update(key, value []byte) error {
return t.update(key, value)
}
// UpdateSafe is identical to Update, except that this method will copy the
// value slice. The caller is free to modify the value bytes after this method returns.
func (t *Trie) UpdateSafe(key, value []byte) error {
return t.Update(key, common.CopyBytes(value))
}
func (t *Trie) update(key, value []byte) error {
t.unhashed++
t.uncommitted++