core/filtermaps: use fnv-1a for column index calculation

2025-02-16 11:10:17 +01:00 · 2025-02-16 11:10:17 +01:00 · 13944d3ed6
parent e99fd8f1aa
commit 13944d3ed6
3 changed files with 23 additions and 46 deletions
--- a/core/filtermaps/map_renderer.go
+++ b/core/filtermaps/map_renderer.go
@ -334,7 +334,7 @@ func (r *mapRenderer) renderCurrentMap(stopCb func() bool) (bool, error) {
 				lvp.rowIndex = r.f.rowIndex(r.currentMap.mapIndex, lvp.layerIndex, logValue)
 				cached = false
 			}
-			r.currentMap.filterMap[lvp.rowIndex] = append(r.currentMap.filterMap[lvp.rowIndex], r.f.columnIndex(r.iterator.lvIndex, logValue))
+			r.currentMap.filterMap[lvp.rowIndex] = append(r.currentMap.filterMap[lvp.rowIndex], r.f.columnIndex(r.iterator.lvIndex, &logValue))
 			if !cached {
 				rowMappingCache.Add(logValue, lvp)
 			}
--- a/core/filtermaps/math.go
+++ b/core/filtermaps/math.go
@ -19,8 +19,8 @@ package filtermaps
 import (
 	"crypto/sha256"
 	"encoding/binary"
+	"hash/fnv"
 	"math"
-	"math/bits"
 	"sort"

 	"github.com/ethereum/go-ethereum/common"
@ -109,28 +109,20 @@ func (p *Params) rowIndex(mapIndex, layerIndex uint32, logValue common.Hash) uin
 	return binary.LittleEndian.Uint32(hash[:4]) % p.mapHeight
 }

-// columnIndex returns the column index that should be added to the appropriate
-// row in order to place a mark for the next log value.
-func (p *Params) columnIndex(lvIndex uint64, logValue common.Hash) uint32 {
-	x := uint32(lvIndex % p.valuesPerMap) // log value sub-index
-	transformHash := transformHash(uint32(lvIndex/p.valuesPerMap), logValue)
-	// apply column index transformation function
-	return x<<(p.logMapWidth-p.logValuesPerMap) + p.columnSubIndex(x, &transformHash)
-}
-
-func (p *Params) columnSubIndex(x uint32, transformHash *common.Hash) uint32 {
-	x *= binary.LittleEndian.Uint32(transformHash[4:8])*2 + 1
-	x = bits.RotateLeft32(x, int(transformHash[0]&31))
-	x ^= binary.LittleEndian.Uint32(transformHash[8:12])
-	x *= binary.LittleEndian.Uint32(transformHash[12:16])*2 + 1
-	x = bits.RotateLeft32(x, int(transformHash[1]&31))
-	x += binary.LittleEndian.Uint32(transformHash[16:20])
-	x *= binary.LittleEndian.Uint32(transformHash[20:24])*2 + 1
-	x = bits.RotateLeft32(x, int(transformHash[2]&31))
-	x ^= binary.LittleEndian.Uint32(transformHash[24:28])
-	x *= binary.LittleEndian.Uint32(transformHash[28:32])*2 + 1
-	x = bits.RotateLeft32(x, int(transformHash[3]&31))
-	return x & (uint32(1)<<(p.logMapWidth-p.logValuesPerMap) - 1)
+// columnIndex returns the column index where the given log value at the given
+// position should be marked.
+func (p *Params) columnIndex(lvIndex uint64, logValue *common.Hash) uint32 {
+	var indexEnc [8]byte
+	binary.LittleEndian.PutUint64(indexEnc[:], lvIndex)
+	// Note: reusing the hasher brings practically no performance gain and would
+	// require passing it through the entire matcher logic because of multi-thread
+	// matching
+	hasher := fnv.New64a()
+	hasher.Write(indexEnc[:])
+	hasher.Write(logValue[:])
+	hash := hasher.Sum64()
+	hashBits := p.logMapWidth - p.logValuesPerMap
+	return uint32(lvIndex%p.valuesPerMap)<<hashBits + (uint32(hash>>(64-hashBits)) ^ uint32(hash)>>(32-hashBits))
 }

 // maxRowLength returns the maximum length filter rows are populated up to
@ -164,21 +156,6 @@ func (p *Params) maskedMapIndex(mapIndex, layerIndex uint32) uint32 {
 	return mapIndex & (uint32(math.MaxUint32) << (p.logMapsPerEpoch - logLayerDiff))
 }

-// transformHash calculates a hash specific to a given map and log value hash
-// that defines a bijective function on the uint32 range. This function is used
-// to transform the log value sub-index (distance from the first index of the map)
-// into a 32 bit column index, then applied in reverse when searching for potential
-// matches for a given log value.
-func transformHash(mapIndex uint32, logValue common.Hash) (result common.Hash) {
-	hasher := sha256.New()
-	hasher.Write(logValue[:])
-	var indexEnc [4]byte
-	binary.LittleEndian.PutUint32(indexEnc[:], mapIndex)
-	hasher.Write(indexEnc[:])
-	hasher.Sum(result[:0])
-	return
-}
-
 // potentialMatches returns the list of log value indices potentially matching
 // the given log value hash in the range of the filter map the row belongs to.
 // Note that the list of indices is always sorted and potential duplicates are
@ -191,15 +168,15 @@ func transformHash(mapIndex uint32, logValue common.Hash) (result common.Hash) {
 // here.
 func (p *Params) potentialMatches(rows []FilterRow, mapIndex uint32, logValue common.Hash) potentialMatches {
 	results := make(potentialMatches, 0, 8)
-	transformHash := transformHash(mapIndex, logValue)
+	mapFirst := uint64(mapIndex) << p.logValuesPerMap
 	for i, row := range rows {
 		rowLen, maxLen := len(row), int(p.maxRowLength(uint32(i)))
 		if rowLen > maxLen {
 			rowLen = maxLen // any additional entries are generated by another log value on a higher mapping layer
 		}
 		for i := 0; i < rowLen; i++ {
-			if potentialSubIndex := row[i] >> (p.logMapWidth - p.logValuesPerMap); row[i]&(uint32(1)<<(p.logMapWidth-p.logValuesPerMap)-1) == p.columnSubIndex(potentialSubIndex, &transformHash) {
-				results = append(results, uint64(mapIndex)<<p.logValuesPerMap+uint64(potentialSubIndex))
+			if potentialMatch := mapFirst + uint64(row[i]>>(p.logMapWidth-p.logValuesPerMap)); row[i] == p.columnIndex(potentialMatch, &logValue) {
+				results = append(results, potentialMatch)
 			}
 		}
 		if rowLen < maxLen {
--- a/core/filtermaps/math_test.go
+++ b/core/filtermaps/math_test.go
@ -34,7 +34,7 @@ func TestSingleMatch(t *testing.T) {
 		lvIndex := uint64(mapIndex)<<params.logValuesPerMap + uint64(rand.Intn(int(params.valuesPerMap)))
 		var lvHash common.Hash
 		crand.Read(lvHash[:])
-		row := FilterRow{params.columnIndex(lvIndex, lvHash)}
+		row := FilterRow{params.columnIndex(lvIndex, &lvHash)}
 		matches := params.potentialMatches([]FilterRow{row}, mapIndex, lvHash)
 		// check if it has been reverse transformed correctly
 		if len(matches) != 1 {
@ -49,7 +49,7 @@ func TestSingleMatch(t *testing.T) {
 }

 const (
-	testPmCount = 100
+	testPmCount = 50
 	testPmLen   = 1000
 )

@ -68,12 +68,12 @@ func TestPotentialMatches(t *testing.T) {
 			// add testPmLen single entries with different log value hashes at different indices
 			lvIndices[i] = lvStart + uint64(rand.Intn(int(params.valuesPerMap)))
 			crand.Read(lvHashes[i][:])
-			row = append(row, params.columnIndex(lvIndices[i], lvHashes[i]))
+			row = append(row, params.columnIndex(lvIndices[i], &lvHashes[i]))
 		}
 		// add the same log value hash at the first testPmLen log value indices of the map's range
 		crand.Read(lvHashes[testPmLen][:])
 		for lvIndex := lvStart; lvIndex < lvStart+testPmLen; lvIndex++ {
-			row = append(row, params.columnIndex(lvIndex, lvHashes[testPmLen]))
+			row = append(row, params.columnIndex(lvIndex, &lvHashes[testPmLen]))
 		}
 		// randomly duplicate some entries
 		for i := 0; i < testPmLen; i++ {