eth/protocols/snap: skip retrieval for completed storages (#29378)

* eth/protocols/snap: skip retrieval for completed storages * eth/protocols/snap: address comments from peter * eth/protocols/snap: add comments
2024-04-10 17:02:45 +08:00 · 2024-04-10 17:02:45 +08:00 · 5dcf5032b5
parent 2bd6bd01d2
commit 5dcf5032b5
3 changed files with 281 additions and 20 deletions
--- a/eth/protocols/snap/metrics.go
+++ b/eth/protocols/snap/metrics.go
@ -54,4 +54,9 @@ var (
 	// skipStorageHealingGauge is the metric to track how many storages are retrieved
 	// in multiple requests but healing is not necessary.
 	skipStorageHealingGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/noheal", nil)
+
+	// largeStorageDiscardGauge is the metric to track how many chunked storages are
+	// discarded during the snap sync.
+	largeStorageDiscardGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/chunk/discard", nil)
+	largeStorageResumedGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/chunk/resume", nil)
 )
--- a/eth/protocols/snap/progress_test.go
+++ b/eth/protocols/snap/progress_test.go
@ -0,0 +1,154 @@
+// Copyright 2024 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snap
+
+import (
+	"encoding/json"
+	"testing"
+
+	"github.com/ethereum/go-ethereum/common"
+)
+
+// Legacy sync progress definitions
+type legacyStorageTask struct {
+	Next common.Hash // Next account to sync in this interval
+	Last common.Hash // Last account to sync in this interval
+}
+
+type legacyAccountTask struct {
+	Next     common.Hash                          // Next account to sync in this interval
+	Last     common.Hash                          // Last account to sync in this interval
+	SubTasks map[common.Hash][]*legacyStorageTask // Storage intervals needing fetching for large contracts
+}
+
+type legacyProgress struct {
+	Tasks []*legacyAccountTask // The suspended account tasks (contract tasks within)
+}
+
+func compareProgress(a legacyProgress, b SyncProgress) bool {
+	if len(a.Tasks) != len(b.Tasks) {
+		return false
+	}
+	for i := 0; i < len(a.Tasks); i++ {
+		if a.Tasks[i].Next != b.Tasks[i].Next {
+			return false
+		}
+		if a.Tasks[i].Last != b.Tasks[i].Last {
+			return false
+		}
+		// new fields are not checked here
+
+		if len(a.Tasks[i].SubTasks) != len(b.Tasks[i].SubTasks) {
+			return false
+		}
+		for addrHash, subTasksA := range a.Tasks[i].SubTasks {
+			subTasksB, ok := b.Tasks[i].SubTasks[addrHash]
+			if !ok || len(subTasksB) != len(subTasksA) {
+				return false
+			}
+			for j := 0; j < len(subTasksA); j++ {
+				if subTasksA[j].Next != subTasksB[j].Next {
+					return false
+				}
+				if subTasksA[j].Last != subTasksB[j].Last {
+					return false
+				}
+			}
+		}
+	}
+	return true
+}
+
+func makeLegacyProgress() legacyProgress {
+	return legacyProgress{
+		Tasks: []*legacyAccountTask{
+			{
+				Next: common.Hash{},
+				Last: common.Hash{0x77},
+				SubTasks: map[common.Hash][]*legacyStorageTask{
+					common.Hash{0x1}: {
+						{
+							Next: common.Hash{},
+							Last: common.Hash{0xff},
+						},
+					},
+				},
+			},
+			{
+				Next: common.Hash{0x88},
+				Last: common.Hash{0xff},
+			},
+		},
+	}
+}
+
+func convertLegacy(legacy legacyProgress) SyncProgress {
+	var progress SyncProgress
+	for i, task := range legacy.Tasks {
+		subTasks := make(map[common.Hash][]*storageTask)
+		for owner, list := range task.SubTasks {
+			var cpy []*storageTask
+			for i := 0; i < len(list); i++ {
+				cpy = append(cpy, &storageTask{
+					Next: list[i].Next,
+					Last: list[i].Last,
+				})
+			}
+			subTasks[owner] = cpy
+		}
+		accountTask := &accountTask{
+			Next:     task.Next,
+			Last:     task.Last,
+			SubTasks: subTasks,
+		}
+		if i == 0 {
+			accountTask.StorageCompleted = []common.Hash{{0xaa}, {0xbb}} // fulfill new fields
+		}
+		progress.Tasks = append(progress.Tasks, accountTask)
+	}
+	return progress
+}
+
+func TestSyncProgressCompatibility(t *testing.T) {
+	// Decode serialized bytes of legacy progress, backward compatibility
+	legacy := makeLegacyProgress()
+	blob, err := json.Marshal(legacy)
+	if err != nil {
+		t.Fatalf("Failed to marshal progress %v", err)
+	}
+	var dec SyncProgress
+	if err := json.Unmarshal(blob, &dec); err != nil {
+		t.Fatalf("Failed to unmarshal progress %v", err)
+	}
+	if !compareProgress(legacy, dec) {
+		t.Fatal("sync progress is not backward compatible")
+	}
+
+	// Decode serialized bytes of new format progress
+	progress := convertLegacy(legacy)
+	blob, err = json.Marshal(progress)
+	if err != nil {
+		t.Fatalf("Failed to marshal progress %v", err)
+	}
+	var legacyDec legacyProgress
+	if err := json.Unmarshal(blob, &legacyDec); err != nil {
+		t.Fatalf("Failed to unmarshal progress %v", err)
+	}
+	if !compareProgress(legacyDec, progress) {
+		t.Fatal("sync progress is not forward compatible")
+	}
+}
--- a/eth/protocols/snap/sync.go
+++ b/eth/protocols/snap/sync.go
@ -295,11 +295,19 @@ type bytecodeHealResponse struct {

 // accountTask represents the sync task for a chunk of the account snapshot.
 type accountTask struct {
-	// These fields get serialized to leveldb on shutdown
+	// These fields get serialized to key-value store on shutdown
 	Next     common.Hash                    // Next account to sync in this interval
 	Last     common.Hash                    // Last account to sync in this interval
 	SubTasks map[common.Hash][]*storageTask // Storage intervals needing fetching for large contracts

+	// This is a list of account hashes whose storage are already completed
+	// in this cycle. This field is newly introduced in v1.14 and will be
+	// empty if the task is resolved from legacy progress data. Furthermore,
+	// this additional field will be ignored by legacy Geth. The only side
+	// effect is that these contracts might be resynced in the new cycle,
+	// retaining the legacy behavior.
+	StorageCompleted []common.Hash `json:",omitempty"`
+
 	// These fields are internals used during runtime
 	req  *accountRequest  // Pending request to fill this task
 	res  *accountResponse // Validate response filling this task
@ -311,6 +319,7 @@ type accountTask struct {

 	codeTasks      map[common.Hash]struct{}    // Code hashes that need retrieval
 	stateTasks     map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval
+	stateCompleted map[common.Hash]struct{}    // Account hashes whose storage have been completed

 	genBatch ethdb.Batch     // Batch used by the node generator
 	genTrie  *trie.StackTrie // Node generator from storage slots
@ -318,6 +327,30 @@ type accountTask struct {
 	done bool // Flag whether the task can be removed
 }

+// activeSubTasks returns the set of storage tasks covered by the current account
+// range. Normally this would be the entire subTask set, but on a sync interrupt
+// and later resume it can happen that a shorter account range is retrieved. This
+// method ensures that we only start up the subtasks covered by the latest account
+// response.
+//
+// Nil is returned if the account range is empty.
+func (task *accountTask) activeSubTasks() map[common.Hash][]*storageTask {
+	if len(task.res.hashes) == 0 {
+		return nil
+	}
+	var (
+		tasks = make(map[common.Hash][]*storageTask)
+		last  = task.res.hashes[len(task.res.hashes)-1]
+	)
+	for hash, subTasks := range task.SubTasks {
+		subTasks := subTasks // closure
+		if hash.Cmp(last) <= 0 {
+			tasks[hash] = subTasks
+		}
+	}
+	return tasks
+}
+
 // storageTask represents the sync task for a chunk of the storage snapshot.
 type storageTask struct {
 	Next common.Hash // Next account to sync in this interval
@ -745,6 +778,14 @@ func (s *Syncer) loadSyncStatus() {
 			for _, task := range s.tasks {
 				task := task // closure for task.genBatch in the stacktrie writer callback

+				// Restore the completed storages
+				task.stateCompleted = make(map[common.Hash]struct{})
+				for _, hash := range task.StorageCompleted {
+					task.stateCompleted[hash] = struct{}{}
+				}
+				task.StorageCompleted = nil
+
+				// Allocate batch for account trie generation
 				task.genBatch = ethdb.HookedBatch{
 					Batch: s.db.NewBatch(),
 					OnPut: func(key []byte, value []byte) {
@ -767,6 +808,8 @@ func (s *Syncer) loadSyncStatus() {
 					options = options.WithSkipBoundary(task.Next != (common.Hash{}), task.Last != common.MaxHash, boundaryAccountNodesGauge)
 				}
 				task.genTrie = trie.NewStackTrie(options)
+
+				// Restore leftover storage tasks
 				for accountHash, subtasks := range task.SubTasks {
 					for _, subtask := range subtasks {
 						subtask := subtask // closure for subtask.genBatch in the stacktrie writer callback
@ -865,6 +908,7 @@ func (s *Syncer) loadSyncStatus() {
 			Last:           last,
 			SubTasks:       make(map[common.Hash][]*storageTask),
 			genBatch:       batch,
+			stateCompleted: make(map[common.Hash]struct{}),
 			genTrie:        trie.NewStackTrie(options),
 		})
 		log.Debug("Created account sync task", "from", next, "last", last)
@ -886,6 +930,14 @@ func (s *Syncer) saveSyncStatus() {
 				}
 			}
 		}
+		// Save the account hashes of completed storage.
+		task.StorageCompleted = make([]common.Hash, 0, len(task.stateCompleted))
+		for hash := range task.stateCompleted {
+			task.StorageCompleted = append(task.StorageCompleted, hash)
+		}
+		if len(task.StorageCompleted) > 0 {
+			log.Debug("Leftover completed storages", "number", len(task.StorageCompleted), "next", task.Next, "last", task.Last)
+		}
 	}
 	// Store the actual progress markers
 	progress := &SyncProgress{
@ -970,6 +1022,10 @@ func (s *Syncer) cleanStorageTasks() {
 			delete(task.SubTasks, account)
 			task.pend--

+			// Mark the state as complete to prevent resyncing, regardless
+			// if state healing is necessary.
+			task.stateCompleted[account] = struct{}{}
+
 			// If this was the last pending task, forward the account task
 			if task.pend == 0 {
 				s.forwardAccountTask(task)
@ -1209,7 +1265,8 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st
 			continue
 		}
 		// Skip tasks that are already retrieving (or done with) all small states
-		if len(task.SubTasks) == 0 && len(task.stateTasks) == 0 {
+		storageTasks := task.activeSubTasks()
+		if len(storageTasks) == 0 && len(task.stateTasks) == 0 {
 			continue
 		}
 		// Task pending retrieval, try to find an idle peer. If no such peer
@ -1253,7 +1310,7 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st
 			roots    = make([]common.Hash, 0, storageSets)
 			subtask  *storageTask
 		)
-		for account, subtasks := range task.SubTasks {
+		for account, subtasks := range storageTasks {
 			for _, st := range subtasks {
 				// Skip any subtasks already filling
 				if st.req != nil {
@ -1850,11 +1907,11 @@ func (s *Syncer) processAccountResponse(res *accountResponse) {
 	res.task.res = res

 	// Ensure that the response doesn't overflow into the subsequent task
-	last := res.task.Last.Big()
+	lastBig := res.task.Last.Big()
 	for i, hash := range res.hashes {
 		// Mark the range complete if the last is already included.
 		// Keep iteration to delete the extra states if exists.
-		cmp := hash.Big().Cmp(last)
+		cmp := hash.Big().Cmp(lastBig)
 		if cmp == 0 {
 			res.cont = false
 			continue
@ -1890,7 +1947,21 @@ func (s *Syncer) processAccountResponse(res *accountResponse) {
 		}
 		// Check if the account is a contract with an unknown storage trie
 		if account.Root != types.EmptyRootHash {
+			// If the storage was already retrieved in the last cycle, there's no need
+			// to resync it again, regardless of whether the storage root is consistent
+			// or not.
+			if _, exist := res.task.stateCompleted[res.hashes[i]]; exist {
+				// The leftover storage tasks are not expected, unless system is
+				// very wrong.
+				if _, ok := res.task.SubTasks[res.hashes[i]]; ok {
+					panic(fmt.Errorf("unexpected leftover storage tasks, owner: %x", res.hashes[i]))
+				}
+				// Mark the healing tag if storage root node is inconsistent, or
+				// it's non-existent due to storage chunking.
 				if !rawdb.HasTrieNode(s.db, res.hashes[i], nil, account.Root, s.scheme) {
+					res.task.needHeal[i] = true
+				}
+			} else {
 				// If there was a previous large state retrieval in progress,
 				// don't restart it from scratch. This happens if a sync cycle
 				// is interrupted and resumed later. However, *do* update the
@ -1902,7 +1973,12 @@ func (s *Syncer) processAccountResponse(res *accountResponse) {
 					}
 					res.task.needHeal[i] = true
 					resumed[res.hashes[i]] = struct{}{}
+					largeStorageResumedGauge.Inc(1)
 				} else {
+					// It's possible that in the hash scheme, the storage, along
+					// with the trie nodes of the given root, is already present
+					// in the database. Schedule the storage task anyway to simplify
+					// the logic here.
 					res.task.stateTasks[res.hashes[i]] = account.Root
 				}
 				res.task.needState[i] = true
@ -1910,13 +1986,29 @@ func (s *Syncer) processAccountResponse(res *accountResponse) {
 			}
 		}
 	}
-	// Delete any subtasks that have been aborted but not resumed. This may undo
-	// some progress if a new peer gives us less accounts than an old one, but for
-	// now we have to live with that.
+	// Delete any subtasks that have been aborted but not resumed. It's essential
+	// as the corresponding contract might be self-destructed in this cycle(it's
+	// no longer possible in ethereum as self-destruction is disabled in Cancun
+	// Fork, but the condition is still necessary for other networks).
+	//
+	// Keep the leftover storage tasks if they are not covered by the responded
+	// account range which should be picked up in next account wave.
+	if len(res.hashes) > 0 {
+		// The hash of last delivered account in the response
+		last := res.hashes[len(res.hashes)-1]
 		for hash := range res.task.SubTasks {
+			// TODO(rjl493456442) degrade the log level before merging.
+			if hash.Cmp(last) > 0 {
+				log.Info("Keeping suspended storage retrieval", "account", hash)
+				continue
+			}
+			// TODO(rjl493456442) degrade the log level before merging.
+			// It should never happen in ethereum.
 			if _, ok := resumed[hash]; !ok {
-			log.Debug("Aborting suspended storage retrieval", "account", hash)
+				log.Error("Aborting suspended storage retrieval", "account", hash)
 				delete(res.task.SubTasks, hash)
+				largeStorageDiscardGauge.Inc(1)
+			}
 		}
 	}
 	// If the account range contained no contracts, or all have been fully filled
@ -2014,6 +2106,7 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
 			if res.subTask == nil && res.mainTask.needState[j] && (i < len(res.hashes)-1 || !res.cont) {
 				res.mainTask.needState[j] = false
 				res.mainTask.pend--
+				res.mainTask.stateCompleted[account] = struct{}{} // mark it as completed
 				smallStorageGauge.Inc(1)
 			}
 			// If the last contract was chunked, mark it as needing healing
@ -2409,10 +2502,19 @@ func (s *Syncer) forwardAccountTask(task *accountTask) {
 			return
 		}
 		task.Next = incHash(hash)
+
+		// Remove the completion flag once the account range is pushed
+		// forward. The leftover accounts will be skipped in the next
+		// cycle.
+		delete(task.stateCompleted, hash)
 	}
 	// All accounts marked as complete, track if the entire task is done
 	task.done = !res.cont

+	// Error out if there is any leftover completion flag.
+	if task.done && len(task.stateCompleted) != 0 {
+		panic(fmt.Errorf("storage completion flags should be emptied, %d left", len(task.stateCompleted)))
+	}
 	// Stack trie could have generated trie nodes, push them to disk (we need to
 	// flush after finalizing task.done. It's fine even if we crash and lose this
 	// write as it will only cause more data to be downloaded during heal.