core: improve chain rewinding mechanism (#29196)

* core: improve chain rewinding mechanism

* core: address comment

* core: periodically print progress log

* core: address comments

* core: fix comment

* core: fix rewinding in path

* core: fix beyondRoot condition

* core: polish code

* core: polish code

* core: extend code comment

* core: stop rewinding if chain is gapped or genesis is reached

* core: fix broken tests
This commit is contained in:
rjl493456442 2024-03-13 19:39:30 +08:00 committed by GitHub
parent b80643b737
commit c170fa277c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 183 additions and 55 deletions

View File

@ -616,6 +616,172 @@ func (bc *BlockChain) SetSafe(header *types.Header) {
}
}
// rewindPathHead implements the logic of rewindHead in the context of hash scheme.
func (bc *BlockChain) rewindHashHead(head *types.Header, root common.Hash) (*types.Header, uint64) {
var (
limit uint64 // The oldest block that will be searched for this rewinding
beyondRoot = root == common.Hash{} // Flag whether we're beyond the requested root (no root, always true)
pivot = rawdb.ReadLastPivotNumber(bc.db) // Associated block number of pivot point state
rootNumber uint64 // Associated block number of requested root
start = time.Now() // Timestamp the rewinding is restarted
logged = time.Now() // Timestamp last progress log was printed
)
// The oldest block to be searched is determined by the pivot block or a constant
// searching threshold. The rationale behind this is as follows:
//
// - Snap sync is selected if the pivot block is available. The earliest available
// state is the pivot block itself, so there is no sense in going further back.
//
// - Full sync is selected if the pivot block does not exist. The hash database
// periodically flushes the state to disk, and the used searching threshold is
// considered sufficient to find a persistent state, even for the testnet. It
// might be not enough for a chain that is nearly empty. In the worst case,
// the entire chain is reset to genesis, and snap sync is re-enabled on top,
// which is still acceptable.
if pivot != nil {
limit = *pivot
} else if head.Number.Uint64() > params.FullImmutabilityThreshold {
limit = head.Number.Uint64() - params.FullImmutabilityThreshold
}
for {
logger := log.Trace
if time.Since(logged) > time.Second*8 {
logged = time.Now()
logger = log.Info
}
logger("Block state missing, rewinding further", "number", head.Number, "hash", head.Hash(), "elapsed", common.PrettyDuration(time.Since(start)))
// If a root threshold was requested but not yet crossed, check
if !beyondRoot && head.Root == root {
beyondRoot, rootNumber = true, head.Number.Uint64()
}
// If search limit is reached, return the genesis block as the
// new chain head.
if head.Number.Uint64() < limit {
log.Info("Rewinding limit reached, resetting to genesis", "number", head.Number, "hash", head.Hash(), "limit", limit)
return bc.genesisBlock.Header(), rootNumber
}
// If the associated state is not reachable, continue searching
// backwards until an available state is found.
if !bc.HasState(head.Root) {
// If the chain is gapped in the middle, return the genesis
// block as the new chain head.
parent := bc.GetHeader(head.ParentHash, head.Number.Uint64()-1)
if parent == nil {
log.Error("Missing block in the middle, resetting to genesis", "number", head.Number.Uint64()-1, "hash", head.ParentHash)
return bc.genesisBlock.Header(), rootNumber
}
head = parent
// If the genesis block is reached, stop searching.
if head.Number.Uint64() == 0 {
log.Info("Genesis block reached", "number", head.Number, "hash", head.Hash())
return head, rootNumber
}
continue // keep rewinding
}
// Once the available state is found, ensure that the requested root
// has already been crossed. If not, continue rewinding.
if beyondRoot || head.Number.Uint64() == 0 {
log.Info("Rewound to block with state", "number", head.Number, "hash", head.Hash())
return head, rootNumber
}
log.Debug("Skipping block with threshold state", "number", head.Number, "hash", head.Hash(), "root", head.Root)
head = bc.GetHeader(head.ParentHash, head.Number.Uint64()-1) // Keep rewinding
}
}
// rewindPathHead implements the logic of rewindHead in the context of path scheme.
func (bc *BlockChain) rewindPathHead(head *types.Header, root common.Hash) (*types.Header, uint64) {
var (
pivot = rawdb.ReadLastPivotNumber(bc.db) // Associated block number of pivot block
rootNumber uint64 // Associated block number of requested root
// BeyondRoot represents whether the requested root is already
// crossed. The flag value is set to true if the root is empty.
beyondRoot = root == common.Hash{}
// noState represents if the target state requested for search
// is unavailable and impossible to be recovered.
noState = !bc.HasState(root) && !bc.stateRecoverable(root)
start = time.Now() // Timestamp the rewinding is restarted
logged = time.Now() // Timestamp last progress log was printed
)
// Rewind the head block tag until an available state is found.
for {
logger := log.Trace
if time.Since(logged) > time.Second*8 {
logged = time.Now()
logger = log.Info
}
logger("Block state missing, rewinding further", "number", head.Number, "hash", head.Hash(), "elapsed", common.PrettyDuration(time.Since(start)))
// If a root threshold was requested but not yet crossed, check
if !beyondRoot && head.Root == root {
beyondRoot, rootNumber = true, head.Number.Uint64()
}
// If the root threshold hasn't been crossed but the available
// state is reached, quickly determine if the target state is
// possible to be reached or not.
if !beyondRoot && noState && bc.HasState(head.Root) {
beyondRoot = true
log.Info("Disable the search for unattainable state", "root", root)
}
// Check if the associated state is available or recoverable if
// the requested root has already been crossed.
if beyondRoot && (bc.HasState(head.Root) || bc.stateRecoverable(head.Root)) {
break
}
// If pivot block is reached, return the genesis block as the
// new chain head. Theoretically there must be a persistent
// state before or at the pivot block, prevent endless rewinding
// towards the genesis just in case.
if pivot != nil && *pivot >= head.Number.Uint64() {
log.Info("Pivot block reached, resetting to genesis", "number", head.Number, "hash", head.Hash())
return bc.genesisBlock.Header(), rootNumber
}
// If the chain is gapped in the middle, return the genesis
// block as the new chain head
parent := bc.GetHeader(head.ParentHash, head.Number.Uint64()-1) // Keep rewinding
if parent == nil {
log.Error("Missing block in the middle, resetting to genesis", "number", head.Number.Uint64()-1, "hash", head.ParentHash)
return bc.genesisBlock.Header(), rootNumber
}
head = parent
// If the genesis block is reached, stop searching.
if head.Number.Uint64() == 0 {
log.Info("Genesis block reached", "number", head.Number, "hash", head.Hash())
return head, rootNumber
}
}
// Recover if the target state if it's not available yet.
if !bc.HasState(head.Root) {
if err := bc.triedb.Recover(head.Root); err != nil {
log.Crit("Failed to rollback state", "err", err)
}
}
log.Info("Rewound to block with state", "number", head.Number, "hash", head.Hash())
return head, rootNumber
}
// rewindHead searches the available states in the database and returns the associated
// block as the new head block.
//
// If the given root is not empty, then the rewind should attempt to pass the specified
// state root and return the associated block number as well. If the root, typically
// representing the state corresponding to snapshot disk layer, is deemed impassable,
// then block number zero is returned, indicating that snapshot recovery is disabled
// and the whole snapshot should be auto-generated in case of head mismatch.
func (bc *BlockChain) rewindHead(head *types.Header, root common.Hash) (*types.Header, uint64) {
if bc.triedb.Scheme() == rawdb.PathScheme {
return bc.rewindPathHead(head, root)
}
return bc.rewindHashHead(head, root)
}
// setHeadBeyondRoot rewinds the local chain to a new head with the extra condition
// that the rewind must pass the specified state root. This method is meant to be
// used when rewinding with snapshots enabled to ensure that we go back further than
@ -634,79 +800,40 @@ func (bc *BlockChain) setHeadBeyondRoot(head uint64, time uint64, root common.Ha
}
defer bc.chainmu.Unlock()
// Track the block number of the requested root hash
var rootNumber uint64 // (no root == always 0)
// Retrieve the last pivot block to short circuit rollbacks beyond it and the
// current freezer limit to start nuking id underflown
pivot := rawdb.ReadLastPivotNumber(bc.db)
frozen, _ := bc.db.Ancients()
var (
// Track the block number of the requested root hash
rootNumber uint64 // (no root == always 0)
// Retrieve the last pivot block to short circuit rollbacks beyond it
// and the current freezer limit to start nuking it's underflown.
pivot = rawdb.ReadLastPivotNumber(bc.db)
)
updateFn := func(db ethdb.KeyValueWriter, header *types.Header) (*types.Header, bool) {
// Rewind the blockchain, ensuring we don't end up with a stateless head
// block. Note, depth equality is permitted to allow using SetHead as a
// chain reparation mechanism without deleting any data!
if currentBlock := bc.CurrentBlock(); currentBlock != nil && header.Number.Uint64() <= currentBlock.Number.Uint64() {
newHeadBlock := bc.GetBlock(header.Hash(), header.Number.Uint64())
if newHeadBlock == nil {
log.Error("Gap in the chain, rewinding to genesis", "number", header.Number, "hash", header.Hash())
newHeadBlock = bc.genesisBlock
} else {
// Block exists. Keep rewinding until either we find one with state
// or until we exceed the optional threshold root hash
beyondRoot := (root == common.Hash{}) // Flag whether we're beyond the requested root (no root, always true)
for {
// If a root threshold was requested but not yet crossed, check
if root != (common.Hash{}) && !beyondRoot && newHeadBlock.Root() == root {
beyondRoot, rootNumber = true, newHeadBlock.NumberU64()
}
if !bc.HasState(newHeadBlock.Root()) && !bc.stateRecoverable(newHeadBlock.Root()) {
log.Trace("Block state missing, rewinding further", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash())
if pivot == nil || newHeadBlock.NumberU64() > *pivot {
parent := bc.GetBlock(newHeadBlock.ParentHash(), newHeadBlock.NumberU64()-1)
if parent != nil {
newHeadBlock = parent
continue
}
log.Error("Missing block in the middle, aiming genesis", "number", newHeadBlock.NumberU64()-1, "hash", newHeadBlock.ParentHash())
newHeadBlock = bc.genesisBlock
} else {
log.Trace("Rewind passed pivot, aiming genesis", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash(), "pivot", *pivot)
newHeadBlock = bc.genesisBlock
}
}
if beyondRoot || newHeadBlock.NumberU64() == 0 {
if !bc.HasState(newHeadBlock.Root()) && bc.stateRecoverable(newHeadBlock.Root()) {
// Rewind to a block with recoverable state. If the state is
// missing, run the state recovery here.
if err := bc.triedb.Recover(newHeadBlock.Root()); err != nil {
log.Crit("Failed to rollback state", "err", err) // Shouldn't happen
}
log.Debug("Rewound to block with state", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash())
}
break
}
log.Debug("Skipping block with threshold state", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash(), "root", newHeadBlock.Root())
newHeadBlock = bc.GetBlock(newHeadBlock.ParentHash(), newHeadBlock.NumberU64()-1) // Keep rewinding
}
}
var newHeadBlock *types.Header
newHeadBlock, rootNumber = bc.rewindHead(header, root)
rawdb.WriteHeadBlockHash(db, newHeadBlock.Hash())
// Degrade the chain markers if they are explicitly reverted.
// In theory we should update all in-memory markers in the
// last step, however the direction of SetHead is from high
// to low, so it's safe to update in-memory markers directly.
bc.currentBlock.Store(newHeadBlock.Header())
headBlockGauge.Update(int64(newHeadBlock.NumberU64()))
bc.currentBlock.Store(newHeadBlock)
headBlockGauge.Update(int64(newHeadBlock.Number.Uint64()))
// The head state is missing, which is only possible in the path-based
// scheme. This situation occurs when the chain head is rewound below
// the pivot point. In this scenario, there is no possible recovery
// approach except for rerunning a snap sync. Do nothing here until the
// state syncer picks it up.
if !bc.HasState(newHeadBlock.Root()) {
log.Info("Chain is stateless, wait state sync", "number", newHeadBlock.Number(), "hash", newHeadBlock.Hash())
if !bc.HasState(newHeadBlock.Root) {
if newHeadBlock.Number.Uint64() != 0 {
log.Crit("Chain is stateless at a non-genesis block")
}
log.Info("Chain is stateless, wait state sync", "number", newHeadBlock.Number, "hash", newHeadBlock.Hash())
}
}
// Rewind the snap block in a simpleton way to the target head
@ -733,6 +860,7 @@ func (bc *BlockChain) setHeadBeyondRoot(head uint64, time uint64, root common.Ha
// intent afterwards is full block importing, delete the chain segment
// between the stateful-block and the sethead target.
var wipe bool
frozen, _ := bc.db.Ancients()
if headNumber+1 < frozen {
wipe = pivot == nil || headNumber >= *pivot
}