core: fix snapshot missing when recovery from crash (#23496)
It is because write known block only checks block and state without snapshot, which could lead to gap between newest snapshot and newest block state. However, new blocks which would cause snapshot to become fixed were ignored, since state was already known. Co-authored-by: Gary Rong <garyrong0905@gmail.com> Co-authored-by: Martin Holst Swende <martin@swende.se>
This commit is contained in:
parent
c2e64db3b1
commit
c576fa153a
|
@ -1436,11 +1436,10 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er
|
|||
|
||||
// Peek the error for the first block to decide the directing import logic
|
||||
it := newInsertIterator(chain, results, bc.validator)
|
||||
|
||||
block, err := it.next()
|
||||
|
||||
// Left-trim all the known blocks
|
||||
if err == ErrKnownBlock {
|
||||
// Left-trim all the known blocks that don't need to build snapshot
|
||||
if bc.skipBlock(err, it) {
|
||||
// First block (and state) is known
|
||||
// 1. We did a roll-back, and should now do a re-import
|
||||
// 2. The block is stored as a sidechain, and is lying about it's stateroot, and passes a stateroot
|
||||
|
@ -1451,7 +1450,7 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er
|
|||
localTd = bc.GetTd(current.Hash(), current.NumberU64())
|
||||
externTd = bc.GetTd(block.ParentHash(), block.NumberU64()-1) // The first block can't be nil
|
||||
)
|
||||
for block != nil && err == ErrKnownBlock {
|
||||
for block != nil && bc.skipBlock(err, it) {
|
||||
externTd = new(big.Int).Add(externTd, block.Difficulty())
|
||||
if localTd.Cmp(externTd) < 0 {
|
||||
break
|
||||
|
@ -1469,7 +1468,7 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er
|
|||
// When node runs a fast sync again, it can re-import a batch of known blocks via
|
||||
// `insertChain` while a part of them have higher total difficulty than current
|
||||
// head full block(new pivot point).
|
||||
for block != nil && err == ErrKnownBlock {
|
||||
for block != nil && bc.skipBlock(err, it) {
|
||||
log.Debug("Writing previously known block", "number", block.Number(), "hash", block.Hash())
|
||||
if err := bc.writeKnownBlock(block); err != nil {
|
||||
return it.index, err
|
||||
|
@ -1501,8 +1500,10 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er
|
|||
// If there are any still remaining, mark as ignored
|
||||
return it.index, err
|
||||
|
||||
// Some other error occurred, abort
|
||||
case err != nil:
|
||||
// Some other error(except ErrKnownBlock) occurred, abort.
|
||||
// ErrKnownBlock is allowed here since some known blocks
|
||||
// still need re-execution to generate snapshots that are missing
|
||||
case err != nil && !errors.Is(err, ErrKnownBlock):
|
||||
bc.futureBlocks.Remove(block.Hash())
|
||||
stats.ignored += len(it.chain)
|
||||
bc.reportBlock(block, nil, err)
|
||||
|
@ -1520,7 +1521,7 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er
|
|||
}
|
||||
}()
|
||||
|
||||
for ; block != nil && err == nil || err == ErrKnownBlock; block, err = it.next() {
|
||||
for ; block != nil && err == nil || errors.Is(err, ErrKnownBlock); block, err = it.next() {
|
||||
// If the chain is terminating, stop processing blocks
|
||||
if bc.insertStopped() {
|
||||
log.Debug("Abort during block processing")
|
||||
|
@ -1535,8 +1536,9 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er
|
|||
// Clique blocks where they can share state among each other, so importing an
|
||||
// older block might complete the state of the subsequent one. In this case,
|
||||
// just skip the block (we already validated it once fully (and crashed), since
|
||||
// its header and body was already in the database).
|
||||
if err == ErrKnownBlock {
|
||||
// its header and body was already in the database). But if the corresponding
|
||||
// snapshot layer is missing, forcibly rerun the execution to build it.
|
||||
if bc.skipBlock(err, it) {
|
||||
logger := log.Debug
|
||||
if bc.chainConfig.Clique == nil {
|
||||
logger = log.Warn
|
||||
|
@ -2013,6 +2015,47 @@ func (bc *BlockChain) futureBlocksLoop() {
|
|||
}
|
||||
}
|
||||
|
||||
// skipBlock returns 'true', if the block being imported can be skipped over, meaning
|
||||
// that the block does not need to be processed but can be considered already fully 'done'.
|
||||
func (bc *BlockChain) skipBlock(err error, it *insertIterator) bool {
|
||||
// We can only ever bypass processing if the only error returned by the validator
|
||||
// is ErrKnownBlock, which means all checks passed, but we already have the block
|
||||
// and state.
|
||||
if !errors.Is(err, ErrKnownBlock) {
|
||||
return false
|
||||
}
|
||||
// If we're not using snapshots, we can skip this, since we have both block
|
||||
// and (trie-) state
|
||||
if bc.snaps == nil {
|
||||
return true
|
||||
}
|
||||
var (
|
||||
header = it.current() // header can't be nil
|
||||
parentRoot common.Hash
|
||||
)
|
||||
// If we also have the snapshot-state, we can skip the processing.
|
||||
if bc.snaps.Snapshot(header.Root) != nil {
|
||||
return true
|
||||
}
|
||||
// In this case, we have the trie-state but not snapshot-state. If the parent
|
||||
// snapshot-state exists, we need to process this in order to not get a gap
|
||||
// in the snapshot layers.
|
||||
// Resolve parent block
|
||||
if parent := it.previous(); parent != nil {
|
||||
parentRoot = parent.Root
|
||||
} else if parent = bc.GetHeaderByHash(header.ParentHash); parent != nil {
|
||||
parentRoot = parent.Root
|
||||
}
|
||||
if parentRoot == (common.Hash{}) {
|
||||
return false // Theoretically impossible case
|
||||
}
|
||||
// Parent is also missing snapshot: we can skip this. Otherwise process.
|
||||
if bc.snaps.Snapshot(parentRoot) == nil {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// maintainTxIndex is responsible for the construction and deletion of the
|
||||
// transaction index.
|
||||
//
|
||||
|
|
|
@ -150,6 +150,14 @@ func (it *insertIterator) previous() *types.Header {
|
|||
return it.chain[it.index-1].Header()
|
||||
}
|
||||
|
||||
// current returns the current header that is being processed, or nil.
|
||||
func (it *insertIterator) current() *types.Header {
|
||||
if it.index == -1 || it.index >= len(it.chain) {
|
||||
return nil
|
||||
}
|
||||
return it.chain[it.index].Header()
|
||||
}
|
||||
|
||||
// first returns the first block in the it.
|
||||
func (it *insertIterator) first() *types.Block {
|
||||
return it.chain[0]
|
||||
|
|
|
@ -1863,3 +1863,124 @@ func testRepair(t *testing.T, tt *rewindTest, snapshots bool) {
|
|||
t.Errorf("Frozen block count mismatch: have %d, want %d", frozen, tt.expFrozen)
|
||||
}
|
||||
}
|
||||
|
||||
// TestIssue23496 tests scenario described in https://github.com/ethereum/go-ethereum/pull/23496#issuecomment-926393893
|
||||
// Credits to @zzyalbert for finding the issue.
|
||||
//
|
||||
// Local chain owns these blocks:
|
||||
// G B1 B2 B3 B4
|
||||
// B1: state committed
|
||||
// B2: snapshot disk layer
|
||||
// B3: state committed
|
||||
// B4: head block
|
||||
//
|
||||
// Crash happens without fully persisting snapshot and in-memory states,
|
||||
// chain rewinds itself to the B1 (skip B3 in order to recover snapshot)
|
||||
// In this case the snapshot layer of B3 is not created because of existent
|
||||
// state.
|
||||
func TestIssue23496(t *testing.T) {
|
||||
// It's hard to follow the test case, visualize the input
|
||||
//log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true))))
|
||||
|
||||
// Create a temporary persistent database
|
||||
datadir, err := ioutil.TempDir("", "")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temporary datadir: %v", err)
|
||||
}
|
||||
os.RemoveAll(datadir)
|
||||
|
||||
db, err := rawdb.NewLevelDBDatabaseWithFreezer(datadir, 0, 0, datadir, "", false)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create persistent database: %v", err)
|
||||
}
|
||||
defer db.Close() // Might double close, should be fine
|
||||
|
||||
// Initialize a fresh chain
|
||||
var (
|
||||
genesis = (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db)
|
||||
engine = ethash.NewFullFaker()
|
||||
config = &CacheConfig{
|
||||
TrieCleanLimit: 256,
|
||||
TrieDirtyLimit: 256,
|
||||
TrieTimeLimit: 5 * time.Minute,
|
||||
SnapshotLimit: 256,
|
||||
SnapshotWait: true,
|
||||
}
|
||||
)
|
||||
chain, err := NewBlockChain(db, config, params.AllEthashProtocolChanges, engine, vm.Config{}, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create chain: %v", err)
|
||||
}
|
||||
blocks, _ := GenerateChain(params.TestChainConfig, genesis, engine, rawdb.NewMemoryDatabase(), 4, func(i int, b *BlockGen) {
|
||||
b.SetCoinbase(common.Address{0x02})
|
||||
b.SetDifficulty(big.NewInt(1000000))
|
||||
})
|
||||
|
||||
// Insert block B1 and commit the state into disk
|
||||
if _, err := chain.InsertChain(blocks[:1]); err != nil {
|
||||
t.Fatalf("Failed to import canonical chain start: %v", err)
|
||||
}
|
||||
chain.stateCache.TrieDB().Commit(blocks[0].Root(), true, nil)
|
||||
|
||||
// Insert block B2 and commit the snapshot into disk
|
||||
if _, err := chain.InsertChain(blocks[1:2]); err != nil {
|
||||
t.Fatalf("Failed to import canonical chain start: %v", err)
|
||||
}
|
||||
if err := chain.snaps.Cap(blocks[1].Root(), 0); err != nil {
|
||||
t.Fatalf("Failed to flatten snapshots: %v", err)
|
||||
}
|
||||
|
||||
// Insert block B3 and commit the state into disk
|
||||
if _, err := chain.InsertChain(blocks[2:3]); err != nil {
|
||||
t.Fatalf("Failed to import canonical chain start: %v", err)
|
||||
}
|
||||
chain.stateCache.TrieDB().Commit(blocks[2].Root(), true, nil)
|
||||
|
||||
// Insert the remaining blocks
|
||||
if _, err := chain.InsertChain(blocks[3:]); err != nil {
|
||||
t.Fatalf("Failed to import canonical chain tail: %v", err)
|
||||
}
|
||||
|
||||
// Pull the plug on the database, simulating a hard crash
|
||||
db.Close()
|
||||
|
||||
// Start a new blockchain back up and see where the repair leads us
|
||||
db, err = rawdb.NewLevelDBDatabaseWithFreezer(datadir, 0, 0, datadir, "", false)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to reopen persistent database: %v", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
chain, err = NewBlockChain(db, nil, params.AllEthashProtocolChanges, engine, vm.Config{}, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to recreate chain: %v", err)
|
||||
}
|
||||
defer chain.Stop()
|
||||
|
||||
if head := chain.CurrentHeader(); head.Number.Uint64() != uint64(4) {
|
||||
t.Errorf("Head header mismatch: have %d, want %d", head.Number, 4)
|
||||
}
|
||||
if head := chain.CurrentFastBlock(); head.NumberU64() != uint64(4) {
|
||||
t.Errorf("Head fast block mismatch: have %d, want %d", head.NumberU64(), uint64(4))
|
||||
}
|
||||
if head := chain.CurrentBlock(); head.NumberU64() != uint64(1) {
|
||||
t.Errorf("Head block mismatch: have %d, want %d", head.NumberU64(), uint64(1))
|
||||
}
|
||||
|
||||
// Reinsert B2-B4
|
||||
if _, err := chain.InsertChain(blocks[1:]); err != nil {
|
||||
t.Fatalf("Failed to import canonical chain tail: %v", err)
|
||||
}
|
||||
if head := chain.CurrentHeader(); head.Number.Uint64() != uint64(4) {
|
||||
t.Errorf("Head header mismatch: have %d, want %d", head.Number, 4)
|
||||
}
|
||||
if head := chain.CurrentFastBlock(); head.NumberU64() != uint64(4) {
|
||||
t.Errorf("Head fast block mismatch: have %d, want %d", head.NumberU64(), uint64(4))
|
||||
}
|
||||
if head := chain.CurrentBlock(); head.NumberU64() != uint64(4) {
|
||||
t.Errorf("Head block mismatch: have %d, want %d", head.NumberU64(), uint64(4))
|
||||
}
|
||||
if layer := chain.Snapshots().Snapshot(blocks[2].Root()); layer == nil {
|
||||
t.Error("Failed to regenerate the snapshot of known state")
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue