From 76700ac892f6fe3d7865fbe2f724626c72a04227 Mon Sep 17 00:00:00 2001 From: gary rong Date: Mon, 29 Mar 2021 17:09:29 +0800 Subject: [PATCH] core/state/pruner: move the compaction out of the pruning procedure (#22579) The main idea behind it is: the range compaction is very expensive which can take a few hours to finish. During this long procedure, a lot of exceptions can occur, e.g. - Geth is killed manually - Geth is killed because of machine crash - etc In order to minimize the effect of the exceptions, the compaction is moved out of the pruning. So that even the compaction is not finished, the pruning is regarded as done. --- core/state/pruner/pruner.go | 68 ++++++++++++------------------------- 1 file changed, 22 insertions(+), 46 deletions(-) diff --git a/core/state/pruner/pruner.go b/core/state/pruner/pruner.go index 62cc7b0120..4d6e415511 100644 --- a/core/state/pruner/pruner.go +++ b/core/state/pruner/pruner.go @@ -113,7 +113,7 @@ func NewPruner(db ethdb.Database, datadir, trieCachePath string, bloomSize uint6 }, nil } -func prune(maindb ethdb.Database, stateBloom *stateBloom, middleStateRoots map[common.Hash]struct{}, start time.Time) error { +func prune(snaptree *snapshot.Tree, root common.Hash, maindb ethdb.Database, stateBloom *stateBloom, bloomPath string, middleStateRoots map[common.Hash]struct{}, start time.Time) error { // Delete all stale trie nodes in the disk. With the help of state bloom // the trie nodes(and codes) belong to the active state will be filtered // out. A very small part of stale tries will also be filtered because of @@ -186,6 +186,25 @@ func prune(maindb ethdb.Database, stateBloom *stateBloom, middleStateRoots map[c iter.Release() log.Info("Pruned state data", "nodes", count, "size", size, "elapsed", common.PrettyDuration(time.Since(pstart))) + // Pruning is done, now drop the "useless" layers from the snapshot. + // Firstly, flushing the target layer into the disk. After that all + // diff layers below the target will all be merged into the disk. + if err := snaptree.Cap(root, 0); err != nil { + return err + } + // Secondly, flushing the snapshot journal into the disk. All diff + // layers upon are dropped silently. Eventually the entire snapshot + // tree is converted into a single disk layer with the pruning target + // as the root. + if _, err := snaptree.Journal(root); err != nil { + return err + } + // Delete the state bloom, it marks the entire pruning procedure is + // finished. If any crashes or manual exit happens before this, + // `RecoverPruning` will pick it up in the next restarts to redo all + // the things. + os.RemoveAll(bloomPath) + // Start compactions, will remove the deleted data from the disk immediately. // Note for small pruning, the compaction is skipped. if count >= rangeCompactionThreshold { @@ -314,29 +333,7 @@ func (p *Pruner) Prune(root common.Hash) error { return err } log.Info("State bloom filter committed", "name", filterName) - - if err := prune(p.db, p.stateBloom, middleRoots, start); err != nil { - return err - } - // Pruning is done, now drop the "useless" layers from the snapshot. - // Firstly, flushing the target layer into the disk. After that all - // diff layers below the target will all be merged into the disk. - if err := p.snaptree.Cap(root, 0); err != nil { - return err - } - // Secondly, flushing the snapshot journal into the disk. All diff - // layers upon the target layer are dropped silently. Eventually the - // entire snapshot tree is converted into a single disk layer with - // the pruning target as the root. - if _, err := p.snaptree.Journal(root); err != nil { - return err - } - // Delete the state bloom, it marks the entire pruning procedure is - // finished. If any crashes or manual exit happens before this, - // `RecoverPruning` will pick it up in the next restarts to redo all - // the things. - os.RemoveAll(filterName) - return nil + return prune(p.snaptree, root, p.db, p.stateBloom, filterName, middleRoots, start) } // RecoverPruning will resume the pruning procedure during the system restart. @@ -400,28 +397,7 @@ func RecoverPruning(datadir string, db ethdb.Database, trieCachePath string) err log.Error("Pruning target state is not existent") return errors.New("non-existent target state") } - if err := prune(db, stateBloom, middleRoots, time.Now()); err != nil { - return err - } - // Pruning is done, now drop the "useless" layers from the snapshot. - // Firstly, flushing the target layer into the disk. After that all - // diff layers below the target will all be merged into the disk. - if err := snaptree.Cap(stateBloomRoot, 0); err != nil { - return err - } - // Secondly, flushing the snapshot journal into the disk. All diff - // layers upon are dropped silently. Eventually the entire snapshot - // tree is converted into a single disk layer with the pruning target - // as the root. - if _, err := snaptree.Journal(stateBloomRoot); err != nil { - return err - } - // Delete the state bloom, it marks the entire pruning procedure is - // finished. If any crashes or manual exit happens before this, - // `RecoverPruning` will pick it up in the next restarts to redo all - // the things. - os.RemoveAll(stateBloomPath) - return nil + return prune(snaptree, stateBloomRoot, db, stateBloom, stateBloomPath, middleRoots, time.Now()) } // extractGenesis loads the genesis state and commits all the state entries