From a8a2b2a488f7433abc09c51b751556875c9107a9 Mon Sep 17 00:00:00 2001 From: obscuren Date: Mon, 13 Apr 2015 16:38:32 +0200 Subject: [PATCH] downloader: added missing blocks catchup functionality When a parent is missing in the block list an attempt should be made to fetch the missing parent and grandparents. --- eth/downloader/downloader.go | 313 ++++++++++++++++++------------ eth/downloader/downloader_test.go | 37 +++- eth/downloader/peer.go | 44 ++++- eth/downloader/queue.go | 44 ++++- 4 files changed, 304 insertions(+), 134 deletions(-) diff --git a/eth/downloader/downloader.go b/eth/downloader/downloader.go index 206c4cc7e2..83e6b8d324 100644 --- a/eth/downloader/downloader.go +++ b/eth/downloader/downloader.go @@ -8,63 +8,44 @@ import ( "time" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/logger" "github.com/ethereum/go-ethereum/logger/glog" "gopkg.in/fatih/set.v0" ) -const maxBlockFetch = 256 +const ( + maxBlockFetch = 256 // Amount of max blocks to be fetched per chunk + minDesiredPeerCount = 3 // Amount of peers desired to start syncing +) -type hashFetcherFn func(common.Hash) error -type blockFetcherFn func([]common.Hash) error type hashCheckFn func(common.Hash) bool type chainInsertFn func(types.Blocks) error type hashIterFn func() (common.Hash, error) - -// XXX make threadsafe!!!! -type peers map[string]*peer - -func (p peers) get(state int) []*peer { - var peers []*peer - for _, peer := range p { - peer.mu.RLock() - if peer.state == state { - peers = append(peers, peer) - } - peer.mu.RUnlock() - } - - return peers -} - -func (p peers) setState(id string, state int) { - if peer, exist := p[id]; exist { - peer.mu.Lock() - defer peer.mu.Unlock() - peer.state = state - } -} +type currentTdFn func() *big.Int type Downloader struct { - queue *queue - - hasBlock hashCheckFn - insertChain chainInsertFn - mu sync.RWMutex + queue *queue peers peers - currentPeer *peer + // Callbacks + hasBlock hashCheckFn + insertChain chainInsertFn + currentTd currentTdFn + // Status fetchingHashes int32 downloadingBlocks int32 + processingBlocks int32 - newPeerCh chan *peer - selectPeerCh chan *peer - HashCh chan []common.Hash - blockCh chan blockPack - quit chan struct{} + // Channels + newPeerCh chan *peer + syncCh chan syncPack + HashCh chan []common.Hash + blockCh chan blockPack + quit chan struct{} } type blockPack struct { @@ -72,17 +53,23 @@ type blockPack struct { blocks []*types.Block } -func New(hasBlock hashCheckFn, insertChain chainInsertFn) *Downloader { +type syncPack struct { + peer *peer + hash common.Hash +} + +func New(hasBlock hashCheckFn, insertChain chainInsertFn, currentTd currentTdFn) *Downloader { downloader := &Downloader{ - queue: newqueue(), - peers: make(peers), - hasBlock: hasBlock, - insertChain: insertChain, - newPeerCh: make(chan *peer, 1), - selectPeerCh: make(chan *peer, 1), - HashCh: make(chan []common.Hash, 1), - blockCh: make(chan blockPack, 1), - quit: make(chan struct{}), + queue: newqueue(), + peers: make(peers), + hasBlock: hasBlock, + insertChain: insertChain, + currentTd: currentTd, + newPeerCh: make(chan *peer, 1), + syncCh: make(chan syncPack, 1), + HashCh: make(chan []common.Hash, 1), + blockCh: make(chan blockPack, 1), + quit: make(chan struct{}), } go downloader.peerHandler() go downloader.update() @@ -116,89 +103,88 @@ func (d *Downloader) UnregisterPeer(id string) { } func (d *Downloader) peerHandler() { - // Fields defined here so we can reduce the amount of locking - // that needs to be done - var highestTd = new(big.Int) + // itimer is used to determine when to start ignoring `minDesiredPeerCount` + itimer := time.NewTicker(5 * time.Second) out: for { select { - case newPeer := <-d.newPeerCh: - // Check if TD of peer is higher than our current - if newPeer.td.Cmp(highestTd) > 0 { - glog.V(logger.Detail).Infoln("New peer with highest TD =", newPeer.td) - - highestTd.Set(newPeer.td) - // select the peer for downloading - d.selectPeerCh <- newPeer + case <-d.newPeerCh: + // Meet the `minDesiredPeerCount` before we select our best peer + if len(d.peers) < minDesiredPeerCount { + break } + d.selectPeer(d.peers.bestPeer()) + case <-itimer.C: + // The timer will make sure that the downloader keeps an active state + // in which it attempts to always check the network for highest td peers + d.selectPeer(d.peers.bestPeer()) case <-d.quit: break out } } } +func (d *Downloader) selectPeer(p *peer) { + // Make sure it's doing neither. Once done we can restart the + // downloading process if the TD is higher. For now just get on + // with whatever is going on. This prevents unecessary switching. + if !(d.isFetchingHashes() || d.isDownloadingBlocks() || d.isProcessing()) { + // selected peer must be better than our own + // XXX we also check the peer's recent hash to make sure we + // don't have it. Some peers report (i think) incorrect TD. + if p.td.Cmp(d.currentTd()) <= 0 || d.hasBlock(p.recentHash) { + return + } + + glog.V(logger.Detail).Infoln("New peer with highest TD =", p.td) + d.syncCh <- syncPack{p, p.recentHash} + } +} + func (d *Downloader) update() { out: for { select { - case selectedPeer := <-d.selectPeerCh: - // Make sure it's doing neither. Once done we can restart the - // downloading process if the TD is higher. For now just get on - // with whatever is going on. This prevents unecessary switching. - if !(d.isFetchingHashes() || d.isDownloadingBlocks()) { - glog.V(logger.Detail).Infoln("Selected new peer", selectedPeer.id) - // Start the fetcher. This will block the update entirely - // interupts need to be send to the appropriate channels - // respectively. - if err := d.startFetchingHashes(selectedPeer); err != nil { - // handle error - glog.V(logger.Debug).Infoln("Error fetching hashes:", err) - // Reset - break - } - - // Start fetching blocks in paralel. The strategy is simple - // take any available peers, seserve a chunk for each peer available, - // let the peer deliver the chunkn and periodically check if a peer - // has timedout. When done downloading, process blocks. - if err := d.startFetchingBlocks(selectedPeer); err != nil { - glog.V(logger.Debug).Infoln("Error downloading blocks:", err) - // reset - break - } - - // XXX this will move when optimised - // Sort the blocks by number. This bit needs much improvement. Right now - // it assumes full honesty form peers (i.e. it's not checked when the blocks - // link). We should at least check whihc queue match. This code could move - // to a seperate goroutine where it periodically checks for linked pieces. - types.BlockBy(types.Number).Sort(d.queue.blocks) - blocks := d.queue.blocks - - glog.V(logger.Debug).Infoln("Inserting chain with", len(blocks), "blocks") - // Loop untill we're out of queue - for len(blocks) != 0 { - max := int(math.Min(float64(len(blocks)), 256)) - // TODO check for parent error. When there's a parent error we should stop - // processing and start requesting the `block.hash` so that it's parent and - // grandparents can be requested and queued. - d.insertChain(blocks[:max]) - blocks = blocks[max:] - } + case sync := <-d.syncCh: + selectedPeer := sync.peer + glog.V(logger.Detail).Infoln("Synchronising with network using:", selectedPeer.id) + // Start the fetcher. This will block the update entirely + // interupts need to be send to the appropriate channels + // respectively. + if err := d.startFetchingHashes(selectedPeer, sync.hash); err != nil { + // handle error + glog.V(logger.Debug).Infoln("Error fetching hashes:", err) + // XXX Reset + break } + + // Start fetching blocks in paralel. The strategy is simple + // take any available peers, seserve a chunk for each peer available, + // let the peer deliver the chunkn and periodically check if a peer + // has timedout. When done downloading, process blocks. + if err := d.startFetchingBlocks(selectedPeer); err != nil { + glog.V(logger.Debug).Infoln("Error downloading blocks:", err) + // XXX reset + break + } + + glog.V(logger.Detail).Infoln("Sync completed") + + d.process() case <-d.quit: break out } } } -func (d *Downloader) startFetchingHashes(p *peer) error { +// XXX Make synchronous +func (d *Downloader) startFetchingHashes(p *peer, hash common.Hash) error { glog.V(logger.Debug).Infoln("Downloading hashes") start := time.Now() // Get the first batch of hashes - p.getHashes(p.recentHash) + p.getHashes(hash) atomic.StoreInt32(&d.fetchingHashes, 1) out: @@ -237,10 +223,6 @@ out: return nil } -func (d *Downloader) DeliverBlocks(id string, block []*types.Block) { - d.blockCh <- blockPack{id, block} -} - func (d *Downloader) startFetchingBlocks(p *peer) error { glog.V(logger.Detail).Infoln("Downloading", d.queue.hashPool.Size(), "blocks") atomic.StoreInt32(&d.downloadingBlocks, 1) @@ -253,8 +235,6 @@ out: for { select { case blockPack := <-d.blockCh: - //fmt.Println("get for", blockPack.peerId) - d.queue.deliver(blockPack.peerId, blockPack.blocks) d.peers.setState(blockPack.peerId, idleState) case <-ticker.C: @@ -266,21 +246,24 @@ out: // Get a possible chunk. If nil is returned no chunk // could be returned due to no hashes available. chunk := d.queue.get(peer, maxBlockFetch) - if chunk != nil { - //fmt.Println("fetching for", peer.id) - // Fetch the chunk and check for error. If the peer was somehow - // already fetching a chunk due to a bug, it will be returned to - // the queue - if err := peer.fetch(chunk); err != nil { - // log for tracing - glog.V(logger.Debug).Infof("peer %s received double work (state = %v)\n", peer.id, peer.state) - d.queue.put(chunk.hashes) - } + if chunk == nil { + continue + } + + //fmt.Println("fetching for", peer.id) + // XXX make fetch blocking. + // Fetch the chunk and check for error. If the peer was somehow + // already fetching a chunk due to a bug, it will be returned to + // the queue + if err := peer.fetch(chunk); err != nil { + // log for tracing + glog.V(logger.Debug).Infof("peer %s received double work (state = %v)\n", peer.id, peer.state) + d.queue.put(chunk.hashes) } } atomic.StoreInt32(&d.downloadingBlocks, 1) } else if len(d.queue.fetching) == 0 { - // Whene there are no more queue and no more `fetching`. We can + // When there are no more queue and no more `fetching`. We can // safely assume we're done. Another part of the process will check // for parent errors and will re-request anything that's missing atomic.StoreInt32(&d.downloadingBlocks, 0) @@ -325,6 +308,88 @@ out: return nil } +// Add an (unrequested) block to the downloader. This is usually done through the +// NewBlockMsg by the protocol handler. +func (d *Downloader) AddBlock(id string, block *types.Block, td *big.Int) { + hash := block.Hash() + + if d.hasBlock(hash) { + return + } + + glog.V(logger.Detail).Infoln("Inserting new block from:", id) + d.queue.addBlock(id, block, td) + + // if the peer is in our healthy list of peers; update the td + // here is a good chance to add the peer back to the list + if peer := d.peers.getPeer(id); peer != nil { + peer.mu.Lock() + peer.td = td + peer.recentHash = block.Hash() + peer.mu.Unlock() + } + + // if neither go ahead to process + if !(d.isFetchingHashes() || d.isDownloadingBlocks()) { + d.process() + } +} + +// Deliver a chunk to the downloader. This is usually done through the BlocksMsg by +// the protocol handler. +func (d *Downloader) DeliverChunk(id string, blocks []*types.Block) { + d.blockCh <- blockPack{id, blocks} +} + +func (d *Downloader) process() error { + atomic.StoreInt32(&d.processingBlocks, 1) + defer atomic.StoreInt32(&d.processingBlocks, 0) + + // XXX this will move when optimised + // Sort the blocks by number. This bit needs much improvement. Right now + // it assumes full honesty form peers (i.e. it's not checked when the blocks + // link). We should at least check whihc queue match. This code could move + // to a seperate goroutine where it periodically checks for linked pieces. + types.BlockBy(types.Number).Sort(d.queue.blocks) + blocks := d.queue.blocks + + glog.V(logger.Debug).Infoln("Inserting chain with", len(blocks), "blocks") + + var err error + // Loop untill we're out of blocks + for len(blocks) != 0 { + max := int(math.Min(float64(len(blocks)), 256)) + // TODO check for parent error. When there's a parent error we should stop + // processing and start requesting the `block.hash` so that it's parent and + // grandparents can be requested and queued. + err = d.insertChain(blocks[:max]) + if err != nil && core.IsParentErr(err) { + glog.V(logger.Debug).Infoln("Aborting process due to missing parent. Fetching hashes") + + // TODO change this. This shite + for i, block := range blocks[:max] { + if !d.hasBlock(block.ParentHash()) { + d.syncCh <- syncPack{d.peers.bestPeer(), block.Hash()} + // remove processed blocks + blocks = blocks[i:] + + break + } + } + break + } + blocks = blocks[max:] + } + + // This will allow the GC to remove the in memory blocks + if len(blocks) == 0 { + d.queue.blocks = nil + } else { + d.queue.blocks = blocks + } + return err +} + func (d *Downloader) isFetchingHashes() bool { return atomic.LoadInt32(&d.fetchingHashes) == 1 } @@ -332,3 +397,7 @@ func (d *Downloader) isFetchingHashes() bool { func (d *Downloader) isDownloadingBlocks() bool { return atomic.LoadInt32(&d.downloadingBlocks) == 1 } + +func (d *Downloader) isProcessing() bool { + return atomic.LoadInt32(&d.processingBlocks) == 1 +} diff --git a/eth/downloader/downloader_test.go b/eth/downloader/downloader_test.go index 64bf9b0961..6cf99b678d 100644 --- a/eth/downloader/downloader_test.go +++ b/eth/downloader/downloader_test.go @@ -14,7 +14,7 @@ import ( var knownHash = common.Hash{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} -func createHashes(amount int) (hashes []common.Hash) { +func createHashes(start, amount int) (hashes []common.Hash) { hashes = make([]common.Hash, amount+1) hashes[len(hashes)-1] = knownHash @@ -49,7 +49,7 @@ type downloadTester struct { func newTester(t *testing.T, hashes []common.Hash, blocks map[common.Hash]*types.Block) *downloadTester { tester := &downloadTester{t: t, hashes: hashes, blocks: blocks, done: make(chan bool)} - downloader := New(tester.hasBlock, tester.insertChain) + downloader := New(tester.hasBlock, tester.insertChain, func() *big.Int { return new(big.Int) }) tester.downloader = downloader return tester @@ -84,7 +84,7 @@ func (dl *downloadTester) getBlocks(id string) func([]common.Hash) error { blocks[i] = dl.blocks[hash] } - go dl.downloader.DeliverBlocks(id, blocks) + go dl.downloader.DeliverChunk(id, blocks) return nil } @@ -109,11 +109,11 @@ func TestDownload(t *testing.T) { glog.SetV(logger.Detail) glog.SetToStderr(true) - hashes := createHashes(1000) + hashes := createHashes(0, 1000) blocks := createBlocksFromHashes(hashes) tester := newTester(t, hashes, blocks) - tester.newPeer("peer1", big.NewInt(10000), hashes[len(hashes)-1]) + tester.newPeer("peer1", big.NewInt(10000), hashes[0]) tester.newPeer("peer2", big.NewInt(0), common.Hash{}) tester.badBlocksPeer("peer3", big.NewInt(0), common.Hash{}) tester.badBlocksPeer("peer4", big.NewInt(0), common.Hash{}) @@ -126,3 +126,30 @@ success: t.Error("timout") } } + +func TestMissing(t *testing.T) { + t.Skip() + + glog.SetV(logger.Detail) + glog.SetToStderr(true) + + hashes := createHashes(0, 1000) + extraHashes := createHashes(1001, 1003) + blocks := createBlocksFromHashes(append(extraHashes, hashes...)) + tester := newTester(t, hashes, blocks) + + tester.newPeer("peer1", big.NewInt(10000), hashes[len(hashes)-1]) + + hashes = append(extraHashes, hashes[:len(hashes)-1]...) + tester.newPeer("peer2", big.NewInt(0), common.Hash{}) + +success1: + select { + case <-tester.done: + break success1 + case <-time.After(10 * time.Second): // XXX this could actually fail on a slow computer + t.Error("timout") + } + + tester.downloader.AddBlock("peer2", blocks[hashes[len(hashes)-1]], big.NewInt(10001)) +} diff --git a/eth/downloader/peer.go b/eth/downloader/peer.go index 318da59b7c..f66e5afd88 100644 --- a/eth/downloader/peer.go +++ b/eth/downloader/peer.go @@ -13,9 +13,51 @@ const ( idleState = 4 ) +type hashFetcherFn func(common.Hash) error +type blockFetcherFn func([]common.Hash) error + +// XXX make threadsafe!!!! +type peers map[string]*peer + +func (p peers) get(state int) []*peer { + var peers []*peer + for _, peer := range p { + peer.mu.RLock() + if peer.state == state { + peers = append(peers, peer) + } + peer.mu.RUnlock() + } + + return peers +} + +func (p peers) setState(id string, state int) { + if peer, exist := p[id]; exist { + peer.mu.Lock() + defer peer.mu.Unlock() + peer.state = state + } +} + +func (p peers) getPeer(id string) *peer { + return p[id] +} + +func (p peers) bestPeer() *peer { + var peer *peer + for _, cp := range p { + if peer == nil || cp.td.Cmp(peer.td) > 0 { + peer = cp + } + } + return peer +} + // peer represents an active peer type peer struct { - state int + state int // Peer state (working, idle) + rep int // TODO peer reputation mu sync.RWMutex id string diff --git a/eth/downloader/queue.go b/eth/downloader/queue.go index b68c5bc822..4d1aa4e934 100644 --- a/eth/downloader/queue.go +++ b/eth/downloader/queue.go @@ -2,16 +2,20 @@ package downloader import ( "math" + "math/big" "sync" "time" + "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" "gopkg.in/fatih/set.v0" ) // queue represents hashes that are either need fetching or are being fetched type queue struct { - hashPool *set.Set + hashPool *set.Set + fetchPool *set.Set + blockHashes *set.Set mu sync.Mutex fetching map[string]*chunk @@ -20,8 +24,10 @@ type queue struct { func newqueue() *queue { return &queue{ - hashPool: set.New(), - fetching: make(map[string]*chunk), + hashPool: set.New(), + fetchPool: set.New(), + blockHashes: set.New(), + fetching: make(map[string]*chunk), } } @@ -50,6 +56,8 @@ func (c *queue) get(p *peer, max int) *chunk { }) // remove the fetchable hashes from hash pool c.hashPool.Separate(hashes) + c.fetchPool.Merge(hashes) + // Create a new chunk for the seperated hashes. The time is being used // to reset the chunk (timeout) chunk := &chunk{hashes, time.Now()} @@ -60,6 +68,22 @@ func (c *queue) get(p *peer, max int) *chunk { return chunk } +func (c *queue) has(hash common.Hash) bool { + return c.hashPool.Has(hash) || c.fetchPool.Has(hash) +} + +func (c *queue) addBlock(id string, block *types.Block, td *big.Int) { + c.mu.Lock() + defer c.mu.Unlock() + + // when adding a block make sure it doesn't already exist + if !c.blockHashes.Has(block.Hash()) { + c.hashPool.Remove(block.Hash()) + c.blocks = append(c.blocks, block) + } +} + +// deliver delivers a chunk to the queue that was requested of the peer func (c *queue) deliver(id string, blocks []*types.Block) { c.mu.Lock() defer c.mu.Unlock() @@ -70,15 +94,19 @@ func (c *queue) deliver(id string, blocks []*types.Block) { delete(c.fetching, id) // seperate the blocks and the hashes - chunk.seperate(blocks) + blockHashes := chunk.fetchedHashes(blocks) + // merge block hashes + c.blockHashes.Merge(blockHashes) // Add the blocks c.blocks = append(c.blocks, blocks...) // Add back whatever couldn't be delivered c.hashPool.Merge(chunk.hashes) + c.fetchPool.Separate(chunk.hashes) } } +// puts puts sets of hashes on to the queue for fetching func (c *queue) put(hashes *set.Set) { c.mu.Lock() defer c.mu.Unlock() @@ -91,8 +119,12 @@ type chunk struct { itime time.Time } -func (ch *chunk) seperate(blocks []*types.Block) { +func (ch *chunk) fetchedHashes(blocks []*types.Block) *set.Set { + fhashes := set.New() for _, block := range blocks { - ch.hashes.Remove(block.Hash()) + fhashes.Add(block.Hash()) } + ch.hashes.Separate(fhashes) + + return fhashes }