go-ethereum/swarm/network/stream/syncer.go

295 lines
8.6 KiB
Go
Raw Normal View History

2018-06-20 07:06:27 -05:00
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package stream
import (
"context"
2018-06-20 07:06:27 -05:00
"math"
"strconv"
"time"
"github.com/ethereum/go-ethereum/metrics"
"github.com/ethereum/go-ethereum/swarm/log"
"github.com/ethereum/go-ethereum/swarm/storage"
)
const (
BatchSize = 128
)
// SwarmSyncerServer implements an Server for history syncing on bins
// offered streams:
// * live request delivery with or without checkback
// * (live/non-live historical) chunk syncing per proximity bin
type SwarmSyncerServer struct {
po uint8
store storage.SyncChunkStore
2018-06-20 07:06:27 -05:00
sessionAt uint64
start uint64
live bool
2018-06-20 07:06:27 -05:00
quit chan struct{}
}
// NewSwarmSyncerServer is contructor for SwarmSyncerServer
func NewSwarmSyncerServer(live bool, po uint8, syncChunkStore storage.SyncChunkStore) (*SwarmSyncerServer, error) {
sessionAt := syncChunkStore.BinIndex(po)
2018-06-20 07:06:27 -05:00
var start uint64
if live {
start = sessionAt
}
return &SwarmSyncerServer{
po: po,
store: syncChunkStore,
2018-06-20 07:06:27 -05:00
sessionAt: sessionAt,
start: start,
live: live,
2018-06-20 07:06:27 -05:00
quit: make(chan struct{}),
}, nil
}
func RegisterSwarmSyncerServer(streamer *Registry, syncChunkStore storage.SyncChunkStore) {
2018-06-20 07:06:27 -05:00
streamer.RegisterServerFunc("SYNC", func(p *Peer, t string, live bool) (Server, error) {
po, err := ParseSyncBinKey(t)
if err != nil {
return nil, err
}
return NewSwarmSyncerServer(live, po, syncChunkStore)
2018-06-20 07:06:27 -05:00
})
// streamer.RegisterServerFunc(stream, func(p *Peer) (Server, error) {
// return NewOutgoingProvableSwarmSyncer(po, db)
// })
}
// Close needs to be called on a stream server
func (s *SwarmSyncerServer) Close() {
close(s.quit)
}
// GetData retrieves the actual chunk from netstore
func (s *SwarmSyncerServer) GetData(ctx context.Context, key []byte) ([]byte, error) {
chunk, err := s.store.Get(ctx, storage.Address(key))
if err != nil {
2018-06-20 07:06:27 -05:00
return nil, err
}
return chunk.Data(), nil
2018-06-20 07:06:27 -05:00
}
// GetBatch retrieves the next batch of hashes from the dbstore
func (s *SwarmSyncerServer) SetNextBatch(from, to uint64) ([]byte, uint64, uint64, *HandoverProof, error) {
var batch []byte
i := 0
if s.live {
if from == 0 {
from = s.start
}
if to <= from || from >= s.sessionAt {
to = math.MaxUint64
}
} else {
if (to < from && to != 0) || from > s.sessionAt {
return nil, 0, 0, nil, nil
}
if to == 0 || to > s.sessionAt {
to = s.sessionAt
}
2018-06-20 07:06:27 -05:00
}
2018-06-20 07:06:27 -05:00
var ticker *time.Ticker
defer func() {
if ticker != nil {
ticker.Stop()
}
}()
var wait bool
for {
if wait {
if ticker == nil {
ticker = time.NewTicker(1000 * time.Millisecond)
}
select {
case <-ticker.C:
case <-s.quit:
return nil, 0, 0, nil, nil
}
}
metrics.GetOrRegisterCounter("syncer.setnextbatch.iterator", nil).Inc(1)
err := s.store.Iterator(from, to, s.po, func(key storage.Address, idx uint64) bool {
batch = append(batch, key[:]...)
2018-06-20 07:06:27 -05:00
i++
to = idx
return i < BatchSize
})
if err != nil {
return nil, 0, 0, nil, err
}
if len(batch) > 0 {
break
}
wait = true
}
log.Trace("Swarm syncer offer batch", "po", s.po, "len", i, "from", from, "to", to, "current store count", s.store.BinIndex(s.po))
2018-06-20 07:06:27 -05:00
return batch, from, to, nil, nil
}
// SwarmSyncerClient
type SwarmSyncerClient struct {
sessionAt uint64
nextC chan struct{}
sessionRoot storage.Address
sessionReader storage.LazySectionReader
retrieveC chan *storage.Chunk
storeC chan *storage.Chunk
store storage.SyncChunkStore
2018-06-20 07:06:27 -05:00
// chunker storage.Chunker
currentRoot storage.Address
requestFunc func(chunk *storage.Chunk)
end, start uint64
peer *Peer
stream Stream
2018-06-20 07:06:27 -05:00
}
// NewSwarmSyncerClient is a contructor for provable data exchange syncer
func NewSwarmSyncerClient(p *Peer, store storage.SyncChunkStore, stream Stream) (*SwarmSyncerClient, error) {
2018-06-20 07:06:27 -05:00
return &SwarmSyncerClient{
store: store,
peer: p,
stream: stream,
2018-06-20 07:06:27 -05:00
}, nil
}
// // NewIncomingProvableSwarmSyncer is a contructor for provable data exchange syncer
// func NewIncomingProvableSwarmSyncer(po int, priority int, index uint64, sessionAt uint64, intervals []uint64, sessionRoot storage.Address, chunker *storage.PyramidChunker, store storage.ChunkStore, p Peer) *SwarmSyncerClient {
2018-06-20 07:06:27 -05:00
// retrieveC := make(storage.Chunk, chunksCap)
// RunChunkRequestor(p, retrieveC)
// storeC := make(storage.Chunk, chunksCap)
// RunChunkStorer(store, storeC)
// s := &SwarmSyncerClient{
// po: po,
// priority: priority,
// sessionAt: sessionAt,
// start: index,
// end: index,
// nextC: make(chan struct{}, 1),
// intervals: intervals,
// sessionRoot: sessionRoot,
// sessionReader: chunker.Join(sessionRoot, retrieveC),
// retrieveC: retrieveC,
// storeC: storeC,
// }
// return s
// }
// // StartSyncing is called on the Peer to start the syncing process
// // the idea is that it is called only after kademlia is close to healthy
all: new p2p node representation (#17643) Package p2p/enode provides a generalized representation of p2p nodes which can contain arbitrary information in key/value pairs. It is also the new home for the node database. The "v4" identity scheme is also moved here from p2p/enr to remove the dependency on Ethereum crypto from that package. Record signature handling is changed significantly. The identity scheme registry is removed and acceptable schemes must be passed to any method that needs identity. This means records must now be validated explicitly after decoding. The enode API is designed to make signature handling easy and safe: most APIs around the codebase work with enode.Node, which is a wrapper around a valid record. Going from enr.Record to enode.Node requires a valid signature. * p2p/discover: port to p2p/enode This ports the discovery code to the new node representation in p2p/enode. The wire protocol is unchanged, this can be considered a refactoring change. The Kademlia table can now deal with nodes using an arbitrary identity scheme. This requires a few incompatible API changes: - Table.Lookup is not available anymore. It used to take a public key as argument because v4 protocol requires one. Its replacement is LookupRandom. - Table.Resolve takes *enode.Node instead of NodeID. This is also for v4 protocol compatibility because nodes cannot be looked up by ID alone. - Types Node and NodeID are gone. Further commits in the series will be fixes all over the the codebase to deal with those removals. * p2p: port to p2p/enode and discovery changes This adapts package p2p to the changes in p2p/discover. All uses of discover.Node and discover.NodeID are replaced by their equivalents from p2p/enode. New API is added to retrieve the enode.Node instance of a peer. The behavior of Server.Self with discovery disabled is improved. It now tries much harder to report a working IP address, falling back to 127.0.0.1 if no suitable address can be determined through other means. These changes were needed for tests of other packages later in the series. * p2p/simulations, p2p/testing: port to p2p/enode No surprises here, mostly replacements of discover.Node, discover.NodeID with their new equivalents. The 'interesting' API changes are: - testing.ProtocolSession tracks complete nodes, not just their IDs. - adapters.NodeConfig has a new method to create a complete node. These changes were needed to make swarm tests work. Note that the NodeID change makes the code incompatible with old simulation snapshots. * whisper/whisperv5, whisper/whisperv6: port to p2p/enode This port was easy because whisper uses []byte for node IDs and URL strings in the API. * eth: port to p2p/enode Again, easy to port because eth uses strings for node IDs and doesn't care about node information in any way. * les: port to p2p/enode Apart from replacing discover.NodeID with enode.ID, most changes are in the server pool code. It now deals with complete nodes instead of (Pubkey, IP, Port) triples. The database format is unchanged for now, but we should probably change it to use the node database later. * node: port to p2p/enode This change simply replaces discover.Node and discover.NodeID with their new equivalents. * swarm/network: port to p2p/enode Swarm has its own node address representation, BzzAddr, containing both an overlay address (the hash of a secp256k1 public key) and an underlay address (enode:// URL). There are no changes to the BzzAddr format in this commit, but certain operations such as creating a BzzAddr from a node ID are now impossible because node IDs aren't public keys anymore. Most swarm-related changes in the series remove uses of NewAddrFromNodeID, replacing it with NewAddr which takes a complete node as argument. ToOverlayAddr is removed because we can just use the node ID directly.
2018-09-24 17:59:00 -05:00
// func StartSyncing(s *Streamer, peerId enode.ID, po uint8, nn bool) {
2018-06-20 07:06:27 -05:00
// lastPO := po
// if nn {
// lastPO = maxPO
// }
//
// for i := po; i <= lastPO; i++ {
// s.Subscribe(peerId, "SYNC", newSyncLabel("LIVE", po), 0, 0, High, true)
// s.Subscribe(peerId, "SYNC", newSyncLabel("HISTORY", po), 0, 0, Mid, false)
// }
// }
// RegisterSwarmSyncerClient registers the client constructor function for
// to handle incoming sync streams
func RegisterSwarmSyncerClient(streamer *Registry, store storage.SyncChunkStore) {
2018-06-20 07:06:27 -05:00
streamer.RegisterClientFunc("SYNC", func(p *Peer, t string, live bool) (Client, error) {
return NewSwarmSyncerClient(p, store, NewStream("SYNC", t, live))
2018-06-20 07:06:27 -05:00
})
}
// NeedData
func (s *SwarmSyncerClient) NeedData(ctx context.Context, key []byte) (wait func(context.Context) error) {
return s.store.FetchFunc(ctx, key)
2018-06-20 07:06:27 -05:00
}
// BatchDone
func (s *SwarmSyncerClient) BatchDone(stream Stream, from uint64, hashes []byte, root []byte) func() (*TakeoverProof, error) {
// TODO: reenable this with putter/getter refactored code
// if s.chunker != nil {
// return func() (*TakeoverProof, error) { return s.TakeoverProof(stream, from, hashes, root) }
// }
return nil
}
func (s *SwarmSyncerClient) TakeoverProof(stream Stream, from uint64, hashes []byte, root storage.Address) (*TakeoverProof, error) {
// for provable syncer currentRoot is non-zero length
// TODO: reenable this with putter/getter
// if s.chunker != nil {
// if from > s.sessionAt { // for live syncing currentRoot is always updated
// //expRoot, err := s.chunker.Append(s.currentRoot, bytes.NewReader(hashes), s.retrieveC, s.storeC)
// expRoot, _, err := s.chunker.Append(s.currentRoot, bytes.NewReader(hashes), s.retrieveC)
// if err != nil {
// return nil, err
// }
// if !bytes.Equal(root, expRoot) {
// return nil, fmt.Errorf("HandoverProof mismatch")
// }
// s.currentRoot = root
// } else {
// expHashes := make([]byte, len(hashes))
// _, err := s.sessionReader.ReadAt(expHashes, int64(s.end*HashSize))
// if err != nil && err != io.EOF {
// return nil, err
// }
// if !bytes.Equal(expHashes, hashes) {
// return nil, errors.New("invalid proof")
// }
// }
// return nil, nil
// }
s.end += uint64(len(hashes)) / HashSize
takeover := &Takeover{
Stream: stream,
Start: s.start,
End: s.end,
Root: root,
}
// serialise and sign
return &TakeoverProof{
Takeover: takeover,
Sig: nil,
}, nil
}
func (s *SwarmSyncerClient) Close() {}
// base for parsing and formating sync bin key
// it must be 2 <= base <= 36
const syncBinKeyBase = 36
// FormatSyncBinKey returns a string representation of
// Kademlia bin number to be used as key for SYNC stream.
func FormatSyncBinKey(bin uint8) string {
return strconv.FormatUint(uint64(bin), syncBinKeyBase)
}
// ParseSyncBinKey parses the string representation
// and returns the Kademlia bin number.
func ParseSyncBinKey(s string) (uint8, error) {
bin, err := strconv.ParseUint(s, syncBinKeyBase, 8)
if err != nil {
return 0, err
}
return uint8(bin), nil
}