swarm/network: WIP consider all nodes for healthy iteration (#19155)
* swarm/network: WIP consider all nodes for healthy iteration * swarm/network/simulation: extend TestWaitTillHealthy to really check kads are healthy * cmd/swarm/swarm-snapshot: fixed bugs in snapshot creation binary * swarm/network/simulation: addressed PR comments * swarm/network/simulation: defer sim.Clsoe() * swarm/network/simulation: fixed wrong sim.Close() * swarm/network/simulation: addressed PR comments * cmd/swarm/swarm-snapshot: reducing default to 8 nodes, more to 4 * cmd/swarm/swarm-snapshot: extended timeout to 3 mins, or 256 nodes snapshot times out * swarm/network/simulation: More PR comments
This commit is contained in:
parent
505a49e689
commit
62d9d63858
|
@ -59,13 +59,16 @@ func createSnapshot(filename string, nodes int, services []string) (err error) {
|
||||||
log.Debug("create snapshot", "filename", filename, "nodes", nodes, "services", services)
|
log.Debug("create snapshot", "filename", filename, "nodes", nodes, "services", services)
|
||||||
|
|
||||||
sim := simulation.New(map[string]simulation.ServiceFunc{
|
sim := simulation.New(map[string]simulation.ServiceFunc{
|
||||||
"bzz": func(ctx *adapters.ServiceContext, b *sync.Map) (node.Service, func(), error) {
|
"bzz": func(ctx *adapters.ServiceContext, bucket *sync.Map) (node.Service, func(), error) {
|
||||||
addr := network.NewAddr(ctx.Config.Node())
|
addr := network.NewAddr(ctx.Config.Node())
|
||||||
kad := network.NewKademlia(addr.Over(), network.NewKadParams())
|
kad := network.NewKademlia(addr.Over(), network.NewKadParams())
|
||||||
hp := network.NewHiveParams()
|
hp := network.NewHiveParams()
|
||||||
hp.KeepAliveInterval = time.Duration(200) * time.Millisecond
|
hp.KeepAliveInterval = time.Duration(200) * time.Millisecond
|
||||||
hp.Discovery = true // discovery must be enabled when creating a snapshot
|
hp.Discovery = true // discovery must be enabled when creating a snapshot
|
||||||
|
|
||||||
|
// store the kademlia in the bucket, needed later in the WaitTillHealthy function
|
||||||
|
bucket.Store(simulation.BucketKeyKademlia, kad)
|
||||||
|
|
||||||
config := &network.BzzConfig{
|
config := &network.BzzConfig{
|
||||||
OverlayAddr: addr.Over(),
|
OverlayAddr: addr.Over(),
|
||||||
UnderlayAddr: addr.Under(),
|
UnderlayAddr: addr.Under(),
|
||||||
|
@ -76,17 +79,17 @@ func createSnapshot(filename string, nodes int, services []string) (err error) {
|
||||||
})
|
})
|
||||||
defer sim.Close()
|
defer sim.Close()
|
||||||
|
|
||||||
_, err = sim.AddNodes(nodes)
|
ids, err := sim.AddNodes(nodes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("add nodes: %v", err)
|
return fmt.Errorf("add nodes: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = sim.Net.ConnectNodesRing(nil)
|
err = sim.Net.ConnectNodesRing(ids)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("connect nodes: %v", err)
|
return fmt.Errorf("connect nodes: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx, cancelSimRun := context.WithTimeout(context.Background(), 2*time.Minute)
|
ctx, cancelSimRun := context.WithTimeout(context.Background(), 3*time.Minute)
|
||||||
defer cancelSimRun()
|
defer cancelSimRun()
|
||||||
if _, err := sim.WaitTillHealthy(ctx); err != nil {
|
if _, err := sim.WaitTillHealthy(ctx); err != nil {
|
||||||
return fmt.Errorf("wait for healthy kademlia: %v", err)
|
return fmt.Errorf("wait for healthy kademlia: %v", err)
|
||||||
|
|
|
@ -48,7 +48,7 @@ func TestSnapshotCreate(t *testing.T) {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "more nodes",
|
name: "more nodes",
|
||||||
nodes: defaultNodes + 5,
|
nodes: defaultNodes + 4,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "services",
|
name: "services",
|
||||||
|
@ -81,7 +81,7 @@ func TestSnapshotCreate(t *testing.T) {
|
||||||
}
|
}
|
||||||
testCmd := runSnapshot(t, append(args, file.Name())...)
|
testCmd := runSnapshot(t, append(args, file.Name())...)
|
||||||
|
|
||||||
testCmd.ExpectExit()
|
testCmd.WaitExit()
|
||||||
if code := testCmd.ExitStatus(); code != 0 {
|
if code := testCmd.ExitStatus(); code != 0 {
|
||||||
t.Fatalf("command exit code %v, expected 0", code)
|
t.Fatalf("command exit code %v, expected 0", code)
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,7 +27,7 @@ import (
|
||||||
var gitCommit string // Git SHA1 commit hash of the release (set via linker flags)
|
var gitCommit string // Git SHA1 commit hash of the release (set via linker flags)
|
||||||
|
|
||||||
// default value for "create" command --nodes flag
|
// default value for "create" command --nodes flag
|
||||||
const defaultNodes = 10
|
const defaultNodes = 8
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
err := newApp().Run(os.Args)
|
err := newApp().Run(os.Args)
|
||||||
|
|
|
@ -58,7 +58,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
|
||||||
for k := range ill {
|
for k := range ill {
|
||||||
delete(ill, k)
|
delete(ill, k)
|
||||||
}
|
}
|
||||||
log.Debug("kademlia health check", "addr count", len(addrs))
|
log.Debug("kademlia health check", "addr count", len(addrs), "kad len", len(kademlias))
|
||||||
for id, k := range kademlias {
|
for id, k := range kademlias {
|
||||||
//PeerPot for this node
|
//PeerPot for this node
|
||||||
addr := common.Bytes2Hex(k.BaseAddr())
|
addr := common.Bytes2Hex(k.BaseAddr())
|
||||||
|
@ -70,7 +70,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
|
||||||
log.Debug("kademlia", "connectNN", h.ConnectNN, "knowNN", h.KnowNN)
|
log.Debug("kademlia", "connectNN", h.ConnectNN, "knowNN", h.KnowNN)
|
||||||
log.Debug("kademlia", "health", h.ConnectNN && h.KnowNN, "addr", hex.EncodeToString(k.BaseAddr()), "node", id)
|
log.Debug("kademlia", "health", h.ConnectNN && h.KnowNN, "addr", hex.EncodeToString(k.BaseAddr()), "node", id)
|
||||||
log.Debug("kademlia", "ill condition", !h.ConnectNN, "addr", hex.EncodeToString(k.BaseAddr()), "node", id)
|
log.Debug("kademlia", "ill condition", !h.ConnectNN, "addr", hex.EncodeToString(k.BaseAddr()), "node", id)
|
||||||
if !h.ConnectNN {
|
if !h.Healthy() {
|
||||||
ill[id] = k
|
ill[id] = k
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -85,6 +85,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
|
||||||
// in simulation bucket.
|
// in simulation bucket.
|
||||||
func (s *Simulation) kademlias() (ks map[enode.ID]*network.Kademlia) {
|
func (s *Simulation) kademlias() (ks map[enode.ID]*network.Kademlia) {
|
||||||
items := s.UpNodesItems(BucketKeyKademlia)
|
items := s.UpNodesItems(BucketKeyKademlia)
|
||||||
|
log.Debug("kademlia len items", "len", len(items))
|
||||||
ks = make(map[enode.ID]*network.Kademlia, len(items))
|
ks = make(map[enode.ID]*network.Kademlia, len(items))
|
||||||
for id, v := range items {
|
for id, v := range items {
|
||||||
k, ok := v.(*network.Kademlia)
|
k, ok := v.(*network.Kademlia)
|
||||||
|
|
|
@ -22,16 +22,115 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/ethereum/go-ethereum/common"
|
||||||
|
"github.com/ethereum/go-ethereum/log"
|
||||||
"github.com/ethereum/go-ethereum/node"
|
"github.com/ethereum/go-ethereum/node"
|
||||||
"github.com/ethereum/go-ethereum/p2p/simulations/adapters"
|
"github.com/ethereum/go-ethereum/p2p/simulations/adapters"
|
||||||
"github.com/ethereum/go-ethereum/swarm/network"
|
"github.com/ethereum/go-ethereum/swarm/network"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
/*
|
||||||
|
TestWaitTillHealthy tests that we indeed get a healthy network after we wait for it.
|
||||||
|
For this to be tested, a bit of a snake tail bite needs to happen:
|
||||||
|
* First we create a first simulation
|
||||||
|
* Run it as nodes connected in a ring
|
||||||
|
* Wait until the network is healthy
|
||||||
|
* Then we create a snapshot
|
||||||
|
* With this snapshot we create a new simulation
|
||||||
|
* This simulation is expected to have a healthy configuration, as it uses the snapshot
|
||||||
|
* Thus we just iterate all nodes and check that their kademlias are healthy
|
||||||
|
* If all kademlias are healthy, the test succeeded, otherwise it failed
|
||||||
|
*/
|
||||||
func TestWaitTillHealthy(t *testing.T) {
|
func TestWaitTillHealthy(t *testing.T) {
|
||||||
sim := New(map[string]ServiceFunc{
|
|
||||||
|
testNodesNum := 10
|
||||||
|
|
||||||
|
// create the first simulation
|
||||||
|
sim := New(createSimServiceMap(true))
|
||||||
|
|
||||||
|
// connect and...
|
||||||
|
nodeIDs, err := sim.AddNodesAndConnectRing(testNodesNum)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// array of all overlay addresses
|
||||||
|
var addrs [][]byte
|
||||||
|
// iterate once to be able to build the peer map
|
||||||
|
for _, node := range nodeIDs {
|
||||||
|
//get the kademlia overlay address from this ID
|
||||||
|
a := node.Bytes()
|
||||||
|
//append it to the array of all overlay addresses
|
||||||
|
addrs = append(addrs, a)
|
||||||
|
}
|
||||||
|
// build a PeerPot only once
|
||||||
|
pp := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// ...wait until healthy
|
||||||
|
ill, err := sim.WaitTillHealthy(ctx)
|
||||||
|
if err != nil {
|
||||||
|
for id, kad := range ill {
|
||||||
|
t.Log("Node", id)
|
||||||
|
t.Log(kad.String())
|
||||||
|
}
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// now create a snapshot of this network
|
||||||
|
snap, err := sim.Net.Snapshot()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// close the initial simulation
|
||||||
|
sim.Close()
|
||||||
|
// create a control simulation
|
||||||
|
controlSim := New(createSimServiceMap(false))
|
||||||
|
defer controlSim.Close()
|
||||||
|
|
||||||
|
// load the snapshot into this control simulation
|
||||||
|
err = controlSim.Net.Load(snap)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
_, err = controlSim.WaitTillHealthy(ctx)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, node := range nodeIDs {
|
||||||
|
// ...get its kademlia
|
||||||
|
item, ok := controlSim.NodeItem(node, BucketKeyKademlia)
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("No kademlia bucket item")
|
||||||
|
}
|
||||||
|
kad := item.(*network.Kademlia)
|
||||||
|
// get its base address
|
||||||
|
kid := common.Bytes2Hex(kad.BaseAddr())
|
||||||
|
|
||||||
|
//get the health info
|
||||||
|
info := kad.GetHealthInfo(pp[kid])
|
||||||
|
log.Trace("Health info", "info", info)
|
||||||
|
// check that it is healthy
|
||||||
|
healthy := info.Healthy()
|
||||||
|
if !healthy {
|
||||||
|
t.Fatalf("Expected node %v of control simulation to be healthy, but it is not, unhealthy kademlias: %v", node, kad.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// createSimServiceMap returns the services map
|
||||||
|
// this function will create the sim services with or without discovery enabled
|
||||||
|
// based on the flag passed
|
||||||
|
func createSimServiceMap(discovery bool) map[string]ServiceFunc {
|
||||||
|
return map[string]ServiceFunc{
|
||||||
"bzz": func(ctx *adapters.ServiceContext, b *sync.Map) (node.Service, func(), error) {
|
"bzz": func(ctx *adapters.ServiceContext, b *sync.Map) (node.Service, func(), error) {
|
||||||
addr := network.NewAddr(ctx.Config.Node())
|
addr := network.NewAddr(ctx.Config.Node())
|
||||||
hp := network.NewHiveParams()
|
hp := network.NewHiveParams()
|
||||||
|
hp.Discovery = discovery
|
||||||
config := &network.BzzConfig{
|
config := &network.BzzConfig{
|
||||||
OverlayAddr: addr.Over(),
|
OverlayAddr: addr.Over(),
|
||||||
UnderlayAddr: addr.Under(),
|
UnderlayAddr: addr.Under(),
|
||||||
|
@ -43,24 +142,5 @@ func TestWaitTillHealthy(t *testing.T) {
|
||||||
b.Store(BucketKeyKademlia, kad)
|
b.Store(BucketKeyKademlia, kad)
|
||||||
return network.NewBzz(config, kad, nil, nil, nil), nil, nil
|
return network.NewBzz(config, kad, nil, nil, nil), nil, nil
|
||||||
},
|
},
|
||||||
})
|
|
||||||
defer sim.Close()
|
|
||||||
|
|
||||||
_, err := sim.AddNodesAndConnectRing(10)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
|
|
||||||
defer cancel()
|
|
||||||
ill, err := sim.WaitTillHealthy(ctx)
|
|
||||||
if err != nil {
|
|
||||||
for id, kad := range ill {
|
|
||||||
t.Log("Node", id)
|
|
||||||
t.Log(kad.String())
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue