swarm/network: WIP consider all nodes for healthy iteration (#19155)
* swarm/network: WIP consider all nodes for healthy iteration * swarm/network/simulation: extend TestWaitTillHealthy to really check kads are healthy * cmd/swarm/swarm-snapshot: fixed bugs in snapshot creation binary * swarm/network/simulation: addressed PR comments * swarm/network/simulation: defer sim.Clsoe() * swarm/network/simulation: fixed wrong sim.Close() * swarm/network/simulation: addressed PR comments * cmd/swarm/swarm-snapshot: reducing default to 8 nodes, more to 4 * cmd/swarm/swarm-snapshot: extended timeout to 3 mins, or 256 nodes snapshot times out * swarm/network/simulation: More PR comments
This commit is contained in:
parent
505a49e689
commit
62d9d63858
|
@ -59,13 +59,16 @@ func createSnapshot(filename string, nodes int, services []string) (err error) {
|
|||
log.Debug("create snapshot", "filename", filename, "nodes", nodes, "services", services)
|
||||
|
||||
sim := simulation.New(map[string]simulation.ServiceFunc{
|
||||
"bzz": func(ctx *adapters.ServiceContext, b *sync.Map) (node.Service, func(), error) {
|
||||
"bzz": func(ctx *adapters.ServiceContext, bucket *sync.Map) (node.Service, func(), error) {
|
||||
addr := network.NewAddr(ctx.Config.Node())
|
||||
kad := network.NewKademlia(addr.Over(), network.NewKadParams())
|
||||
hp := network.NewHiveParams()
|
||||
hp.KeepAliveInterval = time.Duration(200) * time.Millisecond
|
||||
hp.Discovery = true // discovery must be enabled when creating a snapshot
|
||||
|
||||
// store the kademlia in the bucket, needed later in the WaitTillHealthy function
|
||||
bucket.Store(simulation.BucketKeyKademlia, kad)
|
||||
|
||||
config := &network.BzzConfig{
|
||||
OverlayAddr: addr.Over(),
|
||||
UnderlayAddr: addr.Under(),
|
||||
|
@ -76,17 +79,17 @@ func createSnapshot(filename string, nodes int, services []string) (err error) {
|
|||
})
|
||||
defer sim.Close()
|
||||
|
||||
_, err = sim.AddNodes(nodes)
|
||||
ids, err := sim.AddNodes(nodes)
|
||||
if err != nil {
|
||||
return fmt.Errorf("add nodes: %v", err)
|
||||
}
|
||||
|
||||
err = sim.Net.ConnectNodesRing(nil)
|
||||
err = sim.Net.ConnectNodesRing(ids)
|
||||
if err != nil {
|
||||
return fmt.Errorf("connect nodes: %v", err)
|
||||
}
|
||||
|
||||
ctx, cancelSimRun := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
ctx, cancelSimRun := context.WithTimeout(context.Background(), 3*time.Minute)
|
||||
defer cancelSimRun()
|
||||
if _, err := sim.WaitTillHealthy(ctx); err != nil {
|
||||
return fmt.Errorf("wait for healthy kademlia: %v", err)
|
||||
|
|
|
@ -48,7 +48,7 @@ func TestSnapshotCreate(t *testing.T) {
|
|||
},
|
||||
{
|
||||
name: "more nodes",
|
||||
nodes: defaultNodes + 5,
|
||||
nodes: defaultNodes + 4,
|
||||
},
|
||||
{
|
||||
name: "services",
|
||||
|
@ -81,7 +81,7 @@ func TestSnapshotCreate(t *testing.T) {
|
|||
}
|
||||
testCmd := runSnapshot(t, append(args, file.Name())...)
|
||||
|
||||
testCmd.ExpectExit()
|
||||
testCmd.WaitExit()
|
||||
if code := testCmd.ExitStatus(); code != 0 {
|
||||
t.Fatalf("command exit code %v, expected 0", code)
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ import (
|
|||
var gitCommit string // Git SHA1 commit hash of the release (set via linker flags)
|
||||
|
||||
// default value for "create" command --nodes flag
|
||||
const defaultNodes = 10
|
||||
const defaultNodes = 8
|
||||
|
||||
func main() {
|
||||
err := newApp().Run(os.Args)
|
||||
|
|
|
@ -58,7 +58,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
|
|||
for k := range ill {
|
||||
delete(ill, k)
|
||||
}
|
||||
log.Debug("kademlia health check", "addr count", len(addrs))
|
||||
log.Debug("kademlia health check", "addr count", len(addrs), "kad len", len(kademlias))
|
||||
for id, k := range kademlias {
|
||||
//PeerPot for this node
|
||||
addr := common.Bytes2Hex(k.BaseAddr())
|
||||
|
@ -70,7 +70,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
|
|||
log.Debug("kademlia", "connectNN", h.ConnectNN, "knowNN", h.KnowNN)
|
||||
log.Debug("kademlia", "health", h.ConnectNN && h.KnowNN, "addr", hex.EncodeToString(k.BaseAddr()), "node", id)
|
||||
log.Debug("kademlia", "ill condition", !h.ConnectNN, "addr", hex.EncodeToString(k.BaseAddr()), "node", id)
|
||||
if !h.ConnectNN {
|
||||
if !h.Healthy() {
|
||||
ill[id] = k
|
||||
}
|
||||
}
|
||||
|
@ -85,6 +85,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
|
|||
// in simulation bucket.
|
||||
func (s *Simulation) kademlias() (ks map[enode.ID]*network.Kademlia) {
|
||||
items := s.UpNodesItems(BucketKeyKademlia)
|
||||
log.Debug("kademlia len items", "len", len(items))
|
||||
ks = make(map[enode.ID]*network.Kademlia, len(items))
|
||||
for id, v := range items {
|
||||
k, ok := v.(*network.Kademlia)
|
||||
|
|
|
@ -22,16 +22,115 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
"github.com/ethereum/go-ethereum/log"
|
||||
"github.com/ethereum/go-ethereum/node"
|
||||
"github.com/ethereum/go-ethereum/p2p/simulations/adapters"
|
||||
"github.com/ethereum/go-ethereum/swarm/network"
|
||||
)
|
||||
|
||||
/*
|
||||
TestWaitTillHealthy tests that we indeed get a healthy network after we wait for it.
|
||||
For this to be tested, a bit of a snake tail bite needs to happen:
|
||||
* First we create a first simulation
|
||||
* Run it as nodes connected in a ring
|
||||
* Wait until the network is healthy
|
||||
* Then we create a snapshot
|
||||
* With this snapshot we create a new simulation
|
||||
* This simulation is expected to have a healthy configuration, as it uses the snapshot
|
||||
* Thus we just iterate all nodes and check that their kademlias are healthy
|
||||
* If all kademlias are healthy, the test succeeded, otherwise it failed
|
||||
*/
|
||||
func TestWaitTillHealthy(t *testing.T) {
|
||||
sim := New(map[string]ServiceFunc{
|
||||
|
||||
testNodesNum := 10
|
||||
|
||||
// create the first simulation
|
||||
sim := New(createSimServiceMap(true))
|
||||
|
||||
// connect and...
|
||||
nodeIDs, err := sim.AddNodesAndConnectRing(testNodesNum)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// array of all overlay addresses
|
||||
var addrs [][]byte
|
||||
// iterate once to be able to build the peer map
|
||||
for _, node := range nodeIDs {
|
||||
//get the kademlia overlay address from this ID
|
||||
a := node.Bytes()
|
||||
//append it to the array of all overlay addresses
|
||||
addrs = append(addrs, a)
|
||||
}
|
||||
// build a PeerPot only once
|
||||
pp := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// ...wait until healthy
|
||||
ill, err := sim.WaitTillHealthy(ctx)
|
||||
if err != nil {
|
||||
for id, kad := range ill {
|
||||
t.Log("Node", id)
|
||||
t.Log(kad.String())
|
||||
}
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// now create a snapshot of this network
|
||||
snap, err := sim.Net.Snapshot()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// close the initial simulation
|
||||
sim.Close()
|
||||
// create a control simulation
|
||||
controlSim := New(createSimServiceMap(false))
|
||||
defer controlSim.Close()
|
||||
|
||||
// load the snapshot into this control simulation
|
||||
err = controlSim.Net.Load(snap)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
_, err = controlSim.WaitTillHealthy(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for _, node := range nodeIDs {
|
||||
// ...get its kademlia
|
||||
item, ok := controlSim.NodeItem(node, BucketKeyKademlia)
|
||||
if !ok {
|
||||
t.Fatal("No kademlia bucket item")
|
||||
}
|
||||
kad := item.(*network.Kademlia)
|
||||
// get its base address
|
||||
kid := common.Bytes2Hex(kad.BaseAddr())
|
||||
|
||||
//get the health info
|
||||
info := kad.GetHealthInfo(pp[kid])
|
||||
log.Trace("Health info", "info", info)
|
||||
// check that it is healthy
|
||||
healthy := info.Healthy()
|
||||
if !healthy {
|
||||
t.Fatalf("Expected node %v of control simulation to be healthy, but it is not, unhealthy kademlias: %v", node, kad.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// createSimServiceMap returns the services map
|
||||
// this function will create the sim services with or without discovery enabled
|
||||
// based on the flag passed
|
||||
func createSimServiceMap(discovery bool) map[string]ServiceFunc {
|
||||
return map[string]ServiceFunc{
|
||||
"bzz": func(ctx *adapters.ServiceContext, b *sync.Map) (node.Service, func(), error) {
|
||||
addr := network.NewAddr(ctx.Config.Node())
|
||||
hp := network.NewHiveParams()
|
||||
hp.Discovery = discovery
|
||||
config := &network.BzzConfig{
|
||||
OverlayAddr: addr.Over(),
|
||||
UnderlayAddr: addr.Under(),
|
||||
|
@ -43,24 +142,5 @@ func TestWaitTillHealthy(t *testing.T) {
|
|||
b.Store(BucketKeyKademlia, kad)
|
||||
return network.NewBzz(config, kad, nil, nil, nil), nil, nil
|
||||
},
|
||||
})
|
||||
defer sim.Close()
|
||||
|
||||
_, err := sim.AddNodesAndConnectRing(10)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
|
||||
defer cancel()
|
||||
ill, err := sim.WaitTillHealthy(ctx)
|
||||
if err != nil {
|
||||
for id, kad := range ill {
|
||||
t.Log("Node", id)
|
||||
t.Log(kad.String())
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue