swarm/network: Saturation check for healthy networks (#19071)
* swarm/network: new saturation for implementation * swarm/network: re-added saturation func in Kademlia as it is used elsewhere * swarm/network: saturation with higher MinBinSize * swarm/network: PeersPerBin with depth check * swarm/network: edited tests to pass new saturated check * swarm/network: minor fix saturated check * swarm/network/simulations/discovery: fixed renamed RPC call * swarm/network: renamed to isSaturated and returns bool * swarm/network: early depth check
This commit is contained in:
parent
fab8c5a1cd
commit
2af24724dd
|
@ -628,7 +628,8 @@ func (k *Kademlia) string() string {
|
|||
// used for testing only
|
||||
// TODO move to separate testing tools file
|
||||
type PeerPot struct {
|
||||
NNSet [][]byte
|
||||
NNSet [][]byte
|
||||
PeersPerBin []int
|
||||
}
|
||||
|
||||
// NewPeerPotMap creates a map of pot record of *BzzAddr with keys
|
||||
|
@ -654,6 +655,7 @@ func NewPeerPotMap(neighbourhoodSize int, addrs [][]byte) map[string]*PeerPot {
|
|||
|
||||
// all nn-peers
|
||||
var nns [][]byte
|
||||
peersPerBin := make([]int, depth)
|
||||
|
||||
// iterate through the neighbours, going from the deepest to the shallowest
|
||||
np.EachNeighbour(a, Pof, func(val pot.Val, po int) bool {
|
||||
|
@ -667,14 +669,18 @@ func NewPeerPotMap(neighbourhoodSize int, addrs [][]byte) map[string]*PeerPot {
|
|||
// a neighbor is any peer in or deeper than the depth
|
||||
if po >= depth {
|
||||
nns = append(nns, addr)
|
||||
return true
|
||||
} else {
|
||||
// for peers < depth, we just count the number in each bin
|
||||
// the bin is the index of the slice
|
||||
peersPerBin[po]++
|
||||
}
|
||||
return false
|
||||
return true
|
||||
})
|
||||
|
||||
log.Trace(fmt.Sprintf("%x PeerPotMap NNS: %s", addrs[i][:4], LogAddrs(nns)))
|
||||
log.Trace(fmt.Sprintf("%x PeerPotMap NNS: %s, peersPerBin", addrs[i][:4], LogAddrs(nns)))
|
||||
ppmap[common.Bytes2Hex(a)] = &PeerPot{
|
||||
NNSet: nns,
|
||||
NNSet: nns,
|
||||
PeersPerBin: peersPerBin,
|
||||
}
|
||||
}
|
||||
return ppmap
|
||||
|
@ -698,6 +704,39 @@ func (k *Kademlia) saturation() int {
|
|||
return prev
|
||||
}
|
||||
|
||||
// isSaturated returns true if the kademlia is considered saturated, or false if not.
|
||||
// It checks this by checking an array of ints called unsaturatedBins; each item in that array corresponds
|
||||
// to the bin which is unsaturated (number of connections < k.MinBinSize).
|
||||
// The bin is considered unsaturated only if there are actual peers in that PeerPot's bin (peersPerBin)
|
||||
// (if there is no peer for a given bin, then no connection could ever be established;
|
||||
// in a God's view this is relevant as no more peers will ever appear on that bin)
|
||||
func (k *Kademlia) isSaturated(peersPerBin []int, depth int) bool {
|
||||
// depth could be calculated from k but as this is called from `GetHealthInfo()`,
|
||||
// the depth has already been calculated so we can require it as a parameter
|
||||
|
||||
// early check for depth
|
||||
if depth != len(peersPerBin) {
|
||||
return false
|
||||
}
|
||||
unsaturatedBins := make([]int, 0)
|
||||
k.conns.EachBin(k.base, Pof, 0, func(po, size int, f func(func(val pot.Val) bool) bool) bool {
|
||||
|
||||
if po >= depth {
|
||||
return false
|
||||
}
|
||||
log.Trace("peers per bin", "peersPerBin[po]", peersPerBin[po], "po", po)
|
||||
// if there are actually peers in the PeerPot who can fulfill k.MinBinSize
|
||||
if size < k.MinBinSize && size < peersPerBin[po] {
|
||||
log.Trace("connections for po", "po", po, "size", size)
|
||||
unsaturatedBins = append(unsaturatedBins, po)
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
log.Trace("list of unsaturated bins", "unsaturatedBins", unsaturatedBins)
|
||||
return len(unsaturatedBins) == 0
|
||||
}
|
||||
|
||||
// knowNeighbours tests if all neighbours in the peerpot
|
||||
// are found among the peers known to the kademlia
|
||||
// It is used in Healthy function for testing only
|
||||
|
@ -780,11 +819,13 @@ type Health struct {
|
|||
ConnectNN bool // whether node is connected to all its neighbours
|
||||
CountConnectNN int // amount of neighbours connected to
|
||||
MissingConnectNN [][]byte // which neighbours we should have been connected to but we're not
|
||||
Saturated bool // whether we are connected to all the peers we would have liked to
|
||||
Hive string
|
||||
// Saturated: if in all bins < depth number of connections >= MinBinsize or,
|
||||
// if number of connections < MinBinSize, to the number of available peers in that bin
|
||||
Saturated bool
|
||||
Hive string
|
||||
}
|
||||
|
||||
// Healthy reports the health state of the kademlia connectivity
|
||||
// GetHealthInfo reports the health state of the kademlia connectivity
|
||||
//
|
||||
// The PeerPot argument provides an all-knowing view of the network
|
||||
// The resulting Health object is a result of comparisons between
|
||||
|
@ -792,7 +833,7 @@ type Health struct {
|
|||
// what SHOULD it have been when we take all we know about the network into consideration.
|
||||
//
|
||||
// used for testing only
|
||||
func (k *Kademlia) Healthy(pp *PeerPot) *Health {
|
||||
func (k *Kademlia) GetHealthInfo(pp *PeerPot) *Health {
|
||||
k.lock.RLock()
|
||||
defer k.lock.RUnlock()
|
||||
if len(pp.NNSet) < k.NeighbourhoodSize {
|
||||
|
@ -801,7 +842,10 @@ func (k *Kademlia) Healthy(pp *PeerPot) *Health {
|
|||
gotnn, countgotnn, culpritsgotnn := k.connectedNeighbours(pp.NNSet)
|
||||
knownn, countknownn, culpritsknownn := k.knowNeighbours(pp.NNSet)
|
||||
depth := depthForPot(k.conns, k.NeighbourhoodSize, k.base)
|
||||
saturated := k.saturation() < depth
|
||||
|
||||
// check saturation
|
||||
saturated := k.isSaturated(pp.PeersPerBin, depth)
|
||||
|
||||
log.Trace(fmt.Sprintf("%08x: healthy: knowNNs: %v, gotNNs: %v, saturated: %v\n", k.base, knownn, gotnn, saturated))
|
||||
return &Health{
|
||||
KnowNN: knownn,
|
||||
|
@ -814,3 +858,13 @@ func (k *Kademlia) Healthy(pp *PeerPot) *Health {
|
|||
Hive: k.string(),
|
||||
}
|
||||
}
|
||||
|
||||
// Healthy return the strict interpretation of `Healthy` given a `Health` struct
|
||||
// definition of strict health: all conditions must be true:
|
||||
// - we at least know one peer
|
||||
// - we know all neighbors
|
||||
// - we are connected to all known neighbors
|
||||
// - it is saturated
|
||||
func (h *Health) Healthy() bool {
|
||||
return h.KnowNN && h.ConnectNN && h.CountKnowNN > 0 && h.Saturated
|
||||
}
|
||||
|
|
|
@ -168,6 +168,46 @@ func TestNeighbourhoodDepth(t *testing.T) {
|
|||
testNum++
|
||||
}
|
||||
|
||||
// TestHighMinBinSize tests that the saturation function also works
|
||||
// if MinBinSize is > 2, the connection count is < k.MinBinSize
|
||||
// and there are more peers available than connected
|
||||
func TestHighMinBinSize(t *testing.T) {
|
||||
// a function to test for different MinBinSize values
|
||||
testKad := func(minBinSize int) {
|
||||
// create a test kademlia
|
||||
tk := newTestKademlia(t, "11111111")
|
||||
// set its MinBinSize to desired value
|
||||
tk.KadParams.MinBinSize = minBinSize
|
||||
|
||||
// add a couple of peers (so we have NN and depth)
|
||||
tk.On("00000000") // bin 0
|
||||
tk.On("11100000") // bin 3
|
||||
tk.On("11110000") // bin 4
|
||||
|
||||
first := "10000000" // add a first peer at bin 1
|
||||
tk.Register(first) // register it
|
||||
// we now have one registered peer at bin 1;
|
||||
// iterate and connect one peer at each iteration;
|
||||
// should be unhealthy until at minBinSize - 1
|
||||
// we connect the unconnected but registered peer
|
||||
for i := 1; i < minBinSize; i++ {
|
||||
peer := fmt.Sprintf("1000%b", 8|i)
|
||||
tk.On(peer)
|
||||
if i == minBinSize-1 {
|
||||
tk.On(first)
|
||||
tk.checkHealth(true)
|
||||
return
|
||||
}
|
||||
tk.checkHealth(false)
|
||||
}
|
||||
}
|
||||
// test MinBinSizes of 3 to 5
|
||||
testMinBinSizes := []int{3, 4, 5}
|
||||
for _, k := range testMinBinSizes {
|
||||
testKad(k)
|
||||
}
|
||||
}
|
||||
|
||||
// TestHealthStrict tests the simplest definition of health
|
||||
// Which means whether we are connected to all neighbors we know of
|
||||
func TestHealthStrict(t *testing.T) {
|
||||
|
@ -176,60 +216,116 @@ func TestHealthStrict(t *testing.T) {
|
|||
// no peers
|
||||
// unhealthy (and lonely)
|
||||
tk := newTestKademlia(t, "11111111")
|
||||
tk.checkHealth(false, false)
|
||||
tk.checkHealth(false)
|
||||
|
||||
// know one peer but not connected
|
||||
// unhealthy
|
||||
tk.Register("11100000")
|
||||
tk.checkHealth(false, false)
|
||||
tk.checkHealth(false)
|
||||
|
||||
// know one peer and connected
|
||||
// healthy
|
||||
// unhealthy: not saturated
|
||||
tk.On("11100000")
|
||||
tk.checkHealth(true, false)
|
||||
tk.checkHealth(true)
|
||||
|
||||
// know two peers, only one connected
|
||||
// unhealthy
|
||||
tk.Register("11111100")
|
||||
tk.checkHealth(false, false)
|
||||
tk.checkHealth(false)
|
||||
|
||||
// know two peers and connected to both
|
||||
// healthy
|
||||
tk.On("11111100")
|
||||
tk.checkHealth(true, false)
|
||||
tk.checkHealth(true)
|
||||
|
||||
// know three peers, connected to the two deepest
|
||||
// healthy
|
||||
tk.Register("00000000")
|
||||
tk.checkHealth(true, false)
|
||||
tk.checkHealth(false)
|
||||
|
||||
// know three peers, connected to all three
|
||||
// healthy
|
||||
tk.On("00000000")
|
||||
tk.checkHealth(true, false)
|
||||
tk.checkHealth(true)
|
||||
|
||||
// add fourth peer deeper than current depth
|
||||
// unhealthy
|
||||
tk.Register("11110000")
|
||||
tk.checkHealth(false, false)
|
||||
tk.checkHealth(false)
|
||||
|
||||
// connected to three deepest peers
|
||||
// healthy
|
||||
tk.On("11110000")
|
||||
tk.checkHealth(true, false)
|
||||
tk.checkHealth(true)
|
||||
|
||||
// add additional peer in same bin as deepest peer
|
||||
// unhealthy
|
||||
tk.Register("11111101")
|
||||
tk.checkHealth(false, false)
|
||||
tk.checkHealth(false)
|
||||
|
||||
// four deepest of five peers connected
|
||||
// healthy
|
||||
tk.On("11111101")
|
||||
tk.checkHealth(true, false)
|
||||
tk.checkHealth(true)
|
||||
|
||||
// add additional peer in bin 0
|
||||
// unhealthy: unsaturated bin 0, 2 known but 1 connected
|
||||
tk.Register("00000001")
|
||||
tk.checkHealth(false)
|
||||
|
||||
// Connect second in bin 0
|
||||
// healthy
|
||||
tk.On("00000001")
|
||||
tk.checkHealth(true)
|
||||
|
||||
// add peer in bin 1
|
||||
// unhealthy, as it is known but not connected
|
||||
tk.Register("10000000")
|
||||
tk.checkHealth(false)
|
||||
|
||||
// connect peer in bin 1
|
||||
// depth change, is now 1
|
||||
// healthy, 1 peer in bin 1 known and connected
|
||||
tk.On("10000000")
|
||||
tk.checkHealth(true)
|
||||
|
||||
// add second peer in bin 1
|
||||
// unhealthy, as it is known but not connected
|
||||
tk.Register("10000001")
|
||||
tk.checkHealth(false)
|
||||
|
||||
// connect second peer in bin 1
|
||||
// healthy,
|
||||
tk.On("10000001")
|
||||
tk.checkHealth(true)
|
||||
|
||||
// connect third peer in bin 1
|
||||
// healthy,
|
||||
tk.On("10000011")
|
||||
tk.checkHealth(true)
|
||||
|
||||
// add peer in bin 2
|
||||
// unhealthy, no depth change
|
||||
tk.Register("11000000")
|
||||
tk.checkHealth(false)
|
||||
|
||||
// connect peer in bin 2
|
||||
// depth change - as we already have peers in bin 3 and 4,
|
||||
// we have contiguous bins, no bin < po 5 is empty -> depth 5
|
||||
// healthy, every bin < depth has the max available peers,
|
||||
// even if they are < MinBinSize
|
||||
tk.On("11000000")
|
||||
tk.checkHealth(true)
|
||||
|
||||
// add peer in bin 2
|
||||
// unhealthy, peer bin is below depth 5 but
|
||||
// has more available peers (2) than connected ones (1)
|
||||
// --> unsaturated
|
||||
tk.Register("11000011")
|
||||
tk.checkHealth(false)
|
||||
}
|
||||
|
||||
func (tk *testKademlia) checkHealth(expectHealthy bool, expectSaturation bool) {
|
||||
func (tk *testKademlia) checkHealth(expectHealthy bool) {
|
||||
tk.t.Helper()
|
||||
kid := common.Bytes2Hex(tk.BaseAddr())
|
||||
addrs := [][]byte{tk.BaseAddr()}
|
||||
|
@ -239,13 +335,13 @@ func (tk *testKademlia) checkHealth(expectHealthy bool, expectSaturation bool) {
|
|||
})
|
||||
|
||||
pp := NewPeerPotMap(tk.NeighbourhoodSize, addrs)
|
||||
healthParams := tk.Healthy(pp[kid])
|
||||
healthParams := tk.GetHealthInfo(pp[kid])
|
||||
|
||||
// definition of health, all conditions but be true:
|
||||
// - we at least know one peer
|
||||
// - we know all neighbors
|
||||
// - we are connected to all known neighbors
|
||||
health := healthParams.KnowNN && healthParams.ConnectNN && healthParams.CountKnowNN > 0
|
||||
health := healthParams.Healthy()
|
||||
if expectHealthy != health {
|
||||
tk.t.Fatalf("expected kademlia health %v, is %v\n%v", expectHealthy, health, tk.String())
|
||||
}
|
||||
|
|
|
@ -64,7 +64,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
|
|||
addr := common.Bytes2Hex(k.BaseAddr())
|
||||
pp := ppmap[addr]
|
||||
//call Healthy RPC
|
||||
h := k.Healthy(pp)
|
||||
h := k.GetHealthInfo(pp)
|
||||
//print info
|
||||
log.Debug(k.String())
|
||||
log.Debug("kademlia", "connectNN", h.ConnectNN, "knowNN", h.KnowNN)
|
||||
|
|
|
@ -267,7 +267,7 @@ func discoverySimulation(nodes, conns int, adapter adapters.NodeAdapter) (*simul
|
|||
}
|
||||
|
||||
healthy := &network.Health{}
|
||||
if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
|
||||
if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
|
||||
return false, fmt.Errorf("error getting node health: %s", err)
|
||||
}
|
||||
log.Debug(fmt.Sprintf("node %4s healthy: connected nearest neighbours: %v, know nearest neighbours: %v,\n\n%v", id, healthy.ConnectNN, healthy.KnowNN, healthy.Hive))
|
||||
|
@ -352,7 +352,7 @@ func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapt
|
|||
healthy := &network.Health{}
|
||||
addr := id.String()
|
||||
ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)
|
||||
if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
|
||||
if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
|
||||
return fmt.Errorf("error getting node health: %s", err)
|
||||
}
|
||||
|
||||
|
@ -422,7 +422,7 @@ func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapt
|
|||
healthy := &network.Health{}
|
||||
ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)
|
||||
|
||||
if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
|
||||
if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
|
||||
return false, fmt.Errorf("error getting node health: %s", err)
|
||||
}
|
||||
log.Info(fmt.Sprintf("node %4s healthy: got nearest neighbours: %v, know nearest neighbours: %v", id, healthy.ConnectNN, healthy.KnowNN))
|
||||
|
|
Loading…
Reference in New Issue