p2p/discover: watch find failures, evacuate on too many, rebond if failed
This commit is contained in:
parent
64174f196f
commit
6078aa08eb
|
@ -27,6 +27,7 @@ const (
|
||||||
nBuckets = hashBits + 1 // Number of buckets
|
nBuckets = hashBits + 1 // Number of buckets
|
||||||
|
|
||||||
maxBondingPingPongs = 16
|
maxBondingPingPongs = 16
|
||||||
|
maxFindnodeFailures = 5
|
||||||
)
|
)
|
||||||
|
|
||||||
type Table struct {
|
type Table struct {
|
||||||
|
@ -198,7 +199,19 @@ func (tab *Table) Lookup(targetID NodeID) []*Node {
|
||||||
asked[n.ID] = true
|
asked[n.ID] = true
|
||||||
pendingQueries++
|
pendingQueries++
|
||||||
go func() {
|
go func() {
|
||||||
r, _ := tab.net.findnode(n.ID, n.addr(), targetID)
|
// Find potential neighbors to bond with
|
||||||
|
r, err := tab.net.findnode(n.ID, n.addr(), targetID)
|
||||||
|
if err != nil {
|
||||||
|
// Bump the failure counter to detect and evacuate non-bonded entries
|
||||||
|
fails := tab.db.findFails(n.ID) + 1
|
||||||
|
tab.db.updateFindFails(n.ID, fails)
|
||||||
|
glog.V(logger.Detail).Infof("Bumping failures for %x: %d", n.ID[:8], fails)
|
||||||
|
|
||||||
|
if fails > maxFindnodeFailures {
|
||||||
|
glog.V(logger.Detail).Infof("Evacuating node %x: %d findnode failures", n.ID[:8], fails)
|
||||||
|
tab.del(n)
|
||||||
|
}
|
||||||
|
}
|
||||||
reply <- tab.bondall(r)
|
reply <- tab.bondall(r)
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
@ -305,8 +318,15 @@ func (tab *Table) bondall(nodes []*Node) (result []*Node) {
|
||||||
// If pinged is true, the remote node has just pinged us and one half
|
// If pinged is true, the remote node has just pinged us and one half
|
||||||
// of the process can be skipped.
|
// of the process can be skipped.
|
||||||
func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) (*Node, error) {
|
func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) (*Node, error) {
|
||||||
var n *Node
|
// Retrieve a previously known node and any recent findnode failures
|
||||||
if n = tab.db.node(id); n == nil {
|
node, fails := tab.db.node(id), 0
|
||||||
|
if node != nil {
|
||||||
|
fails = tab.db.findFails(id)
|
||||||
|
}
|
||||||
|
// If the node is unknown (non-bonded) or failed (remotely unknown), bond from scratch
|
||||||
|
if node == nil || fails > 0 {
|
||||||
|
glog.V(logger.Detail).Infof("Bonding %x: known=%v, fails=%v", id[:8], node != nil, fails)
|
||||||
|
|
||||||
tab.bondmu.Lock()
|
tab.bondmu.Lock()
|
||||||
w := tab.bonding[id]
|
w := tab.bonding[id]
|
||||||
if w != nil {
|
if w != nil {
|
||||||
|
@ -325,18 +345,22 @@ func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16
|
||||||
delete(tab.bonding, id)
|
delete(tab.bonding, id)
|
||||||
tab.bondmu.Unlock()
|
tab.bondmu.Unlock()
|
||||||
}
|
}
|
||||||
n = w.n
|
node = w.n
|
||||||
if w.err != nil {
|
if w.err != nil {
|
||||||
return nil, w.err
|
return nil, w.err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Bonding succeeded, add to the table and reset previous findnode failures
|
||||||
tab.mutex.Lock()
|
tab.mutex.Lock()
|
||||||
defer tab.mutex.Unlock()
|
defer tab.mutex.Unlock()
|
||||||
b := tab.buckets[logdist(tab.self.sha, n.sha)]
|
|
||||||
if !b.bump(n) {
|
b := tab.buckets[logdist(tab.self.sha, node.sha)]
|
||||||
tab.pingreplace(n, b)
|
if !b.bump(node) {
|
||||||
|
tab.pingreplace(node, b)
|
||||||
}
|
}
|
||||||
return n, nil
|
tab.db.updateFindFails(id, 0)
|
||||||
|
|
||||||
|
return node, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tab *Table) pingpong(w *bondproc, pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) {
|
func (tab *Table) pingpong(w *bondproc, pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) {
|
||||||
|
@ -414,6 +438,21 @@ outer:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// del removes an entry from the node table (used to evacuate failed/non-bonded
|
||||||
|
// discovery peers).
|
||||||
|
func (tab *Table) del(node *Node) {
|
||||||
|
tab.mutex.Lock()
|
||||||
|
defer tab.mutex.Unlock()
|
||||||
|
|
||||||
|
bucket := tab.buckets[logdist(tab.self.sha, node.sha)]
|
||||||
|
for i := range bucket.entries {
|
||||||
|
if bucket.entries[i].ID == node.ID {
|
||||||
|
bucket.entries = append(bucket.entries[:i], bucket.entries[i+1:]...)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (b *bucket) bump(n *Node) bool {
|
func (b *bucket) bump(n *Node) bool {
|
||||||
for i := range b.entries {
|
for i := range b.entries {
|
||||||
if b.entries[i].ID == n.ID {
|
if b.entries[i].ID == n.ID {
|
||||||
|
|
Loading…
Reference in New Issue