p2p/dnsdisc: re-check tree root when leaf resolution fails (#20682)
This adds additional logic to re-resolve the root name of a tree when a couple of leaf requests have failed. We need this change to avoid getting into a failure state where leaf requests keep failing for half an hour when the tree has been updated.
This commit is contained in:
parent
c2117982b8
commit
57d4898e29
|
@ -19,6 +19,7 @@ package dnsdisc
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"crypto/ecdsa"
|
"crypto/ecdsa"
|
||||||
|
"errors"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"reflect"
|
"reflect"
|
||||||
"testing"
|
"testing"
|
||||||
|
@ -176,11 +177,62 @@ func TestIteratorNodeUpdates(t *testing.T) {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// sync the original tree.
|
// Sync the original tree.
|
||||||
resolver.add(tree1.ToTXT("n"))
|
resolver.add(tree1.ToTXT("n"))
|
||||||
checkIterator(t, it, nodes[:25])
|
checkIterator(t, it, nodes[:25])
|
||||||
|
|
||||||
// Update some nodes and ensure RandomNode returns the new nodes as well.
|
// Ensure RandomNode returns the new nodes after the tree is updated.
|
||||||
|
updateSomeNodes(nodesSeed1, nodes)
|
||||||
|
tree2, _ := makeTestTree("n", nodes, nil)
|
||||||
|
resolver.clear()
|
||||||
|
resolver.add(tree2.ToTXT("n"))
|
||||||
|
t.Log("tree updated")
|
||||||
|
|
||||||
|
clock.Run(c.cfg.RecheckInterval + 1*time.Second)
|
||||||
|
checkIterator(t, it, nodes)
|
||||||
|
}
|
||||||
|
|
||||||
|
// This test checks that the tree root is rechecked when a couple of leaf
|
||||||
|
// requests have failed. The test is just like TestIteratorNodeUpdates, but
|
||||||
|
// without advancing the clock by recheckInterval after the tree update.
|
||||||
|
func TestIteratorRootRecheckOnFail(t *testing.T) {
|
||||||
|
var (
|
||||||
|
clock = new(mclock.Simulated)
|
||||||
|
nodes = testNodes(nodesSeed1, 30)
|
||||||
|
resolver = newMapResolver()
|
||||||
|
c = NewClient(Config{
|
||||||
|
Resolver: resolver,
|
||||||
|
Logger: testlog.Logger(t, log.LvlTrace),
|
||||||
|
RecheckInterval: 20 * time.Minute,
|
||||||
|
RateLimit: 500,
|
||||||
|
// Disabling the cache is required for this test because the client doesn't
|
||||||
|
// notice leaf failures if all records are cached.
|
||||||
|
CacheLimit: 1,
|
||||||
|
})
|
||||||
|
)
|
||||||
|
c.clock = clock
|
||||||
|
tree1, url := makeTestTree("n", nodes[:25], nil)
|
||||||
|
it, err := c.NewIterator(url)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sync the original tree.
|
||||||
|
resolver.add(tree1.ToTXT("n"))
|
||||||
|
checkIterator(t, it, nodes[:25])
|
||||||
|
|
||||||
|
// Ensure RandomNode returns the new nodes after the tree is updated.
|
||||||
|
updateSomeNodes(nodesSeed1, nodes)
|
||||||
|
tree2, _ := makeTestTree("n", nodes, nil)
|
||||||
|
resolver.clear()
|
||||||
|
resolver.add(tree2.ToTXT("n"))
|
||||||
|
t.Log("tree updated")
|
||||||
|
|
||||||
|
checkIterator(t, it, nodes)
|
||||||
|
}
|
||||||
|
|
||||||
|
// updateSomeNodes applies ENR updates to some of the given nodes.
|
||||||
|
func updateSomeNodes(keySeed int64, nodes []*enode.Node) {
|
||||||
keys := testKeys(nodesSeed1, len(nodes))
|
keys := testKeys(nodesSeed1, len(nodes))
|
||||||
for i, n := range nodes[:len(nodes)/2] {
|
for i, n := range nodes[:len(nodes)/2] {
|
||||||
r := n.Record()
|
r := n.Record()
|
||||||
|
@ -190,11 +242,6 @@ func TestIteratorNodeUpdates(t *testing.T) {
|
||||||
n2, _ := enode.New(enode.ValidSchemes, r)
|
n2, _ := enode.New(enode.ValidSchemes, r)
|
||||||
nodes[i] = n2
|
nodes[i] = n2
|
||||||
}
|
}
|
||||||
tree2, _ := makeTestTree("n", nodes, nil)
|
|
||||||
clock.Run(c.cfg.RecheckInterval + 1*time.Second)
|
|
||||||
resolver.clear()
|
|
||||||
resolver.add(tree2.ToTXT("n"))
|
|
||||||
checkIterator(t, it, nodes)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// This test verifies that randomIterator re-checks the root of the tree to catch
|
// This test verifies that randomIterator re-checks the root of the tree to catch
|
||||||
|
@ -230,9 +277,10 @@ func TestIteratorLinkUpdates(t *testing.T) {
|
||||||
// Add link to tree3, remove link to tree2.
|
// Add link to tree3, remove link to tree2.
|
||||||
tree1, _ = makeTestTree("t1", nodes[:10], []string{url3})
|
tree1, _ = makeTestTree("t1", nodes[:10], []string{url3})
|
||||||
resolver.add(tree1.ToTXT("t1"))
|
resolver.add(tree1.ToTXT("t1"))
|
||||||
clock.Run(c.cfg.RecheckInterval + 1*time.Second)
|
|
||||||
t.Log("tree1 updated")
|
t.Log("tree1 updated")
|
||||||
|
|
||||||
|
clock.Run(c.cfg.RecheckInterval + 1*time.Second)
|
||||||
|
|
||||||
var wantNodes []*enode.Node
|
var wantNodes []*enode.Node
|
||||||
wantNodes = append(wantNodes, tree1.Nodes()...)
|
wantNodes = append(wantNodes, tree1.Nodes()...)
|
||||||
wantNodes = append(wantNodes, tree3.Nodes()...)
|
wantNodes = append(wantNodes, tree3.Nodes()...)
|
||||||
|
@ -345,5 +393,5 @@ func (mr mapResolver) LookupTXT(ctx context.Context, name string) ([]string, err
|
||||||
if record, ok := mr[name]; ok {
|
if record, ok := mr[name]; ok {
|
||||||
return []string{record}, nil
|
return []string{record}, nil
|
||||||
}
|
}
|
||||||
return nil, nil
|
return nil, errors.New("not found")
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,15 +25,22 @@ import (
|
||||||
"github.com/ethereum/go-ethereum/p2p/enode"
|
"github.com/ethereum/go-ethereum/p2p/enode"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
rootRecheckFailCount = 5 // update root if this many leaf requests fail
|
||||||
|
)
|
||||||
|
|
||||||
// clientTree is a full tree being synced.
|
// clientTree is a full tree being synced.
|
||||||
type clientTree struct {
|
type clientTree struct {
|
||||||
c *Client
|
c *Client
|
||||||
loc *linkEntry // link to this tree
|
loc *linkEntry // link to this tree
|
||||||
|
|
||||||
lastRootCheck mclock.AbsTime // last revalidation of root
|
lastRootCheck mclock.AbsTime // last revalidation of root
|
||||||
root *rootEntry
|
leafFailCount int
|
||||||
enrs *subtreeSync
|
rootFailCount int
|
||||||
links *subtreeSync
|
|
||||||
|
root *rootEntry
|
||||||
|
enrs *subtreeSync
|
||||||
|
links *subtreeSync
|
||||||
|
|
||||||
lc *linkCache // tracks all links between all trees
|
lc *linkCache // tracks all links between all trees
|
||||||
curLinks map[string]struct{} // links contained in this tree
|
curLinks map[string]struct{} // links contained in this tree
|
||||||
|
@ -46,7 +53,7 @@ func newClientTree(c *Client, lc *linkCache, loc *linkEntry) *clientTree {
|
||||||
|
|
||||||
// syncAll retrieves all entries of the tree.
|
// syncAll retrieves all entries of the tree.
|
||||||
func (ct *clientTree) syncAll(dest map[string]entry) error {
|
func (ct *clientTree) syncAll(dest map[string]entry) error {
|
||||||
if err := ct.updateRoot(); err != nil {
|
if err := ct.updateRoot(context.Background()); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := ct.links.resolveAll(dest); err != nil {
|
if err := ct.links.resolveAll(dest); err != nil {
|
||||||
|
@ -60,12 +67,20 @@ func (ct *clientTree) syncAll(dest map[string]entry) error {
|
||||||
|
|
||||||
// syncRandom retrieves a single entry of the tree. The Node return value
|
// syncRandom retrieves a single entry of the tree. The Node return value
|
||||||
// is non-nil if the entry was a node.
|
// is non-nil if the entry was a node.
|
||||||
func (ct *clientTree) syncRandom(ctx context.Context) (*enode.Node, error) {
|
func (ct *clientTree) syncRandom(ctx context.Context) (n *enode.Node, err error) {
|
||||||
if ct.rootUpdateDue() {
|
if ct.rootUpdateDue() {
|
||||||
if err := ct.updateRoot(); err != nil {
|
if err := ct.updateRoot(ctx); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update fail counter for leaf request errors.
|
||||||
|
defer func() {
|
||||||
|
if err != nil {
|
||||||
|
ct.leafFailCount++
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
// Link tree sync has priority, run it to completion before syncing ENRs.
|
// Link tree sync has priority, run it to completion before syncing ENRs.
|
||||||
if !ct.links.done() {
|
if !ct.links.done() {
|
||||||
err := ct.syncNextLink(ctx)
|
err := ct.syncNextLink(ctx)
|
||||||
|
@ -138,15 +153,22 @@ func removeHash(h []string, index int) []string {
|
||||||
}
|
}
|
||||||
|
|
||||||
// updateRoot ensures that the given tree has an up-to-date root.
|
// updateRoot ensures that the given tree has an up-to-date root.
|
||||||
func (ct *clientTree) updateRoot() error {
|
func (ct *clientTree) updateRoot(ctx context.Context) error {
|
||||||
|
if !ct.slowdownRootUpdate(ctx) {
|
||||||
|
return ctx.Err()
|
||||||
|
}
|
||||||
|
|
||||||
ct.lastRootCheck = ct.c.clock.Now()
|
ct.lastRootCheck = ct.c.clock.Now()
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), ct.c.cfg.Timeout)
|
ctx, cancel := context.WithTimeout(ctx, ct.c.cfg.Timeout)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
root, err := ct.c.resolveRoot(ctx, ct.loc)
|
root, err := ct.c.resolveRoot(ctx, ct.loc)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
ct.rootFailCount++
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
ct.root = &root
|
ct.root = &root
|
||||||
|
ct.rootFailCount = 0
|
||||||
|
ct.leafFailCount = 0
|
||||||
|
|
||||||
// Invalidate subtrees if changed.
|
// Invalidate subtrees if changed.
|
||||||
if ct.links == nil || root.lroot != ct.links.root {
|
if ct.links == nil || root.lroot != ct.links.root {
|
||||||
|
@ -161,7 +183,32 @@ func (ct *clientTree) updateRoot() error {
|
||||||
|
|
||||||
// rootUpdateDue returns true when a root update is needed.
|
// rootUpdateDue returns true when a root update is needed.
|
||||||
func (ct *clientTree) rootUpdateDue() bool {
|
func (ct *clientTree) rootUpdateDue() bool {
|
||||||
return ct.root == nil || time.Duration(ct.c.clock.Now()-ct.lastRootCheck) > ct.c.cfg.RecheckInterval
|
tooManyFailures := ct.leafFailCount > rootRecheckFailCount
|
||||||
|
scheduledCheck := ct.c.clock.Now().Sub(ct.lastRootCheck) > ct.c.cfg.RecheckInterval
|
||||||
|
return ct.root == nil || tooManyFailures || scheduledCheck
|
||||||
|
}
|
||||||
|
|
||||||
|
// slowdownRootUpdate applies a delay to root resolution if is tried
|
||||||
|
// too frequently. This avoids busy polling when the client is offline.
|
||||||
|
// Returns true if the timeout passed, false if sync was canceled.
|
||||||
|
func (ct *clientTree) slowdownRootUpdate(ctx context.Context) bool {
|
||||||
|
var delay time.Duration
|
||||||
|
switch {
|
||||||
|
case ct.rootFailCount > 20:
|
||||||
|
delay = 10 * time.Second
|
||||||
|
case ct.rootFailCount > 5:
|
||||||
|
delay = 5 * time.Second
|
||||||
|
default:
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
timeout := ct.c.clock.NewTimer(delay)
|
||||||
|
defer timeout.Stop()
|
||||||
|
select {
|
||||||
|
case <-timeout.C():
|
||||||
|
return true
|
||||||
|
case <-ctx.Done():
|
||||||
|
return false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// subtreeSync is the sync of an ENR or link subtree.
|
// subtreeSync is the sync of an ENR or link subtree.
|
||||||
|
|
Loading…
Reference in New Issue