2019-04-30 06:13:22 -05:00
|
|
|
// Copyright 2019 The go-ethereum Authors
|
|
|
|
// This file is part of the go-ethereum library.
|
|
|
|
//
|
|
|
|
// The go-ethereum library is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Lesser General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// The go-ethereum library is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Lesser General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
package discover
|
|
|
|
|
|
|
|
import (
|
|
|
|
"crypto/ecdsa"
|
p2p/discover: improved node revalidation (#29572)
Node discovery periodically revalidates the nodes in its table by sending PING, checking
if they are still alive. I recently noticed some issues with the implementation of this
process, which can cause strange results such as nodes dropping unexpectedly, certain
nodes not getting revalidated often enough, and bad results being returned to incoming
FINDNODE queries.
In this change, the revalidation process is improved with the following logic:
- We maintain two 'revalidation lists' containing the table nodes, named 'fast' and 'slow'.
- The process chooses random nodes from each list on a randomized interval, the interval being
faster for the 'fast' list, and performs revalidation for the chosen node.
- Whenever a node is newly inserted into the table, it goes into the 'fast' list.
Once validation passes, it transfers to the 'slow' list. If a request fails, or the
node changes endpoint, it transfers back into 'fast'.
- livenessChecks is incremented by one for successful checks. Unlike the old implementation,
we will not drop the node on the first failing check. We instead quickly decay the
livenessChecks give it another chance.
- Order of nodes in bucket doesn't matter anymore.
I am also adding a debug API endpoint to dump the node table content.
Co-authored-by: Martin HS <martin@swende.se>
2024-05-23 07:26:09 -05:00
|
|
|
crand "crypto/rand"
|
|
|
|
"encoding/binary"
|
|
|
|
"math/rand"
|
2019-04-30 06:13:22 -05:00
|
|
|
"net"
|
2024-05-29 08:02:26 -05:00
|
|
|
"net/netip"
|
p2p/discover: improved node revalidation (#29572)
Node discovery periodically revalidates the nodes in its table by sending PING, checking
if they are still alive. I recently noticed some issues with the implementation of this
process, which can cause strange results such as nodes dropping unexpectedly, certain
nodes not getting revalidated often enough, and bad results being returned to incoming
FINDNODE queries.
In this change, the revalidation process is improved with the following logic:
- We maintain two 'revalidation lists' containing the table nodes, named 'fast' and 'slow'.
- The process chooses random nodes from each list on a randomized interval, the interval being
faster for the 'fast' list, and performs revalidation for the chosen node.
- Whenever a node is newly inserted into the table, it goes into the 'fast' list.
Once validation passes, it transfers to the 'slow' list. If a request fails, or the
node changes endpoint, it transfers back into 'fast'.
- livenessChecks is incremented by one for successful checks. Unlike the old implementation,
we will not drop the node on the first failing check. We instead quickly decay the
livenessChecks give it another chance.
- Order of nodes in bucket doesn't matter anymore.
I am also adding a debug API endpoint to dump the node table content.
Co-authored-by: Martin HS <martin@swende.se>
2024-05-23 07:26:09 -05:00
|
|
|
"sync"
|
2023-05-31 06:37:10 -05:00
|
|
|
"time"
|
2019-04-30 06:13:22 -05:00
|
|
|
|
2020-04-08 02:57:23 -05:00
|
|
|
"github.com/ethereum/go-ethereum/common/mclock"
|
2019-04-30 06:13:22 -05:00
|
|
|
"github.com/ethereum/go-ethereum/log"
|
|
|
|
"github.com/ethereum/go-ethereum/p2p/enode"
|
2020-04-08 02:57:23 -05:00
|
|
|
"github.com/ethereum/go-ethereum/p2p/enr"
|
2019-04-30 06:13:22 -05:00
|
|
|
"github.com/ethereum/go-ethereum/p2p/netutil"
|
|
|
|
)
|
|
|
|
|
2019-10-29 10:08:57 -05:00
|
|
|
// UDPConn is a network connection on which discovery can operate.
|
2019-04-30 06:13:22 -05:00
|
|
|
type UDPConn interface {
|
2024-05-29 08:02:26 -05:00
|
|
|
ReadFromUDPAddrPort(b []byte) (n int, addr netip.AddrPort, err error)
|
|
|
|
WriteToUDPAddrPort(b []byte, addr netip.AddrPort) (n int, err error)
|
2019-04-30 06:13:22 -05:00
|
|
|
Close() error
|
|
|
|
LocalAddr() net.Addr
|
|
|
|
}
|
|
|
|
|
2019-10-29 10:08:57 -05:00
|
|
|
// Config holds settings for the discovery listener.
|
2019-04-30 06:13:22 -05:00
|
|
|
type Config struct {
|
|
|
|
// These settings are required and configure the UDP listener:
|
|
|
|
PrivateKey *ecdsa.PrivateKey
|
|
|
|
|
2023-05-31 06:37:10 -05:00
|
|
|
// All remaining settings are optional.
|
|
|
|
|
|
|
|
// Packet handling configuration:
|
2022-11-30 15:03:34 -06:00
|
|
|
NetRestrict *netutil.Netlist // list of allowed IP networks
|
|
|
|
Unhandled chan<- ReadPacket // unhandled packets are sent on this channel
|
|
|
|
|
2023-05-31 06:37:10 -05:00
|
|
|
// Node table configuration:
|
2024-09-30 03:56:14 -05:00
|
|
|
Bootnodes []*enode.Node // list of bootstrap nodes
|
|
|
|
PingInterval time.Duration // speed of node liveness check
|
|
|
|
RefreshInterval time.Duration // used in bucket refresh
|
|
|
|
NoFindnodeLivenessCheck bool // turns off validation of table nodes in FINDNODE handler
|
2022-11-30 15:03:34 -06:00
|
|
|
|
2023-05-31 06:37:10 -05:00
|
|
|
// The options below are useful in very specific cases, like in unit tests.
|
|
|
|
V5ProtocolID *[6]byte
|
|
|
|
Log log.Logger // if set, log messages go here
|
2020-04-08 02:57:23 -05:00
|
|
|
ValidSchemes enr.IdentityScheme // allowed identity schemes
|
|
|
|
Clock mclock.Clock
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cfg Config) withDefaults() Config {
|
2023-05-31 06:37:10 -05:00
|
|
|
// Node table configuration:
|
|
|
|
if cfg.PingInterval == 0 {
|
p2p/discover: improved node revalidation (#29572)
Node discovery periodically revalidates the nodes in its table by sending PING, checking
if they are still alive. I recently noticed some issues with the implementation of this
process, which can cause strange results such as nodes dropping unexpectedly, certain
nodes not getting revalidated often enough, and bad results being returned to incoming
FINDNODE queries.
In this change, the revalidation process is improved with the following logic:
- We maintain two 'revalidation lists' containing the table nodes, named 'fast' and 'slow'.
- The process chooses random nodes from each list on a randomized interval, the interval being
faster for the 'fast' list, and performs revalidation for the chosen node.
- Whenever a node is newly inserted into the table, it goes into the 'fast' list.
Once validation passes, it transfers to the 'slow' list. If a request fails, or the
node changes endpoint, it transfers back into 'fast'.
- livenessChecks is incremented by one for successful checks. Unlike the old implementation,
we will not drop the node on the first failing check. We instead quickly decay the
livenessChecks give it another chance.
- Order of nodes in bucket doesn't matter anymore.
I am also adding a debug API endpoint to dump the node table content.
Co-authored-by: Martin HS <martin@swende.se>
2024-05-23 07:26:09 -05:00
|
|
|
cfg.PingInterval = 3 * time.Second
|
2023-05-31 06:37:10 -05:00
|
|
|
}
|
|
|
|
if cfg.RefreshInterval == 0 {
|
|
|
|
cfg.RefreshInterval = 30 * time.Minute
|
|
|
|
}
|
|
|
|
|
|
|
|
// Debug/test settings:
|
2020-04-08 02:57:23 -05:00
|
|
|
if cfg.Log == nil {
|
|
|
|
cfg.Log = log.Root()
|
|
|
|
}
|
|
|
|
if cfg.ValidSchemes == nil {
|
|
|
|
cfg.ValidSchemes = enode.ValidSchemes
|
|
|
|
}
|
|
|
|
if cfg.Clock == nil {
|
|
|
|
cfg.Clock = mclock.System{}
|
|
|
|
}
|
|
|
|
return cfg
|
2019-04-30 06:13:22 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
// ListenUDP starts listening for discovery packets on the given UDP socket.
|
|
|
|
func ListenUDP(c UDPConn, ln *enode.LocalNode, cfg Config) (*UDPv4, error) {
|
|
|
|
return ListenV4(c, ln, cfg)
|
|
|
|
}
|
|
|
|
|
|
|
|
// ReadPacket is a packet that couldn't be handled. Those packets are sent to the unhandled
|
2020-04-08 02:57:23 -05:00
|
|
|
// channel if configured.
|
2019-04-30 06:13:22 -05:00
|
|
|
type ReadPacket struct {
|
|
|
|
Data []byte
|
2024-05-29 08:02:26 -05:00
|
|
|
Addr netip.AddrPort
|
2019-04-30 06:13:22 -05:00
|
|
|
}
|
p2p/discover: improved node revalidation (#29572)
Node discovery periodically revalidates the nodes in its table by sending PING, checking
if they are still alive. I recently noticed some issues with the implementation of this
process, which can cause strange results such as nodes dropping unexpectedly, certain
nodes not getting revalidated often enough, and bad results being returned to incoming
FINDNODE queries.
In this change, the revalidation process is improved with the following logic:
- We maintain two 'revalidation lists' containing the table nodes, named 'fast' and 'slow'.
- The process chooses random nodes from each list on a randomized interval, the interval being
faster for the 'fast' list, and performs revalidation for the chosen node.
- Whenever a node is newly inserted into the table, it goes into the 'fast' list.
Once validation passes, it transfers to the 'slow' list. If a request fails, or the
node changes endpoint, it transfers back into 'fast'.
- livenessChecks is incremented by one for successful checks. Unlike the old implementation,
we will not drop the node on the first failing check. We instead quickly decay the
livenessChecks give it another chance.
- Order of nodes in bucket doesn't matter anymore.
I am also adding a debug API endpoint to dump the node table content.
Co-authored-by: Martin HS <martin@swende.se>
2024-05-23 07:26:09 -05:00
|
|
|
|
|
|
|
type randomSource interface {
|
|
|
|
Intn(int) int
|
|
|
|
Int63n(int64) int64
|
|
|
|
Shuffle(int, func(int, int))
|
|
|
|
}
|
|
|
|
|
|
|
|
// reseedingRandom is a random number generator that tracks when it was last re-seeded.
|
|
|
|
type reseedingRandom struct {
|
|
|
|
mu sync.Mutex
|
|
|
|
cur *rand.Rand
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *reseedingRandom) seed() {
|
|
|
|
var b [8]byte
|
|
|
|
crand.Read(b[:])
|
|
|
|
seed := binary.BigEndian.Uint64(b[:])
|
|
|
|
new := rand.New(rand.NewSource(int64(seed)))
|
|
|
|
|
|
|
|
r.mu.Lock()
|
|
|
|
r.cur = new
|
|
|
|
r.mu.Unlock()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *reseedingRandom) Intn(n int) int {
|
|
|
|
r.mu.Lock()
|
|
|
|
defer r.mu.Unlock()
|
|
|
|
return r.cur.Intn(n)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *reseedingRandom) Int63n(n int64) int64 {
|
|
|
|
r.mu.Lock()
|
|
|
|
defer r.mu.Unlock()
|
|
|
|
return r.cur.Int63n(n)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *reseedingRandom) Shuffle(n int, swap func(i, j int)) {
|
|
|
|
r.mu.Lock()
|
|
|
|
defer r.mu.Unlock()
|
|
|
|
r.cur.Shuffle(n, swap)
|
|
|
|
}
|