// Copyright 2021 The go-ethereum Authors // This file is part of the go-ethereum library. // // The go-ethereum library is free software: you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // The go-ethereum library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with the go-ethereum library. If not, see . package tracker import ( "container/list" "fmt" "sync" "time" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" ) const ( // trackedGaugeName is the prefix of the per-packet request tracking. trackedGaugeName = "p2p/tracked" // lostMeterName is the prefix of the per-packet request expirations. lostMeterName = "p2p/lost" // staleMeterName is the prefix of the per-packet stale responses. staleMeterName = "p2p/stale" // waitHistName is the prefix of the per-packet (req only) waiting time histograms. waitHistName = "p2p/wait" // maxTrackedPackets is a huge number to act as a failsafe on the number of // pending requests the node will track. It should never be hit unless an // attacker figures out a way to spin requests. maxTrackedPackets = 100000 ) // request tracks sent network requests which have not yet received a response. type request struct { peer string version uint // Protocol version reqCode uint64 // Protocol message code of the request resCode uint64 // Protocol message code of the expected response time time.Time // Timestamp when the request was made expire *list.Element // Expiration marker to untrack it } // Tracker is a pending network request tracker to measure how much time it takes // a remote peer to respond. type Tracker struct { protocol string // Protocol capability identifier for the metrics timeout time.Duration // Global timeout after which to drop a tracked packet pending map[uint64]*request // Currently pending requests expire *list.List // Linked list tracking the expiration order wake *time.Timer // Timer tracking the expiration of the next item lock sync.Mutex // Lock protecting from concurrent updates } // New creates a new network request tracker to monitor how much time it takes to // fill certain requests and how individual peers perform. func New(protocol string, timeout time.Duration) *Tracker { return &Tracker{ protocol: protocol, timeout: timeout, pending: make(map[uint64]*request), expire: list.New(), } } // Track adds a network request to the tracker to wait for a response to arrive // or until the request it cancelled or times out. func (t *Tracker) Track(peer string, version uint, reqCode uint64, resCode uint64, id uint64) { if !metrics.Enabled() { return } t.lock.Lock() defer t.lock.Unlock() // If there's a duplicate request, we've just random-collided (or more probably, // we have a bug), report it. We could also add a metric, but we're not really // expecting ourselves to be buggy, so a noisy warning should be enough. if _, ok := t.pending[id]; ok { log.Error("Network request id collision", "protocol", t.protocol, "version", version, "code", reqCode, "id", id) return } // If we have too many pending requests, bail out instead of leaking memory if pending := len(t.pending); pending >= maxTrackedPackets { log.Error("Request tracker exceeded allowance", "pending", pending, "peer", peer, "protocol", t.protocol, "version", version, "code", reqCode) return } // Id doesn't exist yet, start tracking it t.pending[id] = &request{ peer: peer, version: version, reqCode: reqCode, resCode: resCode, time: time.Now(), expire: t.expire.PushBack(id), } g := fmt.Sprintf("%s/%s/%d/%#02x", trackedGaugeName, t.protocol, version, reqCode) metrics.GetOrRegisterGauge(g, nil).Inc(1) // If we've just inserted the first item, start the expiration timer if t.wake == nil { t.wake = time.AfterFunc(t.timeout, t.clean) } } // clean is called automatically when a preset time passes without a response // being delivered for the first network request. func (t *Tracker) clean() { t.lock.Lock() defer t.lock.Unlock() // Expire anything within a certain threshold (might be no items at all if // we raced with the delivery) for t.expire.Len() > 0 { // Stop iterating if the next pending request is still alive var ( head = t.expire.Front() id = head.Value.(uint64) req = t.pending[id] ) if time.Since(req.time) < t.timeout+5*time.Millisecond { break } // Nope, dead, drop it t.expire.Remove(head) delete(t.pending, id) g := fmt.Sprintf("%s/%s/%d/%#02x", trackedGaugeName, t.protocol, req.version, req.reqCode) metrics.GetOrRegisterGauge(g, nil).Dec(1) m := fmt.Sprintf("%s/%s/%d/%#02x", lostMeterName, t.protocol, req.version, req.reqCode) metrics.GetOrRegisterMeter(m, nil).Mark(1) } t.schedule() } // schedule starts a timer to trigger on the expiration of the first network // packet. func (t *Tracker) schedule() { if t.expire.Len() == 0 { t.wake = nil return } t.wake = time.AfterFunc(time.Until(t.pending[t.expire.Front().Value.(uint64)].time.Add(t.timeout)), t.clean) } // Fulfil fills a pending request, if any is available, reporting on various metrics. func (t *Tracker) Fulfil(peer string, version uint, code uint64, id uint64) { if !metrics.Enabled() { return } t.lock.Lock() defer t.lock.Unlock() // If it's a non existing request, track as stale response req, ok := t.pending[id] if !ok { m := fmt.Sprintf("%s/%s/%d/%#02x", staleMeterName, t.protocol, version, code) metrics.GetOrRegisterMeter(m, nil).Mark(1) return } // If the response is funky, it might be some active attack if req.peer != peer || req.version != version || req.resCode != code { log.Warn("Network response id collision", "have", fmt.Sprintf("%s:%s/%d:%d", peer, t.protocol, version, code), "want", fmt.Sprintf("%s:%s/%d:%d", peer, t.protocol, req.version, req.resCode), ) return } // Everything matches, mark the request serviced and meter it t.expire.Remove(req.expire) delete(t.pending, id) if req.expire.Prev() == nil { if t.wake.Stop() { t.schedule() } } g := fmt.Sprintf("%s/%s/%d/%#02x", trackedGaugeName, t.protocol, req.version, req.reqCode) metrics.GetOrRegisterGauge(g, nil).Dec(1) h := fmt.Sprintf("%s/%s/%d/%#02x", waitHistName, t.protocol, req.version, req.reqCode) sampler := func() metrics.Sample { return metrics.ResettingSample( metrics.NewExpDecaySample(1028, 0.015), ) } metrics.GetOrRegisterHistogramLazy(h, nil, sampler).Update(time.Since(req.time).Microseconds()) }