2014-04-22 18:28:10 -05:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
2014-05-17 21:44:33 -05:00
|
|
|
"sync"
|
2014-04-22 18:28:10 -05:00
|
|
|
"time"
|
2014-04-25 17:13:45 -05:00
|
|
|
|
|
|
|
"github.com/conformal/btcwire"
|
2014-04-22 18:28:10 -05:00
|
|
|
)
|
|
|
|
|
|
|
|
// TODO: Break Client/Peer/Crawler into separate modules.
|
|
|
|
type Crawler struct {
|
2014-05-15 19:04:09 -05:00
|
|
|
client *Client
|
2014-05-19 21:09:50 -05:00
|
|
|
queue *Queue
|
2014-05-15 19:04:09 -05:00
|
|
|
numSeen int
|
|
|
|
numUnique int
|
|
|
|
numConnected int
|
|
|
|
numAttempted int
|
|
|
|
seenFilter map[string]bool // TODO: Replace with bloom filter?
|
|
|
|
PeerAge time.Duration
|
|
|
|
ConnectTimeout time.Duration
|
|
|
|
shutdown chan struct{}
|
2014-05-18 00:16:05 -05:00
|
|
|
waitGroup sync.WaitGroup
|
2014-04-22 18:28:10 -05:00
|
|
|
}
|
|
|
|
|
2014-04-25 18:24:27 -05:00
|
|
|
type Result struct {
|
|
|
|
Node *Peer
|
|
|
|
Peers []*btcwire.NetAddress
|
|
|
|
}
|
|
|
|
|
2014-05-15 19:04:09 -05:00
|
|
|
func NewCrawler(client *Client, seeds []string) *Crawler {
|
2014-04-22 18:28:10 -05:00
|
|
|
c := Crawler{
|
2014-04-23 19:44:22 -05:00
|
|
|
client: client,
|
|
|
|
seenFilter: map[string]bool{},
|
2014-05-15 17:33:09 -05:00
|
|
|
shutdown: make(chan struct{}, 1),
|
2014-05-18 00:16:05 -05:00
|
|
|
waitGroup: sync.WaitGroup{},
|
2014-04-22 18:28:10 -05:00
|
|
|
}
|
2014-05-10 20:48:33 -05:00
|
|
|
filter := func(address string) *string {
|
|
|
|
return c.filter(address)
|
2014-04-22 18:28:10 -05:00
|
|
|
}
|
2014-05-18 00:16:05 -05:00
|
|
|
|
|
|
|
done := make(chan struct{})
|
2014-05-19 21:09:50 -05:00
|
|
|
c.queue = NewQueue(filter, done)
|
2014-05-10 20:48:33 -05:00
|
|
|
|
2014-05-17 21:44:33 -05:00
|
|
|
// Prefill the queue
|
|
|
|
for _, address := range seeds {
|
2014-05-18 00:16:05 -05:00
|
|
|
c.addSeed(address)
|
2014-05-17 21:44:33 -05:00
|
|
|
}
|
2014-04-22 18:28:10 -05:00
|
|
|
|
2014-05-18 00:16:05 -05:00
|
|
|
go func() {
|
|
|
|
c.waitGroup.Wait()
|
|
|
|
done <- struct{}{}
|
|
|
|
}()
|
|
|
|
|
2014-04-22 18:28:10 -05:00
|
|
|
return &c
|
|
|
|
}
|
|
|
|
|
2014-05-14 17:47:07 -05:00
|
|
|
func (c *Crawler) Shutdown() {
|
|
|
|
c.shutdown <- struct{}{}
|
|
|
|
}
|
|
|
|
|
2014-04-25 18:24:27 -05:00
|
|
|
func (c *Crawler) handleAddress(address string) *Result {
|
2014-05-10 20:48:33 -05:00
|
|
|
c.numAttempted++
|
|
|
|
|
2014-04-22 18:28:10 -05:00
|
|
|
client := c.client
|
|
|
|
peer := NewPeer(client, address)
|
2014-05-15 19:04:09 -05:00
|
|
|
peer.ConnectTimeout = c.ConnectTimeout
|
2014-04-25 18:24:27 -05:00
|
|
|
r := Result{Node: peer}
|
2014-04-22 18:28:10 -05:00
|
|
|
|
|
|
|
err := peer.Connect()
|
|
|
|
if err != nil {
|
2014-04-24 21:13:33 -05:00
|
|
|
logger.Debugf("[%s] Connection failed: %v", address, err)
|
2014-04-22 18:28:10 -05:00
|
|
|
return &r
|
|
|
|
}
|
|
|
|
defer peer.Disconnect()
|
|
|
|
|
|
|
|
err = peer.Handshake()
|
|
|
|
if err != nil {
|
2014-04-24 21:13:33 -05:00
|
|
|
logger.Debugf("[%s] Handsake failed: %v", address, err)
|
2014-04-22 18:28:10 -05:00
|
|
|
return &r
|
|
|
|
}
|
|
|
|
|
|
|
|
// Send getaddr.
|
2014-04-25 17:13:45 -05:00
|
|
|
err = peer.WriteMessage(btcwire.NewMsgGetAddr())
|
|
|
|
if err != nil {
|
2014-04-24 21:13:33 -05:00
|
|
|
logger.Warningf("[%s] GetAddr failed: %v", address, err)
|
2014-04-22 18:28:10 -05:00
|
|
|
return &r
|
|
|
|
}
|
|
|
|
|
2014-05-10 20:48:33 -05:00
|
|
|
c.numConnected++
|
|
|
|
|
2014-04-22 18:28:10 -05:00
|
|
|
// Listen for tx inv messages.
|
|
|
|
firstReceived := -1
|
|
|
|
tolerateMessages := 3
|
|
|
|
otherMessages := []string{}
|
|
|
|
|
|
|
|
for {
|
|
|
|
// We can't really tell when we're done receiving peers, so we stop either
|
|
|
|
// when we get a smaller-than-normal set size or when we've received too
|
|
|
|
// many unrelated messages.
|
2014-05-19 17:26:00 -05:00
|
|
|
if len(otherMessages) > tolerateMessages {
|
|
|
|
logger.Debugf("[%s] Giving up with %d results after tolerating messages: %v.", address, len(r.Peers), otherMessages)
|
|
|
|
return &r
|
|
|
|
}
|
|
|
|
|
2014-04-25 17:13:45 -05:00
|
|
|
msg, _, err := peer.ReadMessage()
|
2014-04-22 18:28:10 -05:00
|
|
|
if err != nil {
|
2014-05-19 17:19:45 -05:00
|
|
|
otherMessages = append(otherMessages, err.Error())
|
2014-04-24 21:13:33 -05:00
|
|
|
logger.Warningf("[%s] Failed to read message: %v", address, err)
|
2014-04-22 18:28:10 -05:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
switch tmsg := msg.(type) {
|
|
|
|
case *btcwire.MsgAddr:
|
2014-04-25 18:24:27 -05:00
|
|
|
r.Peers = append(r.Peers, tmsg.AddrList...)
|
2014-04-22 18:28:10 -05:00
|
|
|
|
|
|
|
if firstReceived == -1 {
|
|
|
|
firstReceived = len(tmsg.AddrList)
|
|
|
|
} else if firstReceived > len(tmsg.AddrList) || firstReceived == 0 {
|
|
|
|
// Probably done.
|
|
|
|
return &r
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
otherMessages = append(otherMessages, tmsg.Command())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-05-18 00:16:05 -05:00
|
|
|
func (c *Crawler) addSeed(address string) {
|
|
|
|
c.waitGroup.Add(1)
|
|
|
|
if c.queue.Add(address) == false {
|
|
|
|
c.waitGroup.Done()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-05-10 20:48:33 -05:00
|
|
|
func (c *Crawler) filter(address string) *string {
|
2014-04-22 18:28:10 -05:00
|
|
|
// Returns true if not seen before, otherwise false
|
2014-05-10 20:48:33 -05:00
|
|
|
c.numSeen++
|
|
|
|
|
2014-04-22 18:28:10 -05:00
|
|
|
state, ok := c.seenFilter[address]
|
|
|
|
if ok == true && state == true {
|
2014-05-10 20:48:33 -05:00
|
|
|
return nil
|
2014-04-22 18:28:10 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
c.seenFilter[address] = true
|
2014-05-10 20:48:33 -05:00
|
|
|
c.numUnique++
|
|
|
|
return &address
|
|
|
|
}
|
2014-04-22 18:28:10 -05:00
|
|
|
|
2014-05-14 17:47:07 -05:00
|
|
|
func (c *Crawler) process(r *Result) *Result {
|
2014-05-15 19:04:09 -05:00
|
|
|
timestampSince := time.Now().Add(-c.PeerAge)
|
2014-05-19 17:14:24 -05:00
|
|
|
numStalePeers := 0
|
2014-05-10 20:48:33 -05:00
|
|
|
|
2014-05-14 17:47:07 -05:00
|
|
|
for _, addr := range r.Peers {
|
|
|
|
if !addr.Timestamp.After(timestampSince) {
|
2014-05-19 17:14:24 -05:00
|
|
|
numStalePeers++
|
2014-05-14 17:47:07 -05:00
|
|
|
continue
|
2014-05-10 20:48:33 -05:00
|
|
|
}
|
|
|
|
|
2014-05-18 00:16:05 -05:00
|
|
|
c.addSeed(NetAddressKey(addr))
|
2014-05-14 17:47:07 -05:00
|
|
|
}
|
2014-05-10 20:48:33 -05:00
|
|
|
|
2014-05-14 17:47:07 -05:00
|
|
|
if len(r.Peers) > 0 {
|
2014-05-19 17:14:24 -05:00
|
|
|
logger.Infof("[%s] Returned %d peers (including %d stale). Total %d unique peers via %d connected (of %d attempted).", r.Node.Address, len(r.Peers), numStalePeers, c.numUnique, c.numConnected, c.numAttempted)
|
2014-05-14 17:47:07 -05:00
|
|
|
return r
|
2014-05-10 20:48:33 -05:00
|
|
|
}
|
2014-04-22 18:28:10 -05:00
|
|
|
|
2014-05-14 17:47:07 -05:00
|
|
|
return nil
|
|
|
|
}
|
2014-04-22 18:28:10 -05:00
|
|
|
|
2014-05-17 21:44:33 -05:00
|
|
|
func (c *Crawler) Run(numWorkers int) <-chan Result {
|
|
|
|
result := make(chan Result, 100)
|
2014-04-25 18:24:27 -05:00
|
|
|
workerChan := make(chan struct{}, numWorkers)
|
2014-05-17 21:44:33 -05:00
|
|
|
isDone := false
|
2014-05-15 19:04:09 -05:00
|
|
|
|
2014-05-17 21:44:33 -05:00
|
|
|
go func() {
|
|
|
|
// Queue handler
|
|
|
|
for address := range c.queue.Iter() {
|
|
|
|
// Reserve worker slot (block)
|
|
|
|
workerChan <- struct{}{}
|
2014-05-15 19:59:01 -05:00
|
|
|
|
2014-05-17 21:44:33 -05:00
|
|
|
if isDone {
|
2014-05-15 19:59:01 -05:00
|
|
|
break
|
2014-05-14 17:47:07 -05:00
|
|
|
}
|
|
|
|
|
2014-05-17 21:44:33 -05:00
|
|
|
// Start worker
|
2014-04-22 18:28:10 -05:00
|
|
|
go func() {
|
2014-05-17 21:44:33 -05:00
|
|
|
r := c.handleAddress(address)
|
2014-04-22 18:28:10 -05:00
|
|
|
|
2014-05-17 21:44:33 -05:00
|
|
|
// Process the result
|
|
|
|
if c.process(r) != nil {
|
|
|
|
result <- *r
|
|
|
|
}
|
2014-05-14 17:47:07 -05:00
|
|
|
|
2014-05-17 21:44:33 -05:00
|
|
|
// Clear worker slot
|
|
|
|
<-workerChan
|
2014-05-17 23:53:15 -05:00
|
|
|
c.waitGroup.Done()
|
2014-05-17 21:44:33 -05:00
|
|
|
}()
|
2014-04-22 18:28:10 -05:00
|
|
|
}
|
2014-05-17 21:44:33 -05:00
|
|
|
|
2014-05-17 23:53:15 -05:00
|
|
|
logger.Infof("Stopping queue after %d added items.", c.queue.Count())
|
|
|
|
close(result)
|
2014-05-17 21:44:33 -05:00
|
|
|
}()
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
<-c.shutdown
|
|
|
|
isDone = true
|
|
|
|
}()
|
|
|
|
|
|
|
|
return result
|
2014-04-22 18:28:10 -05:00
|
|
|
}
|