2014-04-22 18:28:10 -05:00
package main
import (
"time"
2014-04-25 17:13:45 -05:00
2014-05-10 20:48:33 -05:00
"./queue"
2014-04-25 17:13:45 -05:00
"github.com/conformal/btcwire"
2014-04-22 18:28:10 -05:00
)
// TODO: Break Client/Peer/Crawler into separate modules.
type Crawler struct {
2014-05-15 19:04:09 -05:00
client * Client
queue * queue . Queue
numSeen int
numUnique int
numConnected int
numAttempted int
seenFilter map [ string ] bool // TODO: Replace with bloom filter?
PeerAge time . Duration
ConnectTimeout time . Duration
shutdown chan struct { }
2014-04-22 18:28:10 -05:00
}
2014-04-25 18:24:27 -05:00
type Result struct {
Node * Peer
Peers [ ] * btcwire . NetAddress
}
2014-05-15 19:04:09 -05:00
func NewCrawler ( client * Client , seeds [ ] string ) * Crawler {
2014-04-22 18:28:10 -05:00
c := Crawler {
2014-04-23 19:44:22 -05:00
client : client ,
seenFilter : map [ string ] bool { } ,
2014-05-15 17:33:09 -05:00
shutdown : make ( chan struct { } , 1 ) ,
2014-04-22 18:28:10 -05:00
}
2014-05-10 20:48:33 -05:00
filter := func ( address string ) * string {
return c . filter ( address )
2014-04-22 18:28:10 -05:00
}
2014-05-10 20:48:33 -05:00
c . queue = queue . NewQueue ( filter , 10 )
go func ( ) {
// Prefill the queue
for _ , address := range seeds {
c . queue . Input <- address
}
} ( )
2014-04-22 18:28:10 -05:00
return & c
}
2014-05-14 17:47:07 -05:00
func ( c * Crawler ) Shutdown ( ) {
c . shutdown <- struct { } { }
}
2014-04-25 18:24:27 -05:00
func ( c * Crawler ) handleAddress ( address string ) * Result {
2014-05-10 20:48:33 -05:00
c . numAttempted ++
2014-04-22 18:28:10 -05:00
client := c . client
peer := NewPeer ( client , address )
2014-05-15 19:04:09 -05:00
peer . ConnectTimeout = c . ConnectTimeout
2014-04-25 18:24:27 -05:00
r := Result { Node : peer }
2014-04-22 18:28:10 -05:00
err := peer . Connect ( )
if err != nil {
2014-04-24 21:13:33 -05:00
logger . Debugf ( "[%s] Connection failed: %v" , address , err )
2014-04-22 18:28:10 -05:00
return & r
}
defer peer . Disconnect ( )
err = peer . Handshake ( )
if err != nil {
2014-04-24 21:13:33 -05:00
logger . Debugf ( "[%s] Handsake failed: %v" , address , err )
2014-04-22 18:28:10 -05:00
return & r
}
// Send getaddr.
2014-04-25 17:13:45 -05:00
err = peer . WriteMessage ( btcwire . NewMsgGetAddr ( ) )
if err != nil {
2014-04-24 21:13:33 -05:00
logger . Warningf ( "[%s] GetAddr failed: %v" , address , err )
2014-04-22 18:28:10 -05:00
return & r
}
2014-05-10 20:48:33 -05:00
c . numConnected ++
2014-04-22 18:28:10 -05:00
// Listen for tx inv messages.
firstReceived := - 1
tolerateMessages := 3
otherMessages := [ ] string { }
for {
// We can't really tell when we're done receiving peers, so we stop either
// when we get a smaller-than-normal set size or when we've received too
// many unrelated messages.
2014-04-25 17:13:45 -05:00
msg , _ , err := peer . ReadMessage ( )
2014-04-22 18:28:10 -05:00
if err != nil {
2014-04-24 21:13:33 -05:00
logger . Warningf ( "[%s] Failed to read message: %v" , address , err )
2014-04-22 18:28:10 -05:00
continue
}
switch tmsg := msg . ( type ) {
case * btcwire . MsgAddr :
2014-04-25 18:24:27 -05:00
r . Peers = append ( r . Peers , tmsg . AddrList ... )
2014-04-22 18:28:10 -05:00
if firstReceived == - 1 {
firstReceived = len ( tmsg . AddrList )
} else if firstReceived > len ( tmsg . AddrList ) || firstReceived == 0 {
// Probably done.
return & r
}
default :
otherMessages = append ( otherMessages , tmsg . Command ( ) )
if len ( otherMessages ) > tolerateMessages {
2014-04-25 18:24:27 -05:00
logger . Debugf ( "[%s] Giving up with %d results after tolerating messages: %v." , address , len ( r . Peers ) , otherMessages )
2014-04-22 18:28:10 -05:00
return & r
}
}
}
}
2014-05-10 20:48:33 -05:00
func ( c * Crawler ) filter ( address string ) * string {
2014-04-22 18:28:10 -05:00
// Returns true if not seen before, otherwise false
2014-05-10 20:48:33 -05:00
c . numSeen ++
2014-04-22 18:28:10 -05:00
state , ok := c . seenFilter [ address ]
if ok == true && state == true {
2014-05-10 20:48:33 -05:00
return nil
2014-04-22 18:28:10 -05:00
}
c . seenFilter [ address ] = true
2014-05-10 20:48:33 -05:00
c . numUnique ++
return & address
}
2014-04-22 18:28:10 -05:00
2014-05-14 17:47:07 -05:00
func ( c * Crawler ) process ( r * Result ) * Result {
2014-05-15 19:04:09 -05:00
timestampSince := time . Now ( ) . Add ( - c . PeerAge )
2014-05-10 20:48:33 -05:00
2014-05-14 17:47:07 -05:00
for _ , addr := range r . Peers {
if ! addr . Timestamp . After ( timestampSince ) {
continue
2014-05-10 20:48:33 -05:00
}
2014-05-14 17:47:07 -05:00
c . queue . Input <- NetAddressKey ( addr )
}
2014-05-10 20:48:33 -05:00
2014-05-14 17:47:07 -05:00
if len ( r . Peers ) > 0 {
logger . Infof ( "[%s] Returned %d peers. Total %d unique peers via %d connected (of %d attempted)." , r . Node . Address , len ( r . Peers ) , c . numUnique , c . numConnected , c . numAttempted )
return r
2014-05-10 20:48:33 -05:00
}
2014-04-22 18:28:10 -05:00
2014-05-14 17:47:07 -05:00
return nil
}
2014-04-22 18:28:10 -05:00
2014-05-14 17:47:07 -05:00
func ( c * Crawler ) Run ( resultChan chan <- Result , numWorkers int ) {
2014-04-25 18:24:27 -05:00
workerChan := make ( chan struct { } , numWorkers )
2014-05-14 17:47:07 -05:00
tempResult := make ( chan Result )
numActive := 0
isActive := true
2014-04-25 18:24:27 -05:00
2014-05-15 19:04:09 -05:00
// Block until we get the first item
c . queue . Wait ( )
2014-05-14 17:47:07 -05:00
// This is the main "event loop".
// FIXME: Feels like there should be a better way to manage the number of
// concurrent workers without limiting slots with workerChan and without
// using a numActive counter.
2014-04-22 18:28:10 -05:00
for {
select {
2014-04-25 18:24:27 -05:00
case workerChan <- struct { } { } :
2014-05-14 17:47:07 -05:00
if ! isActive {
// Don't start any new workers, leave the slot filled.
break
2014-05-15 19:04:09 -05:00
} else if c . queue . IsEmpty ( ) {
if numActive == 0 {
2014-05-15 19:59:01 -05:00
logger . Infof ( "Done after %d queued items." , c . queue . Count ( ) )
2014-05-15 19:04:09 -05:00
close ( resultChan )
return
}
2014-05-15 19:59:01 -05:00
<- workerChan
break
2014-05-14 17:47:07 -05:00
}
numActive ++
2014-04-22 18:28:10 -05:00
go func ( ) {
2014-05-10 20:48:33 -05:00
address := <- c . queue . Output
2014-04-24 21:13:33 -05:00
logger . Debugf ( "[%s] Worker started." , address )
2014-05-14 17:47:07 -05:00
tempResult <- * c . handleAddress ( address )
2014-04-22 18:28:10 -05:00
} ( )
2014-05-14 17:47:07 -05:00
case r := <- tempResult :
if c . process ( & r ) != nil {
resultChan <- r
2014-04-22 18:28:10 -05:00
}
2014-05-10 20:48:33 -05:00
numActive --
2014-04-25 18:24:27 -05:00
<- workerChan
2014-05-14 17:47:07 -05:00
case <- c . shutdown :
2014-05-15 19:59:01 -05:00
logger . Infof ( "Shutting down after %d workers finish." , numActive )
2014-05-14 17:47:07 -05:00
isActive = false
2014-04-22 18:28:10 -05:00
}
}
}