2014-04-22 18:28:10 -05:00
package main
import (
2014-05-17 21:44:33 -05:00
"sync"
2014-04-22 18:28:10 -05:00
"time"
2014-04-25 17:13:45 -05:00
2014-05-10 20:48:33 -05:00
"./queue"
2014-04-25 17:13:45 -05:00
"github.com/conformal/btcwire"
2014-04-22 18:28:10 -05:00
)
// TODO: Break Client/Peer/Crawler into separate modules.
type Crawler struct {
2014-05-15 19:04:09 -05:00
client * Client
queue * queue . Queue
numSeen int
numUnique int
numConnected int
numAttempted int
seenFilter map [ string ] bool // TODO: Replace with bloom filter?
PeerAge time . Duration
ConnectTimeout time . Duration
shutdown chan struct { }
2014-05-17 23:53:15 -05:00
waitGroup * sync . WaitGroup
2014-04-22 18:28:10 -05:00
}
2014-04-25 18:24:27 -05:00
type Result struct {
Node * Peer
Peers [ ] * btcwire . NetAddress
}
2014-05-15 19:04:09 -05:00
func NewCrawler ( client * Client , seeds [ ] string ) * Crawler {
2014-05-17 23:53:15 -05:00
var wg sync . WaitGroup
2014-04-22 18:28:10 -05:00
c := Crawler {
2014-04-23 19:44:22 -05:00
client : client ,
seenFilter : map [ string ] bool { } ,
2014-05-15 17:33:09 -05:00
shutdown : make ( chan struct { } , 1 ) ,
2014-05-17 23:53:15 -05:00
waitGroup : & wg ,
2014-04-22 18:28:10 -05:00
}
2014-05-10 20:48:33 -05:00
filter := func ( address string ) * string {
return c . filter ( address )
2014-04-22 18:28:10 -05:00
}
2014-05-17 23:53:15 -05:00
c . queue = queue . NewQueue ( filter , & wg )
2014-05-10 20:48:33 -05:00
2014-05-17 21:44:33 -05:00
// Prefill the queue
for _ , address := range seeds {
c . queue . Add ( address )
}
2014-04-22 18:28:10 -05:00
return & c
}
2014-05-14 17:47:07 -05:00
func ( c * Crawler ) Shutdown ( ) {
c . shutdown <- struct { } { }
}
2014-04-25 18:24:27 -05:00
func ( c * Crawler ) handleAddress ( address string ) * Result {
2014-05-10 20:48:33 -05:00
c . numAttempted ++
2014-04-22 18:28:10 -05:00
client := c . client
peer := NewPeer ( client , address )
2014-05-15 19:04:09 -05:00
peer . ConnectTimeout = c . ConnectTimeout
2014-04-25 18:24:27 -05:00
r := Result { Node : peer }
2014-04-22 18:28:10 -05:00
err := peer . Connect ( )
if err != nil {
2014-04-24 21:13:33 -05:00
logger . Debugf ( "[%s] Connection failed: %v" , address , err )
2014-04-22 18:28:10 -05:00
return & r
}
defer peer . Disconnect ( )
err = peer . Handshake ( )
if err != nil {
2014-04-24 21:13:33 -05:00
logger . Debugf ( "[%s] Handsake failed: %v" , address , err )
2014-04-22 18:28:10 -05:00
return & r
}
// Send getaddr.
2014-04-25 17:13:45 -05:00
err = peer . WriteMessage ( btcwire . NewMsgGetAddr ( ) )
if err != nil {
2014-04-24 21:13:33 -05:00
logger . Warningf ( "[%s] GetAddr failed: %v" , address , err )
2014-04-22 18:28:10 -05:00
return & r
}
2014-05-10 20:48:33 -05:00
c . numConnected ++
2014-04-22 18:28:10 -05:00
// Listen for tx inv messages.
firstReceived := - 1
tolerateMessages := 3
otherMessages := [ ] string { }
for {
// We can't really tell when we're done receiving peers, so we stop either
// when we get a smaller-than-normal set size or when we've received too
// many unrelated messages.
2014-04-25 17:13:45 -05:00
msg , _ , err := peer . ReadMessage ( )
2014-04-22 18:28:10 -05:00
if err != nil {
2014-04-24 21:13:33 -05:00
logger . Warningf ( "[%s] Failed to read message: %v" , address , err )
2014-04-22 18:28:10 -05:00
continue
}
switch tmsg := msg . ( type ) {
case * btcwire . MsgAddr :
2014-04-25 18:24:27 -05:00
r . Peers = append ( r . Peers , tmsg . AddrList ... )
2014-04-22 18:28:10 -05:00
if firstReceived == - 1 {
firstReceived = len ( tmsg . AddrList )
} else if firstReceived > len ( tmsg . AddrList ) || firstReceived == 0 {
// Probably done.
return & r
}
default :
otherMessages = append ( otherMessages , tmsg . Command ( ) )
if len ( otherMessages ) > tolerateMessages {
2014-04-25 18:24:27 -05:00
logger . Debugf ( "[%s] Giving up with %d results after tolerating messages: %v." , address , len ( r . Peers ) , otherMessages )
2014-04-22 18:28:10 -05:00
return & r
}
}
}
}
2014-05-10 20:48:33 -05:00
func ( c * Crawler ) filter ( address string ) * string {
2014-04-22 18:28:10 -05:00
// Returns true if not seen before, otherwise false
2014-05-10 20:48:33 -05:00
c . numSeen ++
2014-04-22 18:28:10 -05:00
state , ok := c . seenFilter [ address ]
if ok == true && state == true {
2014-05-10 20:48:33 -05:00
return nil
2014-04-22 18:28:10 -05:00
}
c . seenFilter [ address ] = true
2014-05-10 20:48:33 -05:00
c . numUnique ++
return & address
}
2014-04-22 18:28:10 -05:00
2014-05-14 17:47:07 -05:00
func ( c * Crawler ) process ( r * Result ) * Result {
2014-05-15 19:04:09 -05:00
timestampSince := time . Now ( ) . Add ( - c . PeerAge )
2014-05-10 20:48:33 -05:00
2014-05-14 17:47:07 -05:00
for _ , addr := range r . Peers {
if ! addr . Timestamp . After ( timestampSince ) {
continue
2014-05-10 20:48:33 -05:00
}
2014-05-17 23:53:15 -05:00
c . waitGroup . Add ( 1 )
if ! c . queue . Add ( NetAddressKey ( addr ) ) {
c . waitGroup . Done ( )
}
2014-05-14 17:47:07 -05:00
}
2014-05-10 20:48:33 -05:00
2014-05-14 17:47:07 -05:00
if len ( r . Peers ) > 0 {
logger . Infof ( "[%s] Returned %d peers. Total %d unique peers via %d connected (of %d attempted)." , r . Node . Address , len ( r . Peers ) , c . numUnique , c . numConnected , c . numAttempted )
return r
2014-05-10 20:48:33 -05:00
}
2014-04-22 18:28:10 -05:00
2014-05-14 17:47:07 -05:00
return nil
}
2014-04-22 18:28:10 -05:00
2014-05-17 21:44:33 -05:00
func ( c * Crawler ) Run ( numWorkers int ) <- chan Result {
result := make ( chan Result , 100 )
2014-04-25 18:24:27 -05:00
workerChan := make ( chan struct { } , numWorkers )
2014-05-17 21:44:33 -05:00
isDone := false
2014-05-15 19:04:09 -05:00
2014-05-17 21:44:33 -05:00
go func ( ) {
// Queue handler
for address := range c . queue . Iter ( ) {
// Reserve worker slot (block)
workerChan <- struct { } { }
2014-05-15 19:59:01 -05:00
2014-05-17 21:44:33 -05:00
if isDone {
2014-05-15 19:59:01 -05:00
break
2014-05-14 17:47:07 -05:00
}
2014-05-17 21:44:33 -05:00
// Start worker
2014-04-22 18:28:10 -05:00
go func ( ) {
2014-05-17 21:44:33 -05:00
r := c . handleAddress ( address )
2014-04-22 18:28:10 -05:00
2014-05-17 21:44:33 -05:00
// Process the result
if c . process ( r ) != nil {
result <- * r
}
2014-05-14 17:47:07 -05:00
2014-05-17 21:44:33 -05:00
// Clear worker slot
<- workerChan
2014-05-17 23:53:15 -05:00
c . waitGroup . Done ( )
2014-05-17 21:44:33 -05:00
} ( )
2014-04-22 18:28:10 -05:00
}
2014-05-17 21:44:33 -05:00
2014-05-17 23:53:15 -05:00
logger . Infof ( "Stopping queue after %d added items." , c . queue . Count ( ) )
close ( result )
2014-05-17 21:44:33 -05:00
} ( )
go func ( ) {
<- c . shutdown
isDone = true
} ( )
return result
2014-04-22 18:28:10 -05:00
}