2014-04-22 18:28:10 -05:00
package main
import (
"time"
2014-04-25 17:13:45 -05:00
2014-05-10 20:48:33 -05:00
"./queue"
2014-04-25 17:13:45 -05:00
"github.com/conformal/btcwire"
2014-04-22 18:28:10 -05:00
)
// TODO: Break Client/Peer/Crawler into separate modules.
type Crawler struct {
2014-05-10 20:48:33 -05:00
client * Client
queue * queue . Queue
numSeen int
numUnique int
numConnected int
numAttempted int
seenFilter map [ string ] bool // TODO: Replace with bloom filter?
peerAge time . Duration
2014-04-22 18:28:10 -05:00
}
2014-04-25 18:24:27 -05:00
type Result struct {
Node * Peer
Peers [ ] * btcwire . NetAddress
}
2014-05-10 20:48:33 -05:00
func NewCrawler ( client * Client , seeds [ ] string , peerAge time . Duration ) * Crawler {
2014-04-22 18:28:10 -05:00
c := Crawler {
2014-04-23 19:44:22 -05:00
client : client ,
seenFilter : map [ string ] bool { } ,
peerAge : peerAge ,
2014-04-22 18:28:10 -05:00
}
2014-05-10 20:48:33 -05:00
filter := func ( address string ) * string {
return c . filter ( address )
2014-04-22 18:28:10 -05:00
}
2014-05-10 20:48:33 -05:00
c . queue = queue . NewQueue ( filter , 10 )
go func ( ) {
// Prefill the queue
for _ , address := range seeds {
c . queue . Input <- address
}
} ( )
2014-04-22 18:28:10 -05:00
return & c
}
2014-04-25 18:24:27 -05:00
func ( c * Crawler ) handleAddress ( address string ) * Result {
2014-05-10 20:48:33 -05:00
c . numAttempted ++
2014-04-22 18:28:10 -05:00
client := c . client
peer := NewPeer ( client , address )
2014-04-25 18:24:27 -05:00
r := Result { Node : peer }
2014-04-22 18:28:10 -05:00
err := peer . Connect ( )
if err != nil {
2014-04-24 21:13:33 -05:00
logger . Debugf ( "[%s] Connection failed: %v" , address , err )
2014-04-22 18:28:10 -05:00
return & r
}
defer peer . Disconnect ( )
err = peer . Handshake ( )
if err != nil {
2014-04-24 21:13:33 -05:00
logger . Debugf ( "[%s] Handsake failed: %v" , address , err )
2014-04-22 18:28:10 -05:00
return & r
}
// Send getaddr.
2014-04-25 17:13:45 -05:00
err = peer . WriteMessage ( btcwire . NewMsgGetAddr ( ) )
if err != nil {
2014-04-24 21:13:33 -05:00
logger . Warningf ( "[%s] GetAddr failed: %v" , address , err )
2014-04-22 18:28:10 -05:00
return & r
}
2014-05-10 20:48:33 -05:00
c . numConnected ++
2014-04-22 18:28:10 -05:00
// Listen for tx inv messages.
firstReceived := - 1
tolerateMessages := 3
otherMessages := [ ] string { }
for {
// We can't really tell when we're done receiving peers, so we stop either
// when we get a smaller-than-normal set size or when we've received too
// many unrelated messages.
2014-04-25 17:13:45 -05:00
msg , _ , err := peer . ReadMessage ( )
2014-04-22 18:28:10 -05:00
if err != nil {
2014-04-24 21:13:33 -05:00
logger . Warningf ( "[%s] Failed to read message: %v" , address , err )
2014-04-22 18:28:10 -05:00
continue
}
switch tmsg := msg . ( type ) {
case * btcwire . MsgAddr :
2014-04-25 18:24:27 -05:00
r . Peers = append ( r . Peers , tmsg . AddrList ... )
2014-04-22 18:28:10 -05:00
if firstReceived == - 1 {
firstReceived = len ( tmsg . AddrList )
} else if firstReceived > len ( tmsg . AddrList ) || firstReceived == 0 {
// Probably done.
return & r
}
default :
otherMessages = append ( otherMessages , tmsg . Command ( ) )
if len ( otherMessages ) > tolerateMessages {
2014-04-25 18:24:27 -05:00
logger . Debugf ( "[%s] Giving up with %d results after tolerating messages: %v." , address , len ( r . Peers ) , otherMessages )
2014-04-22 18:28:10 -05:00
return & r
}
}
}
}
2014-05-10 20:48:33 -05:00
func ( c * Crawler ) filter ( address string ) * string {
2014-04-22 18:28:10 -05:00
// Returns true if not seen before, otherwise false
2014-05-10 20:48:33 -05:00
c . numSeen ++
2014-04-22 18:28:10 -05:00
state , ok := c . seenFilter [ address ]
if ok == true && state == true {
2014-05-10 20:48:33 -05:00
return nil
2014-04-22 18:28:10 -05:00
}
c . seenFilter [ address ] = true
2014-05-10 20:48:33 -05:00
c . numUnique ++
return & address
}
2014-04-22 18:28:10 -05:00
2014-05-10 20:48:33 -05:00
/ *
func ( c * Crawler ) Run ( resultChan chan <- Result , numWorkers int ) {
workChan := make ( chan string , numWorkers )
queueChan := make ( chan string )
tempResult := make ( chan Result )
go func ( queueChan <- chan string ) {
// Single thread to safely manage the queue
c . addAddress ( <- queueChan )
nextAddress , _ := c . popAddress ( )
for {
select {
case address := <- queueChan :
// Enque address
c . addAddress ( address )
case workChan <- nextAddress :
nextAddress , err := c . popAddress ( )
if err != nil {
// Block until we get more work
c . addAddress ( <- queueChan )
nextAddress , _ = c . popAddress ( )
}
}
}
} ( queueChan )
go func ( tempResult <- chan Result , workChan chan <- string ) {
// Convert from result to queue.
for {
select {
case r := <- tempResult :
}
}
} ( tempResult , workChan )
for address := range workChan {
// Spawn more workers as we get buffered work
go func ( ) {
logger . Debugf ( "[%s] Worker started." , address )
tempResult <- * c . handleAddress ( address )
} ( )
}
2014-04-22 18:28:10 -05:00
}
2014-05-10 20:48:33 -05:00
* /
2014-04-22 18:28:10 -05:00
2014-04-25 18:24:27 -05:00
func ( c * Crawler ) Run ( numWorkers int , stopAfter int ) * [ ] Result {
numActive := 0
2014-04-22 18:28:10 -05:00
2014-04-25 18:24:27 -05:00
resultChan := make ( chan Result )
workerChan := make ( chan struct { } , numWorkers )
results := [ ] Result { }
2014-04-25 18:31:03 -05:00
if stopAfter == 0 {
// No stopping.
stopAfter = - 1
}
2014-04-22 18:28:10 -05:00
// This is the main "event loop". Feels like there may be a better way to
// manage the number of concurrent workers but I can't think of it right now.
for {
select {
2014-04-25 18:24:27 -05:00
case workerChan <- struct { } { } :
2014-04-22 18:28:10 -05:00
go func ( ) {
2014-05-10 20:48:33 -05:00
address := <- c . queue . Output
2014-04-24 21:13:33 -05:00
logger . Debugf ( "[%s] Worker started." , address )
2014-04-25 18:24:27 -05:00
resultChan <- * c . handleAddress ( address )
2014-04-22 18:28:10 -05:00
} ( )
2014-04-25 18:24:27 -05:00
case r := <- resultChan :
timestampSince := time . Now ( ) . Add ( - c . peerAge )
for _ , addr := range r . Peers {
if ! addr . Timestamp . After ( timestampSince ) {
continue
}
2014-05-10 20:48:33 -05:00
c . queue . Input <- NetAddressKey ( addr )
2014-04-22 18:28:10 -05:00
}
2014-05-10 20:48:33 -05:00
numActive --
2014-04-25 18:24:27 -05:00
if len ( r . Peers ) > 0 {
stopAfter --
results = append ( results , r )
2014-04-22 18:28:10 -05:00
2014-05-10 20:48:33 -05:00
logger . Infof ( "[%s] Returned %d peers. Total %d unique peers via %d connected (of %d attempted)." , r . Node . Address , len ( r . Peers ) , c . numUnique , c . numConnected , c . numAttempted )
2014-04-23 19:44:22 -05:00
}
2014-04-22 18:28:10 -05:00
2014-05-10 20:48:33 -05:00
if stopAfter == 0 || ( c . queue . IsEmpty ( ) && numActive == 0 ) {
2014-04-24 21:13:33 -05:00
logger . Infof ( "Done." )
2014-04-25 18:24:27 -05:00
return & results
2014-04-22 18:28:10 -05:00
}
2014-04-25 18:24:27 -05:00
<- workerChan
2014-04-22 18:28:10 -05:00
}
}
}