New queue.
This commit is contained in:
parent
fc1642a06b
commit
f24223dbda
|
@ -0,0 +1,10 @@
|
||||||
|
btc-crawl: *.go
|
||||||
|
go build .
|
||||||
|
|
||||||
|
build: btc-crawl
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm btc-crawl
|
||||||
|
|
||||||
|
run: btc-crawl
|
||||||
|
./btc-crawl -v
|
2
cmd.go
2
cmd.go
|
@ -29,7 +29,7 @@ type Options struct {
|
||||||
Concurrency int `short:"c" long:"concurrency" description:"Maximum number of concurrent connections to open." default:"10"`
|
Concurrency int `short:"c" long:"concurrency" description:"Maximum number of concurrent connections to open." default:"10"`
|
||||||
UserAgent string `short:"A" long:"user-agent" description:"Client name to advertise while crawling. Should be in format of '/name:x.y.z/'." default:"/btc-crawl:0.1.1/"`
|
UserAgent string `short:"A" long:"user-agent" description:"Client name to advertise while crawling. Should be in format of '/name:x.y.z/'." default:"/btc-crawl:0.1.1/"`
|
||||||
PeerAge time.Duration `long:"peer-age" description:"Ignore discovered peers older than this." default:"24h"`
|
PeerAge time.Duration `long:"peer-age" description:"Ignore discovered peers older than this." default:"24h"`
|
||||||
StopAfter int `long:"stop-after" description:"Stop crawling after this many results." default:"0"`
|
StopAfter int `long:"stop-after" description:"Stop crawling after this many results." default:"-1"`
|
||||||
}
|
}
|
||||||
|
|
||||||
var logLevels = []log.Level{
|
var logLevels = []log.Level{
|
||||||
|
|
117
crawler.go
117
crawler.go
|
@ -3,15 +3,20 @@ package main
|
||||||
import (
|
import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"./queue"
|
||||||
|
|
||||||
"github.com/conformal/btcwire"
|
"github.com/conformal/btcwire"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TODO: Break Client/Peer/Crawler into separate modules.
|
// TODO: Break Client/Peer/Crawler into separate modules.
|
||||||
type Crawler struct {
|
type Crawler struct {
|
||||||
client *Client
|
client *Client
|
||||||
count int
|
queue *queue.Queue
|
||||||
|
numSeen int
|
||||||
|
numUnique int
|
||||||
|
numConnected int
|
||||||
|
numAttempted int
|
||||||
seenFilter map[string]bool // TODO: Replace with bloom filter?
|
seenFilter map[string]bool // TODO: Replace with bloom filter?
|
||||||
queue []string
|
|
||||||
peerAge time.Duration
|
peerAge time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,24 +25,30 @@ type Result struct {
|
||||||
Peers []*btcwire.NetAddress
|
Peers []*btcwire.NetAddress
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewCrawler(client *Client, queue []string, peerAge time.Duration) *Crawler {
|
func NewCrawler(client *Client, seeds []string, peerAge time.Duration) *Crawler {
|
||||||
c := Crawler{
|
c := Crawler{
|
||||||
client: client,
|
client: client,
|
||||||
count: 0,
|
|
||||||
seenFilter: map[string]bool{},
|
seenFilter: map[string]bool{},
|
||||||
queue: []string{},
|
|
||||||
peerAge: peerAge,
|
peerAge: peerAge,
|
||||||
}
|
}
|
||||||
|
filter := func(address string) *string {
|
||||||
// Prefill the queue
|
return c.filter(address)
|
||||||
for _, address := range queue {
|
|
||||||
c.addAddress(address)
|
|
||||||
}
|
}
|
||||||
|
c.queue = queue.NewQueue(filter, 10)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
// Prefill the queue
|
||||||
|
for _, address := range seeds {
|
||||||
|
c.queue.Input <- address
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
return &c
|
return &c
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Crawler) handleAddress(address string) *Result {
|
func (c *Crawler) handleAddress(address string) *Result {
|
||||||
|
c.numAttempted++
|
||||||
|
|
||||||
client := c.client
|
client := c.client
|
||||||
peer := NewPeer(client, address)
|
peer := NewPeer(client, address)
|
||||||
r := Result{Node: peer}
|
r := Result{Node: peer}
|
||||||
|
@ -62,6 +73,8 @@ func (c *Crawler) handleAddress(address string) *Result {
|
||||||
return &r
|
return &r
|
||||||
}
|
}
|
||||||
|
|
||||||
|
c.numConnected++
|
||||||
|
|
||||||
// Listen for tx inv messages.
|
// Listen for tx inv messages.
|
||||||
firstReceived := -1
|
firstReceived := -1
|
||||||
tolerateMessages := 3
|
tolerateMessages := 3
|
||||||
|
@ -97,23 +110,69 @@ func (c *Crawler) handleAddress(address string) *Result {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Crawler) addAddress(address string) bool {
|
func (c *Crawler) filter(address string) *string {
|
||||||
// Returns true if not seen before, otherwise false
|
// Returns true if not seen before, otherwise false
|
||||||
|
c.numSeen++
|
||||||
|
|
||||||
state, ok := c.seenFilter[address]
|
state, ok := c.seenFilter[address]
|
||||||
if ok == true && state == true {
|
if ok == true && state == true {
|
||||||
return false
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
c.seenFilter[address] = true
|
c.seenFilter[address] = true
|
||||||
c.count += 1
|
c.numUnique++
|
||||||
c.queue = append(c.queue, address)
|
return &address
|
||||||
|
|
||||||
return true
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
func (c *Crawler) Run(resultChan chan<- Result, numWorkers int) {
|
||||||
|
workChan := make(chan string, numWorkers)
|
||||||
|
queueChan := make(chan string)
|
||||||
|
tempResult := make(chan Result)
|
||||||
|
|
||||||
|
go func(queueChan <-chan string) {
|
||||||
|
// Single thread to safely manage the queue
|
||||||
|
c.addAddress(<-queueChan)
|
||||||
|
nextAddress, _ := c.popAddress()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case address := <-queueChan:
|
||||||
|
// Enque address
|
||||||
|
c.addAddress(address)
|
||||||
|
case workChan <- nextAddress:
|
||||||
|
nextAddress, err := c.popAddress()
|
||||||
|
if err != nil {
|
||||||
|
// Block until we get more work
|
||||||
|
c.addAddress(<-queueChan)
|
||||||
|
nextAddress, _ = c.popAddress()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}(queueChan)
|
||||||
|
|
||||||
|
go func(tempResult <-chan Result, workChan chan<- string) {
|
||||||
|
// Convert from result to queue.
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case r := <-tempResult:
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}(tempResult, workChan)
|
||||||
|
|
||||||
|
for address := range workChan {
|
||||||
|
// Spawn more workers as we get buffered work
|
||||||
|
go func() {
|
||||||
|
logger.Debugf("[%s] Worker started.", address)
|
||||||
|
tempResult <- *c.handleAddress(address)
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
func (c *Crawler) Run(numWorkers int, stopAfter int) *[]Result {
|
func (c *Crawler) Run(numWorkers int, stopAfter int) *[]Result {
|
||||||
numActive := 0
|
numActive := 0
|
||||||
numGood := 0
|
|
||||||
|
|
||||||
resultChan := make(chan Result)
|
resultChan := make(chan Result)
|
||||||
workerChan := make(chan struct{}, numWorkers)
|
workerChan := make(chan struct{}, numWorkers)
|
||||||
|
@ -130,24 +189,13 @@ func (c *Crawler) Run(numWorkers int, stopAfter int) *[]Result {
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case workerChan <- struct{}{}:
|
case workerChan <- struct{}{}:
|
||||||
if len(c.queue) == 0 {
|
|
||||||
// No work yet.
|
|
||||||
<-workerChan
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pop from the queue
|
|
||||||
address := c.queue[0]
|
|
||||||
c.queue = c.queue[1:]
|
|
||||||
numActive += 1
|
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
|
address := <-c.queue.Output
|
||||||
logger.Debugf("[%s] Worker started.", address)
|
logger.Debugf("[%s] Worker started.", address)
|
||||||
resultChan <- *c.handleAddress(address)
|
resultChan <- *c.handleAddress(address)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
case r := <-resultChan:
|
case r := <-resultChan:
|
||||||
newAdded := 0
|
|
||||||
timestampSince := time.Now().Add(-c.peerAge)
|
timestampSince := time.Now().Add(-c.peerAge)
|
||||||
|
|
||||||
for _, addr := range r.Peers {
|
for _, addr := range r.Peers {
|
||||||
|
@ -155,24 +203,19 @@ func (c *Crawler) Run(numWorkers int, stopAfter int) *[]Result {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if c.addAddress(NetAddressKey(addr)) {
|
c.queue.Input <- NetAddressKey(addr)
|
||||||
newAdded += 1
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if newAdded > 0 {
|
numActive--
|
||||||
numGood += 1
|
|
||||||
}
|
|
||||||
numActive -= 1
|
|
||||||
|
|
||||||
if len(r.Peers) > 0 {
|
if len(r.Peers) > 0 {
|
||||||
stopAfter--
|
stopAfter--
|
||||||
results = append(results, r)
|
results = append(results, r)
|
||||||
|
|
||||||
logger.Infof("Added %d new peers of %d returned. Total %d known peers via %d connected.", newAdded, len(r.Peers), c.count, numGood)
|
logger.Infof("[%s] Returned %d peers. Total %d unique peers via %d connected (of %d attempted).", r.Node.Address, len(r.Peers), c.numUnique, c.numConnected, c.numAttempted)
|
||||||
}
|
}
|
||||||
|
|
||||||
if stopAfter == 0 || (len(c.queue) == 0 && numActive == 0) {
|
if stopAfter == 0 || (c.queue.IsEmpty() && numActive == 0) {
|
||||||
logger.Infof("Done.")
|
logger.Infof("Done.")
|
||||||
return &results
|
return &results
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,65 @@
|
||||||
|
package queue
|
||||||
|
|
||||||
|
// A single goroutine manages the overflow queue for thread-safety, funneling
|
||||||
|
// data between the Input and Output channels through a specified filter.
|
||||||
|
type Queue struct {
|
||||||
|
Input chan string
|
||||||
|
Output chan string
|
||||||
|
overflow []string
|
||||||
|
filter func(string) *string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewQueue(filter func(string) *string, bufferSize int) *Queue {
|
||||||
|
q := Queue{
|
||||||
|
Input: make(chan string, bufferSize),
|
||||||
|
Output: make(chan string, bufferSize),
|
||||||
|
overflow: []string{},
|
||||||
|
filter: filter,
|
||||||
|
}
|
||||||
|
|
||||||
|
go func(input <-chan string, output chan<- string) {
|
||||||
|
// Block until we have a next item
|
||||||
|
nextItem := q.next()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case input := <-q.Input:
|
||||||
|
// New input
|
||||||
|
r := q.filter(input)
|
||||||
|
if r != nil {
|
||||||
|
// Store in the overflow
|
||||||
|
q.overflow = append(q.overflow, *r)
|
||||||
|
}
|
||||||
|
case output <- nextItem:
|
||||||
|
// Block until we have more inputs
|
||||||
|
nextItem = q.next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}(q.Input, q.Output)
|
||||||
|
|
||||||
|
return &q
|
||||||
|
}
|
||||||
|
|
||||||
|
func (q *Queue) next() string {
|
||||||
|
// Block until a next item is available.
|
||||||
|
|
||||||
|
if len(q.overflow) > 0 {
|
||||||
|
// Pop off the overflow queue.
|
||||||
|
r := q.overflow[0]
|
||||||
|
q.overflow = q.overflow[1:]
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
// Block until we have a viable output
|
||||||
|
r := q.filter(<-q.Input)
|
||||||
|
|
||||||
|
if r != nil {
|
||||||
|
return *r
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (q *Queue) IsEmpty() bool {
|
||||||
|
return len(q.overflow) == 0
|
||||||
|
}
|
Loading…
Reference in New Issue