2014-04-18 23:26:47 -05:00
package main
import (
2014-05-14 17:47:07 -05:00
"bufio"
2014-04-25 18:24:27 -05:00
"encoding/json"
"fmt"
2014-04-24 21:13:33 -05:00
"os"
2014-05-14 17:47:07 -05:00
"os/signal"
2014-05-15 17:33:09 -05:00
"syscall"
2014-04-23 19:44:22 -05:00
"time"
2014-04-24 21:13:33 -05:00
"github.com/alexcesaro/log"
"github.com/alexcesaro/log/golog"
"github.com/jessevdk/go-flags"
2014-04-18 23:26:47 -05:00
)
2014-04-22 20:53:40 -05:00
// Taken from: https://github.com/bitcoin/bitcoin/blob/89d72f3d9b6e7ef051ad1439f266b809f348229b/src/chainparams.cpp#L143
var defaultDnsSeeds = [ ] string {
"seed.bitcoin.sipa.be" ,
"dnsseed.bluematt.me" ,
"dnsseed.bitcoin.dashjr.org" ,
"seed.bitcoinstats.com" ,
"seed.bitnodes.io" ,
"bitseed.xf2.org" ,
}
type Options struct {
2014-04-23 19:44:22 -05:00
Verbose [ ] bool ` short:"v" long:"verbose" description:"Show verbose logging." `
2014-04-25 18:24:27 -05:00
Output string ` short:"o" long:"output" description:"File to write result to." default:"btc-crawl.json" `
2014-04-23 19:44:22 -05:00
Seed [ ] string ` short:"s" long:"seed" description:"Override which seeds to use." default-mask:"<bitcoin-core DNS seeds>" `
Concurrency int ` short:"c" long:"concurrency" description:"Maximum number of concurrent connections to open." default:"10" `
UserAgent string ` short:"A" long:"user-agent" description:"Client name to advertise while crawling. Should be in format of '/name:x.y.z/'." default:"/btc-crawl:0.1.1/" `
PeerAge time . Duration ` long:"peer-age" description:"Ignore discovered peers older than this." default:"24h" `
2014-05-14 17:47:07 -05:00
StopAfter int ` long:"stop-after" description:"Stop crawling after this many results." default:"0" `
2014-04-22 20:53:40 -05:00
}
2014-04-24 21:13:33 -05:00
var logLevels = [ ] log . Level {
log . Warning ,
log . Info ,
log . Debug ,
}
2014-04-18 23:26:47 -05:00
func main ( ) {
2014-04-25 18:27:10 -05:00
now := time . Now ( )
2014-04-22 20:53:40 -05:00
options := Options { }
2014-04-23 18:03:34 -05:00
parser := flags . NewParser ( & options , flags . Default )
2014-04-25 18:24:27 -05:00
p , err := parser . Parse ( )
2014-04-18 23:26:47 -05:00
if err != nil {
2014-04-25 18:24:27 -05:00
if p == nil {
fmt . Print ( err )
}
2014-04-23 18:03:34 -05:00
return
2014-04-18 23:26:47 -05:00
}
2014-04-24 21:13:33 -05:00
// Figure out the log level
numVerbose := len ( options . Verbose )
if numVerbose > len ( logLevels ) { // lol math.Min, you floaty bugger.
numVerbose = len ( logLevels )
}
2014-04-22 20:53:40 -05:00
2014-04-24 21:13:33 -05:00
logLevel := logLevels [ numVerbose ]
logger = golog . New ( os . Stderr , logLevel )
seedNodes := options . Seed
2014-04-22 20:53:40 -05:00
if len ( seedNodes ) == 0 {
seedNodes = GetSeedsFromDNS ( defaultDnsSeeds )
}
2014-05-14 17:47:07 -05:00
// Create client and crawler
2014-04-23 18:03:34 -05:00
client := NewClient ( options . UserAgent )
2014-04-25 18:24:27 -05:00
crawler := NewCrawler ( client , seedNodes , options . PeerAge )
2014-05-14 17:47:07 -05:00
// Configure output
var w * bufio . Writer
if options . Output == "-" || options . Output == "" {
w = bufio . NewWriter ( os . Stdout )
defer w . Flush ( )
} else {
fp , err := os . Create ( options . Output )
if err != nil {
logger . Errorf ( "Failed to create file: %v" , err )
return
}
2014-04-25 18:24:27 -05:00
2014-05-14 17:47:07 -05:00
w = bufio . NewWriter ( fp )
defer w . Flush ( )
defer fp . Close ( )
2014-04-25 18:24:27 -05:00
}
2014-05-14 17:47:07 -05:00
// Make the first write, make sure everything is cool
_ , err = w . Write ( [ ] byte ( "[" ) )
2014-04-25 18:24:27 -05:00
if err != nil {
2014-05-14 17:47:07 -05:00
logger . Errorf ( "Failed to write result, aborting immediately: %v" , err )
2014-04-25 18:24:27 -05:00
return
}
2014-05-15 17:33:09 -05:00
resultChan := make ( chan Result )
2014-05-14 17:47:07 -05:00
// Construct interrupt handler
sig := make ( chan os . Signal , 1 )
2014-05-15 17:33:09 -05:00
signal . Notify ( sig , os . Interrupt , syscall . SIGTERM )
2014-05-14 17:47:07 -05:00
go func ( ) {
<- sig // Wait for ^C signal
logger . Warningf ( "Interrupt signal detected, shutting down gracefully by waiting for active workers to finish." )
crawler . Shutdown ( )
<- sig // Hurry up?
2014-05-15 17:33:09 -05:00
logger . Warningf ( "Urgent interrupt. Abandoning in-progress workers." )
close ( resultChan ) // FIXME: Could this cause stuff to asplode?
2014-05-14 17:47:07 -05:00
} ( )
// Launch crawler
go crawler . Run ( resultChan , options . Concurrency )
logger . Infof ( "Crawler started with %d concurrency limit." , options . Concurrency )
// Start processing results
count := 0
for result := range resultChan {
b , err := json . Marshal ( result )
if err != nil {
logger . Warningf ( "Failed to export JSON, skipping: %v" , err )
}
if count > 0 {
b = append ( [ ] byte ( "," ) , b ... )
}
_ , err = w . Write ( b )
if err != nil {
logger . Errorf ( "Failed to write result, aborting gracefully: %v" , err )
crawler . Shutdown ( )
break
}
count ++
if options . StopAfter > 0 && count > options . StopAfter {
logger . Infof ( "StopAfter count reached, shutting down gracefully." )
crawler . Shutdown ( )
}
}
w . Write ( [ ] byte ( "]" ) ) // No error checking here because it's too late to care.
logger . Infof ( "Written %d results after %s: %s" , count , time . Now ( ) . Sub ( now ) , options . Output )
2014-04-18 23:26:47 -05:00
}