From 3a6b6ec8fb83f425360dc3125508a0f55983935e Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Fri, 18 Apr 2014 21:26:47 -0700 Subject: [PATCH] Importing code. --- .gitignore | 3 + README.md | 5 + btc-crawl.go | 287 +++++++++++++++++++++++++++++++++++++++++++++++++++ ext_btcd.go | 48 +++++++++ 4 files changed, 343 insertions(+) create mode 100644 btc-crawl.go create mode 100644 ext_btcd.go diff --git a/.gitignore b/.gitignore index 8365624..0f7ca52 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,6 @@ _testmain.go *.exe *.test + +# Binary +/btc-crawl diff --git a/README.md b/README.md index 1b8e6db..d33eb01 100644 --- a/README.md +++ b/README.md @@ -2,3 +2,8 @@ btc-crawl ========= Bitcoin node network crawler (written in golang). + +This is a for-fun project to explore the Bitcoin protocol and network. + +Current status: It crawls from hard-coded values and spits a bunch of stuff to +stdout. diff --git a/btc-crawl.go b/btc-crawl.go new file mode 100644 index 0000000..6df56a2 --- /dev/null +++ b/btc-crawl.go @@ -0,0 +1,287 @@ +package main + +import ( + "fmt" + "github.com/conformal/btcwire" + "log" + "net" + "time" +) + +// TODO: Unhardcode these: +var seedNodes []string = []string{"85.214.251.25:8333", "62.75.216.13:8333"} +var userAgent string = "/btc-crawl:0.0.1" +var lastBlock int32 = 0 + +// TODO: Break Client/Peer/Crawler into separate modules. + +type Client struct { + btcnet btcwire.BitcoinNet // Bitcoin Network + pver uint32 // Protocl Version + userAgent string // User Agent + lastBlock int32 +} + +func NewDefaultClient() *Client { + return &Client{ + btcnet: btcwire.MainNet, + pver: btcwire.ProtocolVersion, + userAgent: userAgent, + lastBlock: lastBlock, + } +} + +type Peer struct { + client *Client + address string + conn net.Conn + nonce uint64 // Nonce we're sending to the peer +} + +func NewPeer(client *Client, address string) *Peer { + p := Peer{ + client: client, + address: address, + } + return &p +} + +func (p *Peer) Connect() error { + if p.conn != nil { + return fmt.Errorf("Peer already connected, can't connect again.") + } + conn, err := net.Dial("tcp", p.address) + if err != nil { + return err + } + + p.conn = conn + return nil +} + +func (p *Peer) Disconnect() { + p.conn.Close() +} + +func (p *Peer) Handshake() error { + if p.conn == nil { + return fmt.Errorf("Peer is not connected, can't handshake.") + } + + log.Printf("[%s] Starting handshake.", p.address) + + nonce, err := btcwire.RandomUint64() + if err != nil { + return err + } + p.nonce = nonce + + pver, btcnet := p.client.pver, p.client.btcnet + + msgVersion, err := btcwire.NewMsgVersionFromConn(p.conn, p.nonce, p.client.userAgent, 0) + msgVersion.DisableRelayTx = true + if err := btcwire.WriteMessage(p.conn, msgVersion, pver, btcnet); err != nil { + return err + } + + // Read the response version. + msg, _, err := btcwire.ReadMessage(p.conn, pver, btcnet) + if err != nil { + return err + } + vmsg, ok := msg.(*btcwire.MsgVersion) + if !ok { + return fmt.Errorf("Did not receive version message: %T", vmsg) + } + // Negotiate protocol version. + if uint32(vmsg.ProtocolVersion) < pver { + pver = uint32(vmsg.ProtocolVersion) + } + log.Printf("[%s] -> Version: %s", p.address, vmsg.UserAgent) + + // Normally we'd check if vmsg.Nonce == p.nonce but the crawler does not + // accept external connections so we skip it. + + // Send verack. + if err := btcwire.WriteMessage(p.conn, btcwire.NewMsgVerAck(), pver, btcnet); err != nil { + return err + } + + return nil +} + +type Crawler struct { + client *Client + count int + seenFilter map[string]bool // TODO: Replace with bloom filter? + results chan []string + workers chan struct{} + queue []string + activeSince time.Duration +} + +func NewCrawler(client *Client, queue []string, numWorkers int) *Crawler { + c := Crawler{ + client: client, + count: 0, + seenFilter: map[string]bool{}, + results: make(chan []string), + workers: make(chan struct{}, numWorkers), + queue: []string{}, + activeSince: time.Hour * -24, + } + + // Prefill the queue + for _, address := range queue { + c.addAddress(address) + } + + return &c +} + +func (c *Crawler) handleAddress(address string) *[]string { + r := []string{} + + client := c.client + peer := NewPeer(client, address) + + err := peer.Connect() + if err != nil { + log.Printf("[%s] Connection failed: %v", address, err) + return &r + } + defer peer.Disconnect() + + err = peer.Handshake() + if err != nil { + log.Printf("[%s] Handsake failed: %v", address, err) + return &r + } + + // Send getaddr. + if err := btcwire.WriteMessage(peer.conn, btcwire.NewMsgGetAddr(), client.pver, client.btcnet); err != nil { + log.Printf("[%s] GetAddr failed: %v", address, err) + return &r + } + + // Listen for tx inv messages. + firstReceived := -1 + tolerateMessages := 3 + otherMessages := []string{} + timestampSince := time.Now().Add(c.activeSince) + + for { + // We can't really tell when we're done receiving peers, so we stop either + // when we get a smaller-than-normal set size or when we've received too + // many unrelated messages. + msg, _, err := btcwire.ReadMessage(peer.conn, client.pver, client.btcnet) + if err != nil { + log.Printf("[%s] Failed to read message: %v", address, err) + continue + } + + switch tmsg := msg.(type) { + case *btcwire.MsgAddr: + for _, addr := range tmsg.AddrList { + if addr.Timestamp.After(timestampSince) { + r = append(r, NetAddressKey(addr)) + } + } + + if firstReceived == -1 { + firstReceived = len(tmsg.AddrList) + } else if firstReceived > len(tmsg.AddrList) || firstReceived == 0 { + // Probably done. + return &r + } + default: + otherMessages = append(otherMessages, tmsg.Command()) + if len(otherMessages) > tolerateMessages { + log.Printf("[%s] Giving up with %d results after tolerating messages: %v.", address, len(r), otherMessages) + return &r + } + } + } +} + +func (c *Crawler) addAddress(address string) bool { + // Returns true if not seen before, otherwise false + state, ok := c.seenFilter[address] + if ok == true && state == true { + return false + } + + c.seenFilter[address] = true + c.count += 1 + c.queue = append(c.queue, address) + + return true +} + +func (c *Crawler) Start() (chan struct{}, error) { + done := make(chan struct{}, 1) + numWorkers := 0 + numGood := 0 + + // This is the main "event loop". Feels like there may be a better way to + // manage the number of concurrent workers but I can't think of it right now. + for { + select { + case c.workers <- struct{}{}: + if len(c.queue) == 0 { + // No work yet. + <-c.workers + continue + } + + // Pop from the queue + address := c.queue[0] + c.queue = c.queue[1:] + numWorkers += 1 + + go func() { + log.Printf("[%s] Worker started.", address) + results := *c.handleAddress(address) + c.results <- results + }() + + case r := <-c.results: + newAdded := 0 + for _, address := range r { + if c.addAddress(address) { + newAdded += 1 + } + } + + if newAdded > 0 { + numGood += 1 + } + numWorkers -= 1 + + log.Printf("Added %d new peers of %d returned. Total %d known peers via %d connected.", newAdded, len(r), c.count, numGood) + + if len(c.queue) == 0 && numWorkers == 0 { + log.Printf("Done.") + done <- struct{}{} + return done, nil + } + + <-c.workers + } + } +} + +func main() { + // TODO: Parse args. + // TODO: Export to a reasonable format. + // TODO: Use proper logger for logging. + client := NewDefaultClient() + crawler := NewCrawler(client, seedNodes, 10) + + done, err := crawler.Start() + if err != nil { + log.Fatal(err) + } + + <-done +} diff --git a/ext_btcd.go b/ext_btcd.go new file mode 100644 index 0000000..fac6262 --- /dev/null +++ b/ext_btcd.go @@ -0,0 +1,48 @@ +// Things borrowed from https://github.com/conformal/btcd/blob/master/addrmanager.go +// because "github.com/conformal/btcd" wouldn't import for some reason. + +package main + +import ( + "encoding/base32" + "github.com/conformal/btcwire" + "net" + "strconv" + "strings" +) + +var onioncatrange = net.IPNet{IP: net.ParseIP("FD87:d87e:eb43::"), + Mask: net.CIDRMask(48, 128)} + +func Tor(na *btcwire.NetAddress) bool { + // bitcoind encodes a .onion address as a 16 byte number by decoding the + // address prior to the .onion (i.e. the key hash) base32 into a ten + // byte number. it then stores the first 6 bytes of the address as + // 0xfD, 0x87, 0xD8, 0x7e, 0xeb, 0x43 + // this is the same range used by onioncat, part of the + // RFC4193 Unique local IPv6 range. + // In summary the format is: + // { magic 6 bytes, 10 bytes base32 decode of key hash } + return onioncatrange.Contains(na.IP) +} + +// ipString returns a string for the ip from the provided NetAddress. If the +// ip is in the range used for tor addresses then it will be transformed into +// the relavent .onion address. +func ipString(na *btcwire.NetAddress) string { + if Tor(na) { + // We know now that na.IP is long enogh. + base32 := base32.StdEncoding.EncodeToString(na.IP[6:]) + return strings.ToLower(base32) + ".onion" + } else { + return na.IP.String() + } +} + +// NetAddressKey returns a string key in the form of ip:port for IPv4 addresses +// or [ip]:port for IPv6 addresses. +func NetAddressKey(na *btcwire.NetAddress) string { + port := strconv.FormatUint(uint64(na.Port), 10) + addr := net.JoinHostPort(ipString(na), port) + return addr +}