package main import ( "fmt" "strings" "time" "go.wit.com/lib/gui/shell" "go.wit.com/log" ) func (h *HyperT) pollHypervisor() { url := "http://" + h.Hostname + ":2520/vms" log.Log(POLL, "wget url =", url) s := shell.Wget(url) if s == nil { return } var bytesSplice []byte bytesSplice = s.Bytes() // fmt.Fprintln(w, string(bytesSplice)) for _, line := range strings.Split(string(bytesSplice), "\n") { if line == "" { continue } fields := strings.Fields(line) if len(fields) < 2 { continue } state := fields[0] name := fields[1] if state == "ON" { log.Log(POLL, h.Hostname, "STATE:", state, "HOST:", name, "rest:", fields[2:]) d := findDroplet(name) if d == nil { // this is a new unknown droplet (not in the config file) d = new(DropletT) d.Hostname = name d.hname = h.Hostname d.lastpoll = time.Now() d.CurrentState = "ON" me.droplets = append(me.droplets, d) log.Log(EVENT, name, "IS NEW. ADDED ON", h.Hostname) } log.Log(INFO, "ALREADY RECORDED", d.Hostname) // update the status to ON and the last polled value d.CurrentState = "ON" d.lastpoll = time.Now() // this means the droplet is still where it was before if d.hname == h.Hostname { continue } if d.hname == "" { // this means the droplet was in the config file // but this is the first time it's shown up as running // this should mean a droplet is running where the config file says it probably should be running if d.hyperPreferred == h.Hostname { log.Log(EVENT, "new droplet", d.Hostname, "(matches config hypervisor", h.Hostname+")") d.hname = h.Hostname continue } log.Log(EVENT, "new droplet", d.Hostname, "on", h.Hostname, "(in config file without preferred hypervisor)") } d.hname = h.Hostname } continue } h.lastpoll = time.Now() h.killcount = 0 // poll worked. reset killcount } func findDroplet(name string) *DropletT { for _, d := range me.droplets { if d.Hostname == name { return d } } return nil } func findHypervisor(name string) *HyperT { for _, h := range me.hypers { if h.Hostname == name { return h } } return nil } // check the state of the cluster and return a string // that is intended to be sent to an uptime monitor like Kuma func clusterHealthy() (bool, string) { var good bool = true var total int var working int var failed int var missing int var unknown int var unknownList []string for _, d := range me.droplets { total += 1 if d.ConfigState != "ON" { continue } dur := time.Since(d.lastpoll) // Calculate the elapsed time if d.CurrentState == "" { // log.Info("SKIP. hostname has not been polled yet", d.Hostname, d.hname) unknown += 1 unknownList = append(unknownList, d.Hostname) continue } if d.CurrentState != "ON" { log.Info("BAD STATE", d.ConfigState, d.Hostname, d.hname, "CurrentState =", d.CurrentState, shell.FormatDuration(dur)) good = false failed += 1 } else { dur := time.Since(d.lastpoll) // Calculate the elapsed time if dur > time.Minute { log.Info("GOOD STATE MISSING", d.Hostname, d.hname, shell.FormatDuration(dur)) good = false d.CurrentState = "MISSING" failed += 1 continue } l := shell.FormatDuration(dur) if l == "" { log.Info("DUR IS EMPTY", dur) missing += 1 continue } working += 1 // log.Info("GOOD STATE ON", d.Hostname, d.hname, "dur =", l) } } var summary string = "(" summary += fmt.Sprintf("total = %d ", total) summary += fmt.Sprintf("working = %d ", working) if missing > 0 { summary += fmt.Sprintf("missing = %d ", missing) } if unknown > 0 { summary += fmt.Sprintf("unknown = %d ", unknown, unknownList) } if failed > 0 { summary += fmt.Sprintf("failed = %d ", failed) } summary = strings.TrimSpace(summary) summary += ")" if me.killcount > 0 { summary += "(killcount=" + fmt.Sprintf("%d", me.killcount) + ")" } last := time.Since(me.unstable) if last > 133*time.Second { // the cluster has not been stable for 10 seconds s := strings.TrimSpace(shell.FormatDuration(last)) summary += "(stable=" + s + ")" } if good { return good, "GOOD=true " + summary } me.unstable = time.Now() return good, "GOOD=false " + summary }