170 lines
4.2 KiB
170 lines
4.2 KiB
package main
import (
func (h *HyperT) pollHypervisor() {
url := "http://" + h.pb.Hostname + ":2520/vms"
log.Log(POLL, "wget url =", url)
s := shell.Wget(url)
if s == nil {
var bytesSplice []byte
bytesSplice = s.Bytes()
// fmt.Fprintln(w, string(bytesSplice))
for _, line := range strings.Split(string(bytesSplice), "\n") {
if line == "" {
fields := strings.Fields(line)
if len(fields) < 2 {
state := fields[0]
name := fields[1]
if state == "ON" {
log.Log(POLL, h.pb.Hostname, "STATE:", state, "HOST:", name, "rest:", fields[2:])
d := findDroplet(name)
if d == nil {
// this is a new unknown droplet (not in the config file)
d = new(DropletT)
d.pb.Hostname = name
d.hname = h.pb.Hostname
d.lastpoll = time.Now()
d.CurrentState = "ON"
me.droplets = append(me.droplets, d)
log.Log(EVENT, name, "IS NEW. ADDED ON", h.pb.Hostname)
log.Log(INFO, "ALREADY RECORDED", d.pb.Hostname)
// update the status to ON and the last polled value
d.CurrentState = "ON"
d.lastpoll = time.Now()
// this means the droplet is still where it was before
if d.hname == h.pb.Hostname {
if d.hname == "" {
// this means the droplet was in the config file
// but this is the first time it's shown up as running
// this should mean a droplet is running where the config file says it probably should be running
if d.hyperPreferred == h.pb.Hostname {
log.Log(EVENT, "new droplet", d.pb.Hostname, "(matches config hypervisor", h.pb.Hostname+")")
d.hname = h.pb.Hostname
log.Log(EVENT, "new droplet", d.pb.Hostname, "on", h.pb.Hostname, "(in config file without preferred hypervisor)")
d.hname = h.pb.Hostname
h.lastpoll = time.Now()
h.killcount = 0 // poll worked. reset killcount
func findDroplet(name string) *DropletT {
for _, d := range me.droplets {
if d.pb.Hostname == name {
return d
return nil
func findHypervisor(name string) *HyperT {
for _, h := range me.hypers {
if h.pb.Hostname == name {
return h
return nil
// check the state of the cluster and return a string
// that is intended to be sent to an uptime monitor like Kuma
func clusterHealthy() (bool, string) {
var good bool = true
var total int
var working int
var failed int
var missing int
var unknown int
var unknownList []string
for _, d := range me.droplets {
total += 1
if d.ConfigState != "ON" {
dur := time.Since(d.lastpoll) // Calculate the elapsed time
if d.CurrentState == "" {
// log.Info("SKIP. hostname has not been polled yet", d.pb.Hostname, d.hname)
unknown += 1
unknownList = append(unknownList, d.pb.Hostname)
if d.CurrentState != "ON" {
log.Info("BAD STATE", d.ConfigState, d.pb.Hostname, d.hname, "CurrentState =", d.CurrentState, shell.FormatDuration(dur))
good = false
failed += 1
} else {
dur := time.Since(d.lastpoll) // Calculate the elapsed time
if dur > time.Minute {
log.Info("GOOD STATE MISSING", d.pb.Hostname, d.hname, shell.FormatDuration(dur))
good = false
d.CurrentState = "MISSING"
failed += 1
l := shell.FormatDuration(dur)
if l == "" {
log.Info("DUR IS EMPTY", dur)
missing += 1
working += 1
// log.Info("GOOD STATE ON", d.pb.Hostname, d.hname, "dur =", l)
var summary string = "("
summary += fmt.Sprintf("total = %d ", total)
summary += fmt.Sprintf("working = %d ", working)
if missing > 0 {
summary += fmt.Sprintf("missing = %d ", missing)
if unknown > 0 {
summary += fmt.Sprintf("unknown = %d ", unknown, unknownList)
if failed > 0 {
summary += fmt.Sprintf("failed = %d ", failed)
summary = strings.TrimSpace(summary)
summary += ")"
if me.killcount > 0 {
summary += "(killcount=" + fmt.Sprintf("%d", me.killcount) + ")"
last := time.Since(me.unstable)
if last > 133*time.Second {
// the cluster has not been stable for 10 seconds
s := strings.TrimSpace(shell.FormatDuration(last))
summary += "(stable=" + s + ")"
if good {
return good, "GOOD=true " + summary
me.unstable = time.Now()
return good, "GOOD=false " + summary