2024-10-12 10:59:11 -05:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
2024-10-13 01:13:19 -05:00
|
|
|
"fmt"
|
2024-10-12 10:59:11 -05:00
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"go.wit.com/lib/gui/shell"
|
|
|
|
"go.wit.com/log"
|
|
|
|
)
|
|
|
|
|
2024-10-13 00:40:22 -05:00
|
|
|
func (h *HyperT) pollHypervisor() {
|
2024-10-12 10:59:11 -05:00
|
|
|
url := "http://" + h.Hostname + ":2520/vms"
|
2024-10-12 12:45:43 -05:00
|
|
|
log.Log(POLL, "wget url =", url)
|
2024-10-12 10:59:11 -05:00
|
|
|
s := shell.Wget(url)
|
|
|
|
if s == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
var bytesSplice []byte
|
|
|
|
bytesSplice = s.Bytes()
|
|
|
|
// fmt.Fprintln(w, string(bytesSplice))
|
|
|
|
for _, line := range strings.Split(string(bytesSplice), "\n") {
|
|
|
|
if line == "" {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
fields := strings.Fields(line)
|
|
|
|
if len(fields) < 2 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
state := fields[0]
|
|
|
|
name := fields[1]
|
|
|
|
if state == "ON" {
|
2024-10-12 11:54:01 -05:00
|
|
|
log.Log(POLL, h.Hostname, "STATE:", state, "HOST:", name, "rest:", fields[2:])
|
2024-10-12 12:45:43 -05:00
|
|
|
d := findDroplet(name)
|
2024-10-17 15:29:47 -05:00
|
|
|
if d == nil {
|
|
|
|
// this is a new unknown droplet (not in the config file)
|
|
|
|
d = new(DropletT)
|
|
|
|
d.Hostname = name
|
|
|
|
d.hname = h.Hostname
|
2024-10-12 12:45:43 -05:00
|
|
|
d.lastpoll = time.Now()
|
|
|
|
d.CurrentState = "ON"
|
2024-10-17 15:29:47 -05:00
|
|
|
me.droplets = append(me.droplets, d)
|
|
|
|
log.Log(EVENT, name, "IS NEW. ADDED ON", h.Hostname)
|
|
|
|
}
|
|
|
|
log.Log(INFO, "ALREADY RECORDED", d.Hostname)
|
|
|
|
|
|
|
|
// update the status to ON and the last polled value
|
|
|
|
d.CurrentState = "ON"
|
|
|
|
d.lastpoll = time.Now()
|
|
|
|
|
|
|
|
// this means the droplet is still where it was before
|
|
|
|
if d.hname == h.Hostname {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if d.hname == "" {
|
|
|
|
// this means the droplet was in the config file
|
|
|
|
// but this is the first time it's shown up as running
|
|
|
|
|
|
|
|
// this should mean a droplet is running where the config file says it probably should be running
|
|
|
|
if d.hyperPreferred == h.Hostname {
|
2024-10-18 07:40:06 -05:00
|
|
|
log.Log(EVENT, "new droplet", d.Hostname, "(matches config hypervisor", h.Hostname+")")
|
2024-10-12 10:59:11 -05:00
|
|
|
d.hname = h.Hostname
|
2024-10-17 15:29:47 -05:00
|
|
|
continue
|
2024-10-12 10:59:11 -05:00
|
|
|
}
|
2024-10-17 15:29:47 -05:00
|
|
|
|
2024-10-18 07:40:06 -05:00
|
|
|
log.Log(EVENT, "new droplet", d.Hostname, "on", h.Hostname, "(in config file without preferred hypervisor)")
|
2024-10-12 10:59:11 -05:00
|
|
|
}
|
|
|
|
d.hname = h.Hostname
|
|
|
|
}
|
2024-10-17 15:29:47 -05:00
|
|
|
continue
|
2024-10-12 10:59:11 -05:00
|
|
|
}
|
2024-10-13 00:40:22 -05:00
|
|
|
h.lastpoll = time.Now()
|
2024-10-13 00:57:29 -05:00
|
|
|
h.killcount = 0 // poll worked. reset killcount
|
2024-10-12 10:59:11 -05:00
|
|
|
}
|
2024-10-12 12:45:43 -05:00
|
|
|
|
|
|
|
func findDroplet(name string) *DropletT {
|
|
|
|
for _, d := range me.droplets {
|
|
|
|
if d.Hostname == name {
|
|
|
|
return d
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
2024-10-12 13:01:31 -05:00
|
|
|
|
2024-10-13 03:04:46 -05:00
|
|
|
func findHypervisor(name string) *HyperT {
|
|
|
|
for _, h := range me.hypers {
|
|
|
|
if h.Hostname == name {
|
|
|
|
return h
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-10-13 01:13:19 -05:00
|
|
|
// check the state of the cluster and return a string
|
|
|
|
// that is intended to be sent to an uptime monitor like Kuma
|
|
|
|
func clusterHealthy() (bool, string) {
|
|
|
|
var good bool = true
|
2024-10-13 01:33:32 -05:00
|
|
|
var total int
|
2024-10-13 01:13:19 -05:00
|
|
|
var working int
|
|
|
|
var failed int
|
2024-10-13 01:38:35 -05:00
|
|
|
var missing int
|
|
|
|
var unknown int
|
2024-10-15 11:02:34 -05:00
|
|
|
var unknownList []string
|
2024-10-13 01:38:35 -05:00
|
|
|
|
2024-10-12 13:01:31 -05:00
|
|
|
for _, d := range me.droplets {
|
2024-10-13 01:33:32 -05:00
|
|
|
total += 1
|
2024-10-17 15:54:39 -05:00
|
|
|
if d.ConfigState != "ON" {
|
2024-10-12 13:01:31 -05:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
dur := time.Since(d.lastpoll) // Calculate the elapsed time
|
2024-10-13 00:40:22 -05:00
|
|
|
if d.CurrentState == "" {
|
|
|
|
// log.Info("SKIP. hostname has not been polled yet", d.Hostname, d.hname)
|
2024-10-13 01:38:35 -05:00
|
|
|
unknown += 1
|
2024-10-15 11:02:34 -05:00
|
|
|
unknownList = append(unknownList, d.Hostname)
|
2024-10-13 00:40:22 -05:00
|
|
|
continue
|
|
|
|
}
|
2024-10-12 13:01:31 -05:00
|
|
|
if d.CurrentState != "ON" {
|
2024-10-17 15:54:39 -05:00
|
|
|
log.Info("BAD STATE", d.ConfigState, d.Hostname, d.hname, "CurrentState =", d.CurrentState, shell.FormatDuration(dur))
|
2024-10-12 13:01:31 -05:00
|
|
|
good = false
|
2024-10-13 01:33:32 -05:00
|
|
|
failed += 1
|
2024-10-12 13:01:31 -05:00
|
|
|
} else {
|
|
|
|
dur := time.Since(d.lastpoll) // Calculate the elapsed time
|
|
|
|
if dur > time.Minute {
|
|
|
|
log.Info("GOOD STATE MISSING", d.Hostname, d.hname, shell.FormatDuration(dur))
|
|
|
|
good = false
|
|
|
|
d.CurrentState = "MISSING"
|
2024-10-13 01:13:19 -05:00
|
|
|
failed += 1
|
|
|
|
continue
|
2024-10-12 13:01:31 -05:00
|
|
|
}
|
2024-10-13 00:57:29 -05:00
|
|
|
l := shell.FormatDuration(dur)
|
2024-10-13 00:40:22 -05:00
|
|
|
if l == "" {
|
|
|
|
log.Info("DUR IS EMPTY", dur)
|
2024-10-13 01:38:35 -05:00
|
|
|
missing += 1
|
2024-10-13 01:13:19 -05:00
|
|
|
continue
|
2024-10-13 00:40:22 -05:00
|
|
|
}
|
2024-10-13 01:13:19 -05:00
|
|
|
working += 1
|
2024-10-13 00:40:22 -05:00
|
|
|
// log.Info("GOOD STATE ON", d.Hostname, d.hname, "dur =", l)
|
2024-10-12 13:01:31 -05:00
|
|
|
}
|
|
|
|
}
|
2024-10-13 01:13:19 -05:00
|
|
|
var summary string = "("
|
2024-10-13 01:33:32 -05:00
|
|
|
summary += fmt.Sprintf("total = %d ", total)
|
2024-10-13 01:38:35 -05:00
|
|
|
summary += fmt.Sprintf("working = %d ", working)
|
|
|
|
if missing > 0 {
|
|
|
|
summary += fmt.Sprintf("missing = %d ", missing)
|
|
|
|
}
|
|
|
|
if unknown > 0 {
|
2024-10-15 11:02:34 -05:00
|
|
|
summary += fmt.Sprintf("unknown = %d ", unknown, unknownList)
|
2024-10-13 01:13:19 -05:00
|
|
|
}
|
|
|
|
if failed > 0 {
|
2024-10-13 01:33:32 -05:00
|
|
|
summary += fmt.Sprintf("failed = %d ", failed)
|
2024-10-13 01:13:19 -05:00
|
|
|
}
|
2024-10-13 01:33:32 -05:00
|
|
|
summary = strings.TrimSpace(summary)
|
2024-10-13 01:13:19 -05:00
|
|
|
summary += ")"
|
2024-10-13 03:49:54 -05:00
|
|
|
if me.killcount > 0 {
|
|
|
|
summary += "(killcount=" + fmt.Sprintf("%d", me.killcount) + ")"
|
|
|
|
}
|
2024-10-13 04:34:55 -05:00
|
|
|
last := time.Since(me.unstable)
|
2024-10-13 03:49:54 -05:00
|
|
|
if last > 133*time.Second {
|
|
|
|
// the cluster has not been stable for 10 seconds
|
2024-10-13 04:34:55 -05:00
|
|
|
s := strings.TrimSpace(shell.FormatDuration(last))
|
|
|
|
summary += "(stable=" + s + ")"
|
2024-10-13 03:49:54 -05:00
|
|
|
}
|
2024-10-13 01:13:19 -05:00
|
|
|
if good {
|
|
|
|
return good, "GOOD=true " + summary
|
|
|
|
}
|
2024-10-13 04:34:55 -05:00
|
|
|
me.unstable = time.Now()
|
2024-10-13 01:13:19 -05:00
|
|
|
return good, "GOOD=false " + summary
|
2024-10-12 13:01:31 -05:00
|
|
|
}
|