track hypervisor daemon kill count

Signed-off-by: Jeff Carr <jcarr@wit.com>
This commit is contained in:
Jeff Carr 2024-10-13 00:57:29 -05:00
parent 62e9d8cfb1
commit 7a4bc0b5d6
3 changed files with 21 additions and 9 deletions

14
http.go
View File

@ -67,10 +67,20 @@ func okHandler(w http.ResponseWriter, r *http.Request) {
fmt.Fprintln(w, "GOOD=false")
}
for _, h := range me.hypers {
url := "http://" + h.Hostname + ":2520/kill"
dur := time.Since(h.lastpoll) // Calculate the elapsed time
if dur > 2 * time.Minute {
url := "http://" + h.Hostname + ":2520/kill"
if dur > 90 * time.Second {
log.Info("KILL DAEMON ON", h.Hostname, shell.FormatDuration(dur), "curl", url)
// s := shell.Wget(url)
// log.Info("curl got:", s)
h.lastpoll = time.Now()
h.killcount += 1
}
if h.killcount != 0 {
log.Info("KILL count =", h.killcount, "FOR", h.Hostname, dur, "curl", url)
}
if h.killcount > 10 {
log.Info("KILL count is greater than 10 for host", h.Hostname, dur, "curl", url)
}
// l := shell.FormatDuration(dur)
// log.Warn("HOST =", h.Hostname, "Last poll =", l)

View File

@ -56,6 +56,7 @@ func (h *HyperT) pollHypervisor() {
}
}
h.lastpoll = time.Now()
h.killcount = 0 // poll worked. reset killcount
}
func findDroplet(name string) *DropletT {
@ -88,7 +89,7 @@ func clusterHealthy() bool {
good = false
d.CurrentState = "MISSING"
}
l := shell.FormatDuration(dur)
l := shell.FormatDuration(dur)
if l == "" {
log.Info("DUR IS EMPTY", dur)
}

View File

@ -23,12 +23,13 @@ type virtigoT struct {
// the stuff that is needed for a hypervisor
type HyperT struct {
Hostname string // the hypervisor hostname
Scan func() // the function to run to scan the hypervisor
Autoscan bool // to scan or not to scan
Delay time.Duration // how often to poll the hypervisor
Dog *time.Ticker // the watchdog timer itself
lastpoll time.Time // the last time the hypervisor polled
Hostname string // the hypervisor hostname
Scan func() // the function to run to scan the hypervisor
Autoscan bool // to scan or not to scan
Delay time.Duration // how often to poll the hypervisor
Dog *time.Ticker // the watchdog timer itself
lastpoll time.Time // the last time the hypervisor polled
killcount int
}
// the stuff that is needed for a hypervisor