track droplets reported from each hypervisor

Signed-off-by: Jeff Carr <jcarr@wit.com>
This commit is contained in:
Jeff Carr 2024-10-27 11:02:50 -05:00
parent d38865a6cf
commit 8fc2fbd9c9
6 changed files with 86 additions and 33 deletions

22
dump.go
View File

@ -4,7 +4,9 @@ import (
"fmt"
"net/http"
"strings"
"time"
"go.wit.com/lib/gui/shell"
pb "go.wit.com/lib/protobuf/virtbuf"
)
@ -44,3 +46,23 @@ func dumpDroplets(w http.ResponseWriter) {
}
}
}
// status of the hypervisors
func dumpHypervisors(w http.ResponseWriter) {
for _, h := range me.hypers {
// lastpoll time.Time // the last time the hypervisor polled
dur := time.Since(h.lastpoll)
tmp := shell.FormatDuration(dur)
fmt.Fprintln(w, h.pb.Hostname, "killcount =", h.killcount, "lastpoll:", tmp)
for name, t := range h.lastDroplets {
dur := time.Since(t)
tmp := shell.FormatDuration(dur)
d := findDroplet(name)
if d == nil {
fmt.Fprintln(w, "\t", h.pb.Hostname, "name =", name, "lastpoll:", tmp)
} else {
fmt.Fprintln(w, "\t", h.pb.Hostname, "name =", name, "lastpoll:", tmp, d.CurrentState)
}
}
}
}

19
http.go
View File

@ -77,7 +77,7 @@ func okHandler(w http.ResponseWriter, r *http.Request) {
}
if route == "/uptime" {
ok, s := clusterHealthy()
ok, s := uptimeCheck()
if ok {
log.Info("Handling URL:", route, "cluster is ok", s)
fmt.Fprintln(w, s)
@ -113,6 +113,18 @@ func okHandler(w http.ResponseWriter, r *http.Request) {
return
}
// toggle poll logging
if route == "/poll" {
if POLL.Get() {
fmt.Fprintln(w, "POLL is true")
POLL.SetBool(false)
} else {
fmt.Fprintln(w, "POLL is false")
POLL.SetBool(true)
}
return
}
if route == "/dumpcluster" {
dumpCluster(w)
return
@ -123,6 +135,11 @@ func okHandler(w http.ResponseWriter, r *http.Request) {
return
}
if route == "/dumphypervisors" {
dumpHypervisors(w)
return
}
if route == "/dumplibvirtxml" {
virtigoxml.DumpLibvirtxmlDomainNames()
return

13
main.go
View File

@ -38,16 +38,17 @@ func main() {
}
// set defaults
me.unstable = time.Now() // initialize the grid as unstable
me.delay = 5 * time.Second // how often to poll the hypervisors
me.unstable = time.Now() // initialize the grid as unstable
me.changed = false
// me.dmap = make(map[*pb.Droplet]*DropletT)
me.hmap = make(map[*pb.Hypervisor]*HyperT)
// how long a droplet can be missing until it's declared dead
me.unstableTimeout = 17 * time.Second
me.missingDropletTimeout = time.Minute // not sure the difference between these values
// how often to poll the hypervisors
me.hyperPollDelay = 5 * time.Second
// how long the cluster must be stable before new droplets can be started
me.clusterStableDuration = 37 * time.Second
@ -141,12 +142,14 @@ func main() {
log.Println("result:", result)
os.Exit(0)
}
// initialize each hypervisor
for _, pbh := range me.cluster.Hypervisors {
// this is a new unknown droplet (not in the config file)
h := new(HyperT)
var h *HyperT
h = new(HyperT)
h.pb = pbh
h.lastDroplets = make(map[string]time.Time)
h.lastpoll = time.Now()
me.hmap[pbh] = h

52
poll.go
View File

@ -18,6 +18,7 @@ func (h *HyperT) pollHypervisor() {
if s == nil {
return
}
var bytesSplice []byte
bytesSplice = s.Bytes()
// fmt.Fprintln(w, string(bytesSplice))
@ -31,12 +32,26 @@ func (h *HyperT) pollHypervisor() {
}
state := fields[0]
name := fields[1]
if state == "OFF" {
// skip locally defined libvirt vms
continue
}
h.lastDroplets[name] = time.Now()
// if _, ok := h.lastDroplets[name]; ok {
// h.lastDroplets[name] = time.Now()
// }
// try the protobuf
d := findDroplet(name)
if d == nil {
// not sure whawt now?
log.Log(WARN, name, "is unknown on", h.pb.Hostname, "state =", state)
log.Log(WARN, name, "this vm was probably started by hand using virtsh")
log.Log(WARN, name, "todo: import vm from libvrit")
continue
}
if state == "ON" {
log.Log(POLL, h.pb.Hostname, "STATE:", state, "HOST:", name, "rest:", fields[2:])
d := findDroplet(name)
if d == nil {
// not sure whawt now?
}
log.Log(INFO, "ALREADY RECORDED", d.Hostname)
// update the status to ON
@ -69,30 +84,25 @@ func (h *HyperT) pollHypervisor() {
}
d.CurrentHypervisor = h.pb.Hostname
}
continue
}
for name, t := range h.lastDroplets {
dur := time.Since(t)
if dur > me.hyperPollDelay {
log.Info("droplet has probably powered down", name)
d := findDroplet(name)
if d != nil {
d.CurrentState = pb.DropletState_UNKNOWN
log.Info("set state UNKNOWN here", name)
}
}
}
h.lastpoll = time.Now()
h.killcount = 0 // poll worked. reset killcount
}
/*
func findHypervisor(name string) *HyperT {
if h, ok := me.hmap[name]; ok {
return h
}
return nil
for _, h := range me.hypers {
if h.pb.Hostname == name {
return h
}
}
return nil
}
*/
// check the state of the cluster and return a string
// that is intended to be sent to an uptime monitor like Kuma
func clusterHealthy() (bool, string) {
func uptimeCheck() (bool, string) {
var good bool = true
var total int
var working int

View File

@ -21,13 +21,13 @@ func (b *virtigoT) Enable() {
// this app's variables
type virtigoT struct {
cluster *pb.Cluster // basic cluster settings
delay time.Duration // how often to poll the hypervisors
hmap map[*pb.Hypervisor]*HyperT // map to the local struct
names []string
hypers []*HyperT
killcount int
unstable time.Time // the last time the cluster was incorrect
changed bool
hyperPollDelay time.Duration // how often to poll the hypervisors
unstableTimeout time.Duration // how long a droplet can be unstable until it's declared dead
clusterStableDuration time.Duration // how long the cluster must be stable before new droplets can be started
missingDropletTimeout time.Duration // how long a droplet can be missing for
@ -35,8 +35,9 @@ type virtigoT struct {
// the stuff that is needed for a hypervisor
type HyperT struct {
pb *pb.Hypervisor // the Hypervisor protobuf
dog *time.Ticker // the watchdog timer itself
lastpoll time.Time // the last time the hypervisor polled
killcount int
pb *pb.Hypervisor // the Hypervisor protobuf
dog *time.Ticker // the watchdog timer itself
lastpoll time.Time // the last time the hypervisor polled
lastDroplets map[string]time.Time // the vm's in the last poll
killcount int // how many times the daemon has been forcably killed
}

View File

@ -39,7 +39,7 @@ func (h *HyperT) sendDirs() {
}
func (h *HyperT) NewWatchdog() {
h.dog = time.NewTicker(me.delay)
h.dog = time.NewTicker(me.hyperPollDelay)
defer h.dog.Stop()
done := make(chan bool)
/*