diff --git a/dump.go b/dump.go index 57a3404..9291bb1 100644 --- a/dump.go +++ b/dump.go @@ -4,7 +4,9 @@ import ( "fmt" "net/http" "strings" + "time" + "go.wit.com/lib/gui/shell" pb "go.wit.com/lib/protobuf/virtbuf" ) @@ -44,3 +46,23 @@ func dumpDroplets(w http.ResponseWriter) { } } } + +// status of the hypervisors +func dumpHypervisors(w http.ResponseWriter) { + for _, h := range me.hypers { + // lastpoll time.Time // the last time the hypervisor polled + dur := time.Since(h.lastpoll) + tmp := shell.FormatDuration(dur) + fmt.Fprintln(w, h.pb.Hostname, "killcount =", h.killcount, "lastpoll:", tmp) + for name, t := range h.lastDroplets { + dur := time.Since(t) + tmp := shell.FormatDuration(dur) + d := findDroplet(name) + if d == nil { + fmt.Fprintln(w, "\t", h.pb.Hostname, "name =", name, "lastpoll:", tmp) + } else { + fmt.Fprintln(w, "\t", h.pb.Hostname, "name =", name, "lastpoll:", tmp, d.CurrentState) + } + } + } +} diff --git a/http.go b/http.go index 937f9a7..2dddeb4 100644 --- a/http.go +++ b/http.go @@ -77,7 +77,7 @@ func okHandler(w http.ResponseWriter, r *http.Request) { } if route == "/uptime" { - ok, s := clusterHealthy() + ok, s := uptimeCheck() if ok { log.Info("Handling URL:", route, "cluster is ok", s) fmt.Fprintln(w, s) @@ -113,6 +113,18 @@ func okHandler(w http.ResponseWriter, r *http.Request) { return } + // toggle poll logging + if route == "/poll" { + if POLL.Get() { + fmt.Fprintln(w, "POLL is true") + POLL.SetBool(false) + } else { + fmt.Fprintln(w, "POLL is false") + POLL.SetBool(true) + } + return + } + if route == "/dumpcluster" { dumpCluster(w) return @@ -123,6 +135,11 @@ func okHandler(w http.ResponseWriter, r *http.Request) { return } + if route == "/dumphypervisors" { + dumpHypervisors(w) + return + } + if route == "/dumplibvirtxml" { virtigoxml.DumpLibvirtxmlDomainNames() return diff --git a/main.go b/main.go index 1168355..fe827eb 100644 --- a/main.go +++ b/main.go @@ -38,16 +38,17 @@ func main() { } // set defaults - me.unstable = time.Now() // initialize the grid as unstable - me.delay = 5 * time.Second // how often to poll the hypervisors + me.unstable = time.Now() // initialize the grid as unstable me.changed = false - // me.dmap = make(map[*pb.Droplet]*DropletT) me.hmap = make(map[*pb.Hypervisor]*HyperT) // how long a droplet can be missing until it's declared dead me.unstableTimeout = 17 * time.Second me.missingDropletTimeout = time.Minute // not sure the difference between these values + // how often to poll the hypervisors + me.hyperPollDelay = 5 * time.Second + // how long the cluster must be stable before new droplets can be started me.clusterStableDuration = 37 * time.Second @@ -141,12 +142,14 @@ func main() { log.Println("result:", result) os.Exit(0) } + // initialize each hypervisor for _, pbh := range me.cluster.Hypervisors { // this is a new unknown droplet (not in the config file) - h := new(HyperT) + var h *HyperT + h = new(HyperT) h.pb = pbh - + h.lastDroplets = make(map[string]time.Time) h.lastpoll = time.Now() me.hmap[pbh] = h diff --git a/poll.go b/poll.go index 3e9caa7..73cbbea 100644 --- a/poll.go +++ b/poll.go @@ -18,6 +18,7 @@ func (h *HyperT) pollHypervisor() { if s == nil { return } + var bytesSplice []byte bytesSplice = s.Bytes() // fmt.Fprintln(w, string(bytesSplice)) @@ -31,12 +32,26 @@ func (h *HyperT) pollHypervisor() { } state := fields[0] name := fields[1] + if state == "OFF" { + // skip locally defined libvirt vms + continue + } + h.lastDroplets[name] = time.Now() + // if _, ok := h.lastDroplets[name]; ok { + // h.lastDroplets[name] = time.Now() + // } + + // try the protobuf + d := findDroplet(name) + if d == nil { + // not sure whawt now? + log.Log(WARN, name, "is unknown on", h.pb.Hostname, "state =", state) + log.Log(WARN, name, "this vm was probably started by hand using virtsh") + log.Log(WARN, name, "todo: import vm from libvrit") + continue + } if state == "ON" { log.Log(POLL, h.pb.Hostname, "STATE:", state, "HOST:", name, "rest:", fields[2:]) - d := findDroplet(name) - if d == nil { - // not sure whawt now? - } log.Log(INFO, "ALREADY RECORDED", d.Hostname) // update the status to ON @@ -69,30 +84,25 @@ func (h *HyperT) pollHypervisor() { } d.CurrentHypervisor = h.pb.Hostname } - continue + } + for name, t := range h.lastDroplets { + dur := time.Since(t) + if dur > me.hyperPollDelay { + log.Info("droplet has probably powered down", name) + d := findDroplet(name) + if d != nil { + d.CurrentState = pb.DropletState_UNKNOWN + log.Info("set state UNKNOWN here", name) + } + } } h.lastpoll = time.Now() h.killcount = 0 // poll worked. reset killcount } -/* -func findHypervisor(name string) *HyperT { - if h, ok := me.hmap[name]; ok { - return h - } - return nil - for _, h := range me.hypers { - if h.pb.Hostname == name { - return h - } - } - return nil -} -*/ - // check the state of the cluster and return a string // that is intended to be sent to an uptime monitor like Kuma -func clusterHealthy() (bool, string) { +func uptimeCheck() (bool, string) { var good bool = true var total int var working int diff --git a/structs.go b/structs.go index 2671b52..715231a 100644 --- a/structs.go +++ b/structs.go @@ -21,13 +21,13 @@ func (b *virtigoT) Enable() { // this app's variables type virtigoT struct { cluster *pb.Cluster // basic cluster settings - delay time.Duration // how often to poll the hypervisors hmap map[*pb.Hypervisor]*HyperT // map to the local struct names []string hypers []*HyperT killcount int unstable time.Time // the last time the cluster was incorrect changed bool + hyperPollDelay time.Duration // how often to poll the hypervisors unstableTimeout time.Duration // how long a droplet can be unstable until it's declared dead clusterStableDuration time.Duration // how long the cluster must be stable before new droplets can be started missingDropletTimeout time.Duration // how long a droplet can be missing for @@ -35,8 +35,9 @@ type virtigoT struct { // the stuff that is needed for a hypervisor type HyperT struct { - pb *pb.Hypervisor // the Hypervisor protobuf - dog *time.Ticker // the watchdog timer itself - lastpoll time.Time // the last time the hypervisor polled - killcount int + pb *pb.Hypervisor // the Hypervisor protobuf + dog *time.Ticker // the watchdog timer itself + lastpoll time.Time // the last time the hypervisor polled + lastDroplets map[string]time.Time // the vm's in the last poll + killcount int // how many times the daemon has been forcably killed } diff --git a/watchdog.go b/watchdog.go index be6f8cf..97baac4 100644 --- a/watchdog.go +++ b/watchdog.go @@ -39,7 +39,7 @@ func (h *HyperT) sendDirs() { } func (h *HyperT) NewWatchdog() { - h.dog = time.NewTicker(me.delay) + h.dog = time.NewTicker(me.hyperPollDelay) defer h.dog.Stop() done := make(chan bool) /*