track droplets reported from each hypervisor
Signed-off-by: Jeff Carr <jcarr@wit.com>
This commit is contained in:
parent
d38865a6cf
commit
8fc2fbd9c9
22
dump.go
22
dump.go
|
@ -4,7 +4,9 @@ import (
|
|||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"go.wit.com/lib/gui/shell"
|
||||
pb "go.wit.com/lib/protobuf/virtbuf"
|
||||
)
|
||||
|
||||
|
@ -44,3 +46,23 @@ func dumpDroplets(w http.ResponseWriter) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// status of the hypervisors
|
||||
func dumpHypervisors(w http.ResponseWriter) {
|
||||
for _, h := range me.hypers {
|
||||
// lastpoll time.Time // the last time the hypervisor polled
|
||||
dur := time.Since(h.lastpoll)
|
||||
tmp := shell.FormatDuration(dur)
|
||||
fmt.Fprintln(w, h.pb.Hostname, "killcount =", h.killcount, "lastpoll:", tmp)
|
||||
for name, t := range h.lastDroplets {
|
||||
dur := time.Since(t)
|
||||
tmp := shell.FormatDuration(dur)
|
||||
d := findDroplet(name)
|
||||
if d == nil {
|
||||
fmt.Fprintln(w, "\t", h.pb.Hostname, "name =", name, "lastpoll:", tmp)
|
||||
} else {
|
||||
fmt.Fprintln(w, "\t", h.pb.Hostname, "name =", name, "lastpoll:", tmp, d.CurrentState)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
19
http.go
19
http.go
|
@ -77,7 +77,7 @@ func okHandler(w http.ResponseWriter, r *http.Request) {
|
|||
}
|
||||
|
||||
if route == "/uptime" {
|
||||
ok, s := clusterHealthy()
|
||||
ok, s := uptimeCheck()
|
||||
if ok {
|
||||
log.Info("Handling URL:", route, "cluster is ok", s)
|
||||
fmt.Fprintln(w, s)
|
||||
|
@ -113,6 +113,18 @@ func okHandler(w http.ResponseWriter, r *http.Request) {
|
|||
return
|
||||
}
|
||||
|
||||
// toggle poll logging
|
||||
if route == "/poll" {
|
||||
if POLL.Get() {
|
||||
fmt.Fprintln(w, "POLL is true")
|
||||
POLL.SetBool(false)
|
||||
} else {
|
||||
fmt.Fprintln(w, "POLL is false")
|
||||
POLL.SetBool(true)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if route == "/dumpcluster" {
|
||||
dumpCluster(w)
|
||||
return
|
||||
|
@ -123,6 +135,11 @@ func okHandler(w http.ResponseWriter, r *http.Request) {
|
|||
return
|
||||
}
|
||||
|
||||
if route == "/dumphypervisors" {
|
||||
dumpHypervisors(w)
|
||||
return
|
||||
}
|
||||
|
||||
if route == "/dumplibvirtxml" {
|
||||
virtigoxml.DumpLibvirtxmlDomainNames()
|
||||
return
|
||||
|
|
13
main.go
13
main.go
|
@ -38,16 +38,17 @@ func main() {
|
|||
}
|
||||
|
||||
// set defaults
|
||||
me.unstable = time.Now() // initialize the grid as unstable
|
||||
me.delay = 5 * time.Second // how often to poll the hypervisors
|
||||
me.unstable = time.Now() // initialize the grid as unstable
|
||||
me.changed = false
|
||||
// me.dmap = make(map[*pb.Droplet]*DropletT)
|
||||
me.hmap = make(map[*pb.Hypervisor]*HyperT)
|
||||
|
||||
// how long a droplet can be missing until it's declared dead
|
||||
me.unstableTimeout = 17 * time.Second
|
||||
me.missingDropletTimeout = time.Minute // not sure the difference between these values
|
||||
|
||||
// how often to poll the hypervisors
|
||||
me.hyperPollDelay = 5 * time.Second
|
||||
|
||||
// how long the cluster must be stable before new droplets can be started
|
||||
me.clusterStableDuration = 37 * time.Second
|
||||
|
||||
|
@ -141,12 +142,14 @@ func main() {
|
|||
log.Println("result:", result)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// initialize each hypervisor
|
||||
for _, pbh := range me.cluster.Hypervisors {
|
||||
// this is a new unknown droplet (not in the config file)
|
||||
h := new(HyperT)
|
||||
var h *HyperT
|
||||
h = new(HyperT)
|
||||
h.pb = pbh
|
||||
|
||||
h.lastDroplets = make(map[string]time.Time)
|
||||
h.lastpoll = time.Now()
|
||||
|
||||
me.hmap[pbh] = h
|
||||
|
|
52
poll.go
52
poll.go
|
@ -18,6 +18,7 @@ func (h *HyperT) pollHypervisor() {
|
|||
if s == nil {
|
||||
return
|
||||
}
|
||||
|
||||
var bytesSplice []byte
|
||||
bytesSplice = s.Bytes()
|
||||
// fmt.Fprintln(w, string(bytesSplice))
|
||||
|
@ -31,12 +32,26 @@ func (h *HyperT) pollHypervisor() {
|
|||
}
|
||||
state := fields[0]
|
||||
name := fields[1]
|
||||
if state == "OFF" {
|
||||
// skip locally defined libvirt vms
|
||||
continue
|
||||
}
|
||||
h.lastDroplets[name] = time.Now()
|
||||
// if _, ok := h.lastDroplets[name]; ok {
|
||||
// h.lastDroplets[name] = time.Now()
|
||||
// }
|
||||
|
||||
// try the protobuf
|
||||
d := findDroplet(name)
|
||||
if d == nil {
|
||||
// not sure whawt now?
|
||||
log.Log(WARN, name, "is unknown on", h.pb.Hostname, "state =", state)
|
||||
log.Log(WARN, name, "this vm was probably started by hand using virtsh")
|
||||
log.Log(WARN, name, "todo: import vm from libvrit")
|
||||
continue
|
||||
}
|
||||
if state == "ON" {
|
||||
log.Log(POLL, h.pb.Hostname, "STATE:", state, "HOST:", name, "rest:", fields[2:])
|
||||
d := findDroplet(name)
|
||||
if d == nil {
|
||||
// not sure whawt now?
|
||||
}
|
||||
log.Log(INFO, "ALREADY RECORDED", d.Hostname)
|
||||
|
||||
// update the status to ON
|
||||
|
@ -69,30 +84,25 @@ func (h *HyperT) pollHypervisor() {
|
|||
}
|
||||
d.CurrentHypervisor = h.pb.Hostname
|
||||
}
|
||||
continue
|
||||
}
|
||||
for name, t := range h.lastDroplets {
|
||||
dur := time.Since(t)
|
||||
if dur > me.hyperPollDelay {
|
||||
log.Info("droplet has probably powered down", name)
|
||||
d := findDroplet(name)
|
||||
if d != nil {
|
||||
d.CurrentState = pb.DropletState_UNKNOWN
|
||||
log.Info("set state UNKNOWN here", name)
|
||||
}
|
||||
}
|
||||
}
|
||||
h.lastpoll = time.Now()
|
||||
h.killcount = 0 // poll worked. reset killcount
|
||||
}
|
||||
|
||||
/*
|
||||
func findHypervisor(name string) *HyperT {
|
||||
if h, ok := me.hmap[name]; ok {
|
||||
return h
|
||||
}
|
||||
return nil
|
||||
for _, h := range me.hypers {
|
||||
if h.pb.Hostname == name {
|
||||
return h
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
*/
|
||||
|
||||
// check the state of the cluster and return a string
|
||||
// that is intended to be sent to an uptime monitor like Kuma
|
||||
func clusterHealthy() (bool, string) {
|
||||
func uptimeCheck() (bool, string) {
|
||||
var good bool = true
|
||||
var total int
|
||||
var working int
|
||||
|
|
11
structs.go
11
structs.go
|
@ -21,13 +21,13 @@ func (b *virtigoT) Enable() {
|
|||
// this app's variables
|
||||
type virtigoT struct {
|
||||
cluster *pb.Cluster // basic cluster settings
|
||||
delay time.Duration // how often to poll the hypervisors
|
||||
hmap map[*pb.Hypervisor]*HyperT // map to the local struct
|
||||
names []string
|
||||
hypers []*HyperT
|
||||
killcount int
|
||||
unstable time.Time // the last time the cluster was incorrect
|
||||
changed bool
|
||||
hyperPollDelay time.Duration // how often to poll the hypervisors
|
||||
unstableTimeout time.Duration // how long a droplet can be unstable until it's declared dead
|
||||
clusterStableDuration time.Duration // how long the cluster must be stable before new droplets can be started
|
||||
missingDropletTimeout time.Duration // how long a droplet can be missing for
|
||||
|
@ -35,8 +35,9 @@ type virtigoT struct {
|
|||
|
||||
// the stuff that is needed for a hypervisor
|
||||
type HyperT struct {
|
||||
pb *pb.Hypervisor // the Hypervisor protobuf
|
||||
dog *time.Ticker // the watchdog timer itself
|
||||
lastpoll time.Time // the last time the hypervisor polled
|
||||
killcount int
|
||||
pb *pb.Hypervisor // the Hypervisor protobuf
|
||||
dog *time.Ticker // the watchdog timer itself
|
||||
lastpoll time.Time // the last time the hypervisor polled
|
||||
lastDroplets map[string]time.Time // the vm's in the last poll
|
||||
killcount int // how many times the daemon has been forcably killed
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ func (h *HyperT) sendDirs() {
|
|||
}
|
||||
|
||||
func (h *HyperT) NewWatchdog() {
|
||||
h.dog = time.NewTicker(me.delay)
|
||||
h.dog = time.NewTicker(me.hyperPollDelay)
|
||||
defer h.dog.Stop()
|
||||
done := make(chan bool)
|
||||
/*
|
||||
|
|
Loading…
Reference in New Issue