track droplets reported from each hypervisor
Signed-off-by: Jeff Carr <jcarr@wit.com>
This commit is contained in:
parent
d38865a6cf
commit
8fc2fbd9c9
22
dump.go
22
dump.go
|
@ -4,7 +4,9 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"go.wit.com/lib/gui/shell"
|
||||||
pb "go.wit.com/lib/protobuf/virtbuf"
|
pb "go.wit.com/lib/protobuf/virtbuf"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -44,3 +46,23 @@ func dumpDroplets(w http.ResponseWriter) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// status of the hypervisors
|
||||||
|
func dumpHypervisors(w http.ResponseWriter) {
|
||||||
|
for _, h := range me.hypers {
|
||||||
|
// lastpoll time.Time // the last time the hypervisor polled
|
||||||
|
dur := time.Since(h.lastpoll)
|
||||||
|
tmp := shell.FormatDuration(dur)
|
||||||
|
fmt.Fprintln(w, h.pb.Hostname, "killcount =", h.killcount, "lastpoll:", tmp)
|
||||||
|
for name, t := range h.lastDroplets {
|
||||||
|
dur := time.Since(t)
|
||||||
|
tmp := shell.FormatDuration(dur)
|
||||||
|
d := findDroplet(name)
|
||||||
|
if d == nil {
|
||||||
|
fmt.Fprintln(w, "\t", h.pb.Hostname, "name =", name, "lastpoll:", tmp)
|
||||||
|
} else {
|
||||||
|
fmt.Fprintln(w, "\t", h.pb.Hostname, "name =", name, "lastpoll:", tmp, d.CurrentState)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
19
http.go
19
http.go
|
@ -77,7 +77,7 @@ func okHandler(w http.ResponseWriter, r *http.Request) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if route == "/uptime" {
|
if route == "/uptime" {
|
||||||
ok, s := clusterHealthy()
|
ok, s := uptimeCheck()
|
||||||
if ok {
|
if ok {
|
||||||
log.Info("Handling URL:", route, "cluster is ok", s)
|
log.Info("Handling URL:", route, "cluster is ok", s)
|
||||||
fmt.Fprintln(w, s)
|
fmt.Fprintln(w, s)
|
||||||
|
@ -113,6 +113,18 @@ func okHandler(w http.ResponseWriter, r *http.Request) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// toggle poll logging
|
||||||
|
if route == "/poll" {
|
||||||
|
if POLL.Get() {
|
||||||
|
fmt.Fprintln(w, "POLL is true")
|
||||||
|
POLL.SetBool(false)
|
||||||
|
} else {
|
||||||
|
fmt.Fprintln(w, "POLL is false")
|
||||||
|
POLL.SetBool(true)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
if route == "/dumpcluster" {
|
if route == "/dumpcluster" {
|
||||||
dumpCluster(w)
|
dumpCluster(w)
|
||||||
return
|
return
|
||||||
|
@ -123,6 +135,11 @@ func okHandler(w http.ResponseWriter, r *http.Request) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if route == "/dumphypervisors" {
|
||||||
|
dumpHypervisors(w)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
if route == "/dumplibvirtxml" {
|
if route == "/dumplibvirtxml" {
|
||||||
virtigoxml.DumpLibvirtxmlDomainNames()
|
virtigoxml.DumpLibvirtxmlDomainNames()
|
||||||
return
|
return
|
||||||
|
|
13
main.go
13
main.go
|
@ -38,16 +38,17 @@ func main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// set defaults
|
// set defaults
|
||||||
me.unstable = time.Now() // initialize the grid as unstable
|
me.unstable = time.Now() // initialize the grid as unstable
|
||||||
me.delay = 5 * time.Second // how often to poll the hypervisors
|
|
||||||
me.changed = false
|
me.changed = false
|
||||||
// me.dmap = make(map[*pb.Droplet]*DropletT)
|
|
||||||
me.hmap = make(map[*pb.Hypervisor]*HyperT)
|
me.hmap = make(map[*pb.Hypervisor]*HyperT)
|
||||||
|
|
||||||
// how long a droplet can be missing until it's declared dead
|
// how long a droplet can be missing until it's declared dead
|
||||||
me.unstableTimeout = 17 * time.Second
|
me.unstableTimeout = 17 * time.Second
|
||||||
me.missingDropletTimeout = time.Minute // not sure the difference between these values
|
me.missingDropletTimeout = time.Minute // not sure the difference between these values
|
||||||
|
|
||||||
|
// how often to poll the hypervisors
|
||||||
|
me.hyperPollDelay = 5 * time.Second
|
||||||
|
|
||||||
// how long the cluster must be stable before new droplets can be started
|
// how long the cluster must be stable before new droplets can be started
|
||||||
me.clusterStableDuration = 37 * time.Second
|
me.clusterStableDuration = 37 * time.Second
|
||||||
|
|
||||||
|
@ -141,12 +142,14 @@ func main() {
|
||||||
log.Println("result:", result)
|
log.Println("result:", result)
|
||||||
os.Exit(0)
|
os.Exit(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
// initialize each hypervisor
|
// initialize each hypervisor
|
||||||
for _, pbh := range me.cluster.Hypervisors {
|
for _, pbh := range me.cluster.Hypervisors {
|
||||||
// this is a new unknown droplet (not in the config file)
|
// this is a new unknown droplet (not in the config file)
|
||||||
h := new(HyperT)
|
var h *HyperT
|
||||||
|
h = new(HyperT)
|
||||||
h.pb = pbh
|
h.pb = pbh
|
||||||
|
h.lastDroplets = make(map[string]time.Time)
|
||||||
h.lastpoll = time.Now()
|
h.lastpoll = time.Now()
|
||||||
|
|
||||||
me.hmap[pbh] = h
|
me.hmap[pbh] = h
|
||||||
|
|
52
poll.go
52
poll.go
|
@ -18,6 +18,7 @@ func (h *HyperT) pollHypervisor() {
|
||||||
if s == nil {
|
if s == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
var bytesSplice []byte
|
var bytesSplice []byte
|
||||||
bytesSplice = s.Bytes()
|
bytesSplice = s.Bytes()
|
||||||
// fmt.Fprintln(w, string(bytesSplice))
|
// fmt.Fprintln(w, string(bytesSplice))
|
||||||
|
@ -31,12 +32,26 @@ func (h *HyperT) pollHypervisor() {
|
||||||
}
|
}
|
||||||
state := fields[0]
|
state := fields[0]
|
||||||
name := fields[1]
|
name := fields[1]
|
||||||
|
if state == "OFF" {
|
||||||
|
// skip locally defined libvirt vms
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
h.lastDroplets[name] = time.Now()
|
||||||
|
// if _, ok := h.lastDroplets[name]; ok {
|
||||||
|
// h.lastDroplets[name] = time.Now()
|
||||||
|
// }
|
||||||
|
|
||||||
|
// try the protobuf
|
||||||
|
d := findDroplet(name)
|
||||||
|
if d == nil {
|
||||||
|
// not sure whawt now?
|
||||||
|
log.Log(WARN, name, "is unknown on", h.pb.Hostname, "state =", state)
|
||||||
|
log.Log(WARN, name, "this vm was probably started by hand using virtsh")
|
||||||
|
log.Log(WARN, name, "todo: import vm from libvrit")
|
||||||
|
continue
|
||||||
|
}
|
||||||
if state == "ON" {
|
if state == "ON" {
|
||||||
log.Log(POLL, h.pb.Hostname, "STATE:", state, "HOST:", name, "rest:", fields[2:])
|
log.Log(POLL, h.pb.Hostname, "STATE:", state, "HOST:", name, "rest:", fields[2:])
|
||||||
d := findDroplet(name)
|
|
||||||
if d == nil {
|
|
||||||
// not sure whawt now?
|
|
||||||
}
|
|
||||||
log.Log(INFO, "ALREADY RECORDED", d.Hostname)
|
log.Log(INFO, "ALREADY RECORDED", d.Hostname)
|
||||||
|
|
||||||
// update the status to ON
|
// update the status to ON
|
||||||
|
@ -69,30 +84,25 @@ func (h *HyperT) pollHypervisor() {
|
||||||
}
|
}
|
||||||
d.CurrentHypervisor = h.pb.Hostname
|
d.CurrentHypervisor = h.pb.Hostname
|
||||||
}
|
}
|
||||||
continue
|
}
|
||||||
|
for name, t := range h.lastDroplets {
|
||||||
|
dur := time.Since(t)
|
||||||
|
if dur > me.hyperPollDelay {
|
||||||
|
log.Info("droplet has probably powered down", name)
|
||||||
|
d := findDroplet(name)
|
||||||
|
if d != nil {
|
||||||
|
d.CurrentState = pb.DropletState_UNKNOWN
|
||||||
|
log.Info("set state UNKNOWN here", name)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
h.lastpoll = time.Now()
|
h.lastpoll = time.Now()
|
||||||
h.killcount = 0 // poll worked. reset killcount
|
h.killcount = 0 // poll worked. reset killcount
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
func findHypervisor(name string) *HyperT {
|
|
||||||
if h, ok := me.hmap[name]; ok {
|
|
||||||
return h
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
for _, h := range me.hypers {
|
|
||||||
if h.pb.Hostname == name {
|
|
||||||
return h
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
// check the state of the cluster and return a string
|
// check the state of the cluster and return a string
|
||||||
// that is intended to be sent to an uptime monitor like Kuma
|
// that is intended to be sent to an uptime monitor like Kuma
|
||||||
func clusterHealthy() (bool, string) {
|
func uptimeCheck() (bool, string) {
|
||||||
var good bool = true
|
var good bool = true
|
||||||
var total int
|
var total int
|
||||||
var working int
|
var working int
|
||||||
|
|
11
structs.go
11
structs.go
|
@ -21,13 +21,13 @@ func (b *virtigoT) Enable() {
|
||||||
// this app's variables
|
// this app's variables
|
||||||
type virtigoT struct {
|
type virtigoT struct {
|
||||||
cluster *pb.Cluster // basic cluster settings
|
cluster *pb.Cluster // basic cluster settings
|
||||||
delay time.Duration // how often to poll the hypervisors
|
|
||||||
hmap map[*pb.Hypervisor]*HyperT // map to the local struct
|
hmap map[*pb.Hypervisor]*HyperT // map to the local struct
|
||||||
names []string
|
names []string
|
||||||
hypers []*HyperT
|
hypers []*HyperT
|
||||||
killcount int
|
killcount int
|
||||||
unstable time.Time // the last time the cluster was incorrect
|
unstable time.Time // the last time the cluster was incorrect
|
||||||
changed bool
|
changed bool
|
||||||
|
hyperPollDelay time.Duration // how often to poll the hypervisors
|
||||||
unstableTimeout time.Duration // how long a droplet can be unstable until it's declared dead
|
unstableTimeout time.Duration // how long a droplet can be unstable until it's declared dead
|
||||||
clusterStableDuration time.Duration // how long the cluster must be stable before new droplets can be started
|
clusterStableDuration time.Duration // how long the cluster must be stable before new droplets can be started
|
||||||
missingDropletTimeout time.Duration // how long a droplet can be missing for
|
missingDropletTimeout time.Duration // how long a droplet can be missing for
|
||||||
|
@ -35,8 +35,9 @@ type virtigoT struct {
|
||||||
|
|
||||||
// the stuff that is needed for a hypervisor
|
// the stuff that is needed for a hypervisor
|
||||||
type HyperT struct {
|
type HyperT struct {
|
||||||
pb *pb.Hypervisor // the Hypervisor protobuf
|
pb *pb.Hypervisor // the Hypervisor protobuf
|
||||||
dog *time.Ticker // the watchdog timer itself
|
dog *time.Ticker // the watchdog timer itself
|
||||||
lastpoll time.Time // the last time the hypervisor polled
|
lastpoll time.Time // the last time the hypervisor polled
|
||||||
killcount int
|
lastDroplets map[string]time.Time // the vm's in the last poll
|
||||||
|
killcount int // how many times the daemon has been forcably killed
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,7 +39,7 @@ func (h *HyperT) sendDirs() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *HyperT) NewWatchdog() {
|
func (h *HyperT) NewWatchdog() {
|
||||||
h.dog = time.NewTicker(me.delay)
|
h.dog = time.NewTicker(me.hyperPollDelay)
|
||||||
defer h.dog.Stop()
|
defer h.dog.Stop()
|
||||||
done := make(chan bool)
|
done := make(chan bool)
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue