checking for the grid to be 'stable' before starting new droplets

Signed-off-by: Jeff Carr <jcarr@wit.com>
This commit is contained in:
Jeff Carr 2024-10-13 04:34:55 -05:00
parent 425e75d388
commit ccbdef1a13
4 changed files with 26 additions and 7 deletions

19
http.go
View File

@ -85,8 +85,23 @@ func okHandler(w http.ResponseWriter, r *http.Request) {
if tmp == "/start" {
start := r.URL.Query().Get("start")
log.Info("should start droplet here", start)
fmt.Fprintln(w, "should start droplet here", start)
log.Info("Handling URL:", tmp, "start droplet", start)
dur := time.Since(me.unstable) // how long has the cluster been stable?
fmt.Fprintln(w, "should start droplet here", start, shell.FormatDuration(dur))
if dur < 17*time.Second {
fmt.Fprintln(w, "grid is still to unstable")
return
}
d := findDroplet(start)
if d == nil {
fmt.Fprintln(w, "can't start unknown droplet", start)
return
}
for _, h := range me.hypers {
fmt.Fprintln(w, "could start droplet on", start, "on", h.Hostname, h.Active)
}
return
}

View File

@ -5,6 +5,7 @@ package main
import (
"embed"
"os"
"time"
"go.wit.com/dev/alexflint/arg"
"go.wit.com/log"
@ -30,6 +31,9 @@ func main() {
readDropletFile("droplets")
readHypervisorFile("hypervisor")
// initialize the grid as unstable
me.unstable = time.Now()
log.Info("create cluser for", argv.Hosts)
for _, name := range argv.Hosts {
h := addHypervisor(name)

View File

@ -139,15 +139,15 @@ func clusterHealthy() (bool, string) {
if me.killcount > 0 {
summary += "(killcount=" + fmt.Sprintf("%d", me.killcount) + ")"
}
last := time.Since(me.stable)
last := time.Since(me.unstable)
if last > 133*time.Second {
// the cluster has not been stable for 10 seconds
s := shell.FormatDuration(last)
summary += "(unstable " + s + ")"
s := strings.TrimSpace(shell.FormatDuration(last))
summary += "(stable=" + s + ")"
}
if good {
me.stable = time.Now()
return good, "GOOD=true " + summary
}
me.unstable = time.Now()
return good, "GOOD=false " + summary
}

View File

@ -20,7 +20,7 @@ type virtigoT struct {
hypers []*HyperT
droplets []*DropletT
killcount int
stable time.Time // the latest time the cluster has been okay
unstable time.Time // the last time the cluster was incorrect
}
// the stuff that is needed for a hypervisor