2024-10-12 10:59:11 -05:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
2024-10-16 20:43:01 -05:00
|
|
|
"fmt"
|
|
|
|
"math/rand"
|
2024-10-13 02:23:30 -05:00
|
|
|
"time"
|
|
|
|
|
2024-10-13 01:33:32 -05:00
|
|
|
"go.wit.com/lib/gui/shell"
|
2024-10-23 19:15:51 -05:00
|
|
|
pb "go.wit.com/lib/protobuf/virtbuf"
|
2024-10-12 10:59:11 -05:00
|
|
|
"go.wit.com/log"
|
|
|
|
)
|
|
|
|
|
2024-10-26 12:32:17 -05:00
|
|
|
// restarts the virtigod daemon on a hypervisor via http
|
|
|
|
func (h *HyperT) RestartVirtigod() {
|
2024-10-22 17:27:24 -05:00
|
|
|
url := "http://" + h.pb.Hostname + ":2520/kill"
|
2024-10-13 01:33:32 -05:00
|
|
|
s := shell.Wget(url)
|
2024-10-26 12:32:17 -05:00
|
|
|
log.Info("EVENT RestartVirtigod", url, s)
|
2024-10-13 02:23:30 -05:00
|
|
|
h.lastpoll = time.Now()
|
|
|
|
h.killcount += 1
|
|
|
|
|
|
|
|
dur := time.Since(h.lastpoll) // Calculate the elapsed time
|
2024-10-22 17:27:24 -05:00
|
|
|
log.Info("KILLED DAEMON", h.pb.Hostname, shell.FormatDuration(dur), "curl", url)
|
2024-10-13 03:20:48 -05:00
|
|
|
me.killcount += 1
|
2024-10-17 15:29:47 -05:00
|
|
|
|
|
|
|
// mark the cluster as unstable so droplet starts can be throttled
|
|
|
|
me.unstable = time.Now()
|
2024-10-13 01:33:32 -05:00
|
|
|
}
|
2024-10-13 03:49:54 -05:00
|
|
|
|
2024-10-16 21:10:08 -05:00
|
|
|
var stableTimeout time.Duration = 43 * time.Second
|
|
|
|
|
2024-10-15 11:02:34 -05:00
|
|
|
// checks if the cluster is ready and stable
|
2024-10-16 20:43:01 -05:00
|
|
|
func clusterReady() (bool, string) {
|
2024-10-15 11:02:34 -05:00
|
|
|
last := time.Since(me.unstable)
|
2024-10-16 21:10:08 -05:00
|
|
|
if last > stableTimeout {
|
2024-10-15 11:02:34 -05:00
|
|
|
// the cluster has not been stable for 133 seconds
|
2024-10-16 21:10:08 -05:00
|
|
|
log.Warn("clusterReady() is stable for ", shell.FormatDuration(stableTimeout), " secs")
|
|
|
|
return true, fmt.Sprintln("clusterReady() is stable ", shell.FormatDuration(stableTimeout), " secs")
|
2024-10-15 11:02:34 -05:00
|
|
|
}
|
|
|
|
log.Warn("clusterReady() is unstable for", shell.FormatDuration(last))
|
2024-10-16 20:43:01 -05:00
|
|
|
return false, "clusterReady() is unstable for " + shell.FormatDuration(last)
|
2024-10-15 11:02:34 -05:00
|
|
|
}
|
|
|
|
|
2024-10-26 08:54:28 -05:00
|
|
|
func dropletReady(d *pb.Droplet) (bool, string) {
|
2024-10-23 19:15:51 -05:00
|
|
|
if d.CurrentState == pb.DropletState_ON {
|
2024-10-16 20:43:01 -05:00
|
|
|
return false, "EVENT start droplet is already ON"
|
2024-10-15 11:02:34 -05:00
|
|
|
}
|
2024-10-26 08:54:28 -05:00
|
|
|
if d.Starts > 2 {
|
2024-10-16 20:43:01 -05:00
|
|
|
// reason := "EVENT start droplet has already been started " + d.starts + " times"
|
2024-10-26 08:54:28 -05:00
|
|
|
return false, fmt.Sprintln("EVENT start droplet has already been started ", d.Starts, " times")
|
2024-10-15 11:02:34 -05:00
|
|
|
}
|
2024-10-16 20:43:01 -05:00
|
|
|
return true, ""
|
2024-10-15 11:02:34 -05:00
|
|
|
}
|
|
|
|
|
2024-10-26 12:32:17 -05:00
|
|
|
func (h *HyperT) start(d *pb.Droplet) (bool, string) {
|
2024-10-16 20:43:01 -05:00
|
|
|
ready, result := clusterReady()
|
|
|
|
if !ready {
|
|
|
|
return false, result
|
2024-10-15 11:02:34 -05:00
|
|
|
}
|
2024-10-26 08:54:28 -05:00
|
|
|
ready, result = dropletReady(d)
|
2024-10-16 20:43:01 -05:00
|
|
|
if !ready {
|
|
|
|
return false, result
|
2024-10-15 11:02:34 -05:00
|
|
|
}
|
|
|
|
|
2024-10-26 08:54:28 -05:00
|
|
|
url := "http://" + h.pb.Hostname + ":2520/start?start=" + d.Hostname
|
2024-10-13 03:49:54 -05:00
|
|
|
s := shell.Wget(url)
|
2024-10-16 20:43:01 -05:00
|
|
|
result = "EVENT start droplet url: " + url + "\n"
|
|
|
|
result += "EVENT start droplet response: " + s.String()
|
2024-10-15 11:02:34 -05:00
|
|
|
|
|
|
|
// increment the counter for a start attempt working
|
2024-10-26 08:54:28 -05:00
|
|
|
d.Starts += 1
|
2024-10-15 11:02:34 -05:00
|
|
|
|
|
|
|
// mark the cluster as unstable so droplet starts can be throttled
|
|
|
|
me.unstable = time.Now()
|
2024-10-16 20:43:01 -05:00
|
|
|
|
|
|
|
return true, result
|
|
|
|
}
|
|
|
|
|
2024-10-26 12:32:17 -05:00
|
|
|
func findDroplet(name string) *pb.Droplet {
|
|
|
|
for _, d := range me.cluster.Droplets {
|
|
|
|
if d.Hostname == name {
|
|
|
|
return d
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-10-16 20:43:01 -05:00
|
|
|
func Start(name string) (bool, string) {
|
|
|
|
var result string
|
|
|
|
|
|
|
|
d := findDroplet(name)
|
|
|
|
if d == nil {
|
2024-10-26 12:32:17 -05:00
|
|
|
result += "can't start unknown droplet: " + name
|
2024-10-16 20:43:01 -05:00
|
|
|
return false, result
|
|
|
|
}
|
|
|
|
|
2024-10-23 19:15:51 -05:00
|
|
|
if d.CurrentState == pb.DropletState_ON {
|
2024-10-26 12:32:17 -05:00
|
|
|
return false, "EVENT start droplet " + d.Hostname + " is already ON"
|
2024-10-16 21:10:08 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
dur := time.Since(me.unstable) // how long has the cluster been stable?
|
|
|
|
result = fmt.Sprintln("should start droplet", name, "here. grid stable for:", shell.FormatDuration(dur))
|
|
|
|
if dur < 17*time.Second {
|
|
|
|
result += "grid is still too unstable"
|
|
|
|
return false, result
|
|
|
|
}
|
|
|
|
|
2024-10-16 20:43:01 -05:00
|
|
|
// make the list of hypervisors that are active and can start new droplets
|
|
|
|
var pool []*HyperT
|
|
|
|
for _, h := range me.hypers {
|
2024-10-22 17:59:27 -05:00
|
|
|
result += fmt.Sprintln("could start droplet on", name, "on", h.pb.Hostname, h.pb.Active)
|
2024-10-26 08:54:28 -05:00
|
|
|
if d.PreferredHypervisor == h.pb.Hostname {
|
2024-10-16 20:43:01 -05:00
|
|
|
// the config file says this droplet should run on this hypervisor
|
2024-10-26 12:32:17 -05:00
|
|
|
a, b := h.start(d)
|
2024-10-16 20:43:01 -05:00
|
|
|
return a, result + b
|
|
|
|
}
|
|
|
|
|
2024-10-22 17:59:27 -05:00
|
|
|
if h.pb.Active != true {
|
2024-10-16 20:43:01 -05:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
pool = append(pool, h)
|
|
|
|
}
|
|
|
|
|
|
|
|
// left here as an example of how to actually do random numbers
|
|
|
|
// it's complete mathematical chaos. Randomness is simple when
|
|
|
|
// human interaction occurs -- which is exactly what happens most
|
|
|
|
// of the time. most random shit is bullshit. all you really need
|
|
|
|
// is exactly this to make sure the random functions work as they
|
|
|
|
// should. Probably, just use this everywhere in all cases. --jcarr
|
|
|
|
rand.Seed(time.Now().UnixNano())
|
|
|
|
a := 0
|
|
|
|
b := len(pool)
|
|
|
|
n := a + rand.Intn(b-a)
|
|
|
|
result += fmt.Sprintln("pool has", len(pool), "members", "rand =", n)
|
|
|
|
h := pool[n]
|
2024-10-26 12:32:17 -05:00
|
|
|
startbool, startresult := h.start(d)
|
2024-10-16 20:43:01 -05:00
|
|
|
return startbool, result + startresult
|
2024-10-13 03:49:54 -05:00
|
|
|
}
|