eindelijk weer eens een push
This commit is contained in:
@@ -0,0 +1,216 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/rest"
|
||||
metricsv "k8s.io/metrics/pkg/client/clientset/versioned"
|
||||
)
|
||||
|
||||
var (
|
||||
CPUOverloadThreshold int64
|
||||
MemoryOverloadThreshold int64
|
||||
CPUUnderloadThreshold int64
|
||||
MemoryUnderloadThreshold int64
|
||||
)
|
||||
|
||||
func getEnvInt64(name string, defaultVal int64) int64 {
|
||||
valStr := os.Getenv(name)
|
||||
if valStr == "" {
|
||||
return defaultVal
|
||||
}
|
||||
val, err := strconv.ParseInt(valStr, 10, 64)
|
||||
if err != nil {
|
||||
log.Printf("Invalid %s: %s, using default %d", name, valStr, defaultVal)
|
||||
return defaultVal
|
||||
}
|
||||
return val
|
||||
}
|
||||
|
||||
func init() {
|
||||
CPUOverloadThreshold = getEnvInt64("CPU_OVERLOAD_THRESHOLD", 80)
|
||||
MemoryOverloadThreshold = getEnvInt64("MEMORY_OVERLOAD_THRESHOLD", 80)
|
||||
CPUUnderloadThreshold = getEnvInt64("CPU_UNDERLOAD_THRESHOLD", 50)
|
||||
MemoryUnderloadThreshold = getEnvInt64("MEMORY_UNDERLOAD_THRESHOLD", 50)
|
||||
}
|
||||
|
||||
func main() {
|
||||
config, err := rest.InClusterConfig()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
clientset, err := kubernetes.NewForConfig(config)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
metricsClient, err := metricsv.NewForConfig(config)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
for {
|
||||
balanceNodes(clientset, metricsClient)
|
||||
time.Sleep(2 * time.Minute)
|
||||
}
|
||||
}
|
||||
|
||||
func balanceNodes(clientset *kubernetes.Clientset, metricsClient *metricsv.Clientset) {
|
||||
ctx := context.Background()
|
||||
|
||||
// Get node metrics
|
||||
nodeMetricsList, err := metricsClient.MetricsV1beta1().NodeMetricses().List(ctx, metav1.ListOptions{})
|
||||
if err != nil {
|
||||
log.Println("Failed to get node metrics:", err)
|
||||
return
|
||||
}
|
||||
|
||||
nodeUsage := make(map[string]map[string]int64)
|
||||
for _, m := range nodeMetricsList.Items {
|
||||
nodeUsage[m.Name] = map[string]int64{
|
||||
"cpu": m.Usage.Cpu().MilliValue(),
|
||||
"mem": m.Usage.Memory().Value() / (1024 * 1024), // MiB
|
||||
}
|
||||
}
|
||||
|
||||
// Get node capacity
|
||||
nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
|
||||
if err != nil {
|
||||
log.Println("Failed to list nodes:", err)
|
||||
return
|
||||
}
|
||||
|
||||
nodeCapacity := make(map[string]map[string]int64)
|
||||
for _, n := range nodes.Items {
|
||||
nodeCapacity[n.Name] = map[string]int64{
|
||||
"cpu": n.Status.Capacity.Cpu().MilliValue(),
|
||||
"mem": n.Status.Capacity.Memory().Value() / (1024 * 1024),
|
||||
}
|
||||
}
|
||||
|
||||
// Identify overloaded and underloaded nodes
|
||||
var overloaded, underloaded []string
|
||||
for node, usage := range nodeUsage {
|
||||
cpuPercent := usage["cpu"] * 100 / nodeCapacity[node]["cpu"]
|
||||
memPercent := usage["mem"] * 100 / nodeCapacity[node]["mem"]
|
||||
|
||||
if cpuPercent > CPUOverloadThreshold || memPercent > MemoryOverloadThreshold {
|
||||
overloaded = append(overloaded, node)
|
||||
} else if cpuPercent < CPUUnderloadThreshold && memPercent < MemoryUnderloadThreshold {
|
||||
underloaded = append(underloaded, node)
|
||||
}
|
||||
}
|
||||
|
||||
// ORIGINAL LOGGING – preserved
|
||||
fmt.Println("Overloaded nodes:", overloaded)
|
||||
fmt.Println("Underloaded nodes:", underloaded)
|
||||
|
||||
if len(overloaded) == 0 || len(underloaded) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Evict exactly ONE pod, then return
|
||||
for _, node := range overloaded {
|
||||
pods, err := clientset.CoreV1().Pods("").List(ctx, metav1.ListOptions{
|
||||
FieldSelector: fmt.Sprintf("spec.nodeName=%s", node),
|
||||
})
|
||||
if err != nil {
|
||||
log.Println("Failed to list pods for node", node, err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, pod := range pods.Items {
|
||||
if pod.Namespace == "kube-system" || isDaemonSet(&pod) {
|
||||
continue
|
||||
}
|
||||
|
||||
targetNode := pickTargetNode(underloaded, nodeUsage, nodeCapacity)
|
||||
if targetNode == "" {
|
||||
log.Println("No suitable underloaded node available")
|
||||
return
|
||||
}
|
||||
|
||||
// ORIGINAL LOGGING – preserved
|
||||
fmt.Printf(
|
||||
"Rescheduling pod %s/%s from %s to %s\n",
|
||||
pod.Namespace,
|
||||
pod.Name,
|
||||
node,
|
||||
targetNode,
|
||||
)
|
||||
|
||||
grace := int64(0)
|
||||
err = clientset.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{
|
||||
GracePeriodSeconds: &grace,
|
||||
})
|
||||
if err != nil {
|
||||
log.Println("Failed to delete pod", pod.Name, err)
|
||||
return
|
||||
}
|
||||
|
||||
// Update in-memory usage
|
||||
nodeUsage[node]["cpu"] -= estimatePodCPU(&pod)
|
||||
nodeUsage[node]["mem"] -= estimatePodMem(&pod)
|
||||
nodeUsage[targetNode]["cpu"] += estimatePodCPU(&pod)
|
||||
nodeUsage[targetNode]["mem"] += estimatePodMem(&pod)
|
||||
|
||||
// IMPORTANT: stop after one eviction
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func isDaemonSet(pod *corev1.Pod) bool {
|
||||
for _, owner := range pod.OwnerReferences {
|
||||
if owner.Kind == "DaemonSet" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// pickTargetNode chooses the underloaded node with lowest combined CPU+Memory usage
|
||||
func pickTargetNode(nodes []string, usage map[string]map[string]int64, capacity map[string]map[string]int64) string {
|
||||
var bestNode string
|
||||
bestLoad := int64(1 << 62) // very high
|
||||
for _, n := range nodes {
|
||||
cpuPercent := usage[n]["cpu"] * 100 / capacity[n]["cpu"]
|
||||
memPercent := usage[n]["mem"] * 100 / capacity[n]["mem"]
|
||||
load := cpuPercent + memPercent
|
||||
if load < bestLoad {
|
||||
bestLoad = load
|
||||
bestNode = n
|
||||
}
|
||||
}
|
||||
return bestNode
|
||||
}
|
||||
|
||||
// estimatePodCPU/Mem returns an approximate CPU/memory usage for the pod (from requests)
|
||||
func estimatePodCPU(pod *corev1.Pod) int64 {
|
||||
var cpu int64
|
||||
for _, c := range pod.Spec.Containers {
|
||||
if q, ok := c.Resources.Requests[corev1.ResourceCPU]; ok {
|
||||
cpu += q.MilliValue()
|
||||
}
|
||||
}
|
||||
return cpu
|
||||
}
|
||||
|
||||
func estimatePodMem(pod *corev1.Pod) int64 {
|
||||
var mem int64
|
||||
for _, c := range pod.Spec.Containers {
|
||||
if q, ok := c.Resources.Requests[corev1.ResourceMemory]; ok {
|
||||
mem += q.Value() / (1024 * 1024) // MiB
|
||||
}
|
||||
}
|
||||
return mem
|
||||
}
|
||||
Reference in New Issue
Block a user