package main import ( "context" "fmt" "log" "os" "strconv" "time" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" metricsv "k8s.io/metrics/pkg/client/clientset/versioned" ) var ( CPUOverloadThreshold int64 MemoryOverloadThreshold int64 CPUUnderloadThreshold int64 MemoryUnderloadThreshold int64 ) func getEnvInt64(name string, defaultVal int64) int64 { valStr := os.Getenv(name) if valStr == "" { return defaultVal } val, err := strconv.ParseInt(valStr, 10, 64) if err != nil { log.Printf("Invalid %s: %s, using default %d", name, valStr, defaultVal) return defaultVal } return val } func init() { CPUOverloadThreshold = getEnvInt64("CPU_OVERLOAD_THRESHOLD", 80) MemoryOverloadThreshold = getEnvInt64("MEMORY_OVERLOAD_THRESHOLD", 80) CPUUnderloadThreshold = getEnvInt64("CPU_UNDERLOAD_THRESHOLD", 50) MemoryUnderloadThreshold = getEnvInt64("MEMORY_UNDERLOAD_THRESHOLD", 50) } func main() { config, err := rest.InClusterConfig() if err != nil { log.Fatal(err) } clientset, err := kubernetes.NewForConfig(config) if err != nil { log.Fatal(err) } metricsClient, err := metricsv.NewForConfig(config) if err != nil { log.Fatal(err) } for { balanceNodes(clientset, metricsClient) time.Sleep(2 * time.Minute) } } func balanceNodes(clientset *kubernetes.Clientset, metricsClient *metricsv.Clientset) { ctx := context.Background() // Get node metrics nodeMetricsList, err := metricsClient.MetricsV1beta1().NodeMetricses().List(ctx, metav1.ListOptions{}) if err != nil { log.Println("Failed to get node metrics:", err) return } nodeUsage := make(map[string]map[string]int64) for _, m := range nodeMetricsList.Items { nodeUsage[m.Name] = map[string]int64{ "cpu": m.Usage.Cpu().MilliValue(), "mem": m.Usage.Memory().Value() / (1024 * 1024), // MiB } } // Get node capacity nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) if err != nil { log.Println("Failed to list nodes:", err) return } nodeCapacity := make(map[string]map[string]int64) for _, n := range nodes.Items { nodeCapacity[n.Name] = map[string]int64{ "cpu": n.Status.Capacity.Cpu().MilliValue(), "mem": n.Status.Capacity.Memory().Value() / (1024 * 1024), } } // Identify overloaded and underloaded nodes var overloaded, underloaded []string for node, usage := range nodeUsage { cpuPercent := usage["cpu"] * 100 / nodeCapacity[node]["cpu"] memPercent := usage["mem"] * 100 / nodeCapacity[node]["mem"] if cpuPercent > CPUOverloadThreshold || memPercent > MemoryOverloadThreshold { overloaded = append(overloaded, node) } else if cpuPercent < CPUUnderloadThreshold && memPercent < MemoryUnderloadThreshold { underloaded = append(underloaded, node) } } // ORIGINAL LOGGING – preserved fmt.Println("Overloaded nodes:", overloaded) fmt.Println("Underloaded nodes:", underloaded) if len(overloaded) == 0 || len(underloaded) == 0 { return } // Evict exactly ONE pod, then return for _, node := range overloaded { pods, err := clientset.CoreV1().Pods("").List(ctx, metav1.ListOptions{ FieldSelector: fmt.Sprintf("spec.nodeName=%s", node), }) if err != nil { log.Println("Failed to list pods for node", node, err) continue } for _, pod := range pods.Items { if pod.Namespace == "kube-system" || isDaemonSet(&pod) { continue } targetNode := pickTargetNode(underloaded, nodeUsage, nodeCapacity) if targetNode == "" { log.Println("No suitable underloaded node available") return } // ORIGINAL LOGGING – preserved fmt.Printf( "Rescheduling pod %s/%s from %s to %s\n", pod.Namespace, pod.Name, node, targetNode, ) grace := int64(0) err = clientset.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{ GracePeriodSeconds: &grace, }) if err != nil { log.Println("Failed to delete pod", pod.Name, err) return } // Update in-memory usage nodeUsage[node]["cpu"] -= estimatePodCPU(&pod) nodeUsage[node]["mem"] -= estimatePodMem(&pod) nodeUsage[targetNode]["cpu"] += estimatePodCPU(&pod) nodeUsage[targetNode]["mem"] += estimatePodMem(&pod) // IMPORTANT: stop after one eviction return } } } func isDaemonSet(pod *corev1.Pod) bool { for _, owner := range pod.OwnerReferences { if owner.Kind == "DaemonSet" { return true } } return false } // pickTargetNode chooses the underloaded node with lowest combined CPU+Memory usage func pickTargetNode(nodes []string, usage map[string]map[string]int64, capacity map[string]map[string]int64) string { var bestNode string bestLoad := int64(1 << 62) // very high for _, n := range nodes { cpuPercent := usage[n]["cpu"] * 100 / capacity[n]["cpu"] memPercent := usage[n]["mem"] * 100 / capacity[n]["mem"] load := cpuPercent + memPercent if load < bestLoad { bestLoad = load bestNode = n } } return bestNode } // estimatePodCPU/Mem returns an approximate CPU/memory usage for the pod (from requests) func estimatePodCPU(pod *corev1.Pod) int64 { var cpu int64 for _, c := range pod.Spec.Containers { if q, ok := c.Resources.Requests[corev1.ResourceCPU]; ok { cpu += q.MilliValue() } } return cpu } func estimatePodMem(pod *corev1.Pod) int64 { var mem int64 for _, c := range pod.Spec.Containers { if q, ok := c.Resources.Requests[corev1.ResourceMemory]; ok { mem += q.Value() / (1024 * 1024) // MiB } } return mem }