package main import ( "context" "fmt" "log" "os" "strconv" "strings" "time" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" ) func main() { // Read environment variables notReadyGraceSec := 300 // default 5 min if val := os.Getenv("POD_REAPER_NOT_READY_GRACE"); val != "" { if i, err := strconv.Atoi(val); err == nil { notReadyGraceSec = i } } dryRun := false if val := os.Getenv("POD_REAPER_DRY_RUN"); strings.ToLower(val) == "true" { dryRun = true } log.Printf("Starting Pod Reaper: notReadyGrace=%ds dryRun=%v", notReadyGraceSec, dryRun) // Build in-cluster Kubernetes client config, err := rest.InClusterConfig() if err != nil { log.Fatalf("Failed to build kubeconfig: %v", err) } clientset, err := kubernetes.NewForConfig(config) if err != nil { log.Fatalf("Failed to create clientset: %v", err) } for { nodes, err := clientset.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{}) if err != nil { log.Println("Failed to list nodes:", err) time.Sleep(10 * time.Second) continue } for _, node := range nodes.Items { notReadyDuration := getNodeNotReadyDuration(&node) if notReadyDuration > time.Duration(notReadyGraceSec)*time.Second { log.Printf("Node %s is NotReady for %v, scanning pods...", node.Name, notReadyDuration) handlePodsOnNode(clientset, &node, dryRun) } } time.Sleep(15 * time.Second) } } func getNodeNotReadyDuration(node *v1.Node) time.Duration { for _, cond := range node.Status.Conditions { if cond.Type == v1.NodeReady { if cond.Status == v1.ConditionFalse || cond.Status == v1.ConditionUnknown { return time.Since(cond.LastTransitionTime.Time) } } } return 0 } func handlePodsOnNode(clientset *kubernetes.Clientset, node *v1.Node, dryRun bool) { pods, err := clientset.CoreV1().Pods("").List(context.Background(), metav1.ListOptions{ FieldSelector: fmt.Sprintf("spec.nodeName=%s", node.Name), }) if err != nil { log.Println("Failed to list pods:", err) return } for _, pod := range pods.Items { // Skip pods that are already terminating if pod.DeletionTimestamp != nil { continue } // Optionally skip mirror pods (static pods) if _, ok := pod.Annotations[v1.MirrorPodAnnotationKey]; ok { continue } // Check owner references deletePod := false for _, owner := range pod.OwnerReferences { if owner.Kind == "Deployment" || owner.Kind == "StatefulSet" || owner.Kind == "DaemonSet" { deletePod = true break } } if deletePod { if dryRun { log.Printf("[DRY RUN] Would delete pod %s/%s on node %s", pod.Namespace, pod.Name, node.Name) } else { log.Printf("Deleting pod %s/%s on node %s", pod.Namespace, pod.Name, node.Name) err := clientset.CoreV1().Pods(pod.Namespace).Delete(context.Background(), pod.Name, metav1.DeleteOptions{}) if err != nil { log.Println("Failed to delete pod:", err) } } } } }