Files
2026-05-31 16:07:30 +02:00

116 lines
3.5 KiB
Go

package main
import (
"context"
"fmt"
"log"
"os"
"strconv"
"strings"
"time"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
)
func main() {
// Read environment variables
notReadyGraceSec := 300 // default 5 min
if val := os.Getenv("POD_REAPER_NOT_READY_GRACE"); val != "" {
if i, err := strconv.Atoi(val); err == nil {
notReadyGraceSec = i
}
}
dryRun := false
if val := os.Getenv("POD_REAPER_DRY_RUN"); strings.ToLower(val) == "true" {
dryRun = true
}
log.Printf("Starting Pod Reaper: notReadyGrace=%ds dryRun=%v", notReadyGraceSec, dryRun)
// Build in-cluster Kubernetes client
config, err := rest.InClusterConfig()
if err != nil {
log.Fatalf("Failed to build kubeconfig: %v", err)
}
clientset, err := kubernetes.NewForConfig(config)
if err != nil {
log.Fatalf("Failed to create clientset: %v", err)
}
for {
nodes, err := clientset.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{})
if err != nil {
log.Println("Failed to list nodes:", err)
time.Sleep(10 * time.Second)
continue
}
for _, node := range nodes.Items {
notReadyDuration := getNodeNotReadyDuration(&node)
if notReadyDuration > time.Duration(notReadyGraceSec)*time.Second {
log.Printf("Node %s is NotReady for %v, scanning pods...", node.Name, notReadyDuration)
handlePodsOnNode(clientset, &node, dryRun)
}
}
time.Sleep(15 * time.Second)
}
}
func getNodeNotReadyDuration(node *v1.Node) time.Duration {
for _, cond := range node.Status.Conditions {
if cond.Type == v1.NodeReady {
if cond.Status == v1.ConditionFalse || cond.Status == v1.ConditionUnknown {
return time.Since(cond.LastTransitionTime.Time)
}
}
}
return 0
}
func handlePodsOnNode(clientset *kubernetes.Clientset, node *v1.Node, dryRun bool) {
pods, err := clientset.CoreV1().Pods("").List(context.Background(), metav1.ListOptions{
FieldSelector: fmt.Sprintf("spec.nodeName=%s", node.Name),
})
if err != nil {
log.Println("Failed to list pods:", err)
return
}
for _, pod := range pods.Items {
// Skip pods that are already terminating
if pod.DeletionTimestamp != nil {
continue
}
// Optionally skip mirror pods (static pods)
if _, ok := pod.Annotations[v1.MirrorPodAnnotationKey]; ok {
continue
}
// Check owner references
deletePod := false
for _, owner := range pod.OwnerReferences {
if owner.Kind == "Deployment" || owner.Kind == "StatefulSet" || owner.Kind == "DaemonSet" {
deletePod = true
break
}
}
if deletePod {
if dryRun {
log.Printf("[DRY RUN] Would delete pod %s/%s on node %s", pod.Namespace, pod.Name, node.Name)
} else {
log.Printf("Deleting pod %s/%s on node %s", pod.Namespace, pod.Name, node.Name)
err := clientset.CoreV1().Pods(pod.Namespace).Delete(context.Background(), pod.Name, metav1.DeleteOptions{})
if err != nil {
log.Println("Failed to delete pod:", err)
}
}
}
}
}