116 lines
3.5 KiB
Go
116 lines
3.5 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
v1 "k8s.io/api/core/v1"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/client-go/kubernetes"
|
|
"k8s.io/client-go/rest"
|
|
)
|
|
|
|
func main() {
|
|
// Read environment variables
|
|
notReadyGraceSec := 300 // default 5 min
|
|
if val := os.Getenv("POD_REAPER_NOT_READY_GRACE"); val != "" {
|
|
if i, err := strconv.Atoi(val); err == nil {
|
|
notReadyGraceSec = i
|
|
}
|
|
}
|
|
dryRun := false
|
|
if val := os.Getenv("POD_REAPER_DRY_RUN"); strings.ToLower(val) == "true" {
|
|
dryRun = true
|
|
}
|
|
|
|
log.Printf("Starting Pod Reaper: notReadyGrace=%ds dryRun=%v", notReadyGraceSec, dryRun)
|
|
|
|
// Build in-cluster Kubernetes client
|
|
config, err := rest.InClusterConfig()
|
|
if err != nil {
|
|
log.Fatalf("Failed to build kubeconfig: %v", err)
|
|
}
|
|
clientset, err := kubernetes.NewForConfig(config)
|
|
if err != nil {
|
|
log.Fatalf("Failed to create clientset: %v", err)
|
|
}
|
|
|
|
for {
|
|
nodes, err := clientset.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{})
|
|
if err != nil {
|
|
log.Println("Failed to list nodes:", err)
|
|
time.Sleep(10 * time.Second)
|
|
continue
|
|
}
|
|
|
|
for _, node := range nodes.Items {
|
|
notReadyDuration := getNodeNotReadyDuration(&node)
|
|
if notReadyDuration > time.Duration(notReadyGraceSec)*time.Second {
|
|
log.Printf("Node %s is NotReady for %v, scanning pods...", node.Name, notReadyDuration)
|
|
handlePodsOnNode(clientset, &node, dryRun)
|
|
}
|
|
}
|
|
|
|
time.Sleep(15 * time.Second)
|
|
}
|
|
}
|
|
|
|
func getNodeNotReadyDuration(node *v1.Node) time.Duration {
|
|
for _, cond := range node.Status.Conditions {
|
|
if cond.Type == v1.NodeReady {
|
|
if cond.Status == v1.ConditionFalse || cond.Status == v1.ConditionUnknown {
|
|
return time.Since(cond.LastTransitionTime.Time)
|
|
}
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func handlePodsOnNode(clientset *kubernetes.Clientset, node *v1.Node, dryRun bool) {
|
|
pods, err := clientset.CoreV1().Pods("").List(context.Background(), metav1.ListOptions{
|
|
FieldSelector: fmt.Sprintf("spec.nodeName=%s", node.Name),
|
|
})
|
|
if err != nil {
|
|
log.Println("Failed to list pods:", err)
|
|
return
|
|
}
|
|
|
|
for _, pod := range pods.Items {
|
|
// Skip pods that are already terminating
|
|
if pod.DeletionTimestamp != nil {
|
|
continue
|
|
}
|
|
|
|
// Optionally skip mirror pods (static pods)
|
|
if _, ok := pod.Annotations[v1.MirrorPodAnnotationKey]; ok {
|
|
continue
|
|
}
|
|
|
|
// Check owner references
|
|
deletePod := false
|
|
for _, owner := range pod.OwnerReferences {
|
|
if owner.Kind == "Deployment" || owner.Kind == "StatefulSet" || owner.Kind == "DaemonSet" {
|
|
deletePod = true
|
|
break
|
|
}
|
|
}
|
|
|
|
if deletePod {
|
|
if dryRun {
|
|
log.Printf("[DRY RUN] Would delete pod %s/%s on node %s", pod.Namespace, pod.Name, node.Name)
|
|
} else {
|
|
log.Printf("Deleting pod %s/%s on node %s", pod.Namespace, pod.Name, node.Name)
|
|
err := clientset.CoreV1().Pods(pod.Namespace).Delete(context.Background(), pod.Name, metav1.DeleteOptions{})
|
|
if err != nil {
|
|
log.Println("Failed to delete pod:", err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|