@@ -362,72 +362,116 @@ pub async fn fetch_recent_logs(docker: &Docker, container_name: &str, n: usize)
362362 rendered
363363}
364364
365- /// Remove stale k3s nodes from a cluster with a reused persistent volume .
365+ /// Remove stale k3s nodes and their orphaned pods from a resumed cluster .
366366///
367367/// When a cluster container is recreated but the volume is reused, k3s registers
368368/// a new node (using the container ID as the hostname) while old node entries
369369/// persist in etcd. Pods scheduled on those stale `NotReady` nodes will never run,
370370/// causing health checks to fail.
371371///
372- /// This function identifies all `NotReady` nodes and deletes them so k3s can
373- /// reschedule workloads onto the current (Ready) node.
372+ /// This function retries with backoff until `kubectl` becomes available (k3s may
373+ /// still be initialising), then:
374+ /// 1. Deletes all `NotReady` nodes so k3s stops tracking them.
375+ /// 2. Force-deletes any pods stuck in `Terminating` so `StatefulSets` and
376+ /// Deployments can reschedule replacements on the current (Ready) node.
374377///
375378/// Returns the number of stale nodes removed.
376379pub async fn clean_stale_nodes ( docker : & Docker , name : & str ) -> Result < usize > {
380+ // Retry until kubectl is responsive. k3s can take 10-20 s to start the
381+ // API server after a container restart, so we allow up to ~45 s.
382+ const MAX_ATTEMPTS : u32 = 15 ;
383+ const RETRY_DELAY : Duration = Duration :: from_secs ( 3 ) ;
384+
377385 let container_name = container_name ( name) ;
386+ let mut stale_nodes: Vec < String > = Vec :: new ( ) ;
387+
388+ for attempt in 1 ..=MAX_ATTEMPTS {
389+ let ( output, exit_code) = exec_capture_with_exit (
390+ docker,
391+ & container_name,
392+ vec ! [
393+ "sh" . to_string( ) ,
394+ "-c" . to_string( ) ,
395+ format!(
396+ "KUBECONFIG={KUBECONFIG_PATH} kubectl get nodes \
397+ --no-headers -o custom-columns=NAME:.metadata.name,STATUS:.status.conditions[-1].status \
398+ 2>/dev/null | grep -v '\\ bTrue$' | awk '{{print $1}}'"
399+ ) ,
400+ ] ,
401+ )
402+ . await ?;
403+
404+ if exit_code == 0 {
405+ stale_nodes = output
406+ . lines ( )
407+ . map ( str:: trim)
408+ . filter ( |l| !l. is_empty ( ) )
409+ . map ( ToString :: to_string)
410+ . collect ( ) ;
411+ break ;
412+ }
413+
414+ if attempt < MAX_ATTEMPTS {
415+ tracing:: debug!(
416+ "kubectl not ready yet (attempt {attempt}/{MAX_ATTEMPTS}), retrying in {}s" ,
417+ RETRY_DELAY . as_secs( )
418+ ) ;
419+ tokio:: time:: sleep ( RETRY_DELAY ) . await ;
420+ }
421+ }
422+
423+ if stale_nodes. is_empty ( ) {
424+ return Ok ( 0 ) ;
425+ }
426+
427+ let node_list = stale_nodes. join ( " " ) ;
428+ let count = stale_nodes. len ( ) ;
429+ tracing:: info!( "removing {} stale node(s): {}" , count, node_list) ;
378430
379- // Get the list of NotReady nodes.
380- // The last condition on a node is always type=Ready; we need to check its
381- // **status** (True/False/Unknown), not its type. Nodes where the Ready
382- // condition status is not "True" are stale and should be removed.
383- let ( output, exit_code) = exec_capture_with_exit (
431+ // Step 1: delete the stale node objects.
432+ let ( _output, exit_code) = exec_capture_with_exit (
384433 docker,
385434 & container_name,
386435 vec ! [
387436 "sh" . to_string( ) ,
388437 "-c" . to_string( ) ,
389438 format!(
390- "KUBECONFIG={KUBECONFIG_PATH} kubectl get nodes \
391- --no-headers -o custom-columns=NAME:.metadata.name,STATUS:.status.conditions[-1].status \
392- 2>/dev/null | grep -v '\\ bTrue$' | awk '{{print $1}}'"
439+ "KUBECONFIG={KUBECONFIG_PATH} kubectl delete node {node_list} --ignore-not-found"
393440 ) ,
394441 ] ,
395442 )
396443 . await ?;
397444
398445 if exit_code != 0 {
399- // kubectl not ready yet or no nodes — nothing to clean
400- return Ok ( 0 ) ;
401- }
402-
403- let stale_nodes: Vec < & str > = output
404- . lines ( )
405- . map ( str:: trim)
406- . filter ( |l| !l. is_empty ( ) )
407- . collect ( ) ;
408- if stale_nodes. is_empty ( ) {
409- return Ok ( 0 ) ;
446+ tracing:: warn!( "failed to delete stale nodes (exit code {exit_code})" ) ;
410447 }
411448
412- let node_list = stale_nodes . join ( " " ) ;
413- let count = stale_nodes . len ( ) ;
414- tracing :: info! ( "removing {} stale node(s): {}" , count , node_list ) ;
415-
449+ // Step 2: force-delete pods stuck in Terminating. After the stale node is
450+ // removed, pods that were scheduled on it transition to Terminating but
451+ // will never complete graceful shutdown (the node is gone). StatefulSets
452+ // will not create a replacement until the old pod is fully deleted.
416453 let ( _output, exit_code) = exec_capture_with_exit (
417454 docker,
418455 & container_name,
419456 vec ! [
420457 "sh" . to_string( ) ,
421458 "-c" . to_string( ) ,
422459 format!(
423- "KUBECONFIG={KUBECONFIG_PATH} kubectl delete node {node_list} --ignore-not-found"
460+ "KUBECONFIG={KUBECONFIG_PATH} kubectl get pods --all-namespaces \
461+ --field-selector=status.phase=Running -o name 2>/dev/null; \
462+ for pod_line in $(KUBECONFIG={KUBECONFIG_PATH} kubectl get pods --all-namespaces \
463+ --no-headers 2>/dev/null | awk '$4 == \" Terminating\" {{print $1\" /\" $2}}'); do \
464+ ns=${{pod_line%%/*}}; pod=${{pod_line#*/}}; \
465+ KUBECONFIG={KUBECONFIG_PATH} kubectl delete pod \" $pod\" -n \" $ns\" \
466+ --force --grace-period=0 --ignore-not-found 2>/dev/null; \
467+ done"
424468 ) ,
425469 ] ,
426470 )
427471 . await ?;
428472
429473 if exit_code != 0 {
430- tracing:: warn !( "failed to delete stale nodes ( exit code {exit_code})" ) ;
474+ tracing:: debug !( "force-delete of terminating pods returned exit code {exit_code} (non-fatal )" ) ;
431475 }
432476
433477 Ok ( count)
0 commit comments