@@ -61,7 +61,8 @@ const ControllerName = "plank"
6161
6262// PodStatus constants
6363const (
64- Evicted = "Evicted"
64+ Evicted = "Evicted"
65+ Terminated = "Terminated"
6566)
6667
6768// NodeStatus constants
@@ -480,6 +481,34 @@ func (r *reconciler) syncPendingJob(ctx context.Context, pj *prowv1.ProwJob) (*r
480481 r .log .WithField ("name" , pj .ObjectMeta .Name ).Debug ("Delete Pod." )
481482 return nil , ctrlruntimeclient .IgnoreNotFound (client .Delete (ctx , pod ))
482483 }
484+ } else if pod .Status .Reason == Terminated {
485+ // Pod was terminated.
486+ if pj .Spec .ErrorOnTermination {
487+ // ErrorOnTermination is enabled, complete the PJ and mark it as
488+ // errored.
489+ r .log .WithField ("error-on-termination" , true ).WithFields (pjutil .ProwJobFields (pj )).Info ("Pods Node got terminated, fail job." )
490+ pj .SetComplete ()
491+ pj .Status .State = prowv1 .ErrorState
492+ pj .Status .Description = "Job pod's node was terminated."
493+ } else {
494+ // ErrorOnTermination is disabled. Delete the pod now and recreate it in
495+ // the next resync.
496+ r .log .WithFields (pjutil .ProwJobFields (pj )).Info ("Pods Node got terminated, deleting & next sync loop will restart pod" )
497+ client , ok := r .buildClients [pj .ClusterAlias ()]
498+ if ! ok {
499+ return nil , TerminalError (fmt .Errorf ("terminated pod %s: unknown cluster alias %q" , pod .Name , pj .ClusterAlias ()))
500+ }
501+ if finalizers := sets .New [string ](pod .Finalizers ... ); finalizers .Has (kubernetesreporterapi .FinalizerName ) {
502+ // We want the end user to not see this, so we have to remove the finalizer, otherwise the pod hangs
503+ oldPod := pod .DeepCopy ()
504+ pod .Finalizers = finalizers .Delete (kubernetesreporterapi .FinalizerName ).UnsortedList ()
505+ if err := client .Patch (ctx , pod , ctrlruntimeclient .MergeFrom (oldPod )); err != nil {
506+ return nil , fmt .Errorf ("failed to patch pod trying to remove %s finalizer: %w" , kubernetesreporterapi .FinalizerName , err )
507+ }
508+ }
509+ r .log .WithField ("name" , pj .ObjectMeta .Name ).Debug ("Delete Pod." )
510+ return nil , ctrlruntimeclient .IgnoreNotFound (client .Delete (ctx , pod ))
511+ }
483512 } else if pod .DeletionTimestamp != nil && pod .Status .Reason == NodeUnreachablePodReason {
484513 // This can happen in any phase and means the node got evicted after it became unresponsive. Delete the finalizer so the pod
485514 // vanishes and we will silently re-create it in the next iteration.
0 commit comments