@@ -481,7 +481,7 @@ func (r *reconciler) syncPendingJob(ctx context.Context, pj *prowv1.ProwJob) (*r
481481 r .log .WithField ("name" , pj .ObjectMeta .Name ).Debug ("Delete Pod." )
482482 return nil , ctrlruntimeclient .IgnoreNotFound (client .Delete (ctx , pod ))
483483 }
484- } else if pod . Status . Reason == Terminated {
484+ } else if isPodTerminated ( pod ) {
485485 // Pod was terminated.
486486 if pj .Spec .ErrorOnTermination {
487487 // ErrorOnTermination is enabled, complete the PJ and mark it as
@@ -692,6 +692,33 @@ func (r *reconciler) syncPendingJob(ctx context.Context, pj *prowv1.ProwJob) (*r
692692 return nil , nil
693693}
694694
695+ func isPodTerminated (pod * corev1.Pod ) bool {
696+ // If there was a Graceful node shutdown, the Pod's status will have a
697+ // reason set to "Terminated":
698+ // https://kubernetes.io/docs/concepts/architecture/nodes/#graceful-node-shutdown
699+ if pod .Status .Reason == Terminated {
700+ return true
701+ }
702+
703+ for _ , condition := range pod .Status .Conditions {
704+ // If the node does no longer exist and the pod gets garbage collected,
705+ // this condition will be set:
706+ // https://kubernetes.io/docs/concepts/workloads/pods/disruptions/#pod-disruption-conditions
707+ if condition .Reason == "DeletionByPodGC" {
708+ return true
709+ }
710+
711+ // On GCP, before a new spot instance is started, the old pods are garbage
712+ // collected (if they have not been already by the Kubernetes PodGC):
713+ // https://github.com/kubernetes/cloud-provider-gcp/blob/25e5dcc715781316bc5e39f8b17c0d5b313453f7/cmd/gcp-controller-manager/node_csr_approver.go#L1035-L1058
714+ if condition .Reason == "DeletionByGCPControllerManager" {
715+ return true
716+ }
717+ }
718+
719+ return false
720+ }
721+
695722// syncTriggeredJob syncs jobs that do not yet have an associated test workload running
696723func (r * reconciler ) syncTriggeredJob (ctx context.Context , pj * prowv1.ProwJob ) (* reconcile.Result , error ) {
697724 prevPJ := pj .DeepCopy ()
0 commit comments