Skip to content

Commit 3221bd1

Browse files
authored
fix: fix updateRun error msg when a binding does not have expected state (#63)
1 parent 491abff commit 3221bd1

File tree

1 file changed

+13
-7
lines changed

1 file changed

+13
-7
lines changed

pkg/controllers/updaterun/execution.go

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -170,13 +170,19 @@ func (r *Reconciler) executeUpdatingStage(
170170
}
171171

172172
// Now the cluster has to be updating, the binding should point to the right resource snapshot and the binding should be bound.
173-
if !isBindingSyncedWithClusterStatus(resourceSnapshotName, updateRun, binding, clusterStatus) || binding.Spec.State != placementv1beta1.BindingStateBound ||
174-
!condition.IsConditionStatusTrue(meta.FindStatusCondition(binding.Status.Conditions, string(placementv1beta1.ResourceBindingRolloutStarted)), binding.Generation) {
175-
unexpectedErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("the updating cluster `%s` in the stage %s does not match the cluster status: %+v, binding: %+v, condition: %+v",
176-
clusterStatus.ClusterName, updatingStageStatus.StageName, clusterStatus, binding.Spec, binding.GetCondition(string(placementv1beta1.ResourceBindingRolloutStarted))))
177-
klog.ErrorS(unexpectedErr, "The binding has been changed during updating, please check if there's concurrent clusterStagedUpdateRun", "clusterStagedUpdateRun", updateRunRef)
178-
markClusterUpdatingFailed(clusterStatus, updateRun.Generation, unexpectedErr.Error())
179-
return 0, fmt.Errorf("%w: %s", errStagedUpdatedAborted, unexpectedErr.Error())
173+
inSync := isBindingSyncedWithClusterStatus(resourceSnapshotName, updateRun, binding, clusterStatus)
174+
rolloutStarted := condition.IsConditionStatusTrue(meta.FindStatusCondition(binding.Status.Conditions, string(placementv1beta1.ResourceBindingRolloutStarted)), binding.Generation)
175+
if !inSync || !rolloutStarted || binding.Spec.State != placementv1beta1.BindingStateBound {
176+
// This issue mostly happens when there are concurrent updateRuns referencing the same clusterResourcePlacement but releasing different versions.
177+
// After the 1st updateRun updates the binding, and before the controller re-checks the binding status, the 2nd updateRun updates the same binding, and thus the 1st updateRun is preempted and observes the binding not matching the desired state.
178+
preemptedErr := controller.NewUserError(fmt.Errorf("the clusterResourceBinding of the updating cluster `%s` in the stage `%s` is not up-to-date with the desired status, "+
179+
"please check the status of binding `%s` and see if there is a concurrent updateRun referencing the same clusterResourcePlacement and updating the same cluster",
180+
clusterStatus.ClusterName, updatingStageStatus.StageName, klog.KObj(binding)))
181+
klog.ErrorS(preemptedErr, "The binding has been changed during updating",
182+
"bindingSpecInSync", inSync, "bindingState", binding.Spec.State,
183+
"bindingRolloutStarted", rolloutStarted, "binding", klog.KObj(binding), "clusterStagedUpdateRun", updateRunRef)
184+
markClusterUpdatingFailed(clusterStatus, updateRun.Generation, preemptedErr.Error())
185+
return 0, fmt.Errorf("%w: %s", errStagedUpdatedAborted, preemptedErr.Error())
180186
}
181187

182188
finished, updateErr := checkClusterUpdateResult(binding, clusterStatus, updatingStageStatus, updateRun)

0 commit comments

Comments
 (0)