Skip to content

Commit 043957b

Browse files
🌱 Move up to date condition to ms for workers (#12959)
* Move setUpToDate condition from MS controller to Machine controller * Use MS and MD from scope instead of reading them again * Introduce util for isUpdating
1 parent 870e8a6 commit 043957b

16 files changed

+814
-474
lines changed

api/core/v1beta2/machine_types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,10 @@ const (
162162

163163
// MachineNotUpToDateReason surface when a Machine spec does not match the spec of the Machine's owner resource, e.g. KubeadmControlPlane or MachineDeployment.
164164
MachineNotUpToDateReason = "NotUpToDate"
165+
166+
// MachineUpToDateUpdatingReason surface when a Machine spec matches the spec of the Machine's owner resource,
167+
// but the Machine is still updating in-place.
168+
MachineUpToDateUpdatingReason = "Updating"
165169
)
166170

167171
// Machine's Updating condition and corresponding reasons.

controlplane/kubeadm/internal/controllers/controller.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ import (
4848
"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal"
4949
runtimeclient "sigs.k8s.io/cluster-api/exp/runtime/client"
5050
"sigs.k8s.io/cluster-api/feature"
51+
"sigs.k8s.io/cluster-api/internal/util/inplace"
5152
"sigs.k8s.io/cluster-api/internal/util/ssa"
5253
"sigs.k8s.io/cluster-api/util"
5354
"sigs.k8s.io/cluster-api/util/cache"
@@ -1039,6 +1040,16 @@ func reconcileMachineUpToDateCondition(_ context.Context, controlPlane *internal
10391040
})
10401041
continue
10411042
}
1043+
1044+
if inplace.IsUpdateInProgress(machine) {
1045+
conditions.Set(machine, metav1.Condition{
1046+
Type: clusterv1.MachineUpToDateCondition,
1047+
Status: metav1.ConditionFalse,
1048+
Reason: clusterv1.MachineUpToDateUpdatingReason,
1049+
})
1050+
continue
1051+
}
1052+
10421053
conditions.Set(machine, metav1.Condition{
10431054
Type: clusterv1.MachineUpToDateCondition,
10441055
Status: metav1.ConditionTrue,

controlplane/kubeadm/internal/controllers/controller_test.go

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2322,6 +2322,90 @@ func TestKubeadmControlPlaneReconciler_reconcileControlPlaneAndMachinesCondition
23222322
},
23232323
},
23242324
},
2325+
{
2326+
name: "Machines in place updating, machine not up-to-date date",
2327+
controlPlane: func() *internal.ControlPlane {
2328+
controlPlane, err := internal.NewControlPlane(ctx, nil, env.GetClient(), defaultCluster, defaultKCP.DeepCopy(), collections.FromMachines(
2329+
func() *clusterv1.Machine {
2330+
m := defaultMachine1.DeepCopy()
2331+
m.Annotations = map[string]string{
2332+
clusterv1.UpdateInProgressAnnotation: "",
2333+
}
2334+
return m
2335+
}(),
2336+
))
2337+
if err != nil {
2338+
panic(err)
2339+
}
2340+
return controlPlane
2341+
}(),
2342+
managementCluster: &fakeManagementCluster{
2343+
Workload: &fakeWorkloadCluster{
2344+
Workload: &internal.Workload{
2345+
Client: fake.NewClientBuilder().Build(),
2346+
},
2347+
},
2348+
},
2349+
lastProbeSuccessTime: now.Add(-3 * time.Minute),
2350+
expectKCPConditions: []metav1.Condition{
2351+
{
2352+
Type: controlplanev1.KubeadmControlPlaneInitializedCondition,
2353+
Status: metav1.ConditionTrue,
2354+
Reason: controlplanev1.KubeadmControlPlaneInitializedReason,
2355+
},
2356+
{
2357+
Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyCondition,
2358+
Status: metav1.ConditionUnknown,
2359+
Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownReason,
2360+
Message: "* Machine machine1-test:\n" +
2361+
" * EtcdMemberHealthy: Waiting for a Node with spec.providerID foo to exist",
2362+
},
2363+
{
2364+
Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyCondition,
2365+
Status: metav1.ConditionUnknown,
2366+
Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownReason,
2367+
Message: "* Machine machine1-test:\n" +
2368+
" * Control plane components: Waiting for a Node with spec.providerID foo to exist",
2369+
},
2370+
},
2371+
expectMachineConditions: []metav1.Condition{
2372+
{
2373+
Type: controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyCondition,
2374+
Status: metav1.ConditionUnknown,
2375+
Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedReason,
2376+
Message: "Waiting for a Node with spec.providerID foo to exist",
2377+
},
2378+
{
2379+
Type: controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyCondition,
2380+
Status: metav1.ConditionUnknown,
2381+
Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedReason,
2382+
Message: "Waiting for a Node with spec.providerID foo to exist",
2383+
},
2384+
{
2385+
Type: controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyCondition,
2386+
Status: metav1.ConditionUnknown,
2387+
Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedReason,
2388+
Message: "Waiting for a Node with spec.providerID foo to exist",
2389+
},
2390+
{
2391+
Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition,
2392+
Status: metav1.ConditionUnknown,
2393+
Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedReason,
2394+
Message: "Waiting for a Node with spec.providerID foo to exist",
2395+
},
2396+
{
2397+
Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyCondition,
2398+
Status: metav1.ConditionUnknown,
2399+
Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedReason,
2400+
Message: "Waiting for a Node with spec.providerID foo to exist",
2401+
},
2402+
{
2403+
Type: clusterv1.MachineUpToDateCondition,
2404+
Status: metav1.ConditionFalse,
2405+
Reason: clusterv1.MachineUpToDateUpdatingReason,
2406+
},
2407+
},
2408+
},
23252409
{
23262410
name: "Machines not up to date",
23272411
controlPlane: func() *internal.ControlPlane {

internal/controllers/machine/machine_controller.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,18 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (retres ct
254254
machine: m,
255255
}
256256

257+
// If the Machine is controlled by a MachineSet, read it.
258+
s.owningMachineSet, err = r.getOwnerMachineSet(ctx, s.machine)
259+
if err != nil {
260+
return ctrl.Result{}, err
261+
}
262+
263+
// If the MachineSet is controlled by a MachineDeployment, get it as well.
264+
s.owningMachineDeployment, err = r.getOwnerMachineDeployment(ctx, s.owningMachineSet)
265+
if err != nil {
266+
return ctrl.Result{}, err
267+
}
268+
257269
defer func() {
258270
updateRes := r.updateStatus(ctx, s)
259271
retres = util.LowestNonZeroResult(retres, updateRes)
@@ -296,6 +308,35 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (retres ct
296308
return doReconcile(ctx, reconcileNormal, s)
297309
}
298310

311+
func (r *Reconciler) getOwnerMachineSet(ctx context.Context, m *clusterv1.Machine) (*clusterv1.MachineSet, error) {
312+
machineSetKey, notFound, err := getOwnerMachineSetObjectKey(m.ObjectMeta)
313+
if err != nil || notFound || machineSetKey == nil {
314+
return nil, err
315+
}
316+
ms := &clusterv1.MachineSet{}
317+
if err := r.Client.Get(ctx, *machineSetKey, ms); err != nil {
318+
return nil, fmt.Errorf("failed to retrieve owner MachineSet for Machine %s: %w", klog.KObj(m), err)
319+
}
320+
return ms, nil
321+
}
322+
323+
func (r *Reconciler) getOwnerMachineDeployment(ctx context.Context, ms *clusterv1.MachineSet) (*clusterv1.MachineDeployment, error) {
324+
if ms == nil {
325+
return nil, nil
326+
}
327+
328+
mdName := ms.Labels[clusterv1.MachineDeploymentNameLabel]
329+
if mdName == "" {
330+
return nil, nil
331+
}
332+
333+
md := &clusterv1.MachineDeployment{}
334+
if err := r.Client.Get(ctx, client.ObjectKey{Namespace: ms.Namespace, Name: mdName}, md); err != nil {
335+
return nil, fmt.Errorf("failed to retrieve owner MachineDeployment for MachineSet %s: %w", klog.KObj(ms), err)
336+
}
337+
return md, nil
338+
}
339+
299340
func patchMachine(ctx context.Context, patchHelper *patch.Helper, machine *clusterv1.Machine, options ...patch.Option) error {
300341
// Always update the readyCondition by summarizing the state of other conditions.
301342
// A step counter is added to represent progress during the provisioning process (instead we are hiding it
@@ -378,6 +419,14 @@ type scope struct {
378419
// of the reconcile function.
379420
machine *clusterv1.Machine
380421

422+
// owningMachineSet return the MachineSet owning this machine.
423+
// Note: The value will be nil in case of stand-alone machines.
424+
owningMachineSet *clusterv1.MachineSet
425+
426+
// owningMachineDeployment return the MachineDeployment controlling the MachineSet owning this machine.
427+
// Note: The value will be nil in case of stand-alone Machines or in case of stand-alone MachineSet.
428+
owningMachineDeployment *clusterv1.MachineDeployment
429+
381430
// infraMachine is the Infrastructure Machine object that is referenced by the
382431
// Machine. It is set after reconcileInfrastructure is called.
383432
infraMachine *unstructured.Unstructured

internal/controllers/machine/machine_controller_noderef.go

Lines changed: 14 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import (
2424

2525
"github.com/pkg/errors"
2626
corev1 "k8s.io/api/core/v1"
27-
apierrors "k8s.io/apimachinery/pkg/api/errors"
2827
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2928
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
3029
"k8s.io/apimachinery/pkg/runtime/schema"
@@ -143,7 +142,7 @@ func (r *Reconciler) reconcileNode(ctx context.Context, s *scope) (ctrl.Result,
143142
_, nodeHadInterruptibleLabel := s.node.Labels[clusterv1.InterruptibleLabel]
144143

145144
// Reconcile node taints
146-
if err := r.patchNode(ctx, remoteClient, s.node, nodeLabels, nodeAnnotations, machine); err != nil {
145+
if err := r.patchNode(ctx, remoteClient, s.node, nodeLabels, nodeAnnotations, s.owningMachineSet, s.owningMachineDeployment); err != nil {
147146
return ctrl.Result{}, errors.Wrapf(err, "failed to reconcile Node %s", klog.KObj(s.node))
148147
}
149148
if !nodeHadInterruptibleLabel && interruptible {
@@ -246,7 +245,7 @@ func (r *Reconciler) getNode(ctx context.Context, c client.Reader, providerID st
246245

247246
// PatchNode is required to workaround an issue on Node.Status.Address which is incorrectly annotated as patchStrategy=merge
248247
// and this causes SSA patch to fail in case there are two addresses with the same key https://github.com/kubernetes-sigs/cluster-api/issues/8417
249-
func (r *Reconciler) patchNode(ctx context.Context, remoteClient client.Client, node *corev1.Node, newLabels, newAnnotations map[string]string, m *clusterv1.Machine) error {
248+
func (r *Reconciler) patchNode(ctx context.Context, remoteClient client.Client, node *corev1.Node, newLabels, newAnnotations map[string]string, ms *clusterv1.MachineSet, md *clusterv1.MachineDeployment) error {
250249
newNode := node.DeepCopy()
251250

252251
// Adds the annotations from the Machine.
@@ -336,20 +335,14 @@ func (r *Reconciler) patchNode(ctx context.Context, remoteClient client.Client,
336335
hasTaintChanges := taints.RemoveNodeTaint(newNode, clusterv1.NodeUninitializedTaint)
337336

338337
// Set Taint to a node in an old MachineSet and unset Taint from a node in a new MachineSet
339-
isOutdated, notFound, err := shouldNodeHaveOutdatedTaint(ctx, r.Client, m)
338+
isOutdated, err := shouldNodeHaveOutdatedTaint(ms, md)
340339
if err != nil {
341340
return errors.Wrapf(err, "failed to check if Node %s is outdated", klog.KRef("", node.Name))
342341
}
343-
344-
// It is only possible to identify if we have to set or remove the NodeOutdatedRevisionTaint if shouldNodeHaveOutdatedTaint
345-
// found all relevant objects.
346-
// Example: when the MachineDeployment or Machineset can't be found due to a background deletion of objects.
347-
if !notFound {
348-
if isOutdated {
349-
hasTaintChanges = taints.EnsureNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges
350-
} else {
351-
hasTaintChanges = taints.RemoveNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges
352-
}
342+
if isOutdated {
343+
hasTaintChanges = taints.EnsureNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges
344+
} else {
345+
hasTaintChanges = taints.RemoveNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges
353346
}
354347

355348
if !hasAnnotationChanges && !hasLabelChanges && !hasTaintChanges {
@@ -362,47 +355,23 @@ func (r *Reconciler) patchNode(ctx context.Context, remoteClient client.Client,
362355
// shouldNodeHaveOutdatedTaint tries to compare the revision of the owning MachineSet to the MachineDeployment.
363356
// It returns notFound = true if the OwnerReference is not set or the APIServer returns NotFound for the MachineSet or MachineDeployment.
364357
// Note: This three cases could happen during background deletion of objects.
365-
func shouldNodeHaveOutdatedTaint(ctx context.Context, c client.Client, m *clusterv1.Machine) (outdated bool, notFound bool, err error) {
366-
if _, hasLabel := m.Labels[clusterv1.MachineDeploymentNameLabel]; !hasLabel {
367-
return false, false, nil
358+
func shouldNodeHaveOutdatedTaint(ms *clusterv1.MachineSet, md *clusterv1.MachineDeployment) (outdated bool, err error) {
359+
if ms == nil || md == nil {
360+
return false, nil
368361
}
369362

370-
// Resolve the MachineSet name via owner references because the label value
371-
// could also be a hash.
372-
objKey, notFound, err := getOwnerMachineSetObjectKey(m.ObjectMeta)
373-
if err != nil || notFound {
374-
return false, notFound, err
375-
}
376-
ms := &clusterv1.MachineSet{}
377-
if err := c.Get(ctx, *objKey, ms); err != nil {
378-
if apierrors.IsNotFound(err) {
379-
return false, true, nil
380-
}
381-
return false, false, err
382-
}
383-
md := &clusterv1.MachineDeployment{}
384-
objKey = &client.ObjectKey{
385-
Namespace: m.Namespace,
386-
Name: m.Labels[clusterv1.MachineDeploymentNameLabel],
387-
}
388-
if err := c.Get(ctx, *objKey, md); err != nil {
389-
if apierrors.IsNotFound(err) {
390-
return false, true, nil
391-
}
392-
return false, false, err
393-
}
394363
msRev, err := mdutil.Revision(ms)
395364
if err != nil {
396-
return false, false, err
365+
return false, err
397366
}
398367
mdRev, err := mdutil.Revision(md)
399368
if err != nil {
400-
return false, false, err
369+
return false, err
401370
}
402371
if msRev < mdRev {
403-
return true, false, nil
372+
return true, nil
404373
}
405-
return false, false, nil
374+
return false, nil
406375
}
407376

408377
func getOwnerMachineSetObjectKey(obj metav1.ObjectMeta) (*client.ObjectKey, bool, error) {

0 commit comments

Comments
 (0)