Skip to content

Commit 0000172

Browse files
authored
fix: address a SSA related issue which might cause all apply attempts (except the first one) to fail due to conflicts (#59)
1 parent fe0f3f3 commit 0000172

File tree

3 files changed

+258
-0
lines changed

3 files changed

+258
-0
lines changed

pkg/controllers/workapplier/apply.go

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,20 @@ func (r *Reconciler) serverSideApply(
288288
inMemberClusterObj.SetResourceVersion("")
289289
}
290290

291+
// Check if forced server-side apply is needed even if it is not turned on by the user.
292+
//
293+
// Note (chenyu1): This is added to addresses cases where Kubernetes might register
294+
// Fleet (the member agent) as an Update typed field manager for the object, which blocks
295+
// the same agent itself from performing a server-side apply due to conflicts,
296+
// as Kubernetes considers Update typed and Apply typed field managers to be different
297+
// entities, despite having the same identifier. In these cases, users will see their
298+
// first apply attempt being successful, yet any subsequent update would fail due to
299+
// conflicts. There are also a few other similar cases that are solved by this check;
300+
// see the inner comments for specifics.
301+
if shouldUseForcedServerSideApply(inMemberClusterObj) {
302+
force = true
303+
}
304+
291305
// Use server-side apply to apply the manifest object.
292306
//
293307
// See the Kubernetes documentation on structured merged diff for the exact behaviors.
@@ -586,3 +600,39 @@ func shouldEnableOptimisticLock(applyStrategy *fleetv1beta1.ApplyStrategy) bool
586600
// Optimistic lock is enabled if the apply strategy is set to IfNotDrifted.
587601
return applyStrategy.WhenToApply == fleetv1beta1.WhenToApplyTypeIfNotDrifted
588602
}
603+
604+
// shouldUseForcedServerSideApply checks if forced server-side apply should be used even if
605+
// the force option is not turned on.
606+
func shouldUseForcedServerSideApply(inMemberClusterObj *unstructured.Unstructured) bool {
607+
managedFields := inMemberClusterObj.GetManagedFields()
608+
for idx := range managedFields {
609+
mf := &managedFields[idx]
610+
// workFieldManagerName is the field manager name used by Fleet; its presence
611+
// suggests that some (not necessarily all) fields are managed by Fleet.
612+
//
613+
// `before-first-apply` is a field manager name used by Kubernetes to "properly"
614+
// track field managers between non-apply and apply ops. Specifically, this
615+
// manager is added when an object is being applied, but Kubernetes finds
616+
// that the object does not have any managed field specified.
617+
//
618+
// Note (chenyu1): unfortunately this name is not exposed as a public variable. See
619+
// the Kubernetes source code for more information.
620+
if mf.Manager != workFieldManagerName && mf.Manager != "before-first-apply" {
621+
// There exists a field manager this is neither Fleet nor the `before-first-apply`
622+
// field manager, which suggests that the object (or at least some of its fields)
623+
// is managed by another entity. Fleet will not enable forced server-side apply in
624+
// this case and let user decide if forced apply is needed.
625+
klog.V(2).InfoS("Found a field manager that is neither Fleet nor the `before-first-apply` field manager; Fleet will not enable forced server-side apply unless explicitly requested",
626+
"fieldManager", mf.Manager,
627+
"GVK", inMemberClusterObj.GroupVersionKind(), "inMemberClusterObj", klog.KObj(inMemberClusterObj))
628+
return false
629+
}
630+
}
631+
632+
// All field managers are either Fleet or the `before-first-apply` field manager;
633+
// use forced server-side apply to avoid confusing self-conflicts. This would
634+
// allow Fleet to (correctly) assume ownership of managed fields.
635+
klog.V(2).InfoS("All field managers are either Fleet or the `before-first-apply` field manager; Fleet will enable forced server-side apply",
636+
"GVK", inMemberClusterObj.GroupVersionKind(), "inMemberClusterObj", klog.KObj(inMemberClusterObj))
637+
return true
638+
}

pkg/controllers/workapplier/apply_test.go

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
2929
"k8s.io/apimachinery/pkg/types"
3030
"k8s.io/utils/ptr"
31+
"sigs.k8s.io/controller-runtime/pkg/client"
3132

3233
fleetv1beta1 "github.com/kubefleet-dev/kubefleet/apis/placement/v1beta1"
3334
)
@@ -459,3 +460,93 @@ func TestSetFleetLastAppliedAnnotation(t *testing.T) {
459460
})
460461
}
461462
}
463+
464+
// TestShouldUseForcedServerSideApply tests the shouldUseForcedServerSideApply function.
465+
func TestShouldUseForcedServerSideApply(t *testing.T) {
466+
testCases := []struct {
467+
name string
468+
inMemberClusterObj client.Object
469+
wantShouldUseForcedServerSideApply bool
470+
}{
471+
{
472+
name: "object under Fleet's management",
473+
inMemberClusterObj: &corev1.ConfigMap{
474+
ObjectMeta: metav1.ObjectMeta{
475+
Name: configMapName,
476+
ManagedFields: []metav1.ManagedFieldsEntry{
477+
{
478+
Manager: workFieldManagerName,
479+
Operation: metav1.ManagedFieldsOperationUpdate,
480+
},
481+
},
482+
},
483+
},
484+
wantShouldUseForcedServerSideApply: true,
485+
},
486+
{
487+
name: "object under before-first-apply field manager's management",
488+
inMemberClusterObj: &corev1.ConfigMap{
489+
ObjectMeta: metav1.ObjectMeta{
490+
Name: configMapName,
491+
ManagedFields: []metav1.ManagedFieldsEntry{
492+
{
493+
Manager: "before-first-apply",
494+
Operation: metav1.ManagedFieldsOperationUpdate,
495+
},
496+
},
497+
},
498+
},
499+
wantShouldUseForcedServerSideApply: true,
500+
},
501+
{
502+
name: "object under both Fleet and before-first-apply field managers' management",
503+
inMemberClusterObj: &corev1.ConfigMap{
504+
ObjectMeta: metav1.ObjectMeta{
505+
Name: configMapName,
506+
ManagedFields: []metav1.ManagedFieldsEntry{
507+
{
508+
Manager: "before-first-apply",
509+
Operation: metav1.ManagedFieldsOperationUpdate,
510+
},
511+
{
512+
Manager: workFieldManagerName,
513+
Operation: metav1.ManagedFieldsOperationUpdate,
514+
},
515+
},
516+
},
517+
},
518+
wantShouldUseForcedServerSideApply: true,
519+
},
520+
{
521+
name: "object under other field manager's management",
522+
inMemberClusterObj: &corev1.ConfigMap{
523+
ObjectMeta: metav1.ObjectMeta{
524+
Name: configMapName,
525+
ManagedFields: []metav1.ManagedFieldsEntry{
526+
{
527+
Manager: "before-first-apply",
528+
Operation: metav1.ManagedFieldsOperationUpdate,
529+
},
530+
{
531+
Manager: "3rd-party-manager",
532+
Operation: metav1.ManagedFieldsOperationApply,
533+
},
534+
{
535+
Manager: workFieldManagerName,
536+
Operation: metav1.ManagedFieldsOperationUpdate,
537+
},
538+
},
539+
},
540+
},
541+
},
542+
}
543+
544+
for _, tc := range testCases {
545+
t.Run(tc.name, func(t *testing.T) {
546+
got := shouldUseForcedServerSideApply(toUnstructured(t, tc.inMemberClusterObj))
547+
if got != tc.wantShouldUseForcedServerSideApply {
548+
t.Errorf("shouldUseForcedServerSideApply() = %t, want %t", got, tc.wantShouldUseForcedServerSideApply)
549+
}
550+
})
551+
}
552+
}

test/e2e/placement_apply_strategy_test.go

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,123 @@ var _ = Describe("validating CRP when resources exists", Ordered, func() {
620620
})
621621
})
622622

623+
var _ = Describe("SSA", Ordered, func() {
624+
Context("use server-side apply to place resources (with changes)", func() {
625+
crpName := fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess())
626+
nsName := fmt.Sprintf(workNamespaceNameTemplate, GinkgoParallelProcess())
627+
cmName := fmt.Sprintf(appConfigMapNameTemplate, GinkgoParallelProcess())
628+
629+
// The key here should match the one used in the default config map.
630+
cmDataKey := "data"
631+
cmDataVal1 := "foobar"
632+
633+
BeforeAll(func() {
634+
// Create the resources on the hub cluster.
635+
createWorkResources()
636+
637+
// Create the CRP.
638+
crp := &placementv1beta1.ClusterResourcePlacement{
639+
ObjectMeta: metav1.ObjectMeta{
640+
Name: crpName,
641+
// Add a custom finalizer; this would allow us to better observe
642+
// the behavior of the controllers.
643+
Finalizers: []string{customDeletionBlockerFinalizer},
644+
},
645+
Spec: placementv1beta1.ClusterResourcePlacementSpec{
646+
ResourceSelectors: workResourceSelector(),
647+
Policy: &placementv1beta1.PlacementPolicy{
648+
PlacementType: placementv1beta1.PickAllPlacementType,
649+
},
650+
Strategy: placementv1beta1.RolloutStrategy{
651+
Type: placementv1beta1.RollingUpdateRolloutStrategyType,
652+
RollingUpdate: &placementv1beta1.RollingUpdateConfig{
653+
MaxUnavailable: ptr.To(intstr.FromInt(1)),
654+
MaxSurge: ptr.To(intstr.FromInt(1)),
655+
UnavailablePeriodSeconds: ptr.To(2),
656+
},
657+
ApplyStrategy: &placementv1beta1.ApplyStrategy{
658+
Type: placementv1beta1.ApplyStrategyTypeServerSideApply,
659+
},
660+
},
661+
},
662+
}
663+
Expect(hubClient.Create(ctx, crp)).To(Succeed())
664+
})
665+
666+
It("should update CRP status as expected", func() {
667+
crpStatusUpdatedActual := crpStatusUpdatedActual(workResourceIdentifiers(), allMemberClusterNames, nil, "0")
668+
Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected")
669+
})
670+
671+
It("should place the resources on all member clusters", checkIfPlacedWorkResourcesOnAllMemberClusters)
672+
673+
It("can update the manifests", func() {
674+
Eventually(func() error {
675+
cm := &corev1.ConfigMap{}
676+
if err := hubClient.Get(ctx, client.ObjectKey{Name: cmName, Namespace: nsName}, cm); err != nil {
677+
return fmt.Errorf("failed to get configmap: %w", err)
678+
}
679+
680+
if cm.Data == nil {
681+
cm.Data = make(map[string]string)
682+
}
683+
cm.Data[cmDataKey] = cmDataVal1
684+
685+
if err := hubClient.Update(ctx, cm); err != nil {
686+
return fmt.Errorf("failed to update configmap: %w", err)
687+
}
688+
return nil
689+
}, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update the manifests")
690+
})
691+
692+
It("should update CRP status as expected", func() {
693+
crpStatusUpdatedActual := crpStatusUpdatedActual(workResourceIdentifiers(), allMemberClusterNames, nil, "1")
694+
// Longer timeout is used to allow full rollouts.
695+
Eventually(crpStatusUpdatedActual, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected")
696+
})
697+
698+
It("should refresh the resources on all member clusters", func() {
699+
for idx := range allMemberClusters {
700+
memberClient := allMemberClusters[idx].KubeClient
701+
702+
Eventually(func() error {
703+
cm := &corev1.ConfigMap{}
704+
if err := memberClient.Get(ctx, client.ObjectKey{Name: cmName, Namespace: nsName}, cm); err != nil {
705+
return fmt.Errorf("failed to get configmap: %w", err)
706+
}
707+
708+
// To keep things simple, here the config map for comparison is
709+
// rebuilt from the retrieved data.
710+
rebuiltCM := &corev1.ConfigMap{
711+
ObjectMeta: metav1.ObjectMeta{
712+
Name: cm.Name,
713+
Namespace: cm.Namespace,
714+
},
715+
Data: cm.Data,
716+
}
717+
wantCM := &corev1.ConfigMap{
718+
ObjectMeta: metav1.ObjectMeta{
719+
Name: cmName,
720+
Namespace: nsName,
721+
},
722+
Data: map[string]string{
723+
cmDataKey: cmDataVal1,
724+
},
725+
}
726+
if diff := cmp.Diff(rebuiltCM, wantCM); diff != "" {
727+
return fmt.Errorf("configMap diff (-got, +want):\n%s", diff)
728+
}
729+
return nil
730+
}, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to refresh the resources on %s", allMemberClusters[idx].ClusterName)
731+
}
732+
})
733+
734+
AfterAll(func() {
735+
ensureCRPAndRelatedResourcesDeleted(crpName, allMemberClusters)
736+
})
737+
})
738+
})
739+
623740
var _ = Describe("switching apply strategies", func() {
624741
Context("switch from client-side apply to report diff", Ordered, func() {
625742
crpName := fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess())

0 commit comments

Comments
 (0)