diff --git a/test/e2e/drain_tool_test.go b/test/e2e/drain_tool_test.go new file mode 100644 index 000000000..d2dba0060 --- /dev/null +++ b/test/e2e/drain_tool_test.go @@ -0,0 +1,404 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "fmt" + "os/exec" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + + clusterv1beta1 "github.com/kubefleet-dev/kubefleet/apis/cluster/v1beta1" + placementv1beta1 "github.com/kubefleet-dev/kubefleet/apis/placement/v1beta1" + "github.com/kubefleet-dev/kubefleet/pkg/utils/condition" + "github.com/kubefleet-dev/kubefleet/test/e2e/framework" + testutilseviction "github.com/kubefleet-dev/kubefleet/test/utils/eviction" + toolsutils "github.com/kubefleet-dev/kubefleet/tools/utils" +) + +var _ = Describe("Drain cluster successfully", Ordered, Serial, func() { + crpName := fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess()) + var drainEvictions []placementv1beta1.ClusterResourcePlacementEviction + var drainClusters, noDrainClusters []*framework.Cluster + var noDrainClusterNames []string + var testStartTime time.Time + + BeforeAll(func() { + testStartTime = time.Now() + drainClusters = []*framework.Cluster{memberCluster1EastProd} + noDrainClusters = []*framework.Cluster{memberCluster2EastCanary, memberCluster3WestProd} + noDrainClusterNames = []string{memberCluster2EastCanaryName, memberCluster3WestProdName} + + By("creating work resources") + createWorkResources() + + // Create the CRP. + createCRP(crpName) + }) + + AfterAll(func() { + // remove drain evictions. + for _, eviction := range drainEvictions { + ensureCRPEvictionDeleted(eviction.Name) + } + // remove taints from member cluster 1 again to guarantee clean up of cordon taint on test failure. + removeTaintsFromMemberClusters([]string{memberCluster1EastProdName}) + ensureCRPAndRelatedResourcesDeleted(crpName, allMemberClusters) + }) + + It("should update cluster resource placement status as expected", func() { + crpStatusUpdatedActual := crpStatusUpdatedActual(workResourceIdentifiers(), allMemberClusterNames, nil, "0") + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update cluster resource placement status as expected") + }) + + It("should place resources on all available member clusters", checkIfPlacedWorkResourcesOnAllMemberClusters) + + It("drain cluster using binary, should succeed", func() { runDrainClusterBinary(hubClusterName, memberCluster1EastProdName) }) + + It("should update member cluster with cordon taint", func() { + taintAddedActual := memberClusterCordonTaintAddedActual(memberCluster1EastProdName) + Eventually(taintAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cordon taint to member cluster") + }) + + It("should update drain cluster resource placement evictions status as expected", func() { + var fetchError error + drainEvictions, fetchError = fetchDrainEvictions(crpName, memberCluster1EastProdName, testStartTime) + Expect(fetchError).Should(Succeed(), "Failed to fetch drain evictions") + for _, eviction := range drainEvictions { + crpEvictionStatusUpdatedActual := testutilseviction.StatusUpdatedActual( + ctx, hubClient, eviction.Name, + &testutilseviction.IsValidEviction{IsValid: true, Msg: condition.EvictionValidMessage}, + &testutilseviction.IsExecutedEviction{IsExecuted: true, Msg: condition.EvictionAllowedNoPDBMessage}) + Eventually(crpEvictionStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update cluster resource placement eviction status as expected") + } + }) + + It("should ensure no resources exist on drained clusters", func() { + for _, cluster := range drainClusters { + resourceRemovedActual := workNamespaceRemovedFromClusterActual(cluster) + Eventually(resourceRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to check if resources doesn't exist on member cluster") + } + }) + + It("should update cluster resource placement status as expected", func() { + crpStatusUpdatedActual := crpStatusUpdatedActual(workResourceIdentifiers(), noDrainClusterNames, nil, "0") + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update cluster resource placement status as expected") + Consistently(crpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update cluster resource placement status as expected") + }) + + It("should still place resources on the selected clusters which were not drained", func() { + for _, cluster := range noDrainClusters { + resourcePlacedActual := workNamespaceAndConfigMapPlacedOnClusterActual(cluster) + Eventually(resourcePlacedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to place resources on the selected clusters") + } + }) + + It("uncordon cluster using binary", func() { runUncordonClusterBinary(hubClusterName, memberCluster1EastProdName) }) + + It("should remove cordon taint from member cluster", func() { + taintRemovedActual := memberClusterCordonTaintRemovedActual(memberCluster1EastProdName) + Eventually(taintRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove cordon taint from member cluster") + }) +}) + +var _ = Describe("Drain cluster blocked - ClusterResourcePlacementDisruptionBudget blocks evictions on all clusters", Ordered, Serial, func() { + crpName := fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess()) + var drainEvictions []placementv1beta1.ClusterResourcePlacementEviction + var testStartTime time.Time + + BeforeAll(func() { + testStartTime = time.Now() + By("creating work resources") + createWorkResources() + + // Create the CRP. + createCRP(crpName) + }) + + AfterAll(func() { + // remove drain evictions. + for _, eviction := range drainEvictions { + ensureCRPEvictionDeleted(eviction.Name) + } + // remove taints from member cluster 1 again to guarantee clean up of cordon taint on test failure. + removeTaintsFromMemberClusters([]string{memberCluster1EastProdName}) + ensureCRPDisruptionBudgetDeleted(crpName) + ensureCRPAndRelatedResourcesDeleted(crpName, allMemberClusters) + }) + + It("should update cluster resource placement status as expected", func() { + crpStatusUpdatedActual := crpStatusUpdatedActual(workResourceIdentifiers(), allMemberClusterNames, nil, "0") + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update cluster resource placement status as expected") + }) + + It("should place resources on all available member clusters", checkIfPlacedWorkResourcesOnAllMemberClusters) + + It("create cluster resource placement disruption budget to block draining", func() { + crpdb := placementv1beta1.ClusterResourcePlacementDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName, + }, + Spec: placementv1beta1.PlacementDisruptionBudgetSpec{ + MinAvailable: &intstr.IntOrString{ + Type: intstr.Int, + IntVal: int32(len(allMemberClusterNames)), + }, + }, + } + Expect(hubClient.Create(ctx, &crpdb)).To(Succeed(), "Failed to create CRP Disruption Budget %s", crpName) + }) + + It("drain cluster using binary, should fail due to CRPDB", func() { runDrainClusterBinary(hubClusterName, memberCluster1EastProdName) }) + + It("should update member cluster with cordon taint", func() { + taintAddedActual := memberClusterCordonTaintAddedActual(memberCluster1EastProdName) + Eventually(taintAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cordon taint to member cluster") + }) + + It("should update drain cluster resource placement evictions status as expected", func() { + var fetchError error + drainEvictions, fetchError = fetchDrainEvictions(crpName, memberCluster1EastProdName, testStartTime) + Expect(fetchError).Should(Succeed(), "Failed to fetch drain evictions") + for _, eviction := range drainEvictions { + crpEvictionStatusUpdatedActual := testutilseviction.StatusUpdatedActual( + ctx, hubClient, eviction.Name, + &testutilseviction.IsValidEviction{IsValid: true, Msg: condition.EvictionValidMessage}, + &testutilseviction.IsExecutedEviction{IsExecuted: false, Msg: fmt.Sprintf(condition.EvictionBlockedPDBSpecifiedMessageFmt, 3, 3)}) + Eventually(crpEvictionStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update cluster resource placement eviction status as expected") + } + }) + + It("should ensure cluster resource placement status remains unchanged", func() { + crpStatusUpdatedActual := crpStatusUpdatedActual(workResourceIdentifiers(), allMemberClusterNames, nil, "0") + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update cluster resource placement status as expected") + Consistently(crpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update cluster resource placement status as expected") + }) + + It("should still place resources on all available member clusters", checkIfPlacedWorkResourcesOnAllMemberClusters) + + It("uncordon cluster using binary", func() { runUncordonClusterBinary(hubClusterName, memberCluster1EastProdName) }) + + It("should remove cordon taint from member cluster", func() { + taintRemovedActual := memberClusterCordonTaintRemovedActual(memberCluster1EastProdName) + Eventually(taintRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove cordon taint from member cluster") + }) +}) + +var _ = Describe("Drain is allowed on one cluster, blocked on others - ClusterResourcePlacementDisruptionBudget blocks evictions on some clusters", Ordered, Serial, func() { + crpName := fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess()) + var drainEvictions []placementv1beta1.ClusterResourcePlacementEviction + var drainClusters, noDrainClusters []*framework.Cluster + var noDrainClusterNames []string + var testStartTime time.Time + + BeforeAll(func() { + testStartTime = time.Now() + drainClusters = []*framework.Cluster{memberCluster1EastProd} + noDrainClusters = []*framework.Cluster{memberCluster2EastCanary, memberCluster3WestProd} + noDrainClusterNames = []string{memberCluster2EastCanaryName, memberCluster3WestProdName} + + By("creating work resources") + createWorkResources() + + // Create the CRP. + createCRP(crpName) + }) + + AfterAll(func() { + // remove remaining drain evictions. + for _, eviction := range drainEvictions { + ensureCRPEvictionDeleted(eviction.Name) + } + // remove taints from member clusters 1,2 again to guarantee clean up of cordon taint on test failure. + removeTaintsFromMemberClusters([]string{memberCluster1EastProdName, memberCluster2EastCanaryName}) + ensureCRPDisruptionBudgetDeleted(crpName) + ensureCRPAndRelatedResourcesDeleted(crpName, allMemberClusters) + }) + + It("should update cluster resource placement status as expected", func() { + crpStatusUpdatedActual := crpStatusUpdatedActual(workResourceIdentifiers(), allMemberClusterNames, nil, "0") + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update cluster resource placement status as expected") + }) + + It("should place resources on all available member clusters", checkIfPlacedWorkResourcesOnAllMemberClusters) + + It("create cluster resource placement disruption budget to block draining on all but one cluster", func() { + crpdb := placementv1beta1.ClusterResourcePlacementDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName, + }, + Spec: placementv1beta1.PlacementDisruptionBudgetSpec{ + MinAvailable: &intstr.IntOrString{ + Type: intstr.Int, + IntVal: int32(len(allMemberClusterNames)) - 1, + }, + }, + } + Expect(hubClient.Create(ctx, &crpdb)).To(Succeed(), "Failed to create CRP Disruption Budget %s", crpName) + }) + + It("drain cluster using binary, should succeed", func() { runDrainClusterBinary(hubClusterName, memberCluster1EastProdName) }) + + It("should update member cluster with cordon taint", func() { + taintAddedActual := memberClusterCordonTaintAddedActual(memberCluster1EastProdName) + Eventually(taintAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cordon taint to member cluster") + }) + + It("should update drain cluster resource placement evictions status as expected", func() { + var fetchError error + drainEvictions, fetchError = fetchDrainEvictions(crpName, memberCluster1EastProdName, testStartTime) + Expect(fetchError).Should(Succeed(), "Failed to fetch drain evictions") + for _, eviction := range drainEvictions { + crpEvictionStatusUpdatedActual := testutilseviction.StatusUpdatedActual( + ctx, hubClient, eviction.Name, + &testutilseviction.IsValidEviction{IsValid: true, Msg: condition.EvictionValidMessage}, + &testutilseviction.IsExecutedEviction{IsExecuted: true, Msg: fmt.Sprintf(condition.EvictionAllowedPDBSpecifiedMessageFmt, 3, 3)}) + Eventually(crpEvictionStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update cluster resource placement eviction status as expected") + } + }) + + It("remove drain evictions for member cluster 1", func() { + for _, eviction := range drainEvictions { + ensureCRPEvictionDeleted(eviction.Name) + } + }) + + It("drain cluster using binary, should fail due to CRPDB", func() { runDrainClusterBinary(hubClusterName, memberCluster2EastCanaryName) }) + + It("should update member cluster with cordon taint", func() { + taintAddedActual := memberClusterCordonTaintAddedActual(memberCluster2EastCanaryName) + Eventually(taintAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cordon taint to member cluster") + }) + + It("should update drain cluster resource placement evictions status as expected", func() { + var fetchError error + drainEvictions, fetchError = fetchDrainEvictions(crpName, memberCluster2EastCanaryName, testStartTime) + Expect(fetchError).Should(Succeed(), "Failed to fetch drain evictions") + for _, eviction := range drainEvictions { + crpEvictionStatusUpdatedActual := testutilseviction.StatusUpdatedActual( + ctx, hubClient, eviction.Name, + &testutilseviction.IsValidEviction{IsValid: true, Msg: condition.EvictionValidMessage}, + &testutilseviction.IsExecutedEviction{IsExecuted: false, Msg: fmt.Sprintf(condition.EvictionBlockedPDBSpecifiedMessageFmt, 2, 2)}) + Eventually(crpEvictionStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update cluster resource placement eviction status as expected") + } + }) + + It("should ensure no resources exist on drained clusters", func() { + for _, cluster := range drainClusters { + resourceRemovedActual := workNamespaceRemovedFromClusterActual(cluster) + Eventually(resourceRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to check if resources doesn't exist on member cluster") + } + }) + + It("should update cluster resource placement status as expected", func() { + crpStatusUpdatedActual := crpStatusUpdatedActual(workResourceIdentifiers(), noDrainClusterNames, nil, "0") + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update cluster resource placement status as expected") + Consistently(crpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update cluster resource placement status as expected") + }) + + It("should still place resources on the selected clusters which were not drained", func() { + for _, cluster := range noDrainClusters { + resourcePlacedActual := workNamespaceAndConfigMapPlacedOnClusterActual(cluster) + Eventually(resourcePlacedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to place resources on the selected clusters") + } + }) + + It("uncordon cluster using binary", func() { runUncordonClusterBinary(hubClusterName, memberCluster1EastProdName) }) + + It("should remove cordon taint from member cluster", func() { + taintRemovedActual := memberClusterCordonTaintRemovedActual(memberCluster1EastProdName) + Eventually(taintRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove cordon taint from member cluster") + }) + + It("uncordon cluster using binary", func() { runUncordonClusterBinary(hubClusterName, memberCluster2EastCanaryName) }) + + It("should remove cordon taint from member cluster", func() { + taintRemovedActual := memberClusterCordonTaintRemovedActual(memberCluster2EastCanaryName) + Eventually(taintRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove cordon taint from member cluster") + }) +}) + +func runDrainClusterBinary(hubClusterName, memberClusterName string) { + By(fmt.Sprintf("draining cluster %s", memberClusterName)) + cmd := exec.Command(drainBinaryPath, + "--hubClusterContext", hubClusterName, + "--clusterName", memberClusterName) + _, err := cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred(), "Drain command failed with error: %v", err) +} + +func runUncordonClusterBinary(hubClusterName, memberClusterName string) { + By(fmt.Sprintf("uncordoning cluster %s", memberClusterName)) + cmd := exec.Command(uncordonBinaryPath, + "--hubClusterContext", hubClusterName, + "--clusterName", memberClusterName) + _, err := cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred(), "Uncordon command failed with error: %v", err) +} + +func fetchDrainEvictions(crpName, clusterName string, testStartTime time.Time) ([]placementv1beta1.ClusterResourcePlacementEviction, error) { + var evictionList placementv1beta1.ClusterResourcePlacementEvictionList + if err := hubClient.List(ctx, &evictionList); err != nil { + return nil, err + } + var filteredDrainEvictions []placementv1beta1.ClusterResourcePlacementEviction + for _, eviction := range evictionList.Items { + if eviction.CreationTimestamp.Time.After(testStartTime) && + eviction.Spec.PlacementName == crpName && + eviction.Spec.ClusterName == clusterName { + filteredDrainEvictions = append(filteredDrainEvictions, eviction) + } + } + return filteredDrainEvictions, nil +} + +func memberClusterCordonTaintAddedActual(mcName string) func() error { + return func() error { + var mc clusterv1beta1.MemberCluster + if err := hubClient.Get(ctx, types.NamespacedName{Name: mcName}, &mc); err != nil { + return fmt.Errorf("failed to get member cluster %s: %w", mcName, err) + } + + for _, taint := range mc.Spec.Taints { + if taint == toolsutils.CordonTaint { + return nil + } + } + return fmt.Errorf("cordon taint not found on member cluster %s", mcName) + } +} + +func memberClusterCordonTaintRemovedActual(mcName string) func() error { + return func() error { + var mc clusterv1beta1.MemberCluster + if err := hubClient.Get(ctx, types.NamespacedName{Name: mcName}, &mc); err != nil { + return fmt.Errorf("failed to get member cluster %s: %w", mcName, err) + } + + for _, taint := range mc.Spec.Taints { + if taint == toolsutils.CordonTaint { + return fmt.Errorf("cordon taint found on member cluster %s", mcName) + } + } + return nil + } +} diff --git a/test/e2e/setup.sh b/test/e2e/setup.sh index d2760fa91..9c4f17b06 100755 --- a/test/e2e/setup.sh +++ b/test/e2e/setup.sh @@ -39,12 +39,12 @@ REGIONS=("" "" "eastasia") AKS_NODE_REGIONS=("westus" "northeurope" "eastasia") # The SKUs that should be set on each node of the respective clusters; if the AKS property # provider is used. See the AKS documentation for specifics. -# +# # Note that this is for information only; kind nodes always use the same fixed setup # (total/allocatable capacity = host capacity). AKS_NODE_SKUS=("Standard_A4_v2" "Standard_B4ms" "Standard_D8s_v5" "Standard_E16_v5" "Standard_M16ms") AKS_SKU_COUNT=${#AKS_NODE_SKUS[@]} -# The number of clusters that has pre-defined configuration for testing purposes. +# The number of clusters that has pre-defined configuration for testing purposes. RESERVED_CLUSTER_COUNT=${MEMBER_CLUSTER_COUNT} # Create the kind clusters @@ -87,7 +87,7 @@ then k=$(( RANDOM % AKS_SKU_COUNT )) kubectl label node "${NODES[$j]}" beta.kubernetes.io/instance-type=${AKS_NODE_SKUS[$k]} done - done + done fi # Build the Fleet agent images @@ -207,3 +207,13 @@ do fi done +# Create tools directory if it doesn't exist +mkdir -p ../../hack/tools/bin + +# Build drain binary +echo "Building drain binary..." +go build -o ../../hack/tools/bin/kubectl-draincluster ../../tools/draincluster + +# Build uncordon binary +echo "Building uncordon binary..." +go build -o ../../hack/tools/bin/kubectl-uncordoncluster ../../tools/uncordoncluster diff --git a/test/e2e/setup_test.go b/test/e2e/setup_test.go index fce8c8f83..835fb6783 100644 --- a/test/e2e/setup_test.go +++ b/test/e2e/setup_test.go @@ -19,8 +19,10 @@ package e2e import ( "context" "flag" + "fmt" "log" "os" + "path/filepath" "sync" "testing" "time" @@ -163,6 +165,11 @@ var ( } ) +var ( + drainBinaryPath = filepath.Join("../../", "hack", "tools", "bin", "kubectl-draincluster") + uncordonBinaryPath = filepath.Join("../../", "hack", "tools", "bin", "kubectl-uncordoncluster") +) + var ( isAzurePropertyProviderEnabled = (os.Getenv(propertyProviderEnvVarName) == azurePropertyProviderEnvVarValue) @@ -340,6 +347,12 @@ func beforeSuiteForAllProcesses() { for i := range allMemberClusters { allMemberClusterNames = append(allMemberClusterNames, allMemberClusters[i].ClusterName) } + + // Check if drain cluster and uncordon cluster binaries exist. + _, err := os.Stat(drainBinaryPath) + Expect(os.IsNotExist(err)).To(BeFalse(), fmt.Sprintf("drain binary not found at %s", drainBinaryPath)) + _, err = os.Stat(uncordonBinaryPath) + Expect(os.IsNotExist(err)).To(BeFalse(), fmt.Sprintf("uncordon binary not found at %s", uncordonBinaryPath)) }) } diff --git a/tools/draincluster/README.md b/tools/draincluster/README.md index 07d6653f2..1effc26ac 100644 --- a/tools/draincluster/README.md +++ b/tools/draincluster/README.md @@ -3,11 +3,11 @@ 1. Build the binary for the `draincluster` tool by running the following command in the root directory of the fleet repo: ```bash -go build -o ./hack/tools/bin/kubectl-draincluster ./tools/draincluster/main.go +go build -o ./hack/tools/bin/kubectl-draincluster ./tools/draincluster/ ``` 2. Copy the binary to a directory in your `PATH` so that it can be run as a kubectl plugin. For example, you can move it to -`/usr/local/bin`: + `/usr/local/bin`: ```bash sudo cp ./hack/tools/bin/kubectl-draincluster /usr/local/bin/ @@ -33,13 +33,13 @@ The following compatible plugins are available: /usr/local/bin/kubectl-draincluster ``` -please refer to the [kubectl plugin documentation](https://kubernetes.io/docs/tasks/extend-kubectl/kubectl-plugins/) for +please refer to the [kubectl plugin documentation](https://kubernetes.io/docs/tasks/extend-kubectl/kubectl-plugins/) for more information. # Drain Member Cluster connected to a fleet -After following the steps above to build the `draincluster` tool as a kubectl plugin, you can use it to remove all -resources propagated to the member cluster from the hub cluster by any `Placement` resource. This is useful when you +After following the steps above to build the `draincluster` tool as a kubectl plugin, you can use it to remove all +resources propagated to the member cluster from the hub cluster by any `Placement` resource. This is useful when you want to temporarily move all workloads off a member cluster in preparation for an event like upgrade or reconfiguration. The `draincluster` tool can be used to drain a member cluster by running the following command: @@ -68,8 +68,8 @@ CURRENT NAME CLUSTER AUTHINFO Here you can see that the context of the hub cluster is called `hub` under the `NAME` column. -The command adds a `Taint` to the `MemberCluster` resource of the member cluster to prevent any new resources from being -propagated to the member cluster. Then it creates `Eviction` objects for all the `Placement` objects that have propagated +The command adds a `Taint` to the `MemberCluster` resource of the member cluster to prevent any new resources from being +propagated to the member cluster. Then it creates `Eviction` objects for all the `Placement` objects that have propagated resources to the member cluster. >> **Note**: The `draincluster` tool is a best-effort mechanism at the moment, so once the command is run successfully diff --git a/tools/draincluster/drain/drain.go b/tools/draincluster/drain.go similarity index 67% rename from tools/draincluster/drain/drain.go rename to tools/draincluster/drain.go index 37828bb44..ef64c3293 100644 --- a/tools/draincluster/drain/drain.go +++ b/tools/draincluster/drain.go @@ -1,9 +1,20 @@ /* -Copyright (c) Microsoft Corporation. -Licensed under the MIT license. +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ -package drain +package main import ( "context" @@ -32,16 +43,16 @@ const ( resourceIdentifierKeyFormat = "%s/%s/%s/%s/%s" ) -type Helper struct { - HubClient client.Client - ClusterName string +type helper struct { + hubClient client.Client + clusterName string } -func (h *Helper) Drain(ctx context.Context) (bool, error) { +func (h *helper) Drain(ctx context.Context) (bool, error) { if err := h.cordon(ctx); err != nil { - return false, fmt.Errorf("failed to cordon member cluster %s: %w", h.ClusterName, err) + return false, fmt.Errorf("failed to cordon member cluster %s: %w", h.clusterName, err) } - log.Printf("Successfully cordoned member cluster %s by adding cordon taint", h.ClusterName) + log.Printf("Successfully cordoned member cluster %s by adding cordon taint", h.clusterName) crpNameMap, err := h.fetchClusterResourcePlacementNamesToEvict(ctx) if err != nil { @@ -49,14 +60,14 @@ func (h *Helper) Drain(ctx context.Context) (bool, error) { } if len(crpNameMap) == 0 { - log.Printf("There are currently no resources propagated to %s from fleet using ClusterResourcePlacement resources", h.ClusterName) + log.Printf("There are currently no resources propagated to %s from fleet using ClusterResourcePlacement resources", h.clusterName) return true, nil } isDrainSuccessful := true // create eviction objects for all . for crpName := range crpNameMap { - evictionName, err := generateDrainEvictionName(crpName, h.ClusterName) + evictionName, err := generateDrainEvictionName(crpName, h.clusterName) if err != nil { return false, err } @@ -70,27 +81,29 @@ func (h *Helper) Drain(ctx context.Context) (bool, error) { }, Spec: placementv1beta1.PlacementEvictionSpec{ PlacementName: crpName, - ClusterName: h.ClusterName, + ClusterName: h.clusterName, }, } - return h.HubClient.Create(ctx, &eviction) + return h.hubClient.Create(ctx, &eviction) }) if err != nil { - return false, fmt.Errorf("failed to create eviction for CRP %s: %w", crpName, err) + return false, fmt.Errorf("failed to create eviction %s for CRP %s targeting member cluster %s: %w", evictionName, crpName, h.clusterName, err) } + log.Printf("Created eviction %s for CRP %s targeting member cluster %s", evictionName, crpName, h.clusterName) + // wait until evictions reach a terminal state. var eviction placementv1beta1.ClusterResourcePlacementEviction err = wait.ExponentialBackoffWithContext(ctx, retry.DefaultBackoff, func(ctx context.Context) (bool, error) { - if err := h.HubClient.Get(ctx, types.NamespacedName{Name: evictionName}, &eviction); err != nil { - return false, fmt.Errorf("failed to get eviction %s: %w", evictionName, err) + if err := h.hubClient.Get(ctx, types.NamespacedName{Name: evictionName}, &eviction); err != nil { + return false, fmt.Errorf("failed to get eviction %s for CRP %s targeting member cluster %s: %w", evictionName, crpName, h.clusterName, err) } return evictionutils.IsEvictionInTerminalState(&eviction), nil }) if err != nil { - return false, fmt.Errorf("failed to wait for evictions to reach terminal state: %w", err) + return false, fmt.Errorf("failed to wait for eviction %s for CRP %s targeting member cluster %s to reach terminal state: %w", evictionName, crpName, h.clusterName, err) } // TODO: add safeguards to check if eviction conditions are set to unknown. @@ -100,17 +113,17 @@ func (h *Helper) Drain(ctx context.Context) (bool, error) { if validCondition.Reason == condition.EvictionInvalidMissingCRPMessage || validCondition.Reason == condition.EvictionInvalidDeletingCRPMessage || validCondition.Reason == condition.EvictionInvalidMissingCRBMessage { - log.Printf("eviction %s is invalid with reason %s for CRP %s, but drain will succeed", evictionName, validCondition.Reason, crpName) + log.Printf("eviction %s is invalid with reason %s for CRP %s targeting member cluster %s, but drain will succeed", evictionName, validCondition.Reason, crpName, h.clusterName) continue } } executedCondition := eviction.GetCondition(string(placementv1beta1.PlacementEvictionConditionTypeExecuted)) if executedCondition == nil || executedCondition.Status == metav1.ConditionFalse { isDrainSuccessful = false - log.Printf("eviction %s was not executed successfully for CRP %s", evictionName, crpName) + log.Printf("eviction %s was not executed successfully for CRP %s targeting member cluster %s", evictionName, crpName, h.clusterName) continue } - log.Printf("eviction %s was executed successfully for CRP %s", evictionName, crpName) + log.Printf("eviction %s was executed successfully for CRP %s targeting member cluster %s", evictionName, crpName, h.clusterName) // log each cluster scoped resource evicted for CRP. clusterScopedResourceIdentifiers, err := h.collectClusterScopedResourcesSelectedByCRP(ctx, crpName) if err != nil { @@ -118,18 +131,18 @@ func (h *Helper) Drain(ctx context.Context) (bool, error) { continue } for _, resourceIdentifier := range clusterScopedResourceIdentifiers { - log.Printf("evicted resource %s propagated by CRP %s", generateResourceIdentifierKey(resourceIdentifier), crpName) + log.Printf("evicted resource %s propagated by CRP %s targeting member cluster %s", generateResourceIdentifierKey(resourceIdentifier), crpName, h.clusterName) } } return isDrainSuccessful, nil } -func (h *Helper) cordon(ctx context.Context) error { +func (h *helper) cordon(ctx context.Context) error { // add taint to member cluster to ensure resources aren't scheduled on it. return retry.RetryOnConflict(retry.DefaultRetry, func() error { var mc clusterv1beta1.MemberCluster - if err := h.HubClient.Get(ctx, types.NamespacedName{Name: h.ClusterName}, &mc); err != nil { + if err := h.hubClient.Get(ctx, types.NamespacedName{Name: h.clusterName}, &mc); err != nil { return err } @@ -143,13 +156,13 @@ func (h *Helper) cordon(ctx context.Context) error { // add taint to member cluster to cordon. mc.Spec.Taints = append(mc.Spec.Taints, toolsutils.CordonTaint) - return h.HubClient.Update(ctx, &mc) + return h.hubClient.Update(ctx, &mc) }) } -func (h *Helper) fetchClusterResourcePlacementNamesToEvict(ctx context.Context) (map[string]bool, error) { +func (h *helper) fetchClusterResourcePlacementNamesToEvict(ctx context.Context) (map[string]bool, error) { var crbList placementv1beta1.ClusterResourceBindingList - if err := h.HubClient.List(ctx, &crbList); err != nil { + if err := h.hubClient.List(ctx, &crbList); err != nil { return map[string]bool{}, fmt.Errorf("failed to list cluster resource bindings: %w", err) } @@ -157,7 +170,7 @@ func (h *Helper) fetchClusterResourcePlacementNamesToEvict(ctx context.Context) // find all unique CRP names for which eviction needs to occur. for i := range crbList.Items { crb := crbList.Items[i] - if crb.Spec.TargetCluster == h.ClusterName && crb.DeletionTimestamp == nil { + if crb.Spec.TargetCluster == h.clusterName && crb.DeletionTimestamp == nil { crpName, ok := crb.GetLabels()[placementv1beta1.CRPTrackingLabel] if !ok { return map[string]bool{}, fmt.Errorf("failed to get CRP name from binding %s", crb.Name) @@ -169,9 +182,9 @@ func (h *Helper) fetchClusterResourcePlacementNamesToEvict(ctx context.Context) return crpNameMap, nil } -func (h *Helper) collectClusterScopedResourcesSelectedByCRP(ctx context.Context, crpName string) ([]placementv1beta1.ResourceIdentifier, error) { +func (h *helper) collectClusterScopedResourcesSelectedByCRP(ctx context.Context, crpName string) ([]placementv1beta1.ResourceIdentifier, error) { var crp placementv1beta1.ClusterResourcePlacement - if err := h.HubClient.Get(ctx, types.NamespacedName{Name: crpName}, &crp); err != nil { + if err := h.hubClient.Get(ctx, types.NamespacedName{Name: crpName}, &crp); err != nil { return nil, fmt.Errorf("failed to get ClusterResourcePlacement %s: %w", crpName, err) } @@ -188,7 +201,8 @@ func (h *Helper) collectClusterScopedResourcesSelectedByCRP(ctx context.Context, func generateDrainEvictionName(crpName, targetCluster string) (string, error) { evictionName := fmt.Sprintf(drainEvictionNameFormat, crpName, targetCluster, uuid.NewUUID()[:uuidLength]) - if errs := validation.IsQualifiedName(evictionName); len(errs) != 0 { + // check to see if eviction name is a valid DNS1123 subdomain name https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names. + if errs := validation.IsDNS1123Subdomain(evictionName); len(errs) != 0 { return "", fmt.Errorf("failed to format a qualified name for drain eviction object with CRP name %s, cluster name %s: %v", crpName, targetCluster, errs) } return evictionName, nil @@ -198,6 +212,9 @@ func generateResourceIdentifierKey(r placementv1beta1.ResourceIdentifier) string if len(r.Group) == 0 && len(r.Namespace) == 0 { return fmt.Sprintf(resourceIdentifierKeyFormat, "''", r.Version, r.Kind, "''", r.Name) } + if len(r.Group) == 0 { + return fmt.Sprintf(resourceIdentifierKeyFormat, "''", r.Version, r.Kind, r.Namespace, r.Name) + } if len(r.Namespace) == 0 { return fmt.Sprintf(resourceIdentifierKeyFormat, r.Group, r.Version, r.Kind, "''", r.Name) } diff --git a/tools/draincluster/drain_test.go b/tools/draincluster/drain_test.go new file mode 100644 index 000000000..62c1d0205 --- /dev/null +++ b/tools/draincluster/drain_test.go @@ -0,0 +1,512 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "context" + "errors" + "fmt" + "strings" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + clusterv1beta1 "github.com/kubefleet-dev/kubefleet/apis/cluster/v1beta1" + placementv1beta1 "github.com/kubefleet-dev/kubefleet/apis/placement/v1beta1" + toolsutils "github.com/kubefleet-dev/kubefleet/tools/utils" +) + +func TestFetchClusterResourcePlacementNamesToEvict(t *testing.T) { + tests := []struct { + name string + targetCluster string + bindings []placementv1beta1.ClusterResourceBinding + wantErr error + wantMap map[string]bool + }{ + { + name: "successfully collected CRPs to evict", + targetCluster: "test-cluster1", + bindings: []placementv1beta1.ClusterResourceBinding{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-crb1", + Labels: map[string]string{ + placementv1beta1.CRPTrackingLabel: "test-crp1", + }, + }, + Spec: placementv1beta1.ResourceBindingSpec{ + TargetCluster: "test-cluster1", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-crb2", + Labels: map[string]string{ + placementv1beta1.CRPTrackingLabel: "test-crp1", + }, + }, + Spec: placementv1beta1.ResourceBindingSpec{ + TargetCluster: "test-cluster2", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-crb3", + Labels: map[string]string{ + placementv1beta1.CRPTrackingLabel: "test-crp2", + }, + }, + Spec: placementv1beta1.ResourceBindingSpec{ + TargetCluster: "test-cluster1", + }, + }, + }, + wantErr: nil, + wantMap: map[string]bool{ + "test-crp1": true, + "test-crp2": true, + }, + }, + { + name: "no CRPs to evict", + targetCluster: "test-cluster1", + bindings: []placementv1beta1.ClusterResourceBinding{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-crb1", + Labels: map[string]string{ + placementv1beta1.CRPTrackingLabel: "test-crp1", + }, + }, + Spec: placementv1beta1.ResourceBindingSpec{ + TargetCluster: "test-cluster2", + }, + }, + }, + wantErr: nil, + wantMap: map[string]bool{}, + }, + { + name: "binding missing CRP label", + targetCluster: "test-cluster1", + bindings: []placementv1beta1.ClusterResourceBinding{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-crb1", + }, + Spec: placementv1beta1.ResourceBindingSpec{ + TargetCluster: "test-cluster1", + }, + }, + }, + wantErr: errors.New("failed to get CRP name from binding test-crb1"), + wantMap: map[string]bool{}, + }, + { + name: "skip CRB with deletionTimestamp", + targetCluster: "test-cluster1", + bindings: []placementv1beta1.ClusterResourceBinding{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-crb1", + Labels: map[string]string{ + placementv1beta1.CRPTrackingLabel: "test-crp1", + }, + DeletionTimestamp: &metav1.Time{Time: time.Now()}, + Finalizers: []string{"test-finalizer"}, + }, + Spec: placementv1beta1.ResourceBindingSpec{ + TargetCluster: "test-cluster1", + }, + }, + }, + wantErr: nil, + wantMap: map[string]bool{}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + var objects []client.Object + scheme := serviceScheme(t) + for i := range tc.bindings { + objects = append(objects, &tc.bindings[i]) + } + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(objects...). + Build() + h := helper{ + hubClient: fakeClient, + clusterName: tc.targetCluster, + } + gotMap, gotErr := h.fetchClusterResourcePlacementNamesToEvict(context.Background()) + if tc.wantErr == nil { + if gotErr != nil { + t.Errorf("fetchClusterResourcePlacementNamesToEvict test %s failed, got error %v, want error %v", tc.name, gotErr, tc.wantErr) + } + if diff := cmp.Diff(gotMap, tc.wantMap); diff != "" { + t.Errorf("fetchClusterResourcePlacementNamesToEvict test %s failed (-got +want):\n%s", tc.name, diff) + } + } else if gotErr == nil || gotErr.Error() != tc.wantErr.Error() { + t.Errorf("fetchClusterResourcePlacementNamesToEvict test %s failed, got error %v, want error %v", tc.name, gotErr, tc.wantErr) + } + }) + } +} + +func TestCollectClusterScopedResourcesSelectedByCRP(t *testing.T) { + tests := []struct { + name string + crpName string + crp *placementv1beta1.ClusterResourcePlacement + wantResources []placementv1beta1.ResourceIdentifier + wantErr error + }{ + { + name: "successfully collect cluster scoped resources", + crpName: "test-crp", + crp: &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-crp", + }, + Status: placementv1beta1.ClusterResourcePlacementStatus{ + SelectedResources: []placementv1beta1.ResourceIdentifier{ + { + Group: "rbac.authorization.k8s.io", + Version: "v1", + Kind: "ClusterRole", + Name: "test-cluster-role", + }, + { + Group: "", + Version: "v1", + Kind: "ConfigMap", + Name: "test-cm", + Namespace: "test-ns", + }, + { + Group: "rbac.authorization.k8s.io", + Version: "v1", + Kind: "ClusterRoleBinding", + Name: "test-cluster-role-binding", + }, + }, + }, + }, + wantResources: []placementv1beta1.ResourceIdentifier{ + { + Group: "rbac.authorization.k8s.io", + Version: "v1", + Kind: "ClusterRole", + Name: "test-cluster-role", + }, + { + Group: "rbac.authorization.k8s.io", + Version: "v1", + Kind: "ClusterRoleBinding", + Name: "test-cluster-role-binding", + }, + }, + wantErr: nil, + }, + { + name: "no cluster scoped resources", + crpName: "test-crp", + crp: &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-crp", + }, + Status: placementv1beta1.ClusterResourcePlacementStatus{ + SelectedResources: []placementv1beta1.ResourceIdentifier{ + { + Group: "", + Version: "v1", + Kind: "ConfigMap", + Name: "test-cm", + Namespace: "test-ns", + }, + }, + }, + }, + wantResources: nil, + wantErr: nil, + }, + { + name: "crp not found", + crpName: "test-crp", + crp: nil, + wantResources: nil, + wantErr: errors.New("failed to get ClusterResourcePlacement test-crp: clusterresourceplacements.placement.kubernetes-fleet.io \"test-crp\" not found"), + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + scheme := serviceScheme(t) + var objects []client.Object + if tc.crp != nil { + objects = append(objects, tc.crp) + } + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(objects...). + Build() + + h := helper{ + hubClient: fakeClient, + } + + gotResources, gotErr := h.collectClusterScopedResourcesSelectedByCRP(context.Background(), tc.crpName) + if tc.wantErr == nil { + if gotErr != nil { + t.Errorf("collectClusterScopedResourcesSelectedByCRP test %s failed, got error %v, want error %v", tc.name, gotErr, tc.wantErr) + } + if diff := cmp.Diff(gotResources, tc.wantResources); diff != "" { + t.Errorf("collectClusterScopedResourcesSelectedByCRP (-got +want):\n%s", diff) + } + } else if gotErr == nil || gotErr.Error() != tc.wantErr.Error() { + t.Errorf("collectClusterScopedResourcesSelectedByCRP test %s failed, got error %v, want error %v", tc.name, gotErr, tc.wantErr) + } + }) + } +} + +func TestGenerateResourceIdentifierKey(t *testing.T) { + tests := []struct { + name string + resource placementv1beta1.ResourceIdentifier + want string + }{ + { + name: "cluster scoped resource with empty group", + resource: placementv1beta1.ResourceIdentifier{ + Group: "", + Version: "v1", + Kind: "Namespace", + Name: "test-ns", + }, + want: "''/v1/Namespace/''/test-ns", + }, + { + name: "cluster scoped resource with non-empty group", + resource: placementv1beta1.ResourceIdentifier{ + Group: "rbac.authorization.k8s.io", + Version: "v1", + Kind: "ClusterRole", + Name: "test-role", + }, + want: "rbac.authorization.k8s.io/v1/ClusterRole/''/test-role", + }, + { + name: "namespaced resource with empty group", + resource: placementv1beta1.ResourceIdentifier{ + Group: "", + Version: "v1", + Kind: "ConfigMap", + Name: "test-cm", + Namespace: "test-ns", + }, + want: "''/v1/ConfigMap/test-ns/test-cm", + }, + { + name: "namespaced resource with non-empty group", + resource: placementv1beta1.ResourceIdentifier{ + Group: "apps", + Version: "v1", + Kind: "Deployment", + Name: "test-deploy", + Namespace: "test-ns", + }, + want: "apps/v1/Deployment/test-ns/test-deploy", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := generateResourceIdentifierKey(tc.resource) + if got != tc.want { + t.Errorf("generateResourceIdentifierKey = %v, want %v", got, tc.want) + } + }) + } +} + +func TestGenerateDrainEvictionName(t *testing.T) { + tests := []struct { + name string + crpName string + targetCluster string + wantErr error + }{ + { + name: "valid names", + crpName: "test-crp", + targetCluster: "test-cluster", + wantErr: nil, + }, + { + name: "name has invalid characters", + crpName: "test-crp", + targetCluster: "test_cluster$", + wantErr: errors.New("failed to format a qualified name for drain eviction object"), + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + evictionName, gotErr := generateDrainEvictionName(tc.crpName, tc.targetCluster) + if tc.wantErr == nil { + if gotErr != nil { + t.Errorf("generateDrainEvictionName() got error %v, want error %v", gotErr, tc.wantErr) + } + // Verify the generated name follows the expected format + prefix := fmt.Sprintf("drain-eviction-%s-%s-", tc.crpName, tc.targetCluster) + if !strings.HasPrefix(evictionName, prefix) { + t.Errorf("generateDrainEvictionName() = %v, want prefix %v", evictionName, prefix) + } + if len(evictionName) != len(prefix)+uuidLength { + t.Errorf("generateDrainEvictionName() generated name length = %v, want %v", len(evictionName), len(prefix)+uuidLength) + } + } else if gotErr == nil || !strings.Contains(gotErr.Error(), tc.wantErr.Error()) { + t.Errorf("generateDrainEvictionName() got error %v, want error %v", gotErr, tc.wantErr) + } + }) + } +} + +func TestCordon(t *testing.T) { + tests := []struct { + name string + memberCluster *clusterv1beta1.MemberCluster + wantTaints []clusterv1beta1.Taint + wantErr error + }{ + { + name: "successfully add cordon taint, no other taints present", + memberCluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + }, + Spec: clusterv1beta1.MemberClusterSpec{ + Taints: []clusterv1beta1.Taint{}, + }, + }, + wantTaints: []clusterv1beta1.Taint{toolsutils.CordonTaint}, + wantErr: nil, + }, + { + name: "cordon taint already exists", + memberCluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + }, + Spec: clusterv1beta1.MemberClusterSpec{ + Taints: []clusterv1beta1.Taint{toolsutils.CordonTaint}, + }, + }, + wantTaints: []clusterv1beta1.Taint{toolsutils.CordonTaint}, + wantErr: nil, + }, + { + name: "successfully add cordon taint, cluster has other taints", + memberCluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + }, + Spec: clusterv1beta1.MemberClusterSpec{ + Taints: []clusterv1beta1.Taint{ + { + Key: "other-key", + Value: "other-value", + Effect: "NoSchedule", + }, + }, + }, + }, + wantTaints: []clusterv1beta1.Taint{ + { + Key: "other-key", + Value: "other-value", + Effect: "NoSchedule", + }, + toolsutils.CordonTaint, + }, + wantErr: nil, + }, + { + name: "member cluster not found", + memberCluster: nil, + wantTaints: nil, + wantErr: errors.New("memberclusters.cluster.kubernetes-fleet.io \"test-cluster\" not found"), + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + scheme := serviceScheme(t) + if err := clusterv1beta1.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add cluster v1beta1 scheme: %v", err) + } + + var objects []client.Object + if tc.memberCluster != nil { + objects = append(objects, tc.memberCluster) + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(objects...). + Build() + + h := helper{ + hubClient: fakeClient, + clusterName: "test-cluster", + } + + gotErr := h.cordon(context.Background()) + if tc.wantErr == nil { + if gotErr != nil { + t.Errorf("cordon test %s failed, got error %v, want error %v", tc.name, gotErr, tc.wantErr) + } + var updatedCluster clusterv1beta1.MemberCluster + if err := fakeClient.Get(context.Background(), client.ObjectKey{Name: "test-cluster"}, &updatedCluster); err != nil { + t.Errorf("failed to get updated cluster: %v", err) + } + if diff := cmp.Diff(updatedCluster.Spec.Taints, tc.wantTaints); diff != "" { + t.Errorf("cordon taints mismatch (-got +want):\n%s", diff) + } + } else if gotErr == nil || gotErr.Error() != tc.wantErr.Error() { + t.Errorf("cordon test %s failed, got error %v, want error %v", tc.name, gotErr, tc.wantErr) + } + }) + } +} + +func serviceScheme(t *testing.T) *runtime.Scheme { + scheme := runtime.NewScheme() + if err := placementv1beta1.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add placement v1beta1 scheme: %v", err) + } + return scheme +} diff --git a/tools/draincluster/main.go b/tools/draincluster/main.go index 9cdcfea9c..162d2524b 100644 --- a/tools/draincluster/main.go +++ b/tools/draincluster/main.go @@ -1,6 +1,17 @@ /* -Copyright (c) Microsoft Corporation. -Licensed under the MIT license. +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ package main @@ -14,7 +25,6 @@ import ( clusterv1beta1 "github.com/kubefleet-dev/kubefleet/apis/cluster/v1beta1" placementv1beta1 "github.com/kubefleet-dev/kubefleet/apis/placement/v1beta1" - "github.com/kubefleet-dev/kubefleet/tools/draincluster/drain" toolsutils "github.com/kubefleet-dev/kubefleet/tools/utils" ) @@ -46,14 +56,14 @@ func main() { log.Fatalf("failed to create hub cluster client: %v", err) } - drainClusterHelper := drain.Helper{ - HubClient: hubClient, - ClusterName: *clusterName, + drainClusterHelper := helper{ + hubClient: hubClient, + clusterName: *clusterName, } isDrainSuccessful, err := drainClusterHelper.Drain(ctx) if err != nil { - log.Fatalf("failed to drain member cluster %s: %v", drainClusterHelper.ClusterName, err) + log.Fatalf("failed to drain member cluster %s: %v", drainClusterHelper.clusterName, err) } if isDrainSuccessful { @@ -66,7 +76,7 @@ func main() { log.Printf("retrying drain to ensure all resources propagated from hub cluster are evicted") isDrainRetrySuccessful, err := drainClusterHelper.Drain(ctx) if err != nil { - log.Fatalf("failed to drain cluster on retry %s: %v", drainClusterHelper.ClusterName, err) + log.Fatalf("failed to drain cluster on retry %s: %v", drainClusterHelper.clusterName, err) } if isDrainRetrySuccessful { log.Printf("drain retry was successful for cluster %s", *clusterName) diff --git a/tools/uncordoncluster/README.md b/tools/uncordoncluster/README.md index 2e796f2d9..bda179bc7 100644 --- a/tools/uncordoncluster/README.md +++ b/tools/uncordoncluster/README.md @@ -3,7 +3,7 @@ 1. Build the binary for the `uncordoncluster` tool by running the following command in the root directory of the fleet repo: ```bash -go build -o ./hack/tools/bin/kubectl-uncordoncluster ./tools/uncordoncluster/main.go +go build -o ./hack/tools/bin/kubectl-uncordoncluster ./tools/uncordoncluster ``` 2. Copy the binary to a directory in your `PATH` so that it can be run as a kubectl plugin. For example, you can move it to @@ -39,8 +39,8 @@ more information. # Uncordon Member Cluster connected to a fleet -After following the steps above to build the `uncordoncluster` tool as a kubectl plugin, you can use it to uncordon a -member cluster that has been cordoned using the `draincluster` tool. +After following the steps above to build the `uncordoncluster` tool as a kubectl plugin, you can use it to uncordon a +member cluster that has been cordoned using the `draincluster` tool. ``` kubectl uncordoncluster --hubClusterContext --clusterName @@ -66,5 +66,5 @@ CURRENT NAME CLUSTER AUTHINFO Here you can see that the context of the hub cluster is called `hub` under the `NAME` column. -The command removes the `cordon` taint added to a `MemberCluster` resource by the `draincluster` tool. If the `cordon` +The command removes the `cordon` taint added to a `MemberCluster` resource by the `draincluster` tool. If the `cordon` taint is not present, the command will not have any effect. diff --git a/tools/uncordoncluster/main.go b/tools/uncordoncluster/main.go index 7aa653db8..448417387 100644 --- a/tools/uncordoncluster/main.go +++ b/tools/uncordoncluster/main.go @@ -1,6 +1,17 @@ /* -Copyright (c) Microsoft Corporation. -Licensed under the MIT license. +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ package main @@ -14,7 +25,6 @@ import ( clusterv1beta1 "github.com/kubefleet-dev/kubefleet/apis/cluster/v1beta1" placementv1beta1 "github.com/kubefleet-dev/kubefleet/apis/placement/v1beta1" - "github.com/kubefleet-dev/kubefleet/tools/uncordoncluster/uncordon" toolsutils "github.com/kubefleet-dev/kubefleet/tools/utils" ) @@ -46,9 +56,9 @@ func main() { log.Fatalf("failed to create hub cluster client: %v", err) } - uncordonClusterHelper := uncordon.Helper{ - HubClient: hubClient, - ClusterName: *clusterName, + uncordonClusterHelper := helper{ + hubClient: hubClient, + clusterName: *clusterName, } if err = uncordonClusterHelper.Uncordon(ctx); err != nil { diff --git a/tools/uncordoncluster/uncordon.go b/tools/uncordoncluster/uncordon.go new file mode 100644 index 000000000..9fd22db81 --- /dev/null +++ b/tools/uncordoncluster/uncordon.go @@ -0,0 +1,60 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "context" + + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/retry" + "sigs.k8s.io/controller-runtime/pkg/client" + + clusterv1beta1 "github.com/kubefleet-dev/kubefleet/apis/cluster/v1beta1" + toolsutils "github.com/kubefleet-dev/kubefleet/tools/utils" +) + +type helper struct { + hubClient client.Client + clusterName string +} + +// Uncordon removes the taint from the member cluster. +func (h *helper) Uncordon(ctx context.Context) error { + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + var mc clusterv1beta1.MemberCluster + if err := h.hubClient.Get(ctx, types.NamespacedName{Name: h.clusterName}, &mc); err != nil { + return err + } + + if len(mc.Spec.Taints) == 0 { + return nil + } + + // remove cordon taint from member cluster. + var newTaints []clusterv1beta1.Taint + for i := range mc.Spec.Taints { + taint := mc.Spec.Taints[i] + if taint == toolsutils.CordonTaint { + continue + } + newTaints = append(newTaints, taint) + } + mc.Spec.Taints = newTaints + + return h.hubClient.Update(ctx, &mc) + }) +} diff --git a/tools/uncordoncluster/uncordon_test.go b/tools/uncordoncluster/uncordon_test.go new file mode 100644 index 000000000..c8928ca8e --- /dev/null +++ b/tools/uncordoncluster/uncordon_test.go @@ -0,0 +1,121 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "context" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + clusterv1beta1 "github.com/kubefleet-dev/kubefleet/apis/cluster/v1beta1" + toolsutils "github.com/kubefleet-dev/kubefleet/tools/utils" +) + +func TestUncordon(t *testing.T) { + taint1 := clusterv1beta1.Taint{ + Key: "test-key1", + Value: "test-value1", + Effect: corev1.TaintEffectNoSchedule, + } + taint2 := clusterv1beta1.Taint{ + Key: "test-key1", + Value: "test-value1", + Effect: corev1.TaintEffectNoSchedule, + } + + // Define test cases + testCases := []struct { + name string + initialTaints []clusterv1beta1.Taint + wantTaints []clusterv1beta1.Taint + wantErr error + }{ + { + name: "no taints present", + initialTaints: []clusterv1beta1.Taint{}, + wantTaints: []clusterv1beta1.Taint{}, + wantErr: nil, + }, + { + name: "cordon taint present", + initialTaints: []clusterv1beta1.Taint{taint1, toolsutils.CordonTaint, taint2}, + wantTaints: []clusterv1beta1.Taint{taint1, taint2}, + wantErr: nil, + }, + { + name: "cordon taint not present", + initialTaints: []clusterv1beta1.Taint{taint1, taint2}, + wantTaints: []clusterv1beta1.Taint{taint1, taint2}, + wantErr: nil, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + s := serviceScheme(t) + // Create a fake client with initial objects + mc := &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + }, + Spec: clusterv1beta1.MemberClusterSpec{ + Taints: tc.initialTaints, + }, + } + fakeClient := fake.NewClientBuilder().WithScheme(s).WithObjects(mc).Build() + + // Initialize UncordonCluster + uncordonCluster := helper{ + hubClient: fakeClient, + clusterName: "test-cluster", + } + + // Call the Uncordon function + gotErr := uncordonCluster.Uncordon(context.Background()) + if tc.wantErr == nil { + if gotErr != nil { + t.Errorf("Uncordon test %s failed, got error %v, want error %v", tc.name, gotErr, tc.wantErr) + } + // Verify the taints + gotMC := &clusterv1beta1.MemberCluster{} + if err := fakeClient.Get(context.Background(), client.ObjectKey{Name: "test-cluster"}, gotMC); err != nil { + t.Errorf("failed to get member cluster: %v", err) + } + if diff := cmp.Diff(tc.wantTaints, gotMC.Spec.Taints, cmpopts.EquateEmpty()); diff != "" { + t.Errorf("Uncordon test %s failed, got taints %v, want taints %v", tc.name, gotMC.Spec.Taints, tc.wantTaints) + } + } else if gotErr == nil || gotErr.Error() != tc.wantErr.Error() { + t.Errorf("Uncordon test %s failed, got error %v, want error %v", tc.name, gotErr, tc.wantErr) + } + }) + } +} + +func serviceScheme(t *testing.T) *runtime.Scheme { + scheme := runtime.NewScheme() + if err := clusterv1beta1.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add cluster v1beta1 scheme: %v", err) + } + return scheme +}