Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
404 changes: 404 additions & 0 deletions test/e2e/drain_tool_test.go

Large diffs are not rendered by default.

16 changes: 13 additions & 3 deletions test/e2e/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ REGIONS=("" "" "eastasia")
AKS_NODE_REGIONS=("westus" "northeurope" "eastasia")
# The SKUs that should be set on each node of the respective clusters; if the AKS property
# provider is used. See the AKS documentation for specifics.
#
#
# Note that this is for information only; kind nodes always use the same fixed setup
# (total/allocatable capacity = host capacity).
AKS_NODE_SKUS=("Standard_A4_v2" "Standard_B4ms" "Standard_D8s_v5" "Standard_E16_v5" "Standard_M16ms")
AKS_SKU_COUNT=${#AKS_NODE_SKUS[@]}
# The number of clusters that has pre-defined configuration for testing purposes.
# The number of clusters that has pre-defined configuration for testing purposes.
RESERVED_CLUSTER_COUNT=${MEMBER_CLUSTER_COUNT}

# Create the kind clusters
Expand Down Expand Up @@ -87,7 +87,7 @@ then
k=$(( RANDOM % AKS_SKU_COUNT ))
kubectl label node "${NODES[$j]}" beta.kubernetes.io/instance-type=${AKS_NODE_SKUS[$k]}
done
done
done
fi

# Build the Fleet agent images
Expand Down Expand Up @@ -207,3 +207,13 @@ do
fi
done

# Create tools directory if it doesn't exist
mkdir -p ../../hack/tools/bin

# Build drain binary
echo "Building drain binary..."
go build -o ../../hack/tools/bin/kubectl-draincluster ../../tools/draincluster

# Build uncordon binary
echo "Building uncordon binary..."
go build -o ../../hack/tools/bin/kubectl-uncordoncluster ../../tools/uncordoncluster
13 changes: 13 additions & 0 deletions test/e2e/setup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@ package e2e
import (
"context"
"flag"
"fmt"
"log"
"os"
"path/filepath"
"sync"
"testing"
"time"
Expand Down Expand Up @@ -163,6 +165,11 @@ var (
}
)

var (
drainBinaryPath = filepath.Join("../../", "hack", "tools", "bin", "kubectl-draincluster")
uncordonBinaryPath = filepath.Join("../../", "hack", "tools", "bin", "kubectl-uncordoncluster")
)

var (
isAzurePropertyProviderEnabled = (os.Getenv(propertyProviderEnvVarName) == azurePropertyProviderEnvVarValue)

Expand Down Expand Up @@ -340,6 +347,12 @@ func beforeSuiteForAllProcesses() {
for i := range allMemberClusters {
allMemberClusterNames = append(allMemberClusterNames, allMemberClusters[i].ClusterName)
}

// Check if drain cluster and uncordon cluster binaries exist.
_, err := os.Stat(drainBinaryPath)
Expect(os.IsNotExist(err)).To(BeFalse(), fmt.Sprintf("drain binary not found at %s", drainBinaryPath))
_, err = os.Stat(uncordonBinaryPath)
Expect(os.IsNotExist(err)).To(BeFalse(), fmt.Sprintf("uncordon binary not found at %s", uncordonBinaryPath))
})
}

Expand Down
14 changes: 7 additions & 7 deletions tools/draincluster/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
1. Build the binary for the `draincluster` tool by running the following command in the root directory of the fleet repo:

```bash
go build -o ./hack/tools/bin/kubectl-draincluster ./tools/draincluster/main.go
go build -o ./hack/tools/bin/kubectl-draincluster ./tools/draincluster/
```

2. Copy the binary to a directory in your `PATH` so that it can be run as a kubectl plugin. For example, you can move it to
`/usr/local/bin`:
`/usr/local/bin`:

```bash
sudo cp ./hack/tools/bin/kubectl-draincluster /usr/local/bin/
Expand All @@ -33,13 +33,13 @@ The following compatible plugins are available:
/usr/local/bin/kubectl-draincluster
```

please refer to the [kubectl plugin documentation](https://kubernetes.io/docs/tasks/extend-kubectl/kubectl-plugins/) for
please refer to the [kubectl plugin documentation](https://kubernetes.io/docs/tasks/extend-kubectl/kubectl-plugins/) for
more information.

# Drain Member Cluster connected to a fleet

After following the steps above to build the `draincluster` tool as a kubectl plugin, you can use it to remove all
resources propagated to the member cluster from the hub cluster by any `Placement` resource. This is useful when you
After following the steps above to build the `draincluster` tool as a kubectl plugin, you can use it to remove all
resources propagated to the member cluster from the hub cluster by any `Placement` resource. This is useful when you
want to temporarily move all workloads off a member cluster in preparation for an event like upgrade or reconfiguration.

The `draincluster` tool can be used to drain a member cluster by running the following command:
Expand Down Expand Up @@ -68,8 +68,8 @@ CURRENT NAME CLUSTER AUTHINFO

Here you can see that the context of the hub cluster is called `hub` under the `NAME` column.

The command adds a `Taint` to the `MemberCluster` resource of the member cluster to prevent any new resources from being
propagated to the member cluster. Then it creates `Eviction` objects for all the `Placement` objects that have propagated
The command adds a `Taint` to the `MemberCluster` resource of the member cluster to prevent any new resources from being
propagated to the member cluster. Then it creates `Eviction` objects for all the `Placement` objects that have propagated
resources to the member cluster.

>> **Note**: The `draincluster` tool is a best-effort mechanism at the moment, so once the command is run successfully
Expand Down
77 changes: 47 additions & 30 deletions tools/draincluster/drain/drain.go → tools/draincluster/drain.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
/*
Copyright (c) Microsoft Corporation.
Licensed under the MIT license.
Copyright 2025 The KubeFleet Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package drain
package main

import (
"context"
Expand Down Expand Up @@ -32,31 +43,31 @@ const (
resourceIdentifierKeyFormat = "%s/%s/%s/%s/%s"
)

type Helper struct {
HubClient client.Client
ClusterName string
type helper struct {
hubClient client.Client
clusterName string
}

func (h *Helper) Drain(ctx context.Context) (bool, error) {
func (h *helper) Drain(ctx context.Context) (bool, error) {
if err := h.cordon(ctx); err != nil {
return false, fmt.Errorf("failed to cordon member cluster %s: %w", h.ClusterName, err)
return false, fmt.Errorf("failed to cordon member cluster %s: %w", h.clusterName, err)
}
log.Printf("Successfully cordoned member cluster %s by adding cordon taint", h.ClusterName)
log.Printf("Successfully cordoned member cluster %s by adding cordon taint", h.clusterName)

crpNameMap, err := h.fetchClusterResourcePlacementNamesToEvict(ctx)
if err != nil {
return false, err
}

if len(crpNameMap) == 0 {
log.Printf("There are currently no resources propagated to %s from fleet using ClusterResourcePlacement resources", h.ClusterName)
log.Printf("There are currently no resources propagated to %s from fleet using ClusterResourcePlacement resources", h.clusterName)
return true, nil
}

isDrainSuccessful := true
// create eviction objects for all <crpName, targetCluster>.
for crpName := range crpNameMap {
evictionName, err := generateDrainEvictionName(crpName, h.ClusterName)
evictionName, err := generateDrainEvictionName(crpName, h.clusterName)
if err != nil {
return false, err
}
Expand All @@ -70,27 +81,29 @@ func (h *Helper) Drain(ctx context.Context) (bool, error) {
},
Spec: placementv1beta1.PlacementEvictionSpec{
PlacementName: crpName,
ClusterName: h.ClusterName,
ClusterName: h.clusterName,
},
}
return h.HubClient.Create(ctx, &eviction)
return h.hubClient.Create(ctx, &eviction)
})

if err != nil {
return false, fmt.Errorf("failed to create eviction for CRP %s: %w", crpName, err)
return false, fmt.Errorf("failed to create eviction %s for CRP %s targeting member cluster %s: %w", evictionName, crpName, h.clusterName, err)
}

log.Printf("Created eviction %s for CRP %s targeting member cluster %s", evictionName, crpName, h.clusterName)

// wait until evictions reach a terminal state.
var eviction placementv1beta1.ClusterResourcePlacementEviction
err = wait.ExponentialBackoffWithContext(ctx, retry.DefaultBackoff, func(ctx context.Context) (bool, error) {
if err := h.HubClient.Get(ctx, types.NamespacedName{Name: evictionName}, &eviction); err != nil {
return false, fmt.Errorf("failed to get eviction %s: %w", evictionName, err)
if err := h.hubClient.Get(ctx, types.NamespacedName{Name: evictionName}, &eviction); err != nil {
return false, fmt.Errorf("failed to get eviction %s for CRP %s targeting member cluster %s: %w", evictionName, crpName, h.clusterName, err)
}
return evictionutils.IsEvictionInTerminalState(&eviction), nil
})

if err != nil {
return false, fmt.Errorf("failed to wait for evictions to reach terminal state: %w", err)
return false, fmt.Errorf("failed to wait for eviction %s for CRP %s targeting member cluster %s to reach terminal state: %w", evictionName, crpName, h.clusterName, err)
}

// TODO: add safeguards to check if eviction conditions are set to unknown.
Expand All @@ -100,36 +113,36 @@ func (h *Helper) Drain(ctx context.Context) (bool, error) {
if validCondition.Reason == condition.EvictionInvalidMissingCRPMessage ||
validCondition.Reason == condition.EvictionInvalidDeletingCRPMessage ||
validCondition.Reason == condition.EvictionInvalidMissingCRBMessage {
log.Printf("eviction %s is invalid with reason %s for CRP %s, but drain will succeed", evictionName, validCondition.Reason, crpName)
log.Printf("eviction %s is invalid with reason %s for CRP %s targeting member cluster %s, but drain will succeed", evictionName, validCondition.Reason, crpName, h.clusterName)
continue
}
}
executedCondition := eviction.GetCondition(string(placementv1beta1.PlacementEvictionConditionTypeExecuted))
if executedCondition == nil || executedCondition.Status == metav1.ConditionFalse {
isDrainSuccessful = false
log.Printf("eviction %s was not executed successfully for CRP %s", evictionName, crpName)
log.Printf("eviction %s was not executed successfully for CRP %s targeting member cluster %s", evictionName, crpName, h.clusterName)
continue
}
log.Printf("eviction %s was executed successfully for CRP %s", evictionName, crpName)
log.Printf("eviction %s was executed successfully for CRP %s targeting member cluster %s", evictionName, crpName, h.clusterName)
// log each cluster scoped resource evicted for CRP.
clusterScopedResourceIdentifiers, err := h.collectClusterScopedResourcesSelectedByCRP(ctx, crpName)
if err != nil {
log.Printf("failed to collect cluster scoped resources selected by CRP %s: %v", crpName, err)
continue
}
for _, resourceIdentifier := range clusterScopedResourceIdentifiers {
log.Printf("evicted resource %s propagated by CRP %s", generateResourceIdentifierKey(resourceIdentifier), crpName)
log.Printf("evicted resource %s propagated by CRP %s targeting member cluster %s", generateResourceIdentifierKey(resourceIdentifier), crpName, h.clusterName)
}
}

return isDrainSuccessful, nil
}

func (h *Helper) cordon(ctx context.Context) error {
func (h *helper) cordon(ctx context.Context) error {
// add taint to member cluster to ensure resources aren't scheduled on it.
return retry.RetryOnConflict(retry.DefaultRetry, func() error {
var mc clusterv1beta1.MemberCluster
if err := h.HubClient.Get(ctx, types.NamespacedName{Name: h.ClusterName}, &mc); err != nil {
if err := h.hubClient.Get(ctx, types.NamespacedName{Name: h.clusterName}, &mc); err != nil {
return err
}

Expand All @@ -143,21 +156,21 @@ func (h *Helper) cordon(ctx context.Context) error {
// add taint to member cluster to cordon.
mc.Spec.Taints = append(mc.Spec.Taints, toolsutils.CordonTaint)

return h.HubClient.Update(ctx, &mc)
return h.hubClient.Update(ctx, &mc)
})
}

func (h *Helper) fetchClusterResourcePlacementNamesToEvict(ctx context.Context) (map[string]bool, error) {
func (h *helper) fetchClusterResourcePlacementNamesToEvict(ctx context.Context) (map[string]bool, error) {
var crbList placementv1beta1.ClusterResourceBindingList
if err := h.HubClient.List(ctx, &crbList); err != nil {
if err := h.hubClient.List(ctx, &crbList); err != nil {
return map[string]bool{}, fmt.Errorf("failed to list cluster resource bindings: %w", err)
}

crpNameMap := make(map[string]bool)
// find all unique CRP names for which eviction needs to occur.
for i := range crbList.Items {
crb := crbList.Items[i]
if crb.Spec.TargetCluster == h.ClusterName && crb.DeletionTimestamp == nil {
if crb.Spec.TargetCluster == h.clusterName && crb.DeletionTimestamp == nil {
crpName, ok := crb.GetLabels()[placementv1beta1.CRPTrackingLabel]
if !ok {
return map[string]bool{}, fmt.Errorf("failed to get CRP name from binding %s", crb.Name)
Expand All @@ -169,9 +182,9 @@ func (h *Helper) fetchClusterResourcePlacementNamesToEvict(ctx context.Context)
return crpNameMap, nil
}

func (h *Helper) collectClusterScopedResourcesSelectedByCRP(ctx context.Context, crpName string) ([]placementv1beta1.ResourceIdentifier, error) {
func (h *helper) collectClusterScopedResourcesSelectedByCRP(ctx context.Context, crpName string) ([]placementv1beta1.ResourceIdentifier, error) {
var crp placementv1beta1.ClusterResourcePlacement
if err := h.HubClient.Get(ctx, types.NamespacedName{Name: crpName}, &crp); err != nil {
if err := h.hubClient.Get(ctx, types.NamespacedName{Name: crpName}, &crp); err != nil {
return nil, fmt.Errorf("failed to get ClusterResourcePlacement %s: %w", crpName, err)
}

Expand All @@ -188,7 +201,8 @@ func (h *Helper) collectClusterScopedResourcesSelectedByCRP(ctx context.Context,
func generateDrainEvictionName(crpName, targetCluster string) (string, error) {
evictionName := fmt.Sprintf(drainEvictionNameFormat, crpName, targetCluster, uuid.NewUUID()[:uuidLength])

if errs := validation.IsQualifiedName(evictionName); len(errs) != 0 {
// check to see if eviction name is a valid DNS1123 subdomain name https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names.
if errs := validation.IsDNS1123Subdomain(evictionName); len(errs) != 0 {
return "", fmt.Errorf("failed to format a qualified name for drain eviction object with CRP name %s, cluster name %s: %v", crpName, targetCluster, errs)
}
return evictionName, nil
Expand All @@ -198,6 +212,9 @@ func generateResourceIdentifierKey(r placementv1beta1.ResourceIdentifier) string
if len(r.Group) == 0 && len(r.Namespace) == 0 {
return fmt.Sprintf(resourceIdentifierKeyFormat, "''", r.Version, r.Kind, "''", r.Name)
}
if len(r.Group) == 0 {
return fmt.Sprintf(resourceIdentifierKeyFormat, "''", r.Version, r.Kind, r.Namespace, r.Name)
}
if len(r.Namespace) == 0 {
return fmt.Sprintf(resourceIdentifierKeyFormat, r.Group, r.Version, r.Kind, "''", r.Name)
}
Expand Down
Loading
Loading