Skip to content

Commit ea64c20

Browse files
authored
test: Drain tool E2Es, UTs (#22)
1 parent 4fb56df commit ea64c20

File tree

11 files changed

+1215
-58
lines changed

11 files changed

+1215
-58
lines changed

test/e2e/drain_tool_test.go

Lines changed: 404 additions & 0 deletions
Large diffs are not rendered by default.

test/e2e/setup.sh

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,12 @@ REGIONS=("" "" "eastasia")
3939
AKS_NODE_REGIONS=("westus" "northeurope" "eastasia")
4040
# The SKUs that should be set on each node of the respective clusters; if the AKS property
4141
# provider is used. See the AKS documentation for specifics.
42-
#
42+
#
4343
# Note that this is for information only; kind nodes always use the same fixed setup
4444
# (total/allocatable capacity = host capacity).
4545
AKS_NODE_SKUS=("Standard_A4_v2" "Standard_B4ms" "Standard_D8s_v5" "Standard_E16_v5" "Standard_M16ms")
4646
AKS_SKU_COUNT=${#AKS_NODE_SKUS[@]}
47-
# The number of clusters that has pre-defined configuration for testing purposes.
47+
# The number of clusters that has pre-defined configuration for testing purposes.
4848
RESERVED_CLUSTER_COUNT=${MEMBER_CLUSTER_COUNT}
4949

5050
# Create the kind clusters
@@ -87,7 +87,7 @@ then
8787
k=$(( RANDOM % AKS_SKU_COUNT ))
8888
kubectl label node "${NODES[$j]}" beta.kubernetes.io/instance-type=${AKS_NODE_SKUS[$k]}
8989
done
90-
done
90+
done
9191
fi
9292

9393
# Build the Fleet agent images
@@ -207,3 +207,13 @@ do
207207
fi
208208
done
209209

210+
# Create tools directory if it doesn't exist
211+
mkdir -p ../../hack/tools/bin
212+
213+
# Build drain binary
214+
echo "Building drain binary..."
215+
go build -o ../../hack/tools/bin/kubectl-draincluster ../../tools/draincluster
216+
217+
# Build uncordon binary
218+
echo "Building uncordon binary..."
219+
go build -o ../../hack/tools/bin/kubectl-uncordoncluster ../../tools/uncordoncluster

test/e2e/setup_test.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@ package e2e
1919
import (
2020
"context"
2121
"flag"
22+
"fmt"
2223
"log"
2324
"os"
25+
"path/filepath"
2426
"sync"
2527
"testing"
2628
"time"
@@ -163,6 +165,11 @@ var (
163165
}
164166
)
165167

168+
var (
169+
drainBinaryPath = filepath.Join("../../", "hack", "tools", "bin", "kubectl-draincluster")
170+
uncordonBinaryPath = filepath.Join("../../", "hack", "tools", "bin", "kubectl-uncordoncluster")
171+
)
172+
166173
var (
167174
isAzurePropertyProviderEnabled = (os.Getenv(propertyProviderEnvVarName) == azurePropertyProviderEnvVarValue)
168175

@@ -340,6 +347,12 @@ func beforeSuiteForAllProcesses() {
340347
for i := range allMemberClusters {
341348
allMemberClusterNames = append(allMemberClusterNames, allMemberClusters[i].ClusterName)
342349
}
350+
351+
// Check if drain cluster and uncordon cluster binaries exist.
352+
_, err := os.Stat(drainBinaryPath)
353+
Expect(os.IsNotExist(err)).To(BeFalse(), fmt.Sprintf("drain binary not found at %s", drainBinaryPath))
354+
_, err = os.Stat(uncordonBinaryPath)
355+
Expect(os.IsNotExist(err)).To(BeFalse(), fmt.Sprintf("uncordon binary not found at %s", uncordonBinaryPath))
343356
})
344357
}
345358

tools/draincluster/README.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
1. Build the binary for the `draincluster` tool by running the following command in the root directory of the fleet repo:
44

55
```bash
6-
go build -o ./hack/tools/bin/kubectl-draincluster ./tools/draincluster/main.go
6+
go build -o ./hack/tools/bin/kubectl-draincluster ./tools/draincluster/
77
```
88

99
2. Copy the binary to a directory in your `PATH` so that it can be run as a kubectl plugin. For example, you can move it to
10-
`/usr/local/bin`:
10+
`/usr/local/bin`:
1111

1212
```bash
1313
sudo cp ./hack/tools/bin/kubectl-draincluster /usr/local/bin/
@@ -33,13 +33,13 @@ The following compatible plugins are available:
3333
/usr/local/bin/kubectl-draincluster
3434
```
3535

36-
please refer to the [kubectl plugin documentation](https://kubernetes.io/docs/tasks/extend-kubectl/kubectl-plugins/) for
36+
please refer to the [kubectl plugin documentation](https://kubernetes.io/docs/tasks/extend-kubectl/kubectl-plugins/) for
3737
more information.
3838

3939
# Drain Member Cluster connected to a fleet
4040

41-
After following the steps above to build the `draincluster` tool as a kubectl plugin, you can use it to remove all
42-
resources propagated to the member cluster from the hub cluster by any `Placement` resource. This is useful when you
41+
After following the steps above to build the `draincluster` tool as a kubectl plugin, you can use it to remove all
42+
resources propagated to the member cluster from the hub cluster by any `Placement` resource. This is useful when you
4343
want to temporarily move all workloads off a member cluster in preparation for an event like upgrade or reconfiguration.
4444

4545
The `draincluster` tool can be used to drain a member cluster by running the following command:
@@ -68,8 +68,8 @@ CURRENT NAME CLUSTER AUTHINFO
6868

6969
Here you can see that the context of the hub cluster is called `hub` under the `NAME` column.
7070

71-
The command adds a `Taint` to the `MemberCluster` resource of the member cluster to prevent any new resources from being
72-
propagated to the member cluster. Then it creates `Eviction` objects for all the `Placement` objects that have propagated
71+
The command adds a `Taint` to the `MemberCluster` resource of the member cluster to prevent any new resources from being
72+
propagated to the member cluster. Then it creates `Eviction` objects for all the `Placement` objects that have propagated
7373
resources to the member cluster.
7474

7575
>> **Note**: The `draincluster` tool is a best-effort mechanism at the moment, so once the command is run successfully
Lines changed: 47 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,20 @@
11
/*
2-
Copyright (c) Microsoft Corporation.
3-
Licensed under the MIT license.
2+
Copyright 2025 The KubeFleet Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
415
*/
516

6-
package drain
17+
package main
718

819
import (
920
"context"
@@ -32,31 +43,31 @@ const (
3243
resourceIdentifierKeyFormat = "%s/%s/%s/%s/%s"
3344
)
3445

35-
type Helper struct {
36-
HubClient client.Client
37-
ClusterName string
46+
type helper struct {
47+
hubClient client.Client
48+
clusterName string
3849
}
3950

40-
func (h *Helper) Drain(ctx context.Context) (bool, error) {
51+
func (h *helper) Drain(ctx context.Context) (bool, error) {
4152
if err := h.cordon(ctx); err != nil {
42-
return false, fmt.Errorf("failed to cordon member cluster %s: %w", h.ClusterName, err)
53+
return false, fmt.Errorf("failed to cordon member cluster %s: %w", h.clusterName, err)
4354
}
44-
log.Printf("Successfully cordoned member cluster %s by adding cordon taint", h.ClusterName)
55+
log.Printf("Successfully cordoned member cluster %s by adding cordon taint", h.clusterName)
4556

4657
crpNameMap, err := h.fetchClusterResourcePlacementNamesToEvict(ctx)
4758
if err != nil {
4859
return false, err
4960
}
5061

5162
if len(crpNameMap) == 0 {
52-
log.Printf("There are currently no resources propagated to %s from fleet using ClusterResourcePlacement resources", h.ClusterName)
63+
log.Printf("There are currently no resources propagated to %s from fleet using ClusterResourcePlacement resources", h.clusterName)
5364
return true, nil
5465
}
5566

5667
isDrainSuccessful := true
5768
// create eviction objects for all <crpName, targetCluster>.
5869
for crpName := range crpNameMap {
59-
evictionName, err := generateDrainEvictionName(crpName, h.ClusterName)
70+
evictionName, err := generateDrainEvictionName(crpName, h.clusterName)
6071
if err != nil {
6172
return false, err
6273
}
@@ -70,27 +81,29 @@ func (h *Helper) Drain(ctx context.Context) (bool, error) {
7081
},
7182
Spec: placementv1beta1.PlacementEvictionSpec{
7283
PlacementName: crpName,
73-
ClusterName: h.ClusterName,
84+
ClusterName: h.clusterName,
7485
},
7586
}
76-
return h.HubClient.Create(ctx, &eviction)
87+
return h.hubClient.Create(ctx, &eviction)
7788
})
7889

7990
if err != nil {
80-
return false, fmt.Errorf("failed to create eviction for CRP %s: %w", crpName, err)
91+
return false, fmt.Errorf("failed to create eviction %s for CRP %s targeting member cluster %s: %w", evictionName, crpName, h.clusterName, err)
8192
}
8293

94+
log.Printf("Created eviction %s for CRP %s targeting member cluster %s", evictionName, crpName, h.clusterName)
95+
8396
// wait until evictions reach a terminal state.
8497
var eviction placementv1beta1.ClusterResourcePlacementEviction
8598
err = wait.ExponentialBackoffWithContext(ctx, retry.DefaultBackoff, func(ctx context.Context) (bool, error) {
86-
if err := h.HubClient.Get(ctx, types.NamespacedName{Name: evictionName}, &eviction); err != nil {
87-
return false, fmt.Errorf("failed to get eviction %s: %w", evictionName, err)
99+
if err := h.hubClient.Get(ctx, types.NamespacedName{Name: evictionName}, &eviction); err != nil {
100+
return false, fmt.Errorf("failed to get eviction %s for CRP %s targeting member cluster %s: %w", evictionName, crpName, h.clusterName, err)
88101
}
89102
return evictionutils.IsEvictionInTerminalState(&eviction), nil
90103
})
91104

92105
if err != nil {
93-
return false, fmt.Errorf("failed to wait for evictions to reach terminal state: %w", err)
106+
return false, fmt.Errorf("failed to wait for eviction %s for CRP %s targeting member cluster %s to reach terminal state: %w", evictionName, crpName, h.clusterName, err)
94107
}
95108

96109
// TODO: add safeguards to check if eviction conditions are set to unknown.
@@ -100,36 +113,36 @@ func (h *Helper) Drain(ctx context.Context) (bool, error) {
100113
if validCondition.Reason == condition.EvictionInvalidMissingCRPMessage ||
101114
validCondition.Reason == condition.EvictionInvalidDeletingCRPMessage ||
102115
validCondition.Reason == condition.EvictionInvalidMissingCRBMessage {
103-
log.Printf("eviction %s is invalid with reason %s for CRP %s, but drain will succeed", evictionName, validCondition.Reason, crpName)
116+
log.Printf("eviction %s is invalid with reason %s for CRP %s targeting member cluster %s, but drain will succeed", evictionName, validCondition.Reason, crpName, h.clusterName)
104117
continue
105118
}
106119
}
107120
executedCondition := eviction.GetCondition(string(placementv1beta1.PlacementEvictionConditionTypeExecuted))
108121
if executedCondition == nil || executedCondition.Status == metav1.ConditionFalse {
109122
isDrainSuccessful = false
110-
log.Printf("eviction %s was not executed successfully for CRP %s", evictionName, crpName)
123+
log.Printf("eviction %s was not executed successfully for CRP %s targeting member cluster %s", evictionName, crpName, h.clusterName)
111124
continue
112125
}
113-
log.Printf("eviction %s was executed successfully for CRP %s", evictionName, crpName)
126+
log.Printf("eviction %s was executed successfully for CRP %s targeting member cluster %s", evictionName, crpName, h.clusterName)
114127
// log each cluster scoped resource evicted for CRP.
115128
clusterScopedResourceIdentifiers, err := h.collectClusterScopedResourcesSelectedByCRP(ctx, crpName)
116129
if err != nil {
117130
log.Printf("failed to collect cluster scoped resources selected by CRP %s: %v", crpName, err)
118131
continue
119132
}
120133
for _, resourceIdentifier := range clusterScopedResourceIdentifiers {
121-
log.Printf("evicted resource %s propagated by CRP %s", generateResourceIdentifierKey(resourceIdentifier), crpName)
134+
log.Printf("evicted resource %s propagated by CRP %s targeting member cluster %s", generateResourceIdentifierKey(resourceIdentifier), crpName, h.clusterName)
122135
}
123136
}
124137

125138
return isDrainSuccessful, nil
126139
}
127140

128-
func (h *Helper) cordon(ctx context.Context) error {
141+
func (h *helper) cordon(ctx context.Context) error {
129142
// add taint to member cluster to ensure resources aren't scheduled on it.
130143
return retry.RetryOnConflict(retry.DefaultRetry, func() error {
131144
var mc clusterv1beta1.MemberCluster
132-
if err := h.HubClient.Get(ctx, types.NamespacedName{Name: h.ClusterName}, &mc); err != nil {
145+
if err := h.hubClient.Get(ctx, types.NamespacedName{Name: h.clusterName}, &mc); err != nil {
133146
return err
134147
}
135148

@@ -143,21 +156,21 @@ func (h *Helper) cordon(ctx context.Context) error {
143156
// add taint to member cluster to cordon.
144157
mc.Spec.Taints = append(mc.Spec.Taints, toolsutils.CordonTaint)
145158

146-
return h.HubClient.Update(ctx, &mc)
159+
return h.hubClient.Update(ctx, &mc)
147160
})
148161
}
149162

150-
func (h *Helper) fetchClusterResourcePlacementNamesToEvict(ctx context.Context) (map[string]bool, error) {
163+
func (h *helper) fetchClusterResourcePlacementNamesToEvict(ctx context.Context) (map[string]bool, error) {
151164
var crbList placementv1beta1.ClusterResourceBindingList
152-
if err := h.HubClient.List(ctx, &crbList); err != nil {
165+
if err := h.hubClient.List(ctx, &crbList); err != nil {
153166
return map[string]bool{}, fmt.Errorf("failed to list cluster resource bindings: %w", err)
154167
}
155168

156169
crpNameMap := make(map[string]bool)
157170
// find all unique CRP names for which eviction needs to occur.
158171
for i := range crbList.Items {
159172
crb := crbList.Items[i]
160-
if crb.Spec.TargetCluster == h.ClusterName && crb.DeletionTimestamp == nil {
173+
if crb.Spec.TargetCluster == h.clusterName && crb.DeletionTimestamp == nil {
161174
crpName, ok := crb.GetLabels()[placementv1beta1.CRPTrackingLabel]
162175
if !ok {
163176
return map[string]bool{}, fmt.Errorf("failed to get CRP name from binding %s", crb.Name)
@@ -169,9 +182,9 @@ func (h *Helper) fetchClusterResourcePlacementNamesToEvict(ctx context.Context)
169182
return crpNameMap, nil
170183
}
171184

172-
func (h *Helper) collectClusterScopedResourcesSelectedByCRP(ctx context.Context, crpName string) ([]placementv1beta1.ResourceIdentifier, error) {
185+
func (h *helper) collectClusterScopedResourcesSelectedByCRP(ctx context.Context, crpName string) ([]placementv1beta1.ResourceIdentifier, error) {
173186
var crp placementv1beta1.ClusterResourcePlacement
174-
if err := h.HubClient.Get(ctx, types.NamespacedName{Name: crpName}, &crp); err != nil {
187+
if err := h.hubClient.Get(ctx, types.NamespacedName{Name: crpName}, &crp); err != nil {
175188
return nil, fmt.Errorf("failed to get ClusterResourcePlacement %s: %w", crpName, err)
176189
}
177190

@@ -188,7 +201,8 @@ func (h *Helper) collectClusterScopedResourcesSelectedByCRP(ctx context.Context,
188201
func generateDrainEvictionName(crpName, targetCluster string) (string, error) {
189202
evictionName := fmt.Sprintf(drainEvictionNameFormat, crpName, targetCluster, uuid.NewUUID()[:uuidLength])
190203

191-
if errs := validation.IsQualifiedName(evictionName); len(errs) != 0 {
204+
// check to see if eviction name is a valid DNS1123 subdomain name https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names.
205+
if errs := validation.IsDNS1123Subdomain(evictionName); len(errs) != 0 {
192206
return "", fmt.Errorf("failed to format a qualified name for drain eviction object with CRP name %s, cluster name %s: %v", crpName, targetCluster, errs)
193207
}
194208
return evictionName, nil
@@ -198,6 +212,9 @@ func generateResourceIdentifierKey(r placementv1beta1.ResourceIdentifier) string
198212
if len(r.Group) == 0 && len(r.Namespace) == 0 {
199213
return fmt.Sprintf(resourceIdentifierKeyFormat, "''", r.Version, r.Kind, "''", r.Name)
200214
}
215+
if len(r.Group) == 0 {
216+
return fmt.Sprintf(resourceIdentifierKeyFormat, "''", r.Version, r.Kind, r.Namespace, r.Name)
217+
}
201218
if len(r.Namespace) == 0 {
202219
return fmt.Sprintf(resourceIdentifierKeyFormat, r.Group, r.Version, r.Kind, "''", r.Name)
203220
}

0 commit comments

Comments
 (0)