diff --git a/README.md b/README.md index 18b9d6a78..251601ee7 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Coverage Status](https://coveralls.io/repos/github/kubernetes-sigs/azurelustre-csi-driver/badge.svg?branch=main)](https://coveralls.io/github/kubernetes-sigs/azurelustre-csi-driver?branch=main) [![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2Fkubernetes-sigs%2Fazurelustre-csi-driver.svg?type=shield)](https://app.fossa.com/projects/git%2Bgithub.com%2Fkubernetes-sigs%2Fazurelustre-csi-driver?ref=badge_shield) -### About +## About This driver allows Kubernetes to access Azure Lustre file system. @@ -12,7 +12,7 @@ This driver allows Kubernetes to access Azure Lustre file system.   -### Container Images & Kubernetes Compatibility: +### Container Images & Kubernetes Compatibility | Driver version | Image | Supported k8s version | Lustre client version | |-----------------|-----------------------------------------------------------------|-----------------------|-----------------------| diff --git a/deploy/rbac-csi-azurelustre-node.yaml b/deploy/rbac-csi-azurelustre-node.yaml index 6953c057a..555bdb537 100644 --- a/deploy/rbac-csi-azurelustre-node.yaml +++ b/deploy/rbac-csi-azurelustre-node.yaml @@ -14,6 +14,9 @@ rules: - apiGroups: [""] resources: ["secrets"] verbs: ["get", "list"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "patch"] --- kind: ClusterRoleBinding diff --git a/docs/csi-debug.md b/docs/csi-debug.md index 12d6f6b73..ca752616d 100644 --- a/docs/csi-debug.md +++ b/docs/csi-debug.md @@ -448,6 +448,75 @@ Check for solutions in [Resolving Common Errors](errors.md) --- +## Pod Scheduling and Node Readiness Issues + +### Pods Stuck in Pending Status with Taint-Related Errors + +**Symptoms:** + +- Pods requiring Azure Lustre storage remain in `Pending` status +- Pod events show taint-related scheduling failures +- Error messages mentioning `azurelustre.csi.azure.com/agent-not-ready` taint + +**Check pod scheduling status:** + +```sh +kubectl describe pod +``` + +Look for events such as: + +- `Warning FailedScheduling ... node(s) had taint {azurelustre.csi.azure.com/agent-not-ready: }, that the pod didn't tolerate` +- `0/X nodes are available: X node(s) had taint {azurelustre.csi.azure.com/agent-not-ready}` + +**Check node taints:** + +```sh +kubectl describe nodes | grep -A5 -B5 "azurelustre.csi.azure.com/agent-not-ready" +``` + +**Check CSI driver readiness on nodes:** + +```sh +# Check if CSI driver pods are running on all nodes +kubectl get pods -n kube-system -l app=csi-azurelustre-node -o wide + +# Check CSI driver logs for startup issues +kubectl logs -n kube-system -l app=csi-azurelustre-node -c azurelustre --tail=100 | grep -i "taint\|ready\|error" +``` + +**Common causes and solutions:** + +1. **CSI Driver Still Starting**: Wait for CSI driver pods to reach `Running` status + + ```sh + kubectl wait --for=condition=ready pod -l app=csi-azurelustre-node -n kube-system --timeout=300s + ``` + +2. **Lustre Module Loading Issues**: Check if Lustre kernel modules are properly loaded + + ```sh + kubectl exec -n kube-system -c azurelustre -- lsmod | grep lustre + ``` + +3. **Manual Taint Removal** (Emergency only - not recommended for production): + + ```sh + kubectl taint nodes azurelustre.csi.azure.com/agent-not-ready:NoSchedule- + ``` + +**Verify taint removal functionality:** + +Check that startup taint removal is enabled in the CSI driver: + +```sh +kubectl logs -n kube-system -l app=csi-azurelustre-node -c azurelustre | grep -i "remove.*taint" +``` + +Expected log output should show taint removal activity when the driver becomes ready. + +--- + ## Get Azure Lustre Driver Version ```sh diff --git a/docs/driver-parameters.md b/docs/driver-parameters.md index 2c00723c7..b43f18ee2 100644 --- a/docs/driver-parameters.md +++ b/docs/driver-parameters.md @@ -4,6 +4,29 @@ These are the parameters to be passed into the custom StorageClass that users mu For more information, see the [Azure Managed Lustre Filesystem (AMLFS) service documentation](https://learn.microsoft.com/en-us/azure/azure-managed-lustre/) and the [AMLFS CSI documentation](https://learn.microsoft.com/en-us/azure/azure-managed-lustre/use-csi-driver-kubernetes). +## CSI Driver Configuration Parameters + +These parameters control the behavior of the Azure Lustre CSI driver itself and are typically configured during driver installation rather than in StorageClass definitions. + +### Node Startup Taint Management + +Name | Meaning | Available Value | Default Value | Configuration Method +--- | --- | --- | --- | --- +remove-not-ready-taint | Controls whether the CSI driver automatically removes startup taints from nodes when the driver becomes ready. This ensures pods are only scheduled to nodes where the CSI driver is fully operational and Lustre filesystem capacity is available. Nodes should have a taint of the form: `azurelustre.csi.azure.com/agent-not-ready:NoSchedule` | `true`, `false` | `true` | Command-line flag `--remove-not-ready-taint` in driver deployment + +#### Startup Taint Details + +When enabled (default), the Azure Lustre CSI driver will: + +1. **Monitor Node Readiness**: Check if the CSI driver is fully initialized on the node +2. **Remove Blocking Taint**: Automatically remove the `azurelustre.csi.azure.com/agent-not-ready:NoSchedule` taint when ready + +This mechanism prevents pods requiring Azure Lustre storage from being scheduled to nodes where: + +- Lustre kernel modules are not yet loaded +- CSI driver components are not fully initialized +- Network connectivity to Lustre filesystems is not established + ## Dynamic Provisioning (Create an AMLFS Cluster through AKS) - Public Preview > **Public Preview Notice**: Dynamic provisioning functionality is currently in public preview. Some features may not be supported or may have constrained capabilities. diff --git a/docs/errors.md b/docs/errors.md index 4c5cf0dec..54d8d26fa 100644 --- a/docs/errors.md +++ b/docs/errors.md @@ -11,6 +11,9 @@ This document describes common errors that can occur during volume creation and - [Error: Resource not found](#error-resource-not-found) - [Error: Cannot create AMLFS cluster, not enough IP addresses available](#error-cannot-create-amlfs-cluster-not-enough-ip-addresses-available) - [Error: Reached Azure Subscription Quota Limit for AMLFS Clusters](#error-reached-azure-subscription-quota-limit-for-amlfs-clusters) +- [Pod Scheduling Errors](#pod-scheduling-errors) + - [Node Readiness and Taint Errors](#node-readiness-and-taint-errors) + - [Error: Node had taint azurelustre.csi.azure.com/agent-not-ready](#error-node-had-taint-azurelustrecsiazurecomagent-not-ready) - [Volume Mounting Errors](#volume-mounting-errors) - [Node Mount Errors](#node-mount-errors) - [Error: Could not mount target](#error-could-not-mount-target) @@ -31,7 +34,7 @@ This document describes common errors that can occur during volume creation and - [Controller Logs](#controller-logs) - [Node Logs](#node-logs) - [Comprehensive Log Collection](#comprehensive-log-collection) - + --- ## Volume Creation Errors @@ -211,6 +214,87 @@ There is not enough room in the /subscriptions//resourceGroups//prov --- +## Pod Scheduling Errors + +### Node Readiness and Taint Errors + +#### Error: Node had taint azurelustre.csi.azure.com/agent-not-ready + +**Symptoms:** + +- Pods requiring Azure Lustre storage remain stuck in `Pending` status +- Pod events show taint-related scheduling failures: + - `Warning FailedScheduling ... node(s) had taint {azurelustre.csi.azure.com/agent-not-ready: }, that the pod didn't tolerate` + - `0/X nodes are available: X node(s) had taint {azurelustre.csi.azure.com/agent-not-ready}` +- Kubectl describe pod shows scheduling failures due to taints + +**Possible Causes:** + +- CSI driver is still initializing on nodes +- Lustre kernel modules are not yet loaded +- CSI driver failed to start properly on affected nodes +- Node is not ready to handle Azure Lustre volume allocations +- CSI driver startup taint removal is disabled + +**Debugging Steps:** + +```bash +# Check pod scheduling status +kubectl describe pod | grep -A10 Events + +# Check which nodes have the taint +kubectl describe nodes | grep -A5 -B5 "azurelustre.csi.azure.com/agent-not-ready" + +# Verify CSI driver pod status on nodes +kubectl get pods -n kube-system -l app=csi-azurelustre-node -o wide + +# Check CSI driver startup logs +kubectl logs -n kube-system -l app=csi-azurelustre-node -c azurelustre --tail=100 | grep -i "taint\|ready\|error" + +# Verify taint removal is enabled (should be true by default) +kubectl logs -n kube-system -l app=csi-azurelustre-node -c azurelustre | grep -i "remove.*taint" +``` + +**Resolution:** + +1. **Wait for CSI Driver Readiness** (most common case): + + ```bash + # Wait for CSI driver pods to reach Running status + kubectl wait --for=condition=ready pod -l app=csi-azurelustre-node -n kube-system --timeout=300s + ``` + + The taint should be automatically removed once the CSI driver is fully operational. + +2. **Check Lustre Module Loading**: + + ```bash + # Verify Lustre modules are loaded on nodes + kubectl exec -n kube-system -c azurelustre -- lsmod | grep lustre + ``` + +3. **Verify CSI Driver Configuration**: + + ```bash + # Check if taint removal is enabled (default: true) + kubectl get deployment csi-azurelustre-node -n kube-system -o yaml | grep "remove-not-ready-taint" + ``` + +4. **Emergency Manual Taint Removal** (not recommended for production): + + ```bash + # Only use if CSI driver is confirmed working but taint persists + kubectl taint nodes azurelustre.csi.azure.com/agent-not-ready:NoSchedule- + ``` + +**Prevention:** + +- Ensure CSI driver has sufficient time to initialize during cluster updates +- Monitor CSI driver health during node scaling operations +- Use pod disruption budgets to prevent scheduling issues during maintenance + +--- + ## Volume Mounting Errors ### Node Mount Errors diff --git a/docs/install-csi-driver.md b/docs/install-csi-driver.md index e3acedc88..cb3c31ed0 100644 --- a/docs/install-csi-driver.md +++ b/docs/install-csi-driver.md @@ -39,54 +39,6 @@ This document explains how to install Azure Lustre CSI driver on a kubernetes cl csi-azurelustre-node-g6sfx 3/3 Running 0 30s ``` -### Verifying CSI Driver Readiness for Lustre Operations - -Before mounting Azure Lustre filesystems, it's important to verify that the CSI driver nodes are fully initialized and ready for Lustre operations. The driver includes **enhanced LNet validation** that performs comprehensive readiness checks: - -- Load required kernel modules (lnet, lustre) -- Configure LNet networking with valid Network Identifiers (NIDs) -- Verify LNet self-ping functionality -- Validate all network interfaces are operational -- Complete all initialization steps - -#### Readiness Validation - -The CSI driver deployment includes automated probes for container health monitoring: - -- **Liveness Probe**: `/healthz` (Port 29763) - HTTP endpoint for basic container health -- **Container Status**: Kubernetes readiness based on container startup and basic health checks - -#### Verification Steps - -1. **Check pod readiness status:** - ```shell - kubectl get -n kube-system pod -l app=csi-azurelustre-node -o wide - ``` - All node pods should show `READY` status as `3/3` and `STATUS` as `Running`. - -2. **Verify probe configuration:** - ```shell - kubectl describe -n kube-system pod -l app=csi-azurelustre-node - ``` - Look for exec-based readiness and startup probe configuration in the pod description: - - `Readiness: exec [/app/readinessProbe.sh]` - - `Startup: exec [/app/readinessProbe.sh]` - - In the Events section, you may see initial startup probe failures during LNet initialization: - - `Warning Unhealthy ... Startup probe failed: Node pod detected - performing Lustre-specific readiness checks` - - This is normal during the initialization phase. Once LNet is fully operational, the probes will succeed and no more failure events will appear. - -3. **Monitor validation logs:** - ```shell - kubectl logs -n kube-system -l app=csi-azurelustre-node -c azurelustre --tail=20 - ``` - Look for CSI driver startup and successful GRPC operation logs indicating driver initialization is complete. - -> **Note**: If you encounter readiness or initialization issues, see the [CSI Driver Troubleshooting Guide](csi-debug.md#enhanced-lnet-validation-troubleshooting) for detailed debugging steps. - -**Important**: The enhanced validation ensures the driver reports ready only when LNet is fully functional for Lustre operations. Wait for all CSI driver node pods to pass enhanced readiness checks before creating PersistentVolumes or mounting Lustre filesystems. - ## Default instructions for production release ### Install with kubectl (current production release) @@ -122,8 +74,7 @@ The CSI driver deployment includes automated probes for container health monitor csi-azurelustre-node-g6sfx 3/3 Running 0 30s ``` - -### Verifying CSI Driver Readiness for Lustre Operations +## Verifying CSI Driver Readiness for Lustre Operations Before mounting Azure Lustre filesystems, it is important to verify that the CSI driver nodes are fully initialized and ready for Lustre operations. The driver includes enhanced LNet validation that performs comprehensive readiness checks: @@ -133,7 +84,7 @@ Before mounting Azure Lustre filesystems, it is important to verify that the CSI - Validate all network interfaces are operational - Complete all initialization steps -#### Enhanced Readiness Validation +### Enhanced Readiness Validation The CSI driver deployment includes automated **exec-based readiness probes** for accurate readiness detection: @@ -143,24 +94,50 @@ The CSI driver deployment includes automated **exec-based readiness probes** for #### Verification Steps 1. **Check pod readiness status:** + ```shell kubectl get -n kube-system pod -l app=csi-azurelustre-node -o wide ``` + All node pods should show `READY` status as `3/3` and `STATUS` as `Running`. 2. **Verify probe configuration:** + ```shell kubectl describe -n kube-system pod -l app=csi-azurelustre-node ``` + Look for exec-based readiness and startup probe configuration and check that no recent probe failures appear in the Events section. 3. **Monitor validation logs:** + ```shell kubectl logs -n kube-system -l app=csi-azurelustre-node -c azurelustre --tail=20 ``` + Look for CSI driver startup and successful GRPC operation logs indicating driver initialization is complete. > **Note**: If you encounter readiness or initialization issues, see the [CSI Driver Troubleshooting Guide](csi-debug.md#enhanced-lnet-validation-troubleshooting) for detailed debugging steps. **Important**: The enhanced validation ensures the driver reports ready only when LNet is fully functional for Lustre operations. Wait for all CSI driver node pods to pass enhanced readiness checks before creating PersistentVolumes or mounting Lustre filesystems. +## Startup Taints + +When the CSI driver starts on each node, it automatically removes the following taint if present: + +- **Taint Key**: `azurelustre.csi.azure.com/agent-not-ready` +- **Taint Effect**: `NoSchedule` + +This ensures that: + +1. **Node Readiness**: Pods requiring Azure Lustre storage are only scheduled to nodes where the CSI driver is fully initialized +2. **Lustre Client Ready**: The node has successfully loaded Lustre kernel modules and networking components + +### Configuring Startup Taint Behavior + +The startup taint functionality is enabled by default but can be configured during installation: + +- **Default Behavior**: Startup taint removal is **enabled** by default +- **Disable Taint Removal**: To disable, set `--remove-not-ready-taint=false` in the driver deployment + +For most AKS users, the default behavior provides optimal pod scheduling and should not be changed diff --git a/go.mod b/go.mod index 9427f449c..1335d24af 100644 --- a/go.mod +++ b/go.mod @@ -19,7 +19,9 @@ require ( golang.org/x/net v0.44.0 google.golang.org/grpc v1.75.1 google.golang.org/protobuf v1.36.9 + k8s.io/api v0.31.13 k8s.io/apimachinery v0.31.13 + k8s.io/client-go v1.5.2 k8s.io/klog/v2 v2.130.1 k8s.io/kubernetes v1.31.13 k8s.io/mount-utils v0.31.6 @@ -122,10 +124,8 @@ require ( gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/api v0.31.13 // indirect k8s.io/apiextensions-apiserver v0.31.1 // indirect k8s.io/apiserver v0.31.13 // indirect - k8s.io/client-go v1.5.2 // indirect k8s.io/cloud-provider v0.31.1 // indirect k8s.io/component-base v0.31.13 // indirect k8s.io/component-helpers v0.31.13 // indirect diff --git a/pkg/azurelustre/azurelustre.go b/pkg/azurelustre/azurelustre.go index 3c69f1279..887612afd 100644 --- a/pkg/azurelustre/azurelustre.go +++ b/pkg/azurelustre/azurelustre.go @@ -18,15 +18,23 @@ package azurelustre import ( "context" + "encoding/json" "fmt" "os" "strings" "sync" + "time" "github.com/Azure/azure-sdk-for-go/sdk/azidentity" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v6" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storagecache/armstoragecache/v4" "github.com/container-storage-interface/spec/lib/go/csi" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8stypes "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" "k8s.io/klog/v2" mount "k8s.io/mount-utils" utilexec "k8s.io/utils/exec" @@ -45,8 +53,13 @@ const ( volumeIDTemplate = "%s#%s#%s#%s#%s#%s" subnetTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/virtualNetworks/%s/subnets/%s" + DefaultAzureConfigFileEnv = "AZURE_CONFIG_FILE" + DefaultConfigFilePathLinux = "/etc/kubernetes/azure.json" + amlFilesystemNameMaxLength = 80 + AgentNotReadyNodeTaintKeySuffix = "/agent-not-ready" + podNameKey = "csi.storage.k8s.io/pod.name" podNamespaceKey = "csi.storage.k8s.io/pod.namespace" podUIDKey = "csi.storage.k8s.io/pod.uid" @@ -103,6 +116,7 @@ type DriverOptions struct { EnableAzureLustreMockMount bool EnableAzureLustreMockDynProv bool WorkingMountDir string + RemoveNotReadyTaint bool } // LustreSkuValue describes the increment and maximum size of a given Lustre sku @@ -122,7 +136,7 @@ type Driver struct { enableAzureLustreMockMount bool // enableAzureLustreMockDynProv is only for testing, DO NOT set as true in non-testing scenario enableAzureLustreMockDynProv bool - mounter *mount.SafeFormatAndMount // TODO_JUSJIN: check any other alternatives + mounter *mount.SafeFormatAndMount forceMounter *mount.MounterForceUnmounter volLockMap *util.LockMap // Directory to temporarily mount to for subdirectory creation @@ -136,6 +150,13 @@ type Driver struct { resourceGroup string location string dynamicProvisioner DynamicProvisionerInterface + + removeNotReadyTaint bool + kubeClient kubernetes.Interface + // taintRemovalInitialDelay is the initial delay for node taint removal + taintRemovalInitialDelay time.Duration + // taintRemovalBackoff is the exponential backoff configuration for node taint removal + taintRemovalBackoff wait.Backoff } // NewDriver Creates a NewCSIDriver object. Assumes vendor version is equal to driver version & @@ -147,6 +168,7 @@ func NewDriver(options *DriverOptions) *Driver { enableAzureLustreMockMount: options.EnableAzureLustreMockMount, enableAzureLustreMockDynProv: options.EnableAzureLustreMockDynProv, workingMountDir: options.WorkingMountDir, + removeNotReadyTaint: options.RemoveNotReadyTaint, } d.Name = options.DriverName d.Version = driverVersion @@ -158,15 +180,21 @@ func NewDriver(options *DriverOptions) *Driver { ctx := context.Background() - // Will need to change if we ever support non-AKS clusters - AKSConfigFile := "/etc/kubernetes/azure.json" - az := &azure.Cloud{} + + credFile, ok := os.LookupEnv(DefaultAzureConfigFileEnv) + if ok && strings.TrimSpace(credFile) != "" { + klog.V(2).Infof("%s env var set as %v", DefaultAzureConfigFileEnv, credFile) + } else { + credFile = DefaultConfigFilePathLinux + klog.V(2).Infof("use default %s env var: %v", DefaultAzureConfigFileEnv, credFile) + } + config, err := configloader.Load[azure.Config](ctx, nil, &configloader.FileLoaderConfig{ - FilePath: AKSConfigFile, + FilePath: credFile, }) if err != nil { - klog.V(2).Infof("failed to get cloud config from file %s: %v", AKSConfigFile, err) + klog.V(2).Infof("failed to get cloud config from file %s: %v", credFile, err) } if config == nil { @@ -191,7 +219,18 @@ func NewDriver(options *DriverOptions) *Driver { d.cloud = az d.resourceGroup = config.ResourceGroup d.location = config.Location - + // Get kubernetes client for taint removal functionality + kubeClient, err := getKubeClient() + if err != nil { + klog.Warningf("failed to get kubernetes client: %v", err) + } + d.kubeClient = kubeClient + d.taintRemovalInitialDelay = 1 * time.Second + d.taintRemovalBackoff = wait.Backoff{ + Duration: 500 * time.Millisecond, + Factor: 2, + Steps: 10, // Max delay = 0.5 * 2^9 = ~4 minutes + } cred, err := azidentity.NewDefaultAzureCredential(nil) if err != nil { klog.Warningf("failed to obtain a credential: %v", err) @@ -278,6 +317,14 @@ func (d *Driver) Run(endpoint string, testBool bool) { d.AddVolumeCapabilityAccessModes(volumeCapabilities) d.AddNodeServiceCapabilities(nodeServiceCapabilities) + // Remove taint from node to indicate driver startup success + // This is done at the last possible moment to prevent race conditions or false positive removals + if d.kubeClient != nil && d.removeNotReadyTaint && d.NodeID != "" { + time.AfterFunc(d.taintRemovalInitialDelay, func() { + removeTaintInBackground(d.kubeClient, d.NodeID, d.Name, d.taintRemovalBackoff, removeNotReadyTaint) + }) + } + s := csicommon.NewNonBlockingGRPCServer() // Driver d act as IdentityServer, ControllerServer and NodeServer s.Start(endpoint, d, d, d, testBool) @@ -317,3 +364,93 @@ func getLustreVolFromID(id string) (*lustreVolume, error) { return vol, nil } + +// getKubeClient creates a kubernetes client from the in-cluster config +func getKubeClient() (kubernetes.Interface, error) { + // Use in-cluster config since this driver is designed for AKS environments + config, err := rest.InClusterConfig() + if err != nil { + return nil, fmt.Errorf("failed to get in-cluster config: %w", err) + } + return kubernetes.NewForConfig(config) +} + +// JSONPatch represents a JSON patch operation +type JSONPatch struct { + OP string `json:"op"` + Path string `json:"path"` + Value interface{} `json:"value,omitempty"` +} + +// removeTaintInBackground removes the taint from the node in a goroutine with retry logic +// TODO: We could test this properly with synctest when we move to go 1.25 +func removeTaintInBackground(k8sClient kubernetes.Interface, nodeName, driverName string, backoff wait.Backoff, removalFunc func(kubernetes.Interface, string, string) error) { + klog.V(2).Infof("starting background node taint removal for node %s", nodeName) + go func() { + backoffErr := wait.ExponentialBackoff(backoff, func() (bool, error) { + err := removalFunc(k8sClient, nodeName, driverName) + if err != nil { + klog.ErrorS(err, "unexpected failure when attempting to remove node taint(s)") + return false, nil + } + return true, nil + }) + + if backoffErr != nil { + klog.ErrorS(backoffErr, "retries exhausted, giving up attempting to remove node taint(s)") + } + }() +} + +// removeNotReadyTaint removes the taint azurelustre.csi.azure.com/agent-not-ready from the local node +// This taint can be optionally applied by users to prevent startup race conditions such as +// https://github.com/kubernetes/kubernetes/issues/95911 +func removeNotReadyTaint(clientset kubernetes.Interface, nodeName, driverName string) error { + ctx := context.Background() + node, err := clientset.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) + if err != nil { + return err + } + + taintKeyToRemove := driverName + AgentNotReadyNodeTaintKeySuffix + klog.V(2).Infof("removing taint with key %s from local node %s", taintKeyToRemove, nodeName) + var taintsToKeep []corev1.Taint + for _, taint := range node.Spec.Taints { + klog.V(5).Infof("checking taint key %s, value %s, effect %s", taint.Key, taint.Value, taint.Effect) + if taint.Key != taintKeyToRemove { + taintsToKeep = append(taintsToKeep, taint) + } else { + klog.V(2).Infof("queued taint for removal with key %s, effect %s", taint.Key, taint.Effect) + } + } + + if len(taintsToKeep) == len(node.Spec.Taints) { + klog.V(2).Infof("no taints to remove on node, skipping taint removal") + return nil + } + + patchRemoveTaints := []JSONPatch{ + { + OP: "test", + Path: "/spec/taints", + Value: node.Spec.Taints, + }, + { + OP: "replace", + Path: "/spec/taints", + Value: taintsToKeep, + }, + } + + patch, err := json.Marshal(patchRemoveTaints) + if err != nil { + return err + } + + _, err = clientset.CoreV1().Nodes().Patch(ctx, nodeName, k8stypes.JSONPatchType, patch, metav1.PatchOptions{}) + if err != nil { + return err + } + klog.V(2).Infof("removed taint with key %s from local node %s successfully", taintKeyToRemove, nodeName) + return nil +} diff --git a/pkg/azurelustre/azurelustre_test.go b/pkg/azurelustre/azurelustre_test.go index 7f67f889b..332dc3962 100644 --- a/pkg/azurelustre/azurelustre_test.go +++ b/pkg/azurelustre/azurelustre_test.go @@ -27,8 +27,14 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" + corev1 "k8s.io/api/core/v1" + storagev1 "k8s.io/api/storage/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + kubefake "k8s.io/client-go/kubernetes/fake" azure "sigs.k8s.io/cloud-provider-azure/pkg/provider" ) @@ -125,14 +131,107 @@ func (f *FakeDynamicProvisioner) GetSkuValuesForLocation(_ context.Context, loca } func TestNewDriver(t *testing.T) { + fakeConfigFile := "fake-cred-file.json" + fakeConfigContent := `{ + "tenantId": "fake-tenant-id", + "subscriptionId": "fake-subscription-id", + "aadClientId": "fake-client-id", + "aadClientSecret": "fake-client-secret", + "resourceGroup": "fake-resource-group", + "location": "fake-location", +}` + + if err := os.WriteFile(fakeConfigFile, []byte(fakeConfigContent), 0o600); err != nil { + t.Error(err) + } + + defer func() { + if err := os.Remove(fakeConfigFile); err != nil { + t.Error(err) + } + }() + + t.Setenv(DefaultAzureConfigFileEnv, fakeConfigFile) + driverOptions := DriverOptions{ NodeID: fakeNodeID, - DriverName: DefaultDriverName, + DriverName: fakeDriverName, + EnableAzureLustreMockMount: false, + EnableAzureLustreMockDynProv: true, + WorkingMountDir: "/tmp", + RemoveNotReadyTaint: true, + } + d := NewDriver(&driverOptions) + assert.NotNil(t, d) + assert.NotNil(t, d.cloud) + assert.NotNil(t, d.dynamicProvisioner) + assert.Equal(t, "fake-resource-group", d.resourceGroup) + assert.Equal(t, "fake-location", d.location) + assert.Equal(t, fakeNodeID, d.NodeID) + assert.Equal(t, fakeDriverName, d.Name) + assert.Equal(t, "fake-subscription-id", d.cloud.SubscriptionID) + assert.Equal(t, "fake-tenant-id", d.cloud.TenantID) + assert.Equal(t, "fake-client-id", d.cloud.AADClientID) + assert.Equal(t, "fake-client-secret", d.cloud.AADClientSecret) + assert.Equal(t, "fake-location", d.cloud.Location) + assert.Equal(t, "fake-resource-group", d.cloud.ResourceGroup) + assert.Equal(t, "/tmp", d.workingMountDir) + assert.True(t, d.enableAzureLustreMockDynProv, "enableAzureLustreMockDynProv should be true") + assert.False(t, d.enableAzureLustreMockMount, "enableAzureLustreMockMount should be false") + assert.True(t, d.removeNotReadyTaint, "removeNotReadyTaint should be true") +} + +func TestNewDriverInvalidConfigFileLocation(t *testing.T) { + fakeConfigFile := "fake-cred-file.json" + + if err := os.Remove(fakeConfigFile); err != nil && !os.IsNotExist(err) { + t.Error(err) + } + + t.Setenv(DefaultAzureConfigFileEnv, fakeConfigFile) + + driverOptions := DriverOptions{ + NodeID: fakeNodeID, + DriverName: fakeDriverName, EnableAzureLustreMockMount: false, EnableAzureLustreMockDynProv: true, + WorkingMountDir: "/tmp", + RemoveNotReadyTaint: true, } d := NewDriver(&driverOptions) assert.NotNil(t, d) + assert.Equal(t, &azure.Cloud{}, d.cloud) + assert.Equal(t, &DynamicProvisioner{}, d.dynamicProvisioner) +} + +func TestNewDriverInvalidConfigFileContents(t *testing.T) { + invalidConfigFile := "fake-cred-file.json" + invalidConfigContent := `;;;....invalid########` + + if err := os.WriteFile(invalidConfigFile, []byte(invalidConfigContent), 0o600); err != nil { + t.Error(err) + } + + defer func() { + if err := os.Remove(invalidConfigFile); err != nil { + t.Error(err) + } + }() + + t.Setenv(DefaultAzureConfigFileEnv, invalidConfigFile) + + driverOptions := DriverOptions{ + NodeID: fakeNodeID, + DriverName: fakeDriverName, + EnableAzureLustreMockMount: false, + EnableAzureLustreMockDynProv: true, + WorkingMountDir: "/tmp", + RemoveNotReadyTaint: true, + } + d := NewDriver(&driverOptions) + assert.NotNil(t, d) + assert.Equal(t, &azure.Cloud{}, d.cloud) + assert.Equal(t, &DynamicProvisioner{}, d.dynamicProvisioner) } func TestIsCorruptedDir(t *testing.T) { @@ -413,3 +512,151 @@ func TestPopulateSubnetPropertiesFromCloudConfig(t *testing.T) { t.Run(tc.name, tc.testFunc) } } + +func TestRemoveNotReadyTaint(t *testing.T) { + expectedNotReadyTaint := DefaultDriverName + AgentNotReadyNodeTaintKeySuffix + testCases := []struct { + name string + nodeName string + nodeExists bool + initialTaints []corev1.Taint + expectedError bool + expectedTaints []string + }{ + { + name: "Other taints are ignored", + nodeName: "test-node", + nodeExists: true, + initialTaints: []corev1.Taint{ + { + Key: "other-taint", + Value: "value", + Effect: corev1.TaintEffectNoSchedule, + }, + }, + expectedError: false, + expectedTaints: []string{"other-taint"}, + }, + { + name: "Removes agent-not-ready taint", + nodeName: "test-node", + nodeExists: true, + initialTaints: []corev1.Taint{ + { + Key: expectedNotReadyTaint, + Value: "NotReady", + Effect: corev1.TaintEffectNoSchedule, + }, + }, + expectedError: false, + expectedTaints: []string{}, + }, + { + name: "Leaves other taints when removing agent-not-ready taint", + nodeName: "test-node", + nodeExists: true, + initialTaints: []corev1.Taint{ + { + Key: expectedNotReadyTaint, + Value: "NotReady", + Effect: corev1.TaintEffectNoSchedule, + }, + { + Key: "other-taint", + Value: "value", + Effect: corev1.TaintEffectNoSchedule, + }, + }, + expectedError: false, + expectedTaints: []string{"other-taint"}, + }, + { + name: "Handles node with no taints", + nodeName: "test-node", + nodeExists: true, + initialTaints: []corev1.Taint{}, + expectedError: false, + expectedTaints: []string{}, + }, + { + name: "Handles node that doesn't exist", + nodeName: "nonexistent-node", + nodeExists: false, + initialTaints: []corev1.Taint{}, + expectedError: true, + expectedTaints: []string{}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ctx := context.Background() + + // Create fake kubernetes client + fakeClient := kubefake.NewSimpleClientset() + + // Create node if it should exist + if tc.nodeExists { + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: tc.nodeName, + }, + Spec: corev1.NodeSpec{ + Taints: tc.initialTaints, + }, + Status: corev1.NodeStatus{ + Allocatable: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + }, + } + _, err := fakeClient.CoreV1().Nodes().Create(ctx, node, metav1.CreateOptions{}) + require.NoError(t, err) + + // Create CSINode for taint removal function + csiNode := &storagev1.CSINode{ + ObjectMeta: metav1.ObjectMeta{ + Name: tc.nodeName, + }, + Spec: storagev1.CSINodeSpec{ + Drivers: []storagev1.CSINodeDriver{ + { + Name: DefaultDriverName, + }, + }, + }, + } + _, err = fakeClient.StorageV1().CSINodes().Create(ctx, csiNode, metav1.CreateOptions{}) + require.NoError(t, err) + } + + // Create driver with fake client + d := NewFakeDriver() + d.NodeID = tc.nodeName + d.kubeClient = fakeClient + d.removeNotReadyTaint = true + + // Test removeNotReadyTaint function + err := removeNotReadyTaint(fakeClient, tc.nodeName, DefaultDriverName) + + if tc.expectedError { + assert.Error(t, err) + } else { + require.NoError(t, err) + + // Verify taint was removed if it existed + if tc.nodeExists { + node, err := fakeClient.CoreV1().Nodes().Get(ctx, tc.nodeName, metav1.GetOptions{}) + require.NoError(t, err) + + actualTaints := make([]string, len(node.Spec.Taints)) + for i, taint := range node.Spec.Taints { + actualTaints[i] = taint.Key + } + assert.Equal(t, tc.expectedTaints, actualTaints) + } + } + }) + } +} diff --git a/pkg/azurelustreplugin/main.go b/pkg/azurelustreplugin/main.go index 0808dfcf4..f9f284f91 100644 --- a/pkg/azurelustreplugin/main.go +++ b/pkg/azurelustreplugin/main.go @@ -33,6 +33,7 @@ var ( enableAzureLustreMockMount = flag.Bool("enable-azurelustre-mock-mount", false, "Whether enable mock mount(only for testing)") enableAzureLustreMockDynProv = flag.Bool("enable-azurelustre-mock-dyn-prov", true, "Whether enable mock dynamic provisioning(only for testing)") workingMountDir = flag.String("working-mount-dir", "/tmp", "working directory for provisioner to mount lustre filesystems temporarily") + removeNotReadyTaint = flag.Bool("remove-not-ready-taint", true, "remove NotReady taint from node when node is ready") ) func main() { @@ -63,6 +64,7 @@ func handle() { EnableAzureLustreMockMount: *enableAzureLustreMockMount, EnableAzureLustreMockDynProv: *enableAzureLustreMockDynProv, WorkingMountDir: *workingMountDir, + RemoveNotReadyTaint: *removeNotReadyTaint, } driver := azurelustre.NewDriver(&driverOptions) if driver == nil {