diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c75e74d..9ba57855 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ ## unreleased +## v4.0.2-beta.1 - 2026.06.18 + +Fixes a regression from v4.0.1 where restaging a non-LUKS volume could fail because the staging mount check compared +path strings that never match (/dev/sdX vs /dev/disk/by-id/...). This caused NodeStageVolume to reject the existing +mount. The check now compares kernel device numbers instead of path strings, so the same underlying device is +recognized regardless of which path was used to mount it. + ## v4.0.1 - 2026.06.04 Fixes a LUKS volume handling bug that could leave a volume stuck attached to a node after pod termination, surfacing later as a `Multi-Attach error` diff --git a/Makefile b/Makefile index 90c35b8e..6b7ec907 100644 --- a/Makefile +++ b/Makefile @@ -65,7 +65,7 @@ test: .PHONY: test-integration test-integration: @echo "==> Started integration tests" - @env GO111MODULE=on go test -race -count 1 -v $(TESTARGS) -tags integration -timeout 20m ./test/... + @env GO111MODULE=on go test -race -count 1 -v $(TESTARGS) -tags integration -parallel 4 -timeout 20m ./test/... .PHONY: build build: compile diff --git a/README.md b/README.md index e3a008af..6aa8fed5 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ secret `my-pvc-luks-key`. ## Releases The cloudscale.ch CSI plugin follows [semantic versioning](https://semver.org/). -The current version is: **`v4.0.1`**. +The current version is: **`v4.0.2-beta.1`**. * Bug fixes will be released as a `PATCH` update. * New features (such as CSI spec bumps) will be released as a `MINOR` update. @@ -92,15 +92,15 @@ We recommend using the latest cloudscale.ch CSI driver compatible with your Kube | 1.25 | v3.3.0 | v3.5.6 | | 1.26 | v3.3.0 | v3.5.6 | | 1.27 | v3.3.0 | v3.5.6 | -| 1.28 | v3.3.0 | v4.0.1 | -| 1.29 | v3.3.0 | v4.0.1 | -| 1.30 | v3.3.0 | v4.0.1 | -| 1.31 | v3.3.0 | v4.0.1 | -| 1.32 | v3.3.0 | v4.0.1 | -| 1.33 | v3.3.0 | v4.0.1 | -| 1.34 [1] | v3.3.0 | v4.0.1 | -| 1.35 | v3.4.1 | v4.0.1 | -| 1.36 | v3.4.1 | v4.0.1 | +| 1.28 | v3.3.0 | v4.0.2-beta.1 | +| 1.29 | v3.3.0 | v4.0.2-beta.1 | +| 1.30 | v3.3.0 | v4.0.2-beta.1 | +| 1.31 | v3.3.0 | v4.0.2-beta.1 | +| 1.32 | v3.3.0 | v4.0.2-beta.1 | +| 1.33 | v3.3.0 | v4.0.2-beta.1 | +| 1.34 [1] | v3.3.0 | v4.0.2-beta.1 | +| 1.35 | v3.4.1 | v4.0.2-beta.1 | +| 1.36 | v3.4.1 | v4.0.2-beta.1 | [1] Prometheus `kubelet_volume_stats_*` metrics not available in 1.34.0 and 1.34.1 due to a [bug in Kubelet](https://github.com/kubernetes/kubernetes/issues/133847). Fixed in `1.34.2`. @@ -216,10 +216,10 @@ $ helm install -g -n kube-system --set controller.image.tag=dev --set node.image Before you continue, be sure to checkout to a [tagged release](https://github.com/cloudscale-ch/csi-cloudscale/releases). Always use the [latest stable version](https://github.com/cloudscale-ch/csi-cloudscale/releases/latest) -For example, to use the latest stable version (`v4.0.1`) you can execute the following command: +For example, to use the latest stable version (`v4.0.2-beta.1`) you can execute the following command: ``` -$ kubectl apply -f https://raw.githubusercontent.com/cloudscale-ch/csi-cloudscale/master/deploy/kubernetes/releases/csi-cloudscale-v4.0.1.yaml +$ kubectl apply -f https://raw.githubusercontent.com/cloudscale-ch/csi-cloudscale/master/deploy/kubernetes/releases/csi-cloudscale-v4.0.2-beta.1.yaml ``` The storage classes `cloudscale-volume-ssd` and `cloudscale-volume-bulk` will be created. The @@ -446,15 +446,15 @@ $ git push origin After it's merged to master, [create a new Github release](https://github.com/cloudscale-ch/csi-cloudscale/releases/new) from -master with the version `v4.0.1` and then publish a new docker build: +master with the version `v4.0.2-beta.1` and then publish a new docker build: ``` $ git checkout master $ make publish ``` -This will create a binary with version `v4.0.1` and docker image pushed to -`cloudscalech/cloudscale-csi-plugin:v4.0.1` +This will create a binary with version `v4.0.2-beta.1` and docker image pushed to +`cloudscalech/cloudscale-csi-plugin:v4.0.2-beta.1` ### Release a pre-release version diff --git a/VERSION b/VERSION index 82f24fdf..502c3c9f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -v4.0.1 +v4.0.2-beta.1 diff --git a/charts/csi-cloudscale/Chart.yaml b/charts/csi-cloudscale/Chart.yaml index 52e4b2e1..b515a246 100644 --- a/charts/csi-cloudscale/Chart.yaml +++ b/charts/csi-cloudscale/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: csi-cloudscale description: A Container Storage Interface Driver for cloudscale.ch volumes. type: application -version: 1.5.1 -appVersion: "4.0.1" +version: 1.5.2-beta.1 +appVersion: "4.0.2-beta.1" home: https://github.com/cloudscale-ch/csi-cloudscale sources: - https://github.com/cloudscale-ch/csi-cloudscale.git diff --git a/charts/csi-cloudscale/values.yaml b/charts/csi-cloudscale/values.yaml index 727180f5..655e7621 100644 --- a/charts/csi-cloudscale/values.yaml +++ b/charts/csi-cloudscale/values.yaml @@ -107,7 +107,7 @@ controller: image: registry: quay.io repository: cloudscalech/cloudscale-csi-plugin - tag: v4.0.1 + tag: v4.0.2-beta.1 pullPolicy: IfNotPresent serviceAccountName: logLevel: info @@ -123,7 +123,7 @@ node: image: registry: quay.io repository: cloudscalech/cloudscale-csi-plugin - tag: v4.0.1 + tag: v4.0.2-beta.1 pullPolicy: IfNotPresent nodeSelector: {} tolerations: [] diff --git a/deploy/kubernetes/releases/csi-cloudscale-v4.0.2-beta.1.yaml b/deploy/kubernetes/releases/csi-cloudscale-v4.0.2-beta.1.yaml new file mode 100644 index 00000000..539aff29 --- /dev/null +++ b/deploy/kubernetes/releases/csi-cloudscale-v4.0.2-beta.1.yaml @@ -0,0 +1,484 @@ +--- +# Source: csi-cloudscale/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: csi-cloudscale-controller-sa + namespace: kube-system +--- +# Source: csi-cloudscale/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: csi-cloudscale-node-sa + namespace: kube-system + +--- +# Source: csi-cloudscale/templates/storageclass.yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: cloudscale-volume-ssd + namespace: kube-system + annotations: + storageclass.kubernetes.io/is-default-class: "true" +provisioner: csi.cloudscale.ch +allowVolumeExpansion: true +reclaimPolicy: Delete +volumeBindingMode: Immediate +parameters: + csi.cloudscale.ch/volume-type: ssd +--- +# Source: csi-cloudscale/templates/storageclass.yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: cloudscale-volume-ssd-luks + namespace: kube-system +provisioner: csi.cloudscale.ch +allowVolumeExpansion: true +reclaimPolicy: Delete +volumeBindingMode: Immediate +parameters: + csi.cloudscale.ch/volume-type: ssd + csi.cloudscale.ch/luks-encrypted: "true" + csi.cloudscale.ch/luks-cipher: "aes-xts-plain64" + csi.cloudscale.ch/luks-key-size: "512" + csi.storage.k8s.io/node-stage-secret-namespace: ${pvc.namespace} + csi.storage.k8s.io/node-stage-secret-name: ${pvc.name}-luks-key +--- +# Source: csi-cloudscale/templates/storageclass.yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: cloudscale-volume-bulk + namespace: kube-system +provisioner: csi.cloudscale.ch +allowVolumeExpansion: true +reclaimPolicy: Delete +volumeBindingMode: Immediate +parameters: + csi.cloudscale.ch/volume-type: bulk +--- +# Source: csi-cloudscale/templates/storageclass.yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: cloudscale-volume-bulk-luks + namespace: kube-system +provisioner: csi.cloudscale.ch +allowVolumeExpansion: true +reclaimPolicy: Delete +volumeBindingMode: Immediate +parameters: + csi.cloudscale.ch/volume-type: bulk + csi.cloudscale.ch/luks-encrypted: "true" + csi.cloudscale.ch/luks-cipher: "aes-xts-plain64" + csi.cloudscale.ch/luks-key-size: "512" + csi.storage.k8s.io/node-stage-secret-namespace: ${pvc.namespace} + csi.storage.k8s.io/node-stage-secret-name: ${pvc.name}-luks-key + +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-provisioner-role +rules: + - apiGroups: [""] + resources: ["persistentvolumes"] + verbs: ["get", "list", "watch", "create", "patch", "delete"] + - apiGroups: [""] + resources: ["persistentvolumeclaims"] + verbs: ["get", "list", "watch", "update"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["events"] + verbs: ["list", "watch", "create", "update", "patch"] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshots"] + verbs: ["get", "list", "watch", "update"] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshotcontents"] + verbs: ["get", "list"] + - apiGroups: [ "coordination.k8s.io" ] + resources: [ "leases" ] + verbs: [ "get", "list", "watch", "create", "update", "patch", "delete" ] + - apiGroups: [ "storage.k8s.io" ] + resources: [ "csinodes" ] + verbs: [ "get", "list", "watch" ] + - apiGroups: [ "" ] + resources: [ "nodes" ] + verbs: [ "get", "list", "watch" ] + - apiGroups: ["storage.k8s.io"] + resources: ["volumeattachments"] + verbs: ["get", "list", "watch"] +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-attacher-role +rules: + - apiGroups: [""] + resources: ["persistentvolumes"] + verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: ["storage.k8s.io"] + resources: ["csinodes"] + verbs: ["get", "list", "watch"] + - apiGroups: ["storage.k8s.io"] + resources: ["volumeattachments"] + verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: ["storage.k8s.io"] + resources: ["volumeattachments/status"] + verbs: ["patch"] +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-snapshotter-role +rules: + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshots"] + verbs: [ "get", "list", "watch", "update" ] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshotcontents"] + verbs: [ "get", "list", "watch", "update", "patch" ] + - apiGroups: [ "snapshot.storage.k8s.io" ] + resources: [ "volumesnapshotcontents/status" ] + verbs: [ "update", "patch" ] + - apiGroups: [ "snapshot.storage.k8s.io" ] + resources: [ "volumesnapshotclasses" ] + verbs: [ "get", "list", "watch" ] + - apiGroups: [""] + resources: ["events"] + verbs: ["list", "watch", "create", "update", "patch"] +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-resizer-role +rules: + - apiGroups: [""] + resources: ["persistentvolumes"] + verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: [""] + resources: ["persistentvolumeclaims"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["persistentvolumeclaims/status"] + verbs: ["update", "patch"] + - apiGroups: [""] + resources: ["events"] + verbs: ["list", "watch", "create", "update", "patch"] + - apiGroups: ["storage.k8s.io"] + resources: ["volumeattributesclasses"] + verbs: ["get", "list", "watch"] +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-node-driver-registrar-role + namespace: kube-system +rules: + - apiGroups: [""] + resources: ["events"] + verbs: ["get", "list", "watch", "create", "update", "patch"] +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-provisioner-binding +subjects: + - kind: ServiceAccount + name: csi-cloudscale-controller-sa + namespace: kube-system +roleRef: + kind: ClusterRole + name: csi-cloudscale-provisioner-role + apiGroup: rbac.authorization.k8s.io +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-snapshotter-binding +subjects: + - kind: ServiceAccount + name: csi-cloudscale-controller-sa + namespace: kube-system +roleRef: + kind: ClusterRole + name: csi-cloudscale-snapshotter-role + apiGroup: rbac.authorization.k8s.io +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-resizer-binding +subjects: + - kind: ServiceAccount + name: csi-cloudscale-controller-sa + namespace: kube-system +roleRef: + kind: ClusterRole + name: csi-cloudscale-resizer-role + apiGroup: rbac.authorization.k8s.io +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-attacher-binding +subjects: + - kind: ServiceAccount + name: csi-cloudscale-controller-sa + namespace: kube-system +roleRef: + kind: ClusterRole + name: csi-cloudscale-attacher-role + apiGroup: rbac.authorization.k8s.io +--- +# Source: csi-cloudscale/templates/rbac.yaml +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-node-driver-registrar-binding +subjects: + - kind: ServiceAccount + name: csi-cloudscale-node-sa + namespace: kube-system +roleRef: + kind: ClusterRole + name: csi-cloudscale-node-driver-registrar-role + apiGroup: rbac.authorization.k8s.io + +--- +# Source: csi-cloudscale/templates/daemonset.yaml +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: csi-cloudscale-node + namespace: kube-system +spec: + selector: + matchLabels: + app: csi-cloudscale-node + template: + metadata: + labels: + app: csi-cloudscale-node + role: csi-cloudscale + spec: + priorityClassName: system-node-critical + serviceAccountName: csi-cloudscale-node-sa + hostNetwork: true + containers: + - name: csi-node-driver-registrar + image: "registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.15.0" + imagePullPolicy: IfNotPresent + args: + - "--v=5" + - "--csi-address=$(ADDRESS)" + - "--kubelet-registration-path=$(DRIVER_REG_SOCK_PATH)" + lifecycle: + preStop: + exec: + command: ["/bin/sh", "-c", "rm -rf /registration/csi.cloudscale.ch /registration/csi.cloudscale.ch-reg.sock"] + env: + - name: ADDRESS + value: /csi/csi.sock + - name: DRIVER_REG_SOCK_PATH + value: /var/lib/kubelet/plugins/csi.cloudscale.ch/csi.sock + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - name: plugin-dir + mountPath: /csi/ + - name: registration-dir + mountPath: /registration/ + - name: csi-cloudscale-plugin + image: "quay.io/cloudscalech/cloudscale-csi-plugin:v4.0.2-beta.1" + imagePullPolicy: IfNotPresent + args : + - "--endpoint=$(CSI_ENDPOINT)" + - "--url=$(CLOUDSCALE_API_URL)" + - "--log-level=info" + env: + - name: CSI_ENDPOINT + value: unix:///csi/csi.sock + - name: CLOUDSCALE_API_URL + value: https://api.cloudscale.ch/ + - name: CLOUDSCALE_MAX_CSI_VOLUMES_PER_NODE + value: "125" + - name: CLOUDSCALE_ACCESS_TOKEN + valueFrom: + secretKeyRef: + name: cloudscale + key: access-token + securityContext: + privileged: true + capabilities: + add: ["SYS_ADMIN"] + allowPrivilegeEscalation: true + volumeMounts: + - name: plugin-dir + mountPath: /csi + - name: pods-mount-dir + mountPath: /var/lib/kubelet + # needed so that any mounts setup inside this container are + # propagated back to the host machine. + mountPropagation: "Bidirectional" + - name: device-dir + mountPath: /dev + - name: tmpfs + mountPath: /tmp + volumes: + - name: registration-dir + hostPath: + path: /var/lib/kubelet/plugins_registry/ + type: DirectoryOrCreate + - name: plugin-dir + hostPath: + path: /var/lib/kubelet/plugins/csi.cloudscale.ch + type: DirectoryOrCreate + - name: pods-mount-dir + hostPath: + path: /var/lib/kubelet + type: Directory + - name: device-dir + hostPath: + path: /dev + # to make sure temporary stored luks keys never touch a disk + - name: tmpfs + emptyDir: + medium: Memory + +--- +# Source: csi-cloudscale/templates/statefulset.yaml +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: csi-cloudscale-controller + namespace: kube-system +spec: + serviceName: "csi-cloudscale" + selector: + matchLabels: + app: csi-cloudscale-controller + replicas: 1 + template: + metadata: + labels: + app: csi-cloudscale-controller + role: csi-cloudscale + spec: + hostNetwork: true + priorityClassName: system-cluster-critical + serviceAccount: csi-cloudscale-controller-sa + containers: + - name: csi-provisioner + image: "registry.k8s.io/sig-storage/csi-provisioner:v5.3.0" + imagePullPolicy: IfNotPresent + args: + - "--csi-address=$(ADDRESS)" + - "--default-fstype=ext4" + - "--v=5" + - "--feature-gates=Topology=false" + env: + - name: ADDRESS + value: /var/lib/csi/sockets/pluginproxy/csi.sock + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ + - name: csi-attacher + image: "registry.k8s.io/sig-storage/csi-attacher:v4.10.0" + imagePullPolicy: IfNotPresent + args: + - "--csi-address=$(ADDRESS)" + - "--v=5" + env: + - name: ADDRESS + value: /var/lib/csi/sockets/pluginproxy/csi.sock + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ + - name: csi-resizer + image: "registry.k8s.io/sig-storage/csi-resizer:v2.0.0" + args: + - "--csi-address=$(ADDRESS)" + - "--timeout=30s" + - "--v=5" + - "--handle-volume-inuse-error=false" + env: + - name: ADDRESS + value: /var/lib/csi/sockets/pluginproxy/csi.sock + imagePullPolicy: IfNotPresent + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ + - name: csi-snapshotter + image: "registry.k8s.io/sig-storage/csi-snapshotter:v8.4.0" + args: + - "--csi-address=$(CSI_ENDPOINT)" + - "--v=5" + env: + - name: CSI_ENDPOINT + value: unix:///var/lib/csi/sockets/pluginproxy/csi.sock + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ + - name: csi-cloudscale-plugin + image: "quay.io/cloudscalech/cloudscale-csi-plugin:v4.0.2-beta.1" + args : + - "--endpoint=$(CSI_ENDPOINT)" + - "--url=$(CLOUDSCALE_API_URL)" + - "--log-level=info" + env: + - name: CSI_ENDPOINT + value: unix:///var/lib/csi/sockets/pluginproxy/csi.sock + - name: CLOUDSCALE_API_URL + value: https://api.cloudscale.ch/ + - name: CLOUDSCALE_ACCESS_TOKEN + valueFrom: + secretKeyRef: + name: cloudscale + key: access-token + imagePullPolicy: IfNotPresent + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ + volumes: + - name: socket-dir + emptyDir: {} + +--- +# Source: csi-cloudscale/templates/csi_driver.yaml +apiVersion: storage.k8s.io/v1 +kind: CSIDriver +metadata: + name: csi.cloudscale.ch +spec: + attachRequired: true + podInfoOnMount: true + +--- +# Source: csi-cloudscale/templates/volumesnapshotclass.yaml +apiVersion: snapshot.storage.k8s.io/v1 +kind: VolumeSnapshotClass +metadata: + name: cloudscale-snapshots +driver: csi.cloudscale.ch +deletionPolicy: Delete diff --git a/driver/driver_test.go b/driver/driver_test.go index 6c8e0b9e..201b0d17 100644 --- a/driver/driver_test.go +++ b/driver/driver_test.go @@ -37,6 +37,7 @@ import ( "github.com/kubernetes-csi/csi-test/v5/pkg/sanity" "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" + "golang.org/x/sys/unix" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "k8s.io/mount-utils" @@ -57,23 +58,9 @@ func TestDriverSuite(t *testing.T) { t.Fatalf("failed to remove unix domain socket file %s, error: %s", socket, err) } - serverId := "987654" - initialServers := map[string]*cloudscale.Server{ - serverId: {UUID: serverId}, - } - cloudscaleClient := NewFakeClient(initialServers) - fm := &fakeMounter{ - mounted: map[string]string{}, - } - driver := &Driver{ - endpoint: endpoint, - serverId: serverId, - zone: DefaultZone.Slug, - cloudscaleClient: cloudscaleClient, - mounter: fm, - log: logrus.New().WithField("test_enabed", true), - volumeLocks: NewVolumeLocks(), - } + fm := newFakeMounter() + driver := newTestDriver(t, fm) + driver.endpoint = endpoint defer driver.Stop() go func() { @@ -122,7 +109,17 @@ func NewFakeClient(initialServers map[string]*cloudscale.Server) *cloudscale.Cli type fakeMounter struct { mounted map[string]string - mu sync.RWMutex + // blockDeviceNumbers maps a path the production code stats as a block + // device to its st_rdev value (the device the file represents). + blockDeviceNumbers map[string]uint64 + // filesystemDeviceNumbers maps a path the production code stats for its + // backing fs to its st_dev value (the device backing the mount). + filesystemDeviceNumbers map[string]uint64 + // nextDevNum is used by Mount to allocate a fresh synthetic device number + // for each unique source, mirroring the kernel invariant that a mount's + // st_dev equals the source's st_rdev. + nextDevNum uint64 + mu sync.RWMutex } func (f *fakeMounter) Format(source, fsType string, luksContext LuksContext, log *logrus.Entry) error { @@ -133,6 +130,20 @@ func (f *fakeMounter) Mount(source, target, fsType string, luksContext LuksConte f.mu.Lock() defer f.mu.Unlock() f.mounted[target] = source + + // LUKS mounts go through /dev/mapper/, not the raw source. + expectedDevice := source + if luksContext.EncryptionEnabled { + expectedDevice = "/dev/mapper/" + luksContext.VolumeName + } + + devNum, ok := f.blockDeviceNumbers[expectedDevice] + if !ok { + f.nextDevNum++ + devNum = f.nextDevNum + f.blockDeviceNumbers[expectedDevice] = devNum + } + f.filesystemDeviceNumbers[target] = devNum return nil } @@ -140,6 +151,7 @@ func (f *fakeMounter) Unmount(target string, luksContext LuksContext, log *logru f.mu.Lock() defer f.mu.Unlock() delete(f.mounted, target) + delete(f.filesystemDeviceNumbers, target) return nil } @@ -160,6 +172,33 @@ func (f *fakeMounter) FindAbsoluteDeviceByIDPath(volumeName string, log *logrus. func (f *fakeMounter) IsFormatted(source string, luksContext LuksContext, log *logrus.Entry) (bool, error) { return true, nil } + +func (f *fakeMounter) IsBlockDevice(volumePath string) (bool, error) { + f.mu.RLock() + defer f.mu.RUnlock() + _, ok := f.blockDeviceNumbers[volumePath] + return ok, nil +} + +func (f *fakeMounter) GetBlockDeviceNumber(path string) (uint64, error) { + f.mu.RLock() + defer f.mu.RUnlock() + n, ok := f.blockDeviceNumbers[path] + if !ok { + return 0, fmt.Errorf("fakeMounter: %s not registered as a block device", path) + } + return n, nil +} + +func (f *fakeMounter) GetFilesystemDeviceNumber(path string) (uint64, error) { + f.mu.RLock() + defer f.mu.RUnlock() + n, ok := f.filesystemDeviceNumbers[path] + if !ok { + return 0, fmt.Errorf("fakeMounter: %s not registered with a filesystem device number", path) + } + return n, nil +} func (f *fakeMounter) GetMountInfo(target string, log *logrus.Entry) (*MountInfo, error) { f.mu.RLock() defer f.mu.RUnlock() @@ -371,10 +410,6 @@ type FakeServerServiceOperations struct { servers map[string]*cloudscale.Server } -func (f *fakeMounter) IsBlockDevice(volumePath string) (bool, error) { - return false, nil -} - func (f *FakeServerServiceOperations) Create(ctx context.Context, createRequest *cloudscale.ServerRequest) (*cloudscale.Server, error) { panic("implement me") } @@ -577,30 +612,15 @@ func (f *FakeBlockingMounter) FinalizeVolumeAttachmentAndFindPath(logger *logrus // NewFakeBlockingMounter creates a new FakeBlockingMounter with the given channel. func NewFakeBlockingMounter(readyToExecute chan chan struct{}) *FakeBlockingMounter { return &FakeBlockingMounter{ - fakeMounter: &fakeMounter{ - mounted: map[string]string{}, - }, + fakeMounter: newFakeMounter(), ReadyToExecute: readyToExecute, } } // initBlockingDriver creates a Driver with a FakeBlockingMounter for concurrency testing. func initBlockingDriver(t *testing.T, readyToExecute chan chan struct{}) *Driver { - serverId := "987654" - initialServers := map[string]*cloudscale.Server{ - serverId: {UUID: serverId}, - } - cloudscaleClient := NewFakeClient(initialServers) - - return &Driver{ - endpoint: "unix:///tmp/csi-test.sock", - serverId: serverId, - zone: DefaultZone.Slug, - cloudscaleClient: cloudscaleClient, - mounter: NewFakeBlockingMounter(readyToExecute), - log: logrus.New().WithField("test_enabled", true), - volumeLocks: NewVolumeLocks(), - } + t.Helper() + return newTestDriver(t, NewFakeBlockingMounter(readyToExecute)) } // TestNodeStageVolume_ConcurrentSameVolume tests that concurrent NodeStageVolume @@ -856,6 +876,145 @@ func TestNodeOperations_CrossOperationLocking(t *testing.T) { <-respStage } +// newFakeMounter returns a zero-state fakeMounter. +func newFakeMounter() *fakeMounter { + return &fakeMounter{ + mounted: map[string]string{}, + blockDeviceNumbers: map[string]uint64{}, + filesystemDeviceNumbers: map[string]uint64{}, + } +} + +// newTestDriver builds a Driver wired to the given mounter, a fake cloudscale +// client containing a single server, and a dummy unix endpoint. +func newTestDriver(t *testing.T, m Mounter) *Driver { + t.Helper() + const serverId = "987654" + return &Driver{ + endpoint: "unix:///tmp/csi-test.sock", + serverId: serverId, + zone: DefaultZone.Slug, + cloudscaleClient: NewFakeClient(map[string]*cloudscale.Server{ + serverId: {UUID: serverId}, + }), + mounter: m, + log: logrus.New().WithField("test_enabled", true), + volumeLocks: NewVolumeLocks(), + } +} + +// initDriverWithFakeMounter builds a Driver with a fakeMounter for NodeStageVolume tests. +func initDriverWithFakeMounter(t *testing.T) (*Driver, *fakeMounter) { + t.Helper() + fm := newFakeMounter() + return newTestDriver(t, fm), fm +} + +func makeStageReq(volumeID, stagingPath, volumeName string, luks bool) *csi.NodeStageVolumeRequest { + pubCtx := map[string]string{ + PublishInfoVolumeName: volumeName, + LuksEncryptedAttribute: "false", + } + if luks { + pubCtx[LuksEncryptedAttribute] = "true" + pubCtx[LuksCipherAttribute] = "aes-xts-plain64" + pubCtx[LuksKeySizeAttribute] = "512" + } + return &csi.NodeStageVolumeRequest{ + VolumeId: volumeID, + StagingTargetPath: stagingPath, + VolumeCapability: &csi.VolumeCapability{ + AccessMode: &csi.VolumeCapability_AccessMode{ + Mode: csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER, + }, + AccessType: &csi.VolumeCapability_Mount{ + Mount: &csi.VolumeCapability_MountVolume{FsType: "ext4"}, + }, + }, + Secrets: map[string]string{LuksKeyAttribute: "test-luks-key"}, + PublishContext: pubCtx, + } +} + +// TestNodeStageVolume_Restage_NonLUKS_ByIdEquivalent tests the driver for a regression against the v4.0.1 bug: +// an already-staged volume gets a second NodeStageVolume call (e.g. after kubelet restart). The fake's stored source +// string differs from the by-id-vs-canonical paths, but the major:minor numbers match. +// NodeStageVolume must succeed therefore. +func TestNodeStageVolume_Restage_NonLUKS_ByIdEquivalent(t *testing.T) { + driver, fm := initDriverWithFakeMounter(t) + vol, err := driver.cloudscaleClient.Volumes.Create(t.Context(), &cloudscale.VolumeCreateRequest{ + Name: "vol", SizeGB: 1, Type: "ssd", + }) + if err != nil { + t.Fatalf("create volume: %v", err) + } + stagingPath := "/mnt/staging-restage-nonluks-ok" + // Simulate a prior successful stage: mount table records the by-id path + // the driver passed to mount(2); the source the production code will + // resolve via FinalizeVolumeAttachmentAndFindPath is "SomePath" (fake's + // return). Both have the same kernel device number and therefore the staging must succeed. + fm.mounted[stagingPath] = "/dev/disk/by-id/scsi-X" + fm.blockDeviceNumbers["SomePath"] = unix.Mkdev(8, 16) + fm.filesystemDeviceNumbers[stagingPath] = unix.Mkdev(8, 16) + + _, err = driver.NodeStageVolume(context.Background(), makeStageReq(vol.UUID, stagingPath, vol.Name, false)) + if err != nil { + t.Fatalf("expected no error on re-stage, got: %v", err) + } +} + +// TestNodeStageVolume_Restage_NonLUKS_StaleDevice ensures the driver rejects a stale mount. +// The staging path is mounted from a different physical device than the one we expect. +func TestNodeStageVolume_Restage_NonLUKS_StaleDevice(t *testing.T) { + driver, fm := initDriverWithFakeMounter(t) + vol, err := driver.cloudscaleClient.Volumes.Create(t.Context(), &cloudscale.VolumeCreateRequest{ + Name: "vol", SizeGB: 1, Type: "ssd", + }) + if err != nil { + t.Fatalf("create volume: %v", err) + } + stagingPath := "/mnt/staging-restage-nonluks-stale" + + fm.mounted[stagingPath] = "/dev/disk/by-id/scsi-X" + fm.blockDeviceNumbers["SomePath"] = unix.Mkdev(8, 16) // what we expect + fm.filesystemDeviceNumbers[stagingPath] = unix.Mkdev(8, 32) // actually mounted device + + _, err = driver.NodeStageVolume(context.Background(), makeStageReq(vol.UUID, stagingPath, vol.Name, false)) + if err == nil { + t.Fatal("expected FailedPrecondition on stale mount, got nil") + } + st, _ := status.FromError(err) + if st.Code() != codes.FailedPrecondition { + t.Fatalf("expected FailedPrecondition, got %v: %v", st.Code(), err) + } +} + +// TestNodeStageVolume_Restage_LUKS_RawDeviceMount ensures the LUKS check rejects a stale mount. +func TestNodeStageVolume_Restage_LUKS_RawDeviceMount(t *testing.T) { + driver, fm := initDriverWithFakeMounter(t) + vol, err := driver.cloudscaleClient.Volumes.Create(t.Context(), &cloudscale.VolumeCreateRequest{ + Name: "vol-luks", SizeGB: 1, Type: "ssd", + }) + if err != nil { + t.Fatalf("create volume: %v", err) + } + + stagingPath := "/mnt/staging-restage-luks-stale" + mapper := "/dev/mapper/" + vol.Name + fm.mounted[stagingPath] = "/dev/sdb" + fm.blockDeviceNumbers[mapper] = unix.Mkdev(252, 0) // we expect the mapper + fm.filesystemDeviceNumbers[stagingPath] = unix.Mkdev(8, 16) // but the mount is from raw sdb + + _, err = driver.NodeStageVolume(context.Background(), makeStageReq(vol.UUID, stagingPath, vol.Name, true)) + if err == nil { + t.Fatal("expected FailedPrecondition on stale LUKS mount, got nil") + } + st, _ := status.FromError(err) + if st.Code() != codes.FailedPrecondition { + t.Fatalf("expected FailedPrecondition, got %v: %v", st.Code(), err) + } +} + // createVolumeForTest is a helper that creates a CSI volume and returns the volume ID. func createVolumeForTest(t *testing.T, driver *Driver, name string) string { t.Helper() diff --git a/driver/mounter.go b/driver/mounter.go index ca5fd5a2..7f4f0446 100644 --- a/driver/mounter.go +++ b/driver/mounter.go @@ -107,6 +107,17 @@ type Mounter interface { // IsBlockDevice checks whether the device at the path is a block device IsBlockDevice(volumePath string) (bool, error) + // GetBlockDeviceNumber runs stat on path, expecting a block device file and + // returns the device number it represents (st_rdev). Symlinks are + // followed. Returns an error if path is not a block device. + GetBlockDeviceNumber(path string) (uint64, error) + + // GetFilesystemDeviceNumber runs stat on path and returns the device number of + // the filesystem the path lives on (st_dev). Symlinks are followed. Returns + // an error if path is a block device, because st_dev would refer to devtmpfs + // rather than the filesystem the caller is asking about. + GetFilesystemDeviceNumber(path string) (uint64, error) + GetDeviceName(mounter mount.Interface, mountPath string) (string, error) FindAbsoluteDeviceByIDPath(volumeName string, log *logrus.Entry) (string, error) @@ -685,3 +696,25 @@ func (m *mounter) IsBlockDevice(devicePath string) (bool, error) { return (stat.Mode & unix.S_IFMT) == unix.S_IFBLK, nil } + +func (m *mounter) GetBlockDeviceNumber(path string) (uint64, error) { + var st unix.Stat_t + if err := unix.Stat(path, &st); err != nil { + return 0, err + } + if st.Mode&unix.S_IFMT != unix.S_IFBLK { + return 0, fmt.Errorf("%s is not a block device", path) + } + return uint64(st.Rdev), nil +} + +func (m *mounter) GetFilesystemDeviceNumber(path string) (uint64, error) { + var st unix.Stat_t + if err := unix.Stat(path, &st); err != nil { + return 0, err + } + if st.Mode&unix.S_IFMT == unix.S_IFBLK { + return 0, fmt.Errorf("%s is a block device, expected a filesystem path", path) + } + return uint64(st.Dev), nil +} diff --git a/driver/node.go b/driver/node.go index 3205268c..4ae0be1c 100644 --- a/driver/node.go +++ b/driver/node.go @@ -19,14 +19,14 @@ package driver import ( "context" + "errors" "fmt" "os" - "path/filepath" "strconv" - "strings" "github.com/container-storage-interface/spec/lib/go/csi" "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "k8s.io/mount-utils" @@ -158,25 +158,50 @@ func (d *Driver) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolumeRe return nil, status.Error(codes.Internal, err.Error()) } } else { - // Something is already mounted at the staging path. Verify it is - // mounted from the device we just resolved before declaring success — - // otherwise a stale mount left by an earlier (failed or racing) stage - // operation can be silently accepted, which is the same class of bug - // as the LUKS mapping reuse in luksOpen. - expected := source + // Something is already mounted at the staging path. Compare by devices using kernel device number to ensure + // the same underlying device is there. + expectedDevice := source if luksContext.EncryptionEnabled { - expected = "/dev/mapper/" + luksContext.VolumeName - } else if resolved, err := filepath.EvalSymlinks(source); err == nil { - // findmnt reports the kernel-resolved device, so compare against - // the canonical form. Fall back to the literal source on resolve - // failure — the mismatch will then surface as a loud error. - expected = resolved + expectedDevice = "/dev/mapper/" + luksContext.VolumeName } - if strings.TrimSpace(mountInfo.Source) != expected { + ll = ll.WithFields(logrus.Fields{ + "expected_device": expectedDevice, + }) + ll.Info("resolving device numbers for expectedDevice and stagingTargetPath") + + expectedDevNum, err := d.mounter.GetBlockDeviceNumber(expectedDevice) + if err != nil { + // A missing expected device while something else is mounted at the + // staging path is also a stale mount. + if errors.Is(err, os.ErrNotExist) { + return nil, status.Errorf(codes.FailedPrecondition, + "stage path %s is mounted but expected device %s does not exist, refusing to reuse stale mount", + stagingTargetPath, expectedDevice) + } + return nil, status.Errorf(codes.Internal, + "failed to resolve device number for %s: %v", expectedDevice, err) + } + mountDevNum, err := d.mounter.GetFilesystemDeviceNumber(stagingTargetPath) + if err != nil { + return nil, status.Errorf(codes.Internal, + "failed to resolve device number for stage path %s: %v", + stagingTargetPath, err) + } + + ll = ll.WithFields(logrus.Fields{ + "expected_device_num": expectedDevNum, + "mount_dev_num": mountDevNum, + }) + ll.Info("checking if device numbers match") + + if expectedDevNum != mountDevNum { return nil, status.Errorf(codes.FailedPrecondition, - "stage path %s is mounted from %q, expected %s, refusing to reuse stale mount", - stagingTargetPath, mountInfo.Source, expected) + "stage path %s is mounted from device %d:%d, expected %s (%d:%d), refusing to reuse stale mount", + stagingTargetPath, + unix.Major(mountDevNum), unix.Minor(mountDevNum), + expectedDevice, + unix.Major(expectedDevNum), unix.Minor(expectedDevNum)) } ll.Info("source device is already mounted to the stagingTargetPath path") } diff --git a/test/kubernetes/integration_test.go b/test/kubernetes/integration_test.go index 0cbee6d3..5ec52a7e 100644 --- a/test/kubernetes/integration_test.go +++ b/test/kubernetes/integration_test.go @@ -10,6 +10,7 @@ import ( "encoding/json" "errors" "fmt" + "io" "log" "net/http" "os" @@ -18,14 +19,13 @@ import ( "testing" "time" - "github.com/cloudscale-ch/cloudscale-go-sdk/v7" "github.com/cloudscale-ch/csi-cloudscale/driver" - "github.com/stretchr/testify/assert" - "golang.org/x/oauth2" - "k8s.io/client-go/rest" + "github.com/cloudscale-ch/cloudscale-go-sdk/v7" snapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v6/apis/volumesnapshot/v1" snapshotclientset "github.com/kubernetes-csi/external-snapshotter/client/v6/clientset/versioned" + "github.com/stretchr/testify/assert" + "golang.org/x/oauth2" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" kubeerrors "k8s.io/apimachinery/pkg/api/errors" @@ -36,9 +36,11 @@ import ( "k8s.io/apimachinery/pkg/selection" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/clientcmd" "k8s.io/client-go/tools/remotecommand" + "k8s.io/utils/ptr" ) const ( @@ -118,6 +120,7 @@ func TestNode_Zone_Annotation(t *testing.T) { } func TestPod_Single_SSD_Volume(t *testing.T) { + t.Parallel() podDescriptor := TestPodDescriptor{ Kind: "Pod", Name: pseudoUuid(), @@ -163,6 +166,7 @@ func TestPod_Single_SSD_Volume(t *testing.T) { } func TestPod_Create_Volume_From_Snapshot(t *testing.T) { + t.Parallel() podDescriptor := TestPodDescriptor{ Kind: "Pod", Name: pseudoUuid(), @@ -279,6 +283,7 @@ func TestPod_Create_Volume_From_Snapshot(t *testing.T) { } func TestPod_Single_SSD_Luks_Volume_Snapshot(t *testing.T) { + t.Parallel() podDescriptor := TestPodDescriptor{ Kind: "Pod", Name: pseudoUuid(), @@ -401,6 +406,7 @@ func TestPod_Single_SSD_Luks_Volume_Snapshot(t *testing.T) { } func TestPod_Create_Raw_Block_Volume_From_Snapshot(t *testing.T) { + t.Parallel() podDescriptor := TestPodDescriptor{ Kind: "Pod", Name: pseudoUuid(), @@ -514,6 +520,7 @@ func TestPod_Create_Raw_Block_Volume_From_Snapshot(t *testing.T) { } func TestPod_Snapshot_Restore_Larger_Size(t *testing.T) { + t.Parallel() podDescriptor := TestPodDescriptor{ Kind: "Pod", Name: pseudoUuid(), @@ -599,6 +606,7 @@ func TestPod_Snapshot_Restore_Larger_Size(t *testing.T) { // Test that restoring a LUKS-encrypted snapshot into a larger volume results in // the block device and filesystem both being expanded. func TestPod_Luks_Snapshot_Restore_Larger_Size(t *testing.T) { + t.Parallel() podDescriptor := TestPodDescriptor{ Kind: "Pod", Name: pseudoUuid(), @@ -698,6 +706,7 @@ func TestPod_Luks_Snapshot_Restore_Larger_Size(t *testing.T) { } func TestCreateMultipleSnapshots_DifferentVolumes(t *testing.T) { + t.Parallel() // Create two independent volumes, each with one snapshot, to verify that // the CSI driver correctly handles creating snapshots from different // volumes and the snapshot controller can reconcile them concurrently. @@ -790,6 +799,7 @@ func TestCreateMultipleSnapshots_DifferentVolumes(t *testing.T) { } func TestCreateMultipleSnapshots_SameVolume(t *testing.T) { + t.Parallel() // Create two snapshots from the same volume to verify that the CSI driver // correctly creates distinct snapshots. This exercises the name-based // idempotency check in CreateSnapshot: the driver must not return an @@ -865,12 +875,13 @@ func TestCreateMultipleSnapshots_SameVolume(t *testing.T) { } func TestPod_Single_SSD_Raw_Volume(t *testing.T) { + t.Parallel() podDescriptor := TestPodDescriptor{ Kind: "Pod", Name: pseudoUuid(), Volumes: []TestPodVolume{ { - ClaimName: "csi-pod-ssd-pvc", + ClaimName: "csi-pod-ssd-raw-pvc", SizeGB: 5, StorageClass: "cloudscale-volume-ssd", Block: true, @@ -911,6 +922,7 @@ func TestPod_Single_SSD_Raw_Volume(t *testing.T) { } func TestPod_Single_Bulk_Volume(t *testing.T) { + t.Parallel() podDescriptor := TestPodDescriptor{ Kind: "Pod", Name: pseudoUuid(), @@ -955,12 +967,13 @@ func TestPod_Single_Bulk_Volume(t *testing.T) { } func TestPod_Single_Bulk_Raw_Volume(t *testing.T) { + t.Parallel() podDescriptor := TestPodDescriptor{ Kind: "Pod", Name: pseudoUuid(), Volumes: []TestPodVolume{ { - ClaimName: "csi-pod-bulk-pvc", + ClaimName: "csi-pod-bulk-raw-pvc", SizeGB: 100, StorageClass: "cloudscale-volume-bulk", Block: true, @@ -1000,6 +1013,7 @@ func TestPod_Single_Bulk_Raw_Volume(t *testing.T) { } func TestDeployment_Single_SSD_Volume(t *testing.T) { + t.Parallel() podDescriptor := TestPodDescriptor{ Kind: "Deployment", Name: pseudoUuid(), @@ -1044,6 +1058,7 @@ func TestDeployment_Single_SSD_Volume(t *testing.T) { } func TestPod_Multi_SSD_Volume(t *testing.T) { + t.Parallel() podDescriptor := TestPodDescriptor{ Kind: "Pod", Name: pseudoUuid(), @@ -1085,6 +1100,7 @@ func TestPod_Multi_SSD_Volume(t *testing.T) { } func TestPod_Multiple_Volumes(t *testing.T) { + t.Parallel() podDescriptor := TestPodDescriptor{ Kind: "Pod", Name: pseudoUuid(), @@ -1140,6 +1156,7 @@ func TestPod_Multiple_Volumes(t *testing.T) { } func TestPod_Single_SSD_Luks_Volume(t *testing.T) { + t.Parallel() podDescriptor := TestPodDescriptor{ Kind: "Pod", Name: pseudoUuid(), @@ -1187,7 +1204,89 @@ func TestPod_Single_SSD_Luks_Volume(t *testing.T) { waitCloudscaleVolumeDeleted(t, pvc.Spec.VolumeName) } +// TestPod_KubeletRestart_RestageSucceeds ensures restaging the same path (after kubelet restart) works. +// This is a regression test against the bug introduced in v4.0.1. +// After kubelet restart, kubelet rebuilds its actual-state from disk and re-issues NodeStageVolume against an already +// staged path. +func TestPod_KubeletRestart_RestageSucceeds(t *testing.T) { + cases := []struct { + name string + storageClass string + luksKey string + }{ + {"non-luks", "cloudscale-volume-ssd", ""}, + {"luks", "cloudscale-volume-ssd-luks", "secret"}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + podDescriptor := TestPodDescriptor{ + Kind: "Pod", + Name: pseudoUuid(), + Volumes: []TestPodVolume{ + { + ClaimName: "restage-pvc-" + tc.name, + SizeGB: 1, + StorageClass: tc.storageClass, + LuksKey: tc.luksKey, + }, + }, + } + + pod := makeKubernetesPod(t, podDescriptor) + pvcs := makeKubernetesPVCs(t, podDescriptor) + assert.Equal(t, 1, len(pvcs)) + waitForPod(t, client, pod.Name) + + defer cleanup(t, podDescriptor) + + nodeName, err := getNodeName(pod.Namespace, pod.Name) + assert.NoError(t, err) + + restartTime := time.Now() + + if err := restartKubeletOnNode(t, nodeName); err != nil { + t.Fatalf("restart kubelet on %s: %v", nodeName, err) + } + + // Give kubelet some time to restart and re-stage. + time.Sleep(60 * time.Second) + + // fetch logs but with a bit of grace period to ensure we don't miss anything important + // and counter possible clock-skew (though that should normally not happen). + logs, err := getCSINodeLogsSince(t.Context(), nodeName, restartTime.Add(-60*time.Second)) + if err != nil { + t.Fatalf("fetch csi-node logs: %v", err) + } + + if strings.Contains(logs, "refusing to reuse stale mount") { + t.Fatalf("csi-node logged stale-mount rejection after kubelet restart (regression of v4.0.1 bug):\n%s", + extractStaleMountLines(logs)) + } + + // Require that re-stage actually happened + if !strings.Contains(logs, "node stage volume called") && + !strings.Contains(logs, "source device is already mounted") { + t.Fatalf("no NodeStageVolume call observed in csi-node logs after kubelet restart on %s; "+ + "test cannot prove the stale-mount check was exercised", + nodeName) + } + + // Pod must still be Running with its volume usable. + refreshed, err := client.CoreV1().Pods(pod.Namespace).Get(context.Background(), pod.Name, metav1.GetOptions{}) + assert.NoError(t, err) + assert.Equal(t, v1.PodRunning, refreshed.Status.Phase) + + boundPVC := getPVC(t, client, pvcs[0].Name) + info, err := getVolumeInfo(t, pod, boundPVC.Spec.VolumeName) + assert.NoError(t, err) + assert.Equal(t, "ext4", info.Filesystem) + }) + } +} + func TestPod_Single_Bulk_Luks_Volume(t *testing.T) { + t.Parallel() podDescriptor := TestPodDescriptor{ Kind: "Pod", Name: pseudoUuid(), @@ -1249,14 +1348,16 @@ var resizeCases = []struct { } func TestPersistentVolume_Resize(t *testing.T) { + t.Parallel() for _, tt := range resizeCases { t.Run(fmt.Sprintf("%v %v", tt.storageClass, tt.block), func(t *testing.T) { + t.Parallel() podDescriptor := TestPodDescriptor{ Kind: "Pod", Name: pseudoUuid(), Volumes: []TestPodVolume{ { - ClaimName: "csi-pod-ssd-pvc", + ClaimName: fmt.Sprintf("csi-pod-resize-pvc-%v-%v", tt.storageClass, tt.block), SizeGB: tt.initialSizeGB, StorageClass: tt.storageClass, LuksKey: tt.LuksKey, @@ -1364,6 +1465,7 @@ func TestPersistentVolume_Resize(t *testing.T) { } func TestVolumeStats(t *testing.T) { + t.Parallel() pvcName := fmt.Sprintf("csi-pvc-stats-%v", pseudoUuid()) podName := pseudoUuid() podDescriptor := TestPodDescriptor{ @@ -1920,7 +2022,8 @@ func getPod(t *testing.T, client kubernetes.Interface, name string) *v1.Pod { // loads the volume with the given name from the cloudscale.ch API func getCloudscaleVolume(t *testing.T, volumeName string) cloudscale.Volume { - ctx, _ := context.WithTimeout(context.Background(), 30*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() volumes, err := cloudscaleClient.Volumes.List(ctx, cloudscale.WithNameFilter(volumeName)) assert.NoError(t, err) @@ -1933,8 +2036,9 @@ func waitCloudscaleVolumeDeleted(t *testing.T, volumeName string) { start := time.Now() for { - ctx, _ := context.WithTimeout(context.Background(), 30*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) volumes, err := cloudscaleClient.Volumes.List(ctx, cloudscale.WithNameFilter(volumeName)) + cancel() if len(volumes) == 0 { t.Logf("volume %v is deleted on cloudscale", volumeName) return @@ -2128,6 +2232,111 @@ func ExecCommand(podNamespace string, podName string, command ...string) (string return execOut.String(), nil } +// restartKubeletOnNode runs `systemctl restart kubelet` on the given node by +// creating an ephemeral privileged debug pod that uses nsenter to enter the host PID +// namespace. +func restartKubeletOnNode(t *testing.T, nodeName string) error { + t.Helper() + podName := "debug-restart-kubelet-" + pseudoUuid() + debugPod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + Namespace: "kube-system", + }, + Spec: v1.PodSpec{ + NodeName: nodeName, + HostPID: true, + RestartPolicy: v1.RestartPolicyNever, + Tolerations: []v1.Toleration{ + {Operator: v1.TolerationOpExists}, + }, + Containers: []v1.Container{ + { + Name: "ctr", + Image: "busybox:1.36", + // nsenter attaches to PID 1's namespaces (the host's init, + // visible because HostPID is true) and runs the command there, + // so systemctl talks to the host's systemd instead of the + // container. Flags: -t 1 target PID, -m mount ns (host + // filesystem / unit files), -u UTS ns (hostname), -i IPC ns + // (D-Bus socket), -n network ns, -p PID ns (so systemd sees + // host PIDs). + Command: []string{ + "sh", "-c", + "nsenter -t 1 -m -u -i -n -p -- systemctl restart kubelet", + }, + SecurityContext: &v1.SecurityContext{Privileged: ptr.To(true)}, + }, + }, + }, + } + + created, err := client.CoreV1().Pods("kube-system").Create(t.Context(), debugPod, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("create debug pod: %w", err) + } + defer func() { + _ = client.CoreV1().Pods("kube-system").Delete(t.Context(), created.Name, metav1.DeleteOptions{}) + }() + + ctx, cancel := context.WithTimeout(t.Context(), 2*time.Minute) + defer cancel() + return wait.PollUntilContextTimeout(ctx, 2*time.Second, 2*time.Minute, true, + func(ctx context.Context) (bool, error) { + p, err := client.CoreV1().Pods("kube-system").Get(ctx, created.Name, metav1.GetOptions{}) + if err != nil { + return false, nil + } + switch p.Status.Phase { + case v1.PodSucceeded: + return true, nil + case v1.PodFailed: + return false, fmt.Errorf("debug pod failed: %s", p.Status.Message) + } + return false, nil + }) +} + +// getCSINodeLogsSince fetches recent csi-node plugin logs for the given node. +func getCSINodeLogsSince(ctx context.Context, nodeName string, since time.Time) (string, error) { + ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + + pods, err := client.CoreV1().Pods("kube-system").List(ctx, metav1.ListOptions{ + LabelSelector: "app=csi-cloudscale-node,role=csi-cloudscale", + }) + if err != nil { + return "", err + } + for _, p := range pods.Items { + if p.Spec.NodeName != nodeName { + continue + } + sinceTime := metav1.NewTime(since) + stream, err := client.CoreV1().Pods(p.Namespace).GetLogs(p.Name, &v1.PodLogOptions{ + Container: "csi-cloudscale-plugin", + SinceTime: &sinceTime, + }).Stream(ctx) + if err != nil { + return "", err + } + buf, err := io.ReadAll(stream) + stream.Close() + return string(buf), err + } + return "", fmt.Errorf("no csi-node pod found on node %s", nodeName) +} + +func extractStaleMountLines(logs string) string { + var out []string + for _, line := range strings.Split(logs, "\n") { + if strings.Contains(line, "refusing to reuse stale mount") { + out = append(out, line) + } + } + return strings.Join(out, "\n") +} + // Metrics Handling type MetricsSet struct {