Skip to content

Commit 028a78b

Browse files
committed
Use force unmount and explicitly unmount bad mount points
There have been cases where the logic to cleanup a mount point has caused the driver to get into a bad state. This is most obvious when a subdirectory is mounted to a volume and a parent directory of that subdirectory is deleted. The Lustre driver doesn't handle that case in the way that Kubernetes expects and returns invalid data. To avoid this scenario causing our driver to get into a bad state, leak mount points, etc, we must explicitly check that we can read the necessary information about the mount point, and if not, explicitly unmount that mount point before allowing Kubernetes to clean up the directory. To ensure that we don't end up in a bad state, this change enables force unmounting as well. The force unmount will only occur after a timeout has expired, since force unmounts can cause issues with the Lustre driver. However, in this case, it is better if we are in a bad enough situation to be able to eventually return to a good state rather than require manual intervention.
1 parent 4613c77 commit 028a78b

File tree

4 files changed

+60
-3
lines changed

4 files changed

+60
-3
lines changed

pkg/azurelustre/azurelustre.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ type Driver struct {
9191
// enableAzureLustreMockMount is only for testing, DO NOT set as true in non-testing scenario
9292
enableAzureLustreMockMount bool
9393
mounter *mount.SafeFormatAndMount // TODO_JUSJIN: check any other alternatives
94+
forceMounter *mount.MounterForceUnmounter
9495
volLockMap *util.LockMap
9596
// Directory to temporarily mount to for subdirectory creation
9697
workingMountDir string
@@ -132,6 +133,13 @@ func (d *Driver) Run(endpoint string, testBool bool) {
132133
Interface: mount.New(""),
133134
Exec: utilexec.New(),
134135
}
136+
forceUnmounter, ok := d.mounter.Interface.(mount.MounterForceUnmounter)
137+
if ok {
138+
klog.V(4).Infof("Using force unmounter interface")
139+
d.forceMounter = &forceUnmounter
140+
} else {
141+
klog.Fatalf("Mounter does not support force unmount")
142+
}
135143

136144
// TODO_JUSJIN: revisit these caps
137145
// Initialize default library driver

pkg/azurelustre/fake_mount.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package azurelustre
1919
import (
2020
"fmt"
2121
"strings"
22+
"time"
2223

2324
mount "k8s.io/mount-utils"
2425
)
@@ -69,3 +70,7 @@ func (f *fakeMounter) IsLikelyNotMountPoint(file string) (bool, error) {
6970
}
7071
return true, nil
7172
}
73+
74+
func (f *fakeMounter) UnmountWithForce(target string, _ time.Duration) error {
75+
return f.Unmount(target)
76+
}

pkg/azurelustre/nodeserver.go

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"os"
2222
"path/filepath"
2323
"strings"
24+
"time"
2425

2526
"github.com/container-storage-interface/spec/lib/go/csi"
2627
"golang.org/x/net/context"
@@ -317,10 +318,44 @@ func (d *Driver) NodeUnpublishVolume(
317318
}
318319

319320
func unmountVolumeAtPath(d *Driver, targetPath string) error {
321+
shouldUnmountBadPath := false
322+
320323
d.kernelModuleLock.Lock()
321324
defer d.kernelModuleLock.Unlock()
322-
err := mount.CleanupMountPoint(targetPath, d.mounter,
323-
true /*extensiveMountPointCheck*/)
325+
326+
parent := filepath.Dir(targetPath)
327+
klog.V(2).Infof("Listing dir: %s", parent)
328+
entries, err := os.ReadDir(parent)
329+
if err != nil {
330+
klog.Warningf("could not list directory %s, will explicitly unmount path before cleanup %s: %q", parent, targetPath, err)
331+
shouldUnmountBadPath = true
332+
}
333+
334+
for _, e := range entries {
335+
if e.Name() == filepath.Base(targetPath) {
336+
_, err := e.Info()
337+
if err != nil {
338+
klog.Warningf("could not get info for entry %s, will explicitly unmount path before cleanup %s: %q", e.Name(), targetPath, err)
339+
shouldUnmountBadPath = true
340+
}
341+
}
342+
}
343+
344+
if shouldUnmountBadPath {
345+
// In these cases, if we only ran mount.CleanupMountWithForce,
346+
// it would have issues trying to stat the directory before
347+
// cleanup, so we need to explicitly unmount the path, with
348+
// force if necessary. Then the directory can be cleaned up
349+
// by the mount.CleanupMountWithForce call.
350+
klog.V(4).Infof("unmounting bad mount: %s)", targetPath)
351+
forceUnmounter := *d.forceMounter
352+
if err := forceUnmounter.UnmountWithForce(targetPath, 30*time.Second); err != nil {
353+
klog.Warningf("couldn't unmount %s: %q", targetPath, err)
354+
}
355+
}
356+
357+
err = mount.CleanupMountWithForce(targetPath, *d.forceMounter,
358+
true /*extensiveMountPointCheck*/, 10*time.Second)
324359
return err
325360
}
326361

@@ -653,7 +688,7 @@ func (d *Driver) internalUnmount(mountPath string) error {
653688

654689
klog.V(4).Infof("internally unmounting %v", target)
655690

656-
err = mount.CleanupMountPoint(target, d.mounter, true)
691+
err = mount.CleanupMountWithForce(target, *d.forceMounter, true, 10*time.Second)
657692
if err != nil {
658693
err = status.Errorf(codes.Internal, "failed to unmount staging target %q: %v", target, err)
659694
}

pkg/azurelustre/nodeserver_test.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,9 @@ func TestEnsureMountPoint(t *testing.T) {
115115
Interface: fakeMounter,
116116
Exec: fakeExec,
117117
}
118+
forceMounter, ok := d.mounter.Interface.(mount.MounterForceUnmounter)
119+
require.True(t, ok, "Mounter should implement MounterForceUnmounter")
120+
d.forceMounter = &forceMounter
118121

119122
for _, test := range tests {
120123
err := makeDir(alreadyExistTarget)
@@ -542,6 +545,9 @@ func TestNodePublishVolume(t *testing.T) {
542545
Interface: fakeMounter,
543546
Exec: fakeExec,
544547
}
548+
forceMounter, ok := d.mounter.Interface.(mount.MounterForceUnmounter)
549+
require.True(t, ok, "Mounter should implement MounterForceUnmounter")
550+
d.forceMounter = &forceMounter
545551
d.workingMountDir = workingMountDir
546552
err := makeDir(targetTest)
547553
require.NoError(t, err)
@@ -736,6 +742,9 @@ func TestNodeUnpublishVolume(t *testing.T) {
736742
Interface: fakeMounter,
737743
Exec: fakeExec,
738744
}
745+
forceMounter, ok := d.mounter.Interface.(mount.MounterForceUnmounter)
746+
require.True(t, ok, "Mounter should implement MounterForceUnmounter")
747+
d.forceMounter = &forceMounter
739748
err := makeDir(targetTest)
740749
require.NoError(t, err)
741750

0 commit comments

Comments
 (0)