Use force unmount and explicitly unmount bad mount points

dabradley · dabradley · commit 028a78b462e1 · 2024-11-25T15:28:06.000-05:00
There have been cases where the logic to cleanup a mount point
has caused the driver to get into a bad state. This is most
obvious when a subdirectory is mounted to a volume and a parent
directory of that subdirectory is deleted. The Lustre driver
doesn't handle that case in the way that Kubernetes expects
and returns invalid data. To avoid this scenario causing our
driver to get into a bad state, leak mount points, etc, we
must explicitly check that we can read the necessary information
about the mount point, and if not, explicitly unmount that
mount point before allowing Kubernetes to clean up the directory.
To ensure that we don't end up in a bad state, this change
enables force unmounting as well. The force unmount will only
occur after a timeout has expired, since force unmounts can
cause issues with the Lustre driver. However, in this case, it
is better if we are in a bad enough situation to be able to
eventually return to a good state rather than require manual
intervention.
diff --git a/pkg/azurelustre/azurelustre.go b/pkg/azurelustre/azurelustre.go
@@ -91,6 +91,7 @@ type Driver struct {
 	// enableAzureLustreMockMount is only for testing, DO NOT set as true in non-testing scenario
 	enableAzureLustreMockMount bool
 	mounter                    *mount.SafeFormatAndMount // TODO_JUSJIN: check any other alternatives
+	forceMounter               *mount.MounterForceUnmounter
 	volLockMap                 *util.LockMap
 	// Directory to temporarily mount to for subdirectory creation
 	workingMountDir string
@@ -132,6 +133,13 @@ func (d *Driver) Run(endpoint string, testBool bool) {
 		Interface: mount.New(""),
 		Exec:      utilexec.New(),
 	}
+	forceUnmounter, ok := d.mounter.Interface.(mount.MounterForceUnmounter)
+	if ok {
+		klog.V(4).Infof("Using force unmounter interface")
+		d.forceMounter = &forceUnmounter
+	} else {
+		klog.Fatalf("Mounter does not support force unmount")
+	}
 
 	// TODO_JUSJIN: revisit these caps
 	// Initialize default library driver
diff --git a/pkg/azurelustre/fake_mount.go b/pkg/azurelustre/fake_mount.go
@@ -19,6 +19,7 @@ package azurelustre
 import (
 	"fmt"
 	"strings"
+	"time"
 
 	mount "k8s.io/mount-utils"
 )
@@ -69,3 +70,7 @@ func (f *fakeMounter) IsLikelyNotMountPoint(file string) (bool, error) {
 	}
 	return true, nil
 }
+
+func (f *fakeMounter) UnmountWithForce(target string, _ time.Duration) error {
+	return f.Unmount(target)
+}
diff --git a/pkg/azurelustre/nodeserver.go b/pkg/azurelustre/nodeserver.go
@@ -21,6 +21,7 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
+	"time"
 
 	"github.com/container-storage-interface/spec/lib/go/csi"
 	"golang.org/x/net/context"
@@ -317,10 +318,44 @@ func (d *Driver) NodeUnpublishVolume(
 }
 
 func unmountVolumeAtPath(d *Driver, targetPath string) error {
+	shouldUnmountBadPath := false
+
 	d.kernelModuleLock.Lock()
 	defer d.kernelModuleLock.Unlock()
-	err := mount.CleanupMountPoint(targetPath, d.mounter,
-		true /*extensiveMountPointCheck*/)
+
+	parent := filepath.Dir(targetPath)
+	klog.V(2).Infof("Listing dir: %s", parent)
+	entries, err := os.ReadDir(parent)
+	if err != nil {
+		klog.Warningf("could not list directory %s, will explicitly unmount path before cleanup %s: %q", parent, targetPath, err)
+		shouldUnmountBadPath = true
+	}
+
+	for _, e := range entries {
+		if e.Name() == filepath.Base(targetPath) {
+			_, err := e.Info()
+			if err != nil {
+				klog.Warningf("could not get info for entry %s, will explicitly unmount path before cleanup %s: %q", e.Name(), targetPath, err)
+				shouldUnmountBadPath = true
+			}
+		}
+	}
+
+	if shouldUnmountBadPath {
+		// In these cases, if we only ran mount.CleanupMountWithForce,
+		// it would have issues trying to stat the directory before
+		// cleanup, so we need to explicitly unmount the path, with
+		// force if necessary. Then the directory can be cleaned up
+		// by the mount.CleanupMountWithForce call.
+		klog.V(4).Infof("unmounting bad mount: %s)", targetPath)
+		forceUnmounter := *d.forceMounter
+		if err := forceUnmounter.UnmountWithForce(targetPath, 30*time.Second); err != nil {
+			klog.Warningf("couldn't unmount %s: %q", targetPath, err)
+		}
+	}
+
+	err = mount.CleanupMountWithForce(targetPath, *d.forceMounter,
+		true /*extensiveMountPointCheck*/, 10*time.Second)
 	return err
 }
 
@@ -653,7 +688,7 @@ func (d *Driver) internalUnmount(mountPath string) error {
 
 	klog.V(4).Infof("internally unmounting %v", target)
 
-	err = mount.CleanupMountPoint(target, d.mounter, true)
+	err = mount.CleanupMountWithForce(target, *d.forceMounter, true, 10*time.Second)
 	if err != nil {
 		err = status.Errorf(codes.Internal, "failed to unmount staging target %q: %v", target, err)
 	}
diff --git a/pkg/azurelustre/nodeserver_test.go b/pkg/azurelustre/nodeserver_test.go
@@ -115,6 +115,9 @@ func TestEnsureMountPoint(t *testing.T) {
 		Interface: fakeMounter,
 		Exec:      fakeExec,
 	}
+	forceMounter, ok := d.mounter.Interface.(mount.MounterForceUnmounter)
+	require.True(t, ok, "Mounter should implement MounterForceUnmounter")
+	d.forceMounter = &forceMounter
 
 	for _, test := range tests {
 		err := makeDir(alreadyExistTarget)
@@ -542,6 +545,9 @@ func TestNodePublishVolume(t *testing.T) {
 			Interface: fakeMounter,
 			Exec:      fakeExec,
 		}
+		forceMounter, ok := d.mounter.Interface.(mount.MounterForceUnmounter)
+		require.True(t, ok, "Mounter should implement MounterForceUnmounter")
+		d.forceMounter = &forceMounter
 		d.workingMountDir = workingMountDir
 		err := makeDir(targetTest)
 		require.NoError(t, err)
@@ -736,6 +742,9 @@ func TestNodeUnpublishVolume(t *testing.T) {
 			Interface: fakeMounter,
 			Exec:      fakeExec,
 		}
+		forceMounter, ok := d.mounter.Interface.(mount.MounterForceUnmounter)
+		require.True(t, ok, "Mounter should implement MounterForceUnmounter")
+		d.forceMounter = &forceMounter
 		err := makeDir(targetTest)
 		require.NoError(t, err)
 

Original file line number	Diff line number	Diff line change
`@@ -19,6 +19,7 @@ package azurelustre`
`19`	`19`	`import (`
`20`	`20`	`"fmt"`
`21`	`21`	`"strings"`
	`22`	`+ "time"`
`22`	`23`
`23`	`24`	`mount "k8s.io/mount-utils"`
`24`	`25`	`)`
`@@ -69,3 +70,7 @@ func (f *fakeMounter) IsLikelyNotMountPoint(file string) (bool, error) {`
`69`	`70`	`}`
`70`	`71`	`return true, nil`
`71`	`72`	`}`
	`73`	`+`
	`74`	`+func (f *fakeMounter) UnmountWithForce(target string, _ time.Duration) error {`
	`75`	`+ return f.Unmount(target)`
	`76`	`+}`