From a59b8a28d23b1f265eb066e760b56d72ad29e91f Mon Sep 17 00:00:00 2001 From: Robin Nyman Date: Thu, 13 Jun 2024 11:48:36 +0300 Subject: [PATCH] [cinder-csi-plugin] retry mount operation with rescan (#2610) If the initial formatting and mounting fails in NodeStageVolume try to rescan the device and retry the operation. This prevents failures if the device information is reported wrongly which would otherwise be blocking the mounting. Signed-off-by: NymanRobin --- pkg/csi/cinder/nodeserver.go | 21 ++++++++++++++++- pkg/util/blockdevice/blockdevice_linux.go | 28 ++++++++++++++++++++--- 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/pkg/csi/cinder/nodeserver.go b/pkg/csi/cinder/nodeserver.go index 3c1faf1fe9..2c4879d4cc 100644 --- a/pkg/csi/cinder/nodeserver.go +++ b/pkg/csi/cinder/nodeserver.go @@ -398,7 +398,7 @@ func (ns *nodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol options = append(options, collectMountOptions(fsType, mountFlags)...) } // Mount - err = m.Mounter().FormatAndMount(devicePath, stagingTarget, fsType, options) + err = ns.formatAndMountRetry(devicePath, stagingTarget, fsType, options) if err != nil { return nil, status.Error(codes.Internal, err.Error()) } @@ -426,6 +426,25 @@ func (ns *nodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol return &csi.NodeStageVolumeResponse{}, nil } +// formatAndMountRetry attempts to format and mount a device at the given path. +// If the initial mount fails, it rescans the device and retries the mount operation. +func (ns *nodeServer) formatAndMountRetry(devicePath, stagingTarget, fsType string, options []string) error { + m := ns.Mount + err := m.Mounter().FormatAndMount(devicePath, stagingTarget, fsType, options) + if err != nil { + klog.Infof("Initial format and mount failed: %v. Attempting rescan.", err) + // Attempting rescan if the initial mount fails + rescanErr := blockdevice.RescanDevice(devicePath) + if rescanErr != nil { + klog.Infof("Rescan failed: %v. Returning original mount error.", rescanErr) + return err + } + klog.Infof("Rescan succeeded, retrying format and mount") + err = m.Mounter().FormatAndMount(devicePath, stagingTarget, fsType, options) + } + return err +} + func (ns *nodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstageVolumeRequest) (*csi.NodeUnstageVolumeResponse, error) { klog.V(4).Infof("NodeUnstageVolume: called with args %+v", protosanitizer.StripSecrets(*req)) diff --git a/pkg/util/blockdevice/blockdevice_linux.go b/pkg/util/blockdevice/blockdevice_linux.go index 738bc09d20..a043f7012e 100644 --- a/pkg/util/blockdevice/blockdevice_linux.go +++ b/pkg/util/blockdevice/blockdevice_linux.go @@ -85,6 +85,16 @@ func checkBlockDeviceSize(devicePath string, deviceMountPath string, newSize int return nil } +func triggerRescan(blockDeviceRescanPath string) error { + klog.V(4).Infof("Rescanning %q block device geometry", blockDeviceRescanPath) + err := os.WriteFile(blockDeviceRescanPath, []byte{'1'}, 0666) + if err != nil { + klog.Errorf("Error rescanning new block device geometry: %v", err) + return err + } + return nil +} + func RescanBlockDeviceGeometry(devicePath string, deviceMountPath string, newSize int64) error { if newSize == 0 { klog.Error("newSize is empty, skipping the block device rescan") @@ -106,13 +116,25 @@ func RescanBlockDeviceGeometry(devicePath string, deviceMountPath string, newSiz } klog.V(3).Infof("Resolved block device path from %q to %q", devicePath, blockDeviceRescanPath) - klog.V(4).Infof("Rescanning %q block device geometry", devicePath) - err = os.WriteFile(blockDeviceRescanPath, []byte{'1'}, 0666) + err = triggerRescan(blockDeviceRescanPath) if err != nil { - klog.Errorf("Error rescanning new block device geometry: %v", err) // no need to run checkBlockDeviceSize second time here, return the saved error return bdSizeErr } return checkBlockDeviceSize(devicePath, deviceMountPath, newSize) } + +func RescanDevice(devicePath string) error { + blockDeviceRescanPath, err := findBlockDeviceRescanPath(devicePath) + if err != nil { + return fmt.Errorf("Device does not have rescan path " + devicePath) + } + + klog.V(3).Infof("Resolved block device path from %q to %q", devicePath, blockDeviceRescanPath) + err = triggerRescan(blockDeviceRescanPath) + if err != nil { + return fmt.Errorf("Error rescanning new block device geometry " + devicePath) + } + return nil +}