From b23e274b36a241586a5f346eeb4b7135ffba1945 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Tue, 23 Jul 2024 11:00:55 +0200 Subject: [PATCH 1/3] loopback: use fstat on the open file descriptor move the stat call later after the file is already opened so it is less vulnerable to the file being removed between the stat and the open syscall. Signed-off-by: Giuseppe Scrivano --- pkg/loopback/attach_loopback.go | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/pkg/loopback/attach_loopback.go b/pkg/loopback/attach_loopback.go index 40d8fd2b89..bfdafcdf78 100644 --- a/pkg/loopback/attach_loopback.go +++ b/pkg/loopback/attach_loopback.go @@ -6,6 +6,7 @@ package loopback import ( "errors" "fmt" + "io/fs" "os" "syscall" @@ -53,26 +54,28 @@ func openNextAvailableLoopback(index int, sparseName string, sparseFile *os.File target := fmt.Sprintf("/dev/loop%d", index) index++ - fi, err := os.Stat(target) + // OpenFile adds O_CLOEXEC + loopFile, err = os.OpenFile(target, os.O_RDWR, 0o644) if err != nil { - if os.IsNotExist(err) { - logrus.Error("There are no more loopback devices available.") + if errors.Is(err, fs.ErrNotExist) { + continue } + logrus.Errorf("Opening loopback device: %s", err) return nil, ErrAttachLoopbackDevice } + fi, err := loopFile.Stat() + if err != nil { + loopFile.Close() + logrus.Errorf("Stat loopback device: %s", err) + return nil, ErrAttachLoopbackDevice + } if fi.Mode()&os.ModeDevice != os.ModeDevice { + loopFile.Close() logrus.Errorf("Loopback device %s is not a block device.", target) continue } - // OpenFile adds O_CLOEXEC - loopFile, err = os.OpenFile(target, os.O_RDWR, 0o644) - if err != nil { - logrus.Errorf("Opening loopback device: %s", err) - return nil, ErrAttachLoopbackDevice - } - // Try to attach to the loop file if err := ioctlLoopSetFd(loopFile.Fd(), sparseFile.Fd()); err != nil { loopFile.Close() From 01c633e609c2fbec55f07aa9aa2354a4f3bdafd1 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Tue, 23 Jul 2024 12:14:01 +0200 Subject: [PATCH 2/3] loopback: fix race condition opening loopback device the loopback device file could be already used/removed by another process. Since the process is inherently racy, just grab the next available index and try again until it succeeds. Closes: https://github.com/containers/storage/issues/2038 Signed-off-by: Giuseppe Scrivano --- pkg/loopback/attach_loopback.go | 30 ++++++++++++-------- pkg/loopback/attach_test.go | 49 +++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 11 deletions(-) create mode 100644 pkg/loopback/attach_test.go diff --git a/pkg/loopback/attach_loopback.go b/pkg/loopback/attach_loopback.go index bfdafcdf78..f9f0134984 100644 --- a/pkg/loopback/attach_loopback.go +++ b/pkg/loopback/attach_loopback.go @@ -40,7 +40,7 @@ func getNextFreeLoopbackIndex() (int, error) { return index, err } -func openNextAvailableLoopback(index int, sparseName string, sparseFile *os.File) (loopFile *os.File, err error) { +func openNextAvailableLoopback(sparseName string, sparseFile *os.File) (loopFile *os.File, err error) { // Read information about the loopback file. var st syscall.Stat_t err = syscall.Fstat(int(sparseFile.Fd()), &st) @@ -49,15 +49,31 @@ func openNextAvailableLoopback(index int, sparseName string, sparseFile *os.File return nil, ErrAttachLoopbackDevice } + // upper bound to avoid infinite loop + remaining := 1000 + // Start looking for a free /dev/loop for { + if remaining == 0 { + logrus.Errorf("No free loopback devices available") + return nil, ErrAttachLoopbackDevice + } + remaining-- + + index, err := getNextFreeLoopbackIndex() + if err != nil { + logrus.Debugf("Error retrieving the next available loopback: %s", err) + return nil, err + } + target := fmt.Sprintf("/dev/loop%d", index) - index++ // OpenFile adds O_CLOEXEC loopFile, err = os.OpenFile(target, os.O_RDWR, 0o644) if err != nil { if errors.Is(err, fs.ErrNotExist) { + // Another process could have taken the loopback device in the meantime. So repeat + // the process with the next loopback device. continue } logrus.Errorf("Opening loopback device: %s", err) @@ -127,14 +143,6 @@ func AttachLoopDeviceRO(sparseName string) (loop *os.File, err error) { } func attachLoopDevice(sparseName string, readonly bool) (loop *os.File, err error) { - // Try to retrieve the next available loopback device via syscall. - // If it fails, we discard error and start looping for a - // loopback from index 0. - startIndex, err := getNextFreeLoopbackIndex() - if err != nil { - logrus.Debugf("Error retrieving the next available loopback: %s", err) - } - var sparseFile *os.File // OpenFile adds O_CLOEXEC @@ -149,7 +157,7 @@ func attachLoopDevice(sparseName string, readonly bool) (loop *os.File, err erro } defer sparseFile.Close() - loopFile, err := openNextAvailableLoopback(startIndex, sparseName, sparseFile) + loopFile, err := openNextAvailableLoopback(sparseName, sparseFile) if err != nil { return nil, err } diff --git a/pkg/loopback/attach_test.go b/pkg/loopback/attach_test.go new file mode 100644 index 0000000000..8ae2f1143d --- /dev/null +++ b/pkg/loopback/attach_test.go @@ -0,0 +1,49 @@ +//go:build linux && cgo +// +build linux,cgo + +package loopback + +import ( + "os" + "sync" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const ( + maxDevicesPerGoroutine = 1000 + maxGoroutines = 10 +) + +func TestAttachLoopbackDeviceRace(t *testing.T) { + createLoopbackDevice := func() { + // Create a file to use as a backing file + f, err := os.CreateTemp(t.TempDir(), "loopback-test") + require.NoError(t, err) + defer f.Close() + + defer os.Remove(f.Name()) + + lp, err := AttachLoopDevice(f.Name()) + assert.NoError(t, err) + assert.NotNil(t, lp, "loopback device file should not be nil") + if lp != nil { + lp.Close() + } + } + + wg := sync.WaitGroup{} + + for i := 0; i < maxGoroutines; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for i := 0; i < maxDevicesPerGoroutine; i++ { + createLoopbackDevice() + } + }() + } + wg.Wait() +} From 998e6d433d3aaab0408c8d033aea3e7d43b4a198 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Tue, 23 Jul 2024 13:00:17 +0200 Subject: [PATCH 3/3] loopback: treat ENXIO as ENOENT Signed-off-by: Giuseppe Scrivano --- pkg/loopback/attach_loopback.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/loopback/attach_loopback.go b/pkg/loopback/attach_loopback.go index f9f0134984..067dd7cd90 100644 --- a/pkg/loopback/attach_loopback.go +++ b/pkg/loopback/attach_loopback.go @@ -11,6 +11,7 @@ import ( "syscall" "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" ) // Loopback related errors @@ -71,7 +72,9 @@ func openNextAvailableLoopback(sparseName string, sparseFile *os.File) (loopFile // OpenFile adds O_CLOEXEC loopFile, err = os.OpenFile(target, os.O_RDWR, 0o644) if err != nil { - if errors.Is(err, fs.ErrNotExist) { + // The kernel returns ENXIO when opening a device that is in the "deleting" or "rundown" state, so + // just treat ENXIO as if the device does not exist. + if errors.Is(err, fs.ErrNotExist) || errors.Is(err, unix.ENXIO) { // Another process could have taken the loopback device in the meantime. So repeat // the process with the next loopback device. continue