From 12f67d47f1f1a19604cf93a3aadf6ab42ba379ac Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Sat, 13 Jan 2018 09:13:02 -0800 Subject: [PATCH] Fix a possible race during PutObject() (#5376) Under any concurrent removeObjects in progress might have removed the parents of the same prefix for which there is an ongoing putObject request. An inconsistent situation may arise as explained below even under sufficient locking. PutObject is almost successful at the last stage when a temporary file is renamed to its actual namespace at `a/b/c/object1`. Concurrently a RemoveObject is also in progress at the same prefix for an `a/b/c/object2`. To create the object1 at location `a/b/c` PutObject has to create all the parents recursively. ``` a/b/c - os.MkdirAll loops through has now created 'a/' and 'b/' about to create 'c/' a/b/c/object2 - at this point 'c/' and 'object2' are deleted about to delete b/ ``` Now for os.MkdirAll loop the expected situation is that top level parent 'a/b/' exists which it created , such that it can create 'c/' - since removeObject and putObject do not compete for lock due to holding locks at different resources. removeObject proceeds to delete parent 'b/' since 'c/' is not yet present, once deleted 'os.MkdirAll' would receive an error as syscall.ENOENT which would fail the putObject request. This PR tries to address this issue by implementing a safer/guarded approach where we would retry an operation such as `os.MkdirAll` and `os.Rename` if both operations observe syscall.ENOENT. Fixes #5254 --- cmd/fs-v1-helpers.go | 36 +++--------- cmd/fs-v1-helpers_test.go | 22 ------- cmd/fs-v1.go | 2 +- cmd/os-reliable.go | 121 ++++++++++++++++++++++++++++++++++++++ cmd/os-reliable_test.go | 78 ++++++++++++++++++++++++ cmd/posix.go | 32 ++-------- 6 files changed, 211 insertions(+), 80 deletions(-) create mode 100644 cmd/os-reliable.go create mode 100644 cmd/os-reliable_test.go diff --git a/cmd/fs-v1-helpers.go b/cmd/fs-v1-helpers.go index b01c51244..138d5bea3 100644 --- a/cmd/fs-v1-helpers.go +++ b/cmd/fs-v1-helpers.go @@ -1,5 +1,5 @@ /* - * Minio Cloud Storage, (C) 2016 Minio, Inc. + * Minio Cloud Storage, (C) 2016, 2017, 2018 Minio, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ package cmd import ( - "fmt" "io" "os" pathutil "path" @@ -91,24 +90,6 @@ func fsRemoveDir(dirPath string) (err error) { return nil } -// Creates a new directory, parent dir is also recursively created -// if it doesn't exist. -func fsMkdirAll(dirPath string) (err error) { - if dirPath == "" { - return errors.Trace(errInvalidArgument) - } - - if err = checkPathLength(dirPath); err != nil { - return errors.Trace(err) - } - - if err = os.MkdirAll(dirPath, 0777); err != nil { - return errors.Trace(err) - } - - return nil -} - // Creates a new directory, parent dir should exist // otherwise returns an error. If directory already // exists returns an error. Windows long paths @@ -277,7 +258,7 @@ func fsCreateFile(filePath string, reader io.Reader, buf []byte, fallocSize int6 return 0, errors.Trace(err) } - if err := os.MkdirAll(pathutil.Dir(filePath), 0777); err != nil { + if err := mkdirAll(pathutil.Dir(filePath), 0777); err != nil { return 0, errors.Trace(err) } @@ -374,19 +355,16 @@ func fsRenameFile(sourcePath, destPath string) error { if err := checkPathLength(destPath); err != nil { return errors.Trace(err) } + // Verify if source path exists. - if _, err := os.Stat((sourcePath)); err != nil { + if _, err := os.Stat(sourcePath); err != nil { return osErrToFSFileErr(err) } - if err := os.MkdirAll(pathutil.Dir(destPath), 0777); err != nil { - return errors.Trace(err) - } - if err := os.Rename((sourcePath), (destPath)); err != nil { - if isSysErrCrossDevice(err) { - return errors.Trace(fmt.Errorf("%s (%s)->(%s)", errCrossDeviceLink, sourcePath, destPath)) - } + + if err := renameAll(sourcePath, destPath); err != nil { return errors.Trace(err) } + return nil } diff --git a/cmd/fs-v1-helpers_test.go b/cmd/fs-v1-helpers_test.go index b24df2940..0fc6177b0 100644 --- a/cmd/fs-v1-helpers_test.go +++ b/cmd/fs-v1-helpers_test.go @@ -28,28 +28,6 @@ import ( "github.com/minio/minio/pkg/lock" ) -// Tests - fsMkdirAll() -func TestFSMkdirAll(t *testing.T) { - // create posix test setup - _, path, err := newPosixTestSetup() - if err != nil { - t.Fatalf("Unable to create posix test setup, %s", err) - } - defer os.RemoveAll(path) - - if err = fsMkdirAll(""); errors.Cause(err) != errInvalidArgument { - t.Fatal("Unexpected error", err) - } - - if err = fsMkdirAll(pathJoin(path, "my-obj-del-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001")); errors.Cause(err) != errFileNameTooLong { - t.Fatal("Unexpected error", err) - } - - if err = fsMkdirAll(pathJoin(path, "success-vol", "success-object")); err != nil { - t.Fatal("Unexpected error", err) - } -} - func TestFSRenameFile(t *testing.T) { // create posix test setup _, path, err := newPosixTestSetup() diff --git a/cmd/fs-v1.go b/cmd/fs-v1.go index a2b469fb4..226362bff 100644 --- a/cmd/fs-v1.go +++ b/cmd/fs-v1.go @@ -665,7 +665,7 @@ func (fs fsObjects) putObject(bucket string, object string, data *hash.Reader, m if fs.parentDirIsObject(bucket, path.Dir(object)) { return ObjectInfo{}, toObjectErr(errors.Trace(errFileAccessDenied), bucket, object) } - if err = fsMkdirAll(pathJoin(fs.fsPath, bucket, object)); err != nil { + if err = mkdirAll(pathJoin(fs.fsPath, bucket, object), 0777); err != nil { return ObjectInfo{}, toObjectErr(err, bucket, object) } var fi os.FileInfo diff --git a/cmd/os-reliable.go b/cmd/os-reliable.go new file mode 100644 index 000000000..627a3065a --- /dev/null +++ b/cmd/os-reliable.go @@ -0,0 +1,121 @@ +/* + * Minio Cloud Storage, (C) 2018 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "fmt" + "os" + "path" +) + +// Wrapper functions to os.MkdirAll, which calls reliableMkdirAll +// this is to ensure that if there is a racy parent directory +// delete in between we can simply retry the operation. +func mkdirAll(dirPath string, mode os.FileMode) (err error) { + if dirPath == "" { + return errInvalidArgument + } + + if err = checkPathLength(dirPath); err != nil { + return err + } + + if err = reliableMkdirAll(dirPath, mode); err != nil { + // File path cannot be verified since one of the parents is a file. + if isSysErrNotDir(err) { + return errFileAccessDenied + } else if isSysErrPathNotFound(err) { + // This is a special case should be handled only for + // windows, because windows API does not return "not a + // directory" error message. Handle this specifically here. + return errFileAccessDenied + } + } + return err +} + +// Reliably retries os.MkdirAll if for some reason os.MkdirAll returns +// syscall.ENOENT (parent does not exist). +func reliableMkdirAll(dirPath string, mode os.FileMode) (err error) { + i := 0 + for { + // Creates all the parent directories, with mode 0777 mkdir honors system umask. + if err = os.MkdirAll(dirPath, mode); err != nil { + // Retry only for the first retryable error. + if os.IsNotExist(err) && i == 0 { + i++ + continue + } + } + break + } + return err +} + +// Wrapper function to os.Rename, which calls reliableMkdirAll +// and reliableRenameAll. This is to ensure that if there is a +// racy parent directory delete in between we can simply retry +// the operation. +func renameAll(srcFilePath, dstFilePath string) (err error) { + if srcFilePath == "" || dstFilePath == "" { + return errInvalidArgument + } + + if err = checkPathLength(srcFilePath); err != nil { + return err + } + if err = checkPathLength(dstFilePath); err != nil { + return err + } + + if err = reliableRename(srcFilePath, dstFilePath); err != nil { + if isSysErrNotDir(err) { + return errFileAccessDenied + } else if isSysErrPathNotFound(err) { + // This is a special case should be handled only for + // windows, because windows API does not return "not a + // directory" error message. Handle this specifically here. + return errFileAccessDenied + } else if isSysErrCrossDevice(err) { + return fmt.Errorf("%s (%s)->(%s)", errCrossDeviceLink, srcFilePath, dstFilePath) + } else if os.IsNotExist(err) { + return errFileNotFound + } + } + return err +} + +// Reliably retries os.RenameAll if for some reason os.RenameAll returns +// syscall.ENOENT (parent does not exist). +func reliableRename(srcFilePath, dstFilePath string) (err error) { + i := 0 + for { + if err = reliableMkdirAll(path.Dir(dstFilePath), 0777); err != nil { + return err + } + // After a successful parent directory create attempt a renameAll. + if err = os.Rename(srcFilePath, dstFilePath); err != nil { + // Retry only for the first retryable error. + if os.IsNotExist(err) && i == 0 { + i++ + continue + } + } + break + } + return err +} diff --git a/cmd/os-reliable_test.go b/cmd/os-reliable_test.go new file mode 100644 index 000000000..f5405c0b0 --- /dev/null +++ b/cmd/os-reliable_test.go @@ -0,0 +1,78 @@ +/* + * Minio Cloud Storage, (C) 2018 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "os" + "testing" + + "github.com/minio/minio/pkg/errors" +) + +// Tests - mkdirAll() +func TestOSMkdirAll(t *testing.T) { + // create posix test setup + _, path, err := newPosixTestSetup() + if err != nil { + t.Fatalf("Unable to create posix test setup, %s", err) + } + defer os.RemoveAll(path) + + if err = mkdirAll("", 0777); errors.Cause(err) != errInvalidArgument { + t.Fatal("Unexpected error", err) + } + + if err = mkdirAll(pathJoin(path, "my-obj-del-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001"), 0777); errors.Cause(err) != errFileNameTooLong { + t.Fatal("Unexpected error", err) + } + + if err = mkdirAll(pathJoin(path, "success-vol", "success-object"), 0777); err != nil { + t.Fatal("Unexpected error", err) + } +} + +// Tests - renameAll() +func TestOSRenameAll(t *testing.T) { + // create posix test setup + _, path, err := newPosixTestSetup() + if err != nil { + t.Fatalf("Unable to create posix test setup, %s", err) + } + defer os.RemoveAll(path) + + if err = mkdirAll(pathJoin(path, "testvolume1"), 0777); err != nil { + t.Fatal(err) + } + if err = renameAll("", "foo"); err != errInvalidArgument { + t.Fatal(err) + } + if err = renameAll("foo", ""); err != errInvalidArgument { + t.Fatal(err) + } + if err = renameAll(pathJoin(path, "testvolume1"), pathJoin(path, "testvolume2")); err != nil { + t.Fatal(err) + } + if err = renameAll(pathJoin(path, "testvolume1"), pathJoin(path, "testvolume2")); errors.Cause(err) != errFileNotFound { + t.Fatal(err) + } + if err = renameAll(pathJoin(path, "my-obj-del-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001"), pathJoin(path, "testvolume2")); errors.Cause(err) != errFileNameTooLong { + t.Fatal("Unexpected error", err) + } + if err = renameAll(pathJoin(path, "testvolume1"), pathJoin(path, "my-obj-del-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001")); errors.Cause(err) != errFileNameTooLong { + t.Fatal("Unexpected error", err) + } +} diff --git a/cmd/posix.go b/cmd/posix.go index 1184c8a5b..3949e8b19 100644 --- a/cmd/posix.go +++ b/cmd/posix.go @@ -1,5 +1,5 @@ /* - * Minio Cloud Storage, (C) 2016, 2017 Minio, Inc. + * Minio Cloud Storage, (C) 2016, 2017, 2018 Minio, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -674,14 +674,7 @@ func (s *posix) createFile(volume, path string) (f *os.File, err error) { } else { // Create top level directories if they don't exist. // with mode 0777 mkdir honors system umask. - if err = os.MkdirAll(slashpath.Dir(filePath), 0777); err != nil { - // File path cannot be verified since one of the parents is a file. - if isSysErrNotDir(err) { - return nil, errFileAccessDenied - } else if isSysErrPathNotFound(err) { - // Add specific case for windows. - return nil, errFileAccessDenied - } + if err = mkdirAll(slashpath.Dir(filePath), 0777); err != nil { return nil, err } } @@ -970,25 +963,8 @@ func (s *posix) RenameFile(srcVolume, srcPath, dstVolume, dstPath string) (err e } // Destination does not exist, hence proceed with the rename. } - // Creates all the parent directories, with mode 0777 mkdir honors system umask. - if err = os.MkdirAll(slashpath.Dir(dstFilePath), 0777); err != nil { - // File path cannot be verified since one of the parents is a file. - if isSysErrNotDir(err) { - return errFileAccessDenied - } else if isSysErrPathNotFound(err) { - // This is a special case should be handled only for - // windows, because windows API does not return "not a - // directory" error message. Handle this specifically here. - return errFileAccessDenied - } - return err - } - // Finally attempt a rename. - err = os.Rename((srcFilePath), (dstFilePath)) - if err != nil { - if os.IsNotExist(err) { - return errFileNotFound - } + + if err = renameAll(srcFilePath, dstFilePath); err != nil { return err }