From 331e6199df012fc7bae79a4f8905cbcac9f112e3 Mon Sep 17 00:00:00 2001 From: sdong Date: Wed, 11 Mar 2020 16:20:13 -0700 Subject: [PATCH] Include more information in file lock failure (#6507) Summary: When users fail to open a DB with file lock failure, it is sometimes hard for users to debug. We now include the time the lock is acquired and the thread ID that acquired the lock, to help users debug problems like this. Default Env's thread ID is used. Since type of lockedFiles is changed, rename it to follow naming convention too. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6507 Test Plan: Add a unit test and improve an existing test to validate the case. Differential Revision: D20378333 fbshipit-source-id: 312fe0e9733fd1d1e9969c321b90ce523cf4708a --- HISTORY.md | 1 + db/db_basic_test.cc | 7 ++++++- env/fs_posix.cc | 48 ++++++++++++++++++++++++++++++------------- util/filelock_test.cc | 7 ++++++- 4 files changed, 47 insertions(+), 16 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 49e6cb9fc..7208b07e9 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -12,6 +12,7 @@ ### New Features * Basic support for user timestamp in iterator. Seek/SeekToFirst/Next and lower/upper bounds are supported. Reverse iteration is not supported. Merge is not considered. +* When file lock failure when the lock is held by the current process, return acquiring time and thread ID in the error message. ## 6.8.0 (02/24/2020) ### Java API Changes diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index e2fd94027..393c87aeb 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -324,7 +324,12 @@ TEST_F(DBBasicTest, CheckLock) { ASSERT_OK(TryReopen(options)); // second open should fail - ASSERT_TRUE(!(DB::Open(options, dbname_, &localdb)).ok()); + Status s = DB::Open(options, dbname_, &localdb); + ASSERT_NOK(s); +#ifdef OS_LINUX + ASSERT_TRUE(s.ToString().find("lock hold by current process") != + std::string::npos); +#endif // OS_LINUX } while (ChangeCompactOptions()); } diff --git a/env/fs_posix.cc b/env/fs_posix.cc index 0f1a41ac2..14c9d546d 100644 --- a/env/fs_posix.cc +++ b/env/fs_posix.cc @@ -82,8 +82,13 @@ inline mode_t GetDBFileMode(bool allow_non_owner_access) { } // list of pathnames that are locked -static std::set lockedFiles; -static port::Mutex mutex_lockedFiles; +// Only used for error message. +struct LockHoldingInfo { + int64_t acquire_time; + uint64_t acquiring_thread; +}; +static std::map locked_files; +static port::Mutex mutex_locked_files; static int LockOrUnlock(int fd, bool lock) { errno = 0; @@ -699,9 +704,16 @@ class PosixFileSystem : public FileSystem { *lock = nullptr; IOStatus result; - mutex_lockedFiles.Lock(); - // If it already exists in the lockedFiles set, then it is already locked, - // and fail this lock attempt. Otherwise, insert it into lockedFiles. + LockHoldingInfo lhi; + int64_t current_time = 0; + // Ignore status code as the time is only used for error message. + Env::Default()->GetCurrentTime(¤t_time); + lhi.acquire_time = current_time; + lhi.acquiring_thread = Env::Default()->GetThreadID(); + + mutex_locked_files.Lock(); + // If it already exists in the locked_files set, then it is already locked, + // and fail this lock attempt. Otherwise, insert it into locked_files. // This check is needed because fcntl() does not detect lock conflict // if the fcntl is issued by the same thread that earlier acquired // this lock. @@ -709,10 +721,18 @@ class PosixFileSystem : public FileSystem { // Otherwise, we will open a new file descriptor. Locks are associated with // a process, not a file descriptor and when *any* file descriptor is // closed, all locks the process holds for that *file* are released - if (lockedFiles.insert(fname).second == false) { - mutex_lockedFiles.Unlock(); + const auto it_success = locked_files.insert({fname, lhi}); + if (it_success.second == false) { + mutex_locked_files.Unlock(); errno = ENOLCK; - return IOError("lock ", fname, errno); + LockHoldingInfo& prev_info = it_success.first->second; + // Note that the thread ID printed is the same one as the one in + // posix logger, but posix logger prints it hex format. + return IOError("lock hold by current process, acquire time " + + ToString(prev_info.acquire_time) + + " acquiring thread " + + ToString(prev_info.acquiring_thread), + fname, errno); } int fd; @@ -727,7 +747,7 @@ class PosixFileSystem : public FileSystem { } else if (LockOrUnlock(fd, true) == -1) { // if there is an error in locking, then remove the pathname from // lockedfiles - lockedFiles.erase(fname); + locked_files.erase(fname); result = IOError("While lock file", fname, errno); close(fd); } else { @@ -738,7 +758,7 @@ class PosixFileSystem : public FileSystem { *lock = my_lock; } - mutex_lockedFiles.Unlock(); + mutex_locked_files.Unlock(); return result; } @@ -746,10 +766,10 @@ class PosixFileSystem : public FileSystem { IODebugContext* /*dbg*/) override { PosixFileLock* my_lock = reinterpret_cast(lock); IOStatus result; - mutex_lockedFiles.Lock(); + mutex_locked_files.Lock(); // If we are unlocking, then verify that we had locked it earlier, - // it should already exist in lockedFiles. Remove it from lockedFiles. - if (lockedFiles.erase(my_lock->filename) != 1) { + // it should already exist in locked_files. Remove it from locked_files. + if (locked_files.erase(my_lock->filename) != 1) { errno = ENOLCK; result = IOError("unlock", my_lock->filename, errno); } else if (LockOrUnlock(my_lock->fd_, false) == -1) { @@ -757,7 +777,7 @@ class PosixFileSystem : public FileSystem { } close(my_lock->fd_); delete my_lock; - mutex_lockedFiles.Unlock(); + mutex_locked_files.Unlock(); return result; } diff --git a/util/filelock_test.cc b/util/filelock_test.cc index 20efd513f..00790a024 100644 --- a/util/filelock_test.cc +++ b/util/filelock_test.cc @@ -10,6 +10,7 @@ #include #include "test_util/testharness.h" #include "util/coding.h" +#include "util/string_util.h" namespace ROCKSDB_NAMESPACE { @@ -120,7 +121,11 @@ TEST_F(LockTest, LockBySameThread) { ASSERT_TRUE( AssertFileIsLocked() ); // re-acquire the lock on the same file. This should fail. - ASSERT_TRUE(LockFile(&lock2).IsIOError()); + Status s = LockFile(&lock2); + ASSERT_TRUE(s.IsIOError()); + // Validate that error message contains current thread ID. + ASSERT_TRUE(s.ToString().find(ToString(Env::Default()->GetThreadID())) != + std::string::npos); // check the file is locked ASSERT_TRUE( AssertFileIsLocked() );