Include more information in file lock failure (#6507)

Summary:
When users fail to open a DB with file lock failure, it is sometimes hard for users to debug. We now include the time the lock is acquired and the thread ID that acquired the lock, to help users debug problems like this. Default Env's thread ID is used.

Since type of lockedFiles is changed, rename it to follow naming convention too.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6507

Test Plan: Add a unit test and improve an existing test to validate the case.

Differential Revision: D20378333

fbshipit-source-id: 312fe0e9733fd1d1e9969c321b90ce523cf4708a
main
sdong 5 years ago committed by Facebook GitHub Bot
parent 37a635cfe6
commit 331e6199df
  1. 1
      HISTORY.md
  2. 7
      db/db_basic_test.cc
  3. 48
      env/fs_posix.cc
  4. 7
      util/filelock_test.cc

@ -12,6 +12,7 @@
### New Features ### New Features
* Basic support for user timestamp in iterator. Seek/SeekToFirst/Next and lower/upper bounds are supported. Reverse iteration is not supported. Merge is not considered. * Basic support for user timestamp in iterator. Seek/SeekToFirst/Next and lower/upper bounds are supported. Reverse iteration is not supported. Merge is not considered.
* When file lock failure when the lock is held by the current process, return acquiring time and thread ID in the error message.
## 6.8.0 (02/24/2020) ## 6.8.0 (02/24/2020)
### Java API Changes ### Java API Changes

@ -324,7 +324,12 @@ TEST_F(DBBasicTest, CheckLock) {
ASSERT_OK(TryReopen(options)); ASSERT_OK(TryReopen(options));
// second open should fail // second open should fail
ASSERT_TRUE(!(DB::Open(options, dbname_, &localdb)).ok()); Status s = DB::Open(options, dbname_, &localdb);
ASSERT_NOK(s);
#ifdef OS_LINUX
ASSERT_TRUE(s.ToString().find("lock hold by current process") !=
std::string::npos);
#endif // OS_LINUX
} while (ChangeCompactOptions()); } while (ChangeCompactOptions());
} }

48
env/fs_posix.cc vendored

@ -82,8 +82,13 @@ inline mode_t GetDBFileMode(bool allow_non_owner_access) {
} }
// list of pathnames that are locked // list of pathnames that are locked
static std::set<std::string> lockedFiles; // Only used for error message.
static port::Mutex mutex_lockedFiles; struct LockHoldingInfo {
int64_t acquire_time;
uint64_t acquiring_thread;
};
static std::map<std::string, LockHoldingInfo> locked_files;
static port::Mutex mutex_locked_files;
static int LockOrUnlock(int fd, bool lock) { static int LockOrUnlock(int fd, bool lock) {
errno = 0; errno = 0;
@ -699,9 +704,16 @@ class PosixFileSystem : public FileSystem {
*lock = nullptr; *lock = nullptr;
IOStatus result; IOStatus result;
mutex_lockedFiles.Lock(); LockHoldingInfo lhi;
// If it already exists in the lockedFiles set, then it is already locked, int64_t current_time = 0;
// and fail this lock attempt. Otherwise, insert it into lockedFiles. // Ignore status code as the time is only used for error message.
Env::Default()->GetCurrentTime(&current_time);
lhi.acquire_time = current_time;
lhi.acquiring_thread = Env::Default()->GetThreadID();
mutex_locked_files.Lock();
// If it already exists in the locked_files set, then it is already locked,
// and fail this lock attempt. Otherwise, insert it into locked_files.
// This check is needed because fcntl() does not detect lock conflict // This check is needed because fcntl() does not detect lock conflict
// if the fcntl is issued by the same thread that earlier acquired // if the fcntl is issued by the same thread that earlier acquired
// this lock. // this lock.
@ -709,10 +721,18 @@ class PosixFileSystem : public FileSystem {
// Otherwise, we will open a new file descriptor. Locks are associated with // Otherwise, we will open a new file descriptor. Locks are associated with
// a process, not a file descriptor and when *any* file descriptor is // a process, not a file descriptor and when *any* file descriptor is
// closed, all locks the process holds for that *file* are released // closed, all locks the process holds for that *file* are released
if (lockedFiles.insert(fname).second == false) { const auto it_success = locked_files.insert({fname, lhi});
mutex_lockedFiles.Unlock(); if (it_success.second == false) {
mutex_locked_files.Unlock();
errno = ENOLCK; errno = ENOLCK;
return IOError("lock ", fname, errno); LockHoldingInfo& prev_info = it_success.first->second;
// Note that the thread ID printed is the same one as the one in
// posix logger, but posix logger prints it hex format.
return IOError("lock hold by current process, acquire time " +
ToString(prev_info.acquire_time) +
" acquiring thread " +
ToString(prev_info.acquiring_thread),
fname, errno);
} }
int fd; int fd;
@ -727,7 +747,7 @@ class PosixFileSystem : public FileSystem {
} else if (LockOrUnlock(fd, true) == -1) { } else if (LockOrUnlock(fd, true) == -1) {
// if there is an error in locking, then remove the pathname from // if there is an error in locking, then remove the pathname from
// lockedfiles // lockedfiles
lockedFiles.erase(fname); locked_files.erase(fname);
result = IOError("While lock file", fname, errno); result = IOError("While lock file", fname, errno);
close(fd); close(fd);
} else { } else {
@ -738,7 +758,7 @@ class PosixFileSystem : public FileSystem {
*lock = my_lock; *lock = my_lock;
} }
mutex_lockedFiles.Unlock(); mutex_locked_files.Unlock();
return result; return result;
} }
@ -746,10 +766,10 @@ class PosixFileSystem : public FileSystem {
IODebugContext* /*dbg*/) override { IODebugContext* /*dbg*/) override {
PosixFileLock* my_lock = reinterpret_cast<PosixFileLock*>(lock); PosixFileLock* my_lock = reinterpret_cast<PosixFileLock*>(lock);
IOStatus result; IOStatus result;
mutex_lockedFiles.Lock(); mutex_locked_files.Lock();
// If we are unlocking, then verify that we had locked it earlier, // If we are unlocking, then verify that we had locked it earlier,
// it should already exist in lockedFiles. Remove it from lockedFiles. // it should already exist in locked_files. Remove it from locked_files.
if (lockedFiles.erase(my_lock->filename) != 1) { if (locked_files.erase(my_lock->filename) != 1) {
errno = ENOLCK; errno = ENOLCK;
result = IOError("unlock", my_lock->filename, errno); result = IOError("unlock", my_lock->filename, errno);
} else if (LockOrUnlock(my_lock->fd_, false) == -1) { } else if (LockOrUnlock(my_lock->fd_, false) == -1) {
@ -757,7 +777,7 @@ class PosixFileSystem : public FileSystem {
} }
close(my_lock->fd_); close(my_lock->fd_);
delete my_lock; delete my_lock;
mutex_lockedFiles.Unlock(); mutex_locked_files.Unlock();
return result; return result;
} }

@ -10,6 +10,7 @@
#include <vector> #include <vector>
#include "test_util/testharness.h" #include "test_util/testharness.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
@ -120,7 +121,11 @@ TEST_F(LockTest, LockBySameThread) {
ASSERT_TRUE( AssertFileIsLocked() ); ASSERT_TRUE( AssertFileIsLocked() );
// re-acquire the lock on the same file. This should fail. // re-acquire the lock on the same file. This should fail.
ASSERT_TRUE(LockFile(&lock2).IsIOError()); Status s = LockFile(&lock2);
ASSERT_TRUE(s.IsIOError());
// Validate that error message contains current thread ID.
ASSERT_TRUE(s.ToString().find(ToString(Env::Default()->GetThreadID())) !=
std::string::npos);
// check the file is locked // check the file is locked
ASSERT_TRUE( AssertFileIsLocked() ); ASSERT_TRUE( AssertFileIsLocked() );

Loading…
Cancel
Save