Fix WAL log data corruption #8723 (#8746)

Summary:
Fix WAL log data corruption when using DBOptions.manual_wal_flush(true) and WriteOptions.sync(true) together (https://github.com/facebook/rocksdb/issues/8723)

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8746

Reviewed By: ajkr

Differential Revision: D30758468

Pulled By: riversand963

fbshipit-source-id: 07c20899d5f2447dc77861b4845efc68a59aa4e8
main
eharry 3 years ago committed by Facebook GitHub Bot
parent a5566d508b
commit 0b6be7eb68
  1. 1
      HISTORY.md
  2. 16
      db/db_impl/db_impl_write.cc
  3. 33
      db/db_test.cc

@ -9,6 +9,7 @@
* Fix a race in BackupEngine if RateLimiter is reconfigured during concurrent Restore operations. * Fix a race in BackupEngine if RateLimiter is reconfigured during concurrent Restore operations.
* Fix a bug on POSIX in which failure to create a lock file (e.g. out of space) can prevent future LockFile attempts in the same process on the same file from succeeding. * Fix a bug on POSIX in which failure to create a lock file (e.g. out of space) can prevent future LockFile attempts in the same process on the same file from succeeding.
* Fix a bug that backup_rate_limiter and restore_rate_limiter in BackupEngine could not limit read rates. * Fix a bug that backup_rate_limiter and restore_rate_limiter in BackupEngine could not limit read rates.
* Fix WAL log data corruption when using DBOptions.manual_wal_flush(true) and WriteOptions.sync(true) together. The sync WAL should work with locked log_write_mutex_.
### New Features ### New Features
* RemoteCompaction's interface now includes `db_name`, `db_id`, `session_id`, which could help the user uniquely identify compaction job between db instances and sessions. * RemoteCompaction's interface now includes `db_name`, `db_id`, `session_id`, which could help the user uniquely identify compaction job between db instances and sessions.

@ -1124,6 +1124,18 @@ IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group,
// writer thread, so no one will push to logs_, // writer thread, so no one will push to logs_,
// - as long as other threads don't modify it, it's safe to read // - as long as other threads don't modify it, it's safe to read
// from std::deque from multiple threads concurrently. // from std::deque from multiple threads concurrently.
//
// Sync operation should work with locked log_write_mutex_, because:
// when DBOptions.manual_wal_flush_ is set,
// FlushWAL function will be invoked by another thread.
// if without locked log_write_mutex_, the log file may get data
// corruption
const bool needs_locking = manual_wal_flush_ && !two_write_queues_;
if (UNLIKELY(needs_locking)) {
log_write_mutex_.Lock();
}
for (auto& log : logs_) { for (auto& log : logs_) {
io_s = log.writer->file()->Sync(immutable_db_options_.use_fsync); io_s = log.writer->file()->Sync(immutable_db_options_.use_fsync);
if (!io_s.ok()) { if (!io_s.ok()) {
@ -1131,6 +1143,10 @@ IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group,
} }
} }
if (UNLIKELY(needs_locking)) {
log_write_mutex_.Unlock();
}
if (io_s.ok() && need_log_dir_sync) { if (io_s.ok() && need_log_dir_sync) {
// We only sync WAL directory the first time WAL syncing is // We only sync WAL directory the first time WAL syncing is
// requested, so that in case users never turn on WAL sync, // requested, so that in case users never turn on WAL sync,

@ -4098,6 +4098,39 @@ TEST_F(DBTest, ConcurrentFlushWAL) {
} }
} }
// This test failure will be caught with a probability
TEST_F(DBTest, ManualFlushWalAndWriteRace) {
Options options;
options.env = env_;
options.manual_wal_flush = true;
options.create_if_missing = true;
DestroyAndReopen(options);
WriteOptions wopts;
wopts.sync = true;
port::Thread writeThread([&]() {
for (int i = 0; i < 100; i++) {
auto istr = ToString(i);
dbfull()->Put(wopts, "key_" + istr, "value_" + istr);
}
});
port::Thread flushThread([&]() {
for (int i = 0; i < 100; i++) {
ASSERT_OK(dbfull()->FlushWAL(false));
}
});
writeThread.join();
flushThread.join();
ASSERT_OK(dbfull()->Put(wopts, "foo1", "value1"));
ASSERT_OK(dbfull()->Put(wopts, "foo2", "value2"));
Reopen(options);
ASSERT_EQ("value1", Get("foo1"));
ASSERT_EQ("value2", Get("foo2"));
}
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
TEST_F(DBTest, DynamicMemtableOptions) { TEST_F(DBTest, DynamicMemtableOptions) {
const uint64_t k64KB = 1 << 16; const uint64_t k64KB = 1 << 16;

Loading…
Cancel
Save