From b666f8544501198f39a8b7b16b9fbd5544e2c43a Mon Sep 17 00:00:00 2001 From: sdong Date: Tue, 13 Sep 2016 16:38:28 -0700 Subject: [PATCH] Consider more factors when determining preallocation size of WAL files Summary: Currently the WAL file preallocation size is 1.1 * write_buffer_size. This, however, will be over-estimated if options.db_write_buffer_size or options.max_total_wal_size is set and is much smaller. Test Plan: Add a unit test. Reviewers: andrewkr, yiwu Reviewed By: yiwu Subscribers: leveldb, andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D63957 --- db/db_impl.cc | 24 +++++++++++-- db/db_impl.h | 2 ++ db/db_test2.cc | 1 - db/db_test_util.h | 13 ++++++- db/db_wal_test.cc | 89 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 124 insertions(+), 5 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index cefa98913..6cf9e7d09 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -3682,6 +3682,24 @@ bool DBImpl::MCOverlap(ManualCompaction* m, ManualCompaction* m1) { return true; } +uint64_t DBImpl::GetWalPreallocateBlockSize(uint64_t write_buffer_size) const { + uint64_t bsize = write_buffer_size / 10 + write_buffer_size; + // Some users might set very high write_buffer_size and rely on + // max_total_wal_size or other parameters to control the WAL size. + if (db_options_.max_total_wal_size > 0) { + bsize = std::min(bsize, db_options_.max_total_wal_size); + } + if (db_options_.db_write_buffer_size > 0) { + bsize = std::min(bsize, db_options_.db_write_buffer_size); + } + if (db_options_.write_buffer_manager && + db_options_.write_buffer_manager->enabled()) { + bsize = std::min(bsize, db_options_.write_buffer_manager->buffer_size()); + } + + return bsize; +} + namespace { struct IterState { IterState(DBImpl* _db, InstrumentedMutex* _mu, SuperVersion* _super_version, @@ -4995,8 +5013,7 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) { // Our final size should be less than write_buffer_size // (compression, etc) but err on the side of caution. lfile->SetPreallocationBlockSize( - mutable_cf_options.write_buffer_size / 10 + - mutable_cf_options.write_buffer_size); + GetWalPreallocateBlockSize(mutable_cf_options.write_buffer_size)); unique_ptr file_writer( new WritableFileWriter(std::move(lfile), opt_env_opt)); new_log = new log::Writer(std::move(file_writer), new_log_number, @@ -5747,7 +5764,8 @@ Status DB::Open(const DBOptions& db_options, const std::string& dbname, LogFileName(impl->db_options_.wal_dir, new_log_number), &lfile, opt_env_options); if (s.ok()) { - lfile->SetPreallocationBlockSize((max_write_buffer_size / 10) + max_write_buffer_size); + lfile->SetPreallocationBlockSize( + impl->GetWalPreallocateBlockSize(max_write_buffer_size)); impl->logfile_number_ = new_log_number; unique_ptr file_writer( new WritableFileWriter(std::move(lfile), opt_env_options)); diff --git a/db/db_impl.h b/db/db_impl.h index 51b058d51..277e09bfd 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -1067,6 +1067,8 @@ class DBImpl : public DB { bool ShouldntRunManualCompaction(ManualCompaction* m); bool HaveManualCompaction(ColumnFamilyData* cfd); bool MCOverlap(ManualCompaction* m, ManualCompaction* m1); + + uint64_t GetWalPreallocateBlockSize(uint64_t write_buffer_size) const; }; // Sanitize db options. The caller should delete result.info_log if diff --git a/db/db_test2.cc b/db/db_test2.cc index 95383daa8..670c045e4 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -2065,7 +2065,6 @@ TEST_F(DBTest2, ReadAmpBitmapLiveInCacheAfterDBClose) { ASSERT_EQ(total_useful_bytes_iter1 + total_useful_bytes_iter2, total_loaded_bytes_iter1 + total_loaded_bytes_iter2); } - } // namespace rocksdb int main(int argc, char** argv) { diff --git a/db/db_test_util.h b/db/db_test_util.h index 68dcde52f..629211d5b 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -310,7 +310,18 @@ class SpecialEnv : public EnvWrapper { return s; } Status Truncate(uint64_t size) override { return base_->Truncate(size); } - Status Close() override { return base_->Close(); } + Status Close() override { +// SyncPoint is not supported in Released Windows Mode. +#if !(defined NDEBUG) || !defined(OS_WIN) + // Check preallocation size + // preallocation size is never passed to base file. + size_t preallocation_size = preallocation_block_size(); + TEST_SYNC_POINT_CALLBACK("DBTestWalFile.GetPreallocationStatus", + &preallocation_size); +#endif // !(defined NDEBUG) || !defined(OS_WIN) + + return base_->Close(); + } Status Flush() override { return base_->Flush(); } Status Sync() override { ++env_->sync_counter_; diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc index 7e29fbb70..0d2a74e0a 100644 --- a/db/db_wal_test.cc +++ b/db/db_wal_test.cc @@ -291,6 +291,95 @@ TEST_F(DBWALTest, RecoveryWithEmptyLog) { } while (ChangeOptions()); } +#if !(defined NDEBUG) || !defined(OS_WIN) +TEST_F(DBWALTest, PreallocateBlock) { + Options options = CurrentOptions(); + options.write_buffer_size = 10 * 1000 * 1000; + options.max_total_wal_size = 0; + + size_t expected_preallocation_size = static_cast( + options.write_buffer_size + options.write_buffer_size / 10); + + DestroyAndReopen(options); + + std::atomic called(0); + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DBTestWalFile.GetPreallocationStatus", [&](void* arg) { + ASSERT_TRUE(arg != nullptr); + size_t preallocation_size = *(static_cast(arg)); + ASSERT_EQ(expected_preallocation_size, preallocation_size); + called.fetch_add(1); + }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + Put("", ""); + Flush(); + Put("", ""); + Close(); + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); + ASSERT_EQ(2, called.load()); + + options.max_total_wal_size = 1000 * 1000; + expected_preallocation_size = static_cast(options.max_total_wal_size); + Reopen(options); + called.store(0); + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DBTestWalFile.GetPreallocationStatus", [&](void* arg) { + ASSERT_TRUE(arg != nullptr); + size_t preallocation_size = *(static_cast(arg)); + ASSERT_EQ(expected_preallocation_size, preallocation_size); + called.fetch_add(1); + }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + Put("", ""); + Flush(); + Put("", ""); + Close(); + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); + ASSERT_EQ(2, called.load()); + + options.db_write_buffer_size = 800 * 1000; + expected_preallocation_size = + static_cast(options.db_write_buffer_size); + Reopen(options); + called.store(0); + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DBTestWalFile.GetPreallocationStatus", [&](void* arg) { + ASSERT_TRUE(arg != nullptr); + size_t preallocation_size = *(static_cast(arg)); + ASSERT_EQ(expected_preallocation_size, preallocation_size); + called.fetch_add(1); + }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + Put("", ""); + Flush(); + Put("", ""); + Close(); + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); + ASSERT_EQ(2, called.load()); + + expected_preallocation_size = 700 * 1000; + std::shared_ptr write_buffer_manager = + std::make_shared(static_cast(700 * 1000)); + options.write_buffer_manager = write_buffer_manager; + Reopen(options); + called.store(0); + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DBTestWalFile.GetPreallocationStatus", [&](void* arg) { + ASSERT_TRUE(arg != nullptr); + size_t preallocation_size = *(static_cast(arg)); + ASSERT_EQ(expected_preallocation_size, preallocation_size); + called.fetch_add(1); + }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + Put("", ""); + Flush(); + Put("", ""); + Close(); + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); + ASSERT_EQ(2, called.load()); +} +#endif // !(defined NDEBUG) || !defined(OS_WIN) + #ifndef ROCKSDB_LITE TEST_F(DBWALTest, GetSortedWalFiles) { do {