From 991120fa10716ea371463188f6638d0401c1a935 Mon Sep 17 00:00:00 2001 From: Sagar Vemuri Date: Mon, 16 Jul 2018 14:24:33 -0700 Subject: [PATCH] Allow ttl to be changed dynamically (#4133) Summary: Allow ttl to be changed dynamically. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4133 Differential Revision: D8845440 Pulled By: sagar0 fbshipit-source-id: c8c87ae643b3a8c4123e4c037c4645efc094a2d3 --- HISTORY.md | 3 +- db/db_compaction_test.cc | 56 ++++++++++++++++++++++++++++-- db/version_set.cc | 11 +++--- db/version_set.h | 3 +- include/rocksdb/advanced_options.h | 2 ++ options/cf_options.cc | 3 +- options/cf_options.h | 5 +-- options/options_helper.cc | 4 ++- 8 files changed, 72 insertions(+), 15 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index ffaf5f516..c27d2824e 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -11,7 +11,8 @@ * Add a new table property, "rocksdb.num.range-deletions", which counts the number of range deletion tombstones in the table. * Improve the performance of iterators doing long range scans by using readahead, when using direct IO. * pin_top_level_index_and_filter (default true) in BlockBasedTableOptions can be used in combination with cache_index_and_filter_blocks to prefetch and pin the top-level index of partitioned index and filter blocks in cache. It has no impact when cache_index_and_filter_blocks is false. -* Avoid memcpy when reading mmap files with OpenReadOnly and max_open_files==-1 +* Avoid memcpy when reading mmap files with OpenReadOnly and max_open_files==-1. +* Support dynamically changing `ColumnFamilyOptions::ttl` via `SetOptions()`. ### Bug Fixes * fix deadlock with enable_pipelined_write=true and max_successive_merges > 0 diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 20dd09541..b28f5a2e5 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -3376,14 +3376,13 @@ TEST_F(DBCompactionTest, LevelCompactExpiredTtlFiles) { } Flush(); } - Flush(); dbfull()->TEST_WaitForCompact(); MoveFilesToLevel(3); ASSERT_EQ("0,0,0,2", FilesPerLevel()); + // Delete previously written keys. for (int i = 0; i < kNumLevelFiles; ++i) { for (int j = 0; j < kNumKeysPerFile; ++j) { - // Overwrite previous keys with smaller, but predictable, values. ASSERT_OK(Delete(Key(i * kNumKeysPerFile + j))); } Flush(); @@ -3396,7 +3395,7 @@ TEST_F(DBCompactionTest, LevelCompactExpiredTtlFiles) { env_->addon_time_.fetch_add(36 * 60 * 60); // 36 hours ASSERT_EQ("0,2,0,2", FilesPerLevel()); - // Just do a siimple write + flush so that the Ttl expired files get + // Just do a simple write + flush so that the Ttl expired files get // compacted. ASSERT_OK(Put("a", "1")); Flush(); @@ -3410,6 +3409,57 @@ TEST_F(DBCompactionTest, LevelCompactExpiredTtlFiles) { // All non-L0 files are deleted, as they contained only deleted data. ASSERT_EQ("1", FilesPerLevel()); rocksdb::SyncPoint::GetInstance()->DisableProcessing(); + + // Test dynamically changing ttl. + + env_->addon_time_.store(0); + DestroyAndReopen(options); + + for (int i = 0; i < kNumLevelFiles; ++i) { + for (int j = 0; j < kNumKeysPerFile; ++j) { + ASSERT_OK( + Put(Key(i * kNumKeysPerFile + j), RandomString(&rnd, kValueSize))); + } + Flush(); + } + dbfull()->TEST_WaitForCompact(); + MoveFilesToLevel(3); + ASSERT_EQ("0,0,0,2", FilesPerLevel()); + + // Delete previously written keys. + for (int i = 0; i < kNumLevelFiles; ++i) { + for (int j = 0; j < kNumKeysPerFile; ++j) { + ASSERT_OK(Delete(Key(i * kNumKeysPerFile + j))); + } + Flush(); + } + dbfull()->TEST_WaitForCompact(); + ASSERT_EQ("2,0,0,2", FilesPerLevel()); + MoveFilesToLevel(1); + ASSERT_EQ("0,2,0,2", FilesPerLevel()); + + // Move time forward by 12 hours, and make sure that compaction still doesn't + // trigger as ttl is set to 24 hours. + env_->addon_time_.fetch_add(12 * 60 * 60); + ASSERT_OK(Put("a", "1")); + Flush(); + dbfull()->TEST_WaitForCompact(); + ASSERT_EQ("1,2,0,2", FilesPerLevel()); + + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { + Compaction* compaction = reinterpret_cast(arg); + ASSERT_TRUE(compaction->compaction_reason() == CompactionReason::kTtl); + }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + // Dynamically change ttl to 10 hours. + // This should trigger a ttl compaction, as 12 hours have already passed. + ASSERT_OK(dbfull()->SetOptions({{"ttl", "36000"}})); + dbfull()->TEST_WaitForCompact(); + // All non-L0 files are deleted, as they contained only deleted data. + ASSERT_EQ("1", FilesPerLevel()); + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBCompactionTest, CompactRangeDelayedByL0FileCount) { diff --git a/db/version_set.cc b/db/version_set.cc index ced79de75..d7aafd71e 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1665,8 +1665,8 @@ void VersionStorageInfo::ComputeCompactionScore( } ComputeFilesMarkedForCompaction(); ComputeBottommostFilesMarkedForCompaction(); - if (immutable_cf_options.ttl > 0) { - ComputeExpiredTtlFiles(immutable_cf_options); + if (mutable_cf_options.ttl > 0) { + ComputeExpiredTtlFiles(immutable_cf_options, mutable_cf_options.ttl); } EstimateCompactionBytesNeeded(mutable_cf_options); } @@ -1695,8 +1695,8 @@ void VersionStorageInfo::ComputeFilesMarkedForCompaction() { } void VersionStorageInfo::ComputeExpiredTtlFiles( - const ImmutableCFOptions& ioptions) { - assert(ioptions.ttl > 0); + const ImmutableCFOptions& ioptions, const uint64_t ttl) { + assert(ttl > 0); expired_ttl_files_.clear(); @@ -1713,8 +1713,7 @@ void VersionStorageInfo::ComputeExpiredTtlFiles( f->fd.table_reader->GetTableProperties() != nullptr) { auto creation_time = f->fd.table_reader->GetTableProperties()->creation_time; - if (creation_time > 0 && - creation_time < (current_time - ioptions.ttl)) { + if (creation_time > 0 && creation_time < (current_time - ttl)) { expired_ttl_files_.emplace_back(level, f); } } diff --git a/db/version_set.h b/db/version_set.h index 269592b6c..6fe205651 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -137,7 +137,8 @@ class VersionStorageInfo { // This computes ttl_expired_files_ and is called by // ComputeCompactionScore() - void ComputeExpiredTtlFiles(const ImmutableCFOptions& ioptions); + void ComputeExpiredTtlFiles(const ImmutableCFOptions& ioptions, + const uint64_t ttl); // This computes bottommost_files_marked_for_compaction_ and is called by // ComputeCompactionScore() or UpdateOldestSnapshot(). diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index 66807f77e..940a6f6b7 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -596,6 +596,8 @@ struct AdvancedColumnFamilyOptions { // Enabled only for level compaction for now. // // Default: 0 (disabled) + // + // Dynamically changeable through SetOptions() API uint64_t ttl = 0; // Create ColumnFamilyOptions with default values for all fields diff --git a/options/cf_options.cc b/options/cf_options.cc index 01aa6dcc4..d70d53930 100644 --- a/options/cf_options.cc +++ b/options/cf_options.cc @@ -75,7 +75,6 @@ ImmutableCFOptions::ImmutableCFOptions(const ImmutableDBOptions& db_options, max_subcompactions(db_options.max_subcompactions), memtable_insert_with_hint_prefix_extractor( cf_options.memtable_insert_with_hint_prefix_extractor.get()), - ttl(cf_options.ttl), cf_paths(cf_options.cf_paths) {} // Multiple two operands. If they overflow, return op1. @@ -168,6 +167,8 @@ void MutableCFOptions::Dump(Logger* log) const { max_bytes_for_level_base); ROCKS_LOG_INFO(log, " max_bytes_for_level_multiplier: %f", max_bytes_for_level_multiplier); + ROCKS_LOG_INFO(log, " ttl: %" PRIu64, + ttl); std::string result; char buf[10]; for (const auto m : max_bytes_for_level_multiplier_additional) { diff --git a/options/cf_options.h b/options/cf_options.h index df7f82f44..1658bf427 100644 --- a/options/cf_options.h +++ b/options/cf_options.h @@ -119,8 +119,6 @@ struct ImmutableCFOptions { const SliceTransform* memtable_insert_with_hint_prefix_extractor; - uint64_t ttl; - std::vector cf_paths; }; @@ -149,6 +147,7 @@ struct MutableCFOptions { target_file_size_multiplier(options.target_file_size_multiplier), max_bytes_for_level_base(options.max_bytes_for_level_base), max_bytes_for_level_multiplier(options.max_bytes_for_level_multiplier), + ttl(options.ttl), max_bytes_for_level_multiplier_additional( options.max_bytes_for_level_multiplier_additional), compaction_options_fifo(options.compaction_options_fifo), @@ -181,6 +180,7 @@ struct MutableCFOptions { target_file_size_multiplier(0), max_bytes_for_level_base(0), max_bytes_for_level_multiplier(0), + ttl(0), compaction_options_fifo(), max_sequential_skip_in_iterations(0), paranoid_file_checks(false), @@ -228,6 +228,7 @@ struct MutableCFOptions { int target_file_size_multiplier; uint64_t max_bytes_for_level_base; double max_bytes_for_level_multiplier; + uint64_t ttl; std::vector max_bytes_for_level_multiplier_additional; CompactionOptionsFIFO compaction_options_fifo; CompactionOptionsUniversal compaction_options_universal; diff --git a/options/options_helper.cc b/options/options_helper.cc index a8064c830..455b53503 100644 --- a/options/options_helper.cc +++ b/options/options_helper.cc @@ -168,6 +168,7 @@ ColumnFamilyOptions BuildColumnFamilyOptions( mutable_cf_options.max_bytes_for_level_base; cf_opts.max_bytes_for_level_multiplier = mutable_cf_options.max_bytes_for_level_multiplier; + cf_opts.ttl = mutable_cf_options.ttl; cf_opts.max_bytes_for_level_multiplier_additional.clear(); for (auto value : @@ -1867,7 +1868,8 @@ std::unordered_map offsetof(struct MutableCFOptions, compaction_options_universal)}}, {"ttl", {offset_of(&ColumnFamilyOptions::ttl), OptionType::kUInt64T, - OptionVerificationType::kNormal, false, 0}}}; + OptionVerificationType::kNormal, true, + offsetof(struct MutableCFOptions, ttl)}}}; std::unordered_map OptionsHelper::fifo_compaction_options_type_info = {