From 77eab5c85aa7a05d2200339738373f8b0b921e81 Mon Sep 17 00:00:00 2001 From: sdong Date: Mon, 25 Nov 2019 17:11:26 -0800 Subject: [PATCH] Make default value of options.ttl to be 30 days when it is supported. (#6073) Summary: By default options.ttl is disabled. We believe a better default will be 30 days, which means deleted data the database will be removed from SST files slightly after 30 days, for most of the cases. Make the default UINT64_MAX - 1 to indicate that it is not overridden by users. Change periodic_compaction_seconds to be UINT64_MAX - 1 to UINT64_MAX too to be consistent. Also fix a small bug in the previous periodic_compaction_seconds default code. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6073 Test Plan: Add unit tests for it. Differential Revision: D18669626 fbshipit-source-id: 957cd4374cafc1557d45a0ba002010552a378cc8 --- HISTORY.md | 5 ++- db/column_family.cc | 42 +++++++++++++++++++------ db/compaction/compaction_picker_test.cc | 2 ++ db/db_compaction_test.cc | 2 +- db/db_options_test.cc | 15 +++++++++ db/db_test.cc | 30 ++++++++++++++++++ db/db_universal_compaction_test.cc | 3 +- db/version_set.cc | 9 +++--- include/rocksdb/advanced_options.h | 14 ++++++--- 9 files changed, 100 insertions(+), 22 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index a49ec50ad..b39230a3f 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,12 +3,15 @@ ### Public API Change * RocksDB release 4.1 or older will not be able to open DB generated by the new release. 4.2 was released on Feb 23, 2016. * TTL Compactions in Level compaction style now initiate successive cascading compactions on a key range so that it reaches the bottom level quickly on TTL expiry. `creation_time` table property for compaction output files is now set to the minimum of the creation times of all compaction inputs. -* Changed the default value of periodic_compaction_seconds to `UINT64_MAX` which allows RocksDB to auto-tune periodic compaction scheduling. When using the default value, periodic compactions are now auto-enabled if a compaction filter is used. A value of `0` will turn off the feature completely. * With FIFO compaction style, options.periodic_compaction_seconds will have the same meaning as options.ttl. Whichever stricter will be used. With the default options.periodic_compaction_seconds value with options.ttl's default of 0, RocksDB will give a default of 30 days. * Added an API GetCreationTimeOfOldestFile(uint64_t* creation_time) to get the file_creation_time of the oldest SST file in the DB. * An unlikely usage of FilterPolicy is no longer supported. Calling GetFilterBitsBuilder() on the FilterPolicy returned by NewBloomFilterPolicy will now cause an assertion violation in debug builds, because RocksDB has internally migrated to a more elaborate interface that is expected to evolve further. Custom implementations of FilterPolicy should work as before, except those wrapping the return of NewBloomFilterPolicy, which will require a new override of a protected function in FilterPolicy. * The option BackupableDBOptions::max_valid_backups_to_open is now only used when opening BackupEngineReadOnly. When opening a read/write BackupEngine, anything but the default value logs a warning and is treated as the default. This change ensures that backup deletion has proper accounting of shared files to ensure they are deleted when no longer referenced by a backup. +### Default Option Changes +* Changed the default value of periodic_compaction_seconds to `UINT64_MAX - 1` which allows RocksDB to auto-tune periodic compaction scheduling. When using the default value, periodic compactions are now auto-enabled if a compaction filter is used. A value of `0` will turn off the feature completely. +* Changed the default value of ttl to `UINT64_MAX - 1` which allows RocksDB to auto-tune ttl value. When using the default value, TTL will be auto-enabled to 30 days, when the feature is supported. To revert the old behavior, you can explictly set it to 0. + ### New Features * Universal compaction to support options.periodic_compaction_seconds. A full compaction will be triggered if any file is over the threshold. * `GetLiveFilesMetaData` and `GetColumnFamilyMetaData` now expose the file number of SST files as well as the oldest blob file referenced by each SST. diff --git a/db/column_family.cc b/db/column_family.cc index 6fb5391e9..f66759818 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -188,6 +188,11 @@ Status CheckCFPathsSupported(const DBOptions& db_options, return Status::OK(); } +namespace { +const uint64_t kDefaultTtl = 0xfffffffffffffffe; +const uint64_t kDefaultPeriodicCompSecs = 0xfffffffffffffffe; +}; // namespace + ColumnFamilyOptions SanitizeOptions(const ImmutableDBOptions& db_options, const ColumnFamilyOptions& src) { ColumnFamilyOptions result = src; @@ -343,8 +348,20 @@ ColumnFamilyOptions SanitizeOptions(const ImmutableDBOptions& db_options, result.max_compaction_bytes = result.target_file_size_base * 25; } - const uint64_t kDefaultPeriodicCompSecs = 0xffffffffffffffff; - const uint64_t kDefaultTtlSecs = 30 * 24 * 60 * 60; + bool is_block_based_table = + (result.table_factory->Name() == BlockBasedTableFactory().Name()); + + const uint64_t kAdjustedTtl = 30 * 24 * 60 * 60; + if (result.ttl == kDefaultTtl) { + if (is_block_based_table && + result.compaction_style != kCompactionStyleFIFO) { + result.ttl = kAdjustedTtl; + } else { + result.ttl = 0; + } + } + + const uint64_t kAdjustedPeriodicCompSecs = 30 * 24 * 60 * 60; // Turn on periodic compactions and set them to occur once every 30 days if // compaction filters are used and periodic_compaction_seconds is set to the @@ -352,16 +369,19 @@ ColumnFamilyOptions SanitizeOptions(const ImmutableDBOptions& db_options, if (result.compaction_style != kCompactionStyleFIFO) { if ((result.compaction_filter != nullptr || result.compaction_filter_factory != nullptr) && - result.periodic_compaction_seconds == kDefaultPeriodicCompSecs) { - result.periodic_compaction_seconds = kDefaultTtlSecs; + result.periodic_compaction_seconds == kDefaultPeriodicCompSecs && + is_block_based_table) { + result.periodic_compaction_seconds = kAdjustedPeriodicCompSecs; } } else { // result.compaction_style == kCompactionStyleFIFO if (result.ttl == 0) { - if (result.periodic_compaction_seconds == kDefaultPeriodicCompSecs) { - result.periodic_compaction_seconds = kDefaultTtlSecs; + if (is_block_based_table) { + if (result.periodic_compaction_seconds == kDefaultPeriodicCompSecs) { + result.periodic_compaction_seconds = kAdjustedPeriodicCompSecs; + } + result.ttl = result.periodic_compaction_seconds; } - result.ttl = result.periodic_compaction_seconds; } else if (result.periodic_compaction_seconds != 0) { result.ttl = std::min(result.ttl, result.periodic_compaction_seconds); } @@ -379,6 +399,10 @@ ColumnFamilyOptions SanitizeOptions(const ImmutableDBOptions& db_options, } } + if (result.periodic_compaction_seconds == kDefaultPeriodicCompSecs) { + result.periodic_compaction_seconds = 0; + } + return result; } @@ -1209,7 +1233,7 @@ Status ColumnFamilyData::ValidateOptions( return s; } - if (cf_options.ttl > 0) { + if (cf_options.ttl > 0 && cf_options.ttl != kDefaultTtl) { if (cf_options.table_factory->Name() != BlockBasedTableFactory().Name()) { return Status::NotSupported( "TTL is only supported in Block-Based Table format. "); @@ -1217,7 +1241,7 @@ Status ColumnFamilyData::ValidateOptions( } if (cf_options.periodic_compaction_seconds > 0 && - cf_options.periodic_compaction_seconds < port::kMaxUint64) { + cf_options.periodic_compaction_seconds != kDefaultPeriodicCompSecs) { if (cf_options.table_factory->Name() != BlockBasedTableFactory().Name()) { return Status::NotSupported( "Periodic Compaction is only supported in " diff --git a/db/compaction/compaction_picker_test.cc b/db/compaction/compaction_picker_test.cc index df86bd985..d593d6465 100644 --- a/db/compaction/compaction_picker_test.cc +++ b/db/compaction/compaction_picker_test.cc @@ -57,6 +57,8 @@ class CompactionPickerTest : public testing::Test { log_buffer_(InfoLogLevel::INFO_LEVEL, &logger_), file_num_(1), vstorage_(nullptr) { + mutable_cf_options_.ttl = 0; + mutable_cf_options_.periodic_compaction_seconds = 0; // ioptions_.compaction_pri = kMinOverlappingRatio has its own set of // tests to cover. ioptions_.compaction_pri = kByCompensatedSize; diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index eaae16f54..3c2452e4c 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -3911,7 +3911,7 @@ TEST_F(DBCompactionTest, LevelPeriodicCompactionWithCompactionFilters) { for (CompactionFilterType comp_filter_type : {kUseCompactionFilter, kUseCompactionFilterFactory}) { // Assert that periodic compactions are not enabled. - ASSERT_EQ(port::kMaxUint64, options.periodic_compaction_seconds); + ASSERT_EQ(port::kMaxUint64 - 1, options.periodic_compaction_seconds); if (comp_filter_type == kUseCompactionFilter) { options.compaction_filter = &test_compaction_filter; diff --git a/db/db_options_test.cc b/db/db_options_test.cc index 103c58c3b..cb031e62e 100644 --- a/db/db_options_test.cc +++ b/db/db_options_test.cc @@ -635,6 +635,21 @@ TEST_F(DBOptionsTest, SanitizeUniversalTTLCompaction) { ASSERT_EQ(100, dbfull()->GetOptions().periodic_compaction_seconds); } +TEST_F(DBOptionsTest, SanitizeTtlDefault) { + Options options; + Reopen(options); + ASSERT_EQ(30 * 24 * 60 * 60, dbfull()->GetOptions().ttl); + + options.compaction_style = kCompactionStyleLevel; + options.ttl = 0; + Reopen(options); + ASSERT_EQ(0, dbfull()->GetOptions().ttl); + + options.ttl = 100; + Reopen(options); + ASSERT_EQ(100, dbfull()->GetOptions().ttl); +} + TEST_F(DBOptionsTest, SanitizeFIFOPeriodicCompaction) { Options options; options.compaction_style = kCompactionStyleFIFO; diff --git a/db/db_test.cc b/db/db_test.cc index e365fd3ce..6ea5e9e00 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -3311,6 +3311,22 @@ TEST_F(DBTest, FIFOCompactionStyleWithCompactionAndDelete) { } } +// Check that FIFO-with-TTL is not supported with max_open_files != -1. +TEST_F(DBTest, FIFOCompactionWithTTLAndMaxOpenFilesTest) { + Options options; + options.compaction_style = kCompactionStyleFIFO; + options.create_if_missing = true; + options.ttl = 600; // seconds + + // TTL is now supported with max_open_files != -1. + options.max_open_files = 100; + options = CurrentOptions(options); + ASSERT_OK(TryReopen(options)); + + options.max_open_files = -1; + ASSERT_OK(TryReopen(options)); +} + // Check that FIFO-with-TTL is supported only with BlockBasedTableFactory. TEST_F(DBTest, FIFOCompactionWithTTLAndVariousTableFormatsTest) { Options options; @@ -4812,6 +4828,7 @@ TEST_F(DBTest, DynamicCompactionOptions) { // Even more FIFOCompactionTests are at DBTest.FIFOCompaction* . TEST_F(DBTest, DynamicFIFOCompactionOptions) { Options options; + options.ttl = 0; options.create_if_missing = true; DestroyAndReopen(options); @@ -6165,6 +6182,19 @@ TEST_F(DBTest, FailWhenCompressionNotSupportedTest) { } } +TEST_F(DBTest, CreateColumnFamilyShouldFailOnIncompatibleOptions) { + Options options = CurrentOptions(); + options.max_open_files = 100; + Reopen(options); + + ColumnFamilyOptions cf_options(options); + // ttl is now supported when max_open_files is -1. + cf_options.ttl = 3600; + ColumnFamilyHandle* handle; + ASSERT_OK(db_->CreateColumnFamily(cf_options, "pikachu", &handle)); + delete handle; +} + #ifndef ROCKSDB_LITE TEST_F(DBTest, RowCache) { Options options = CurrentOptions(); diff --git a/db/db_universal_compaction_test.cc b/db/db_universal_compaction_test.cc index 2fe03650b..522f4a2d8 100644 --- a/db/db_universal_compaction_test.cc +++ b/db/db_universal_compaction_test.cc @@ -2166,9 +2166,10 @@ TEST_F(DBTestUniversalCompaction2, PeriodicCompactionDefault) { ASSERT_EQ(30 * 24 * 60 * 60, dbfull()->GetOptions().periodic_compaction_seconds); + options.ttl = 60 * 24 * 60 * 60; options.compaction_filter = nullptr; Reopen(options); - ASSERT_EQ(options.periodic_compaction_seconds, + ASSERT_EQ(60 * 24 * 60 * 60, dbfull()->GetOptions().periodic_compaction_seconds); } diff --git a/db/version_set.cc b/db/version_set.cc index 938f609ff..2393503d5 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2442,8 +2442,7 @@ void VersionStorageInfo::ComputeCompactionScore( if (mutable_cf_options.ttl > 0) { ComputeExpiredTtlFiles(immutable_cf_options, mutable_cf_options.ttl); } - if (mutable_cf_options.periodic_compaction_seconds > 0 && - mutable_cf_options.periodic_compaction_seconds < port::kMaxUint64) { + if (mutable_cf_options.periodic_compaction_seconds > 0) { ComputeFilesMarkedForPeriodicCompaction( immutable_cf_options, mutable_cf_options.periodic_compaction_seconds); } @@ -2514,12 +2513,12 @@ void VersionStorageInfo::ComputeFilesMarkedForPeriodicCompaction( } const uint64_t current_time = static_cast(temp_current_time); - assert(periodic_compaction_seconds <= current_time); - // Disable periodic compaction if periodic_compaction_seconds > current_time. - // This also help handle the underflow case. + // If periodic_compaction_seconds > current_time, no file possibly qualifies + // periodic compaction. if (periodic_compaction_seconds > current_time) { return; } + const uint64_t allowed_time_limit = current_time - periodic_compaction_seconds; diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index 90663ff0e..d4e986a11 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -654,11 +654,15 @@ struct AdvancedColumnFamilyOptions { // unit: seconds. Ex: 1 day = 1 * 24 * 60 * 60 // In FIFO, this option will have the same meaning as // periodic_compaction_seconds. Whichever stricter will be used. + // 0 means disabling. + // UINT64_MAX - 1 (0xfffffffffffffffe) is special flag to allow RocksDB to + // pick default. // - // Default: 0 (disabled) + // Default: 30 days for leveled compaction + block based table. disable + // otherwise. // // Dynamically changeable through SetOptions() API - uint64_t ttl = 0; + uint64_t ttl = 0xfffffffffffffffe; // Files older than this value will be picked up for compaction, and // re-written to the same level as they were before. @@ -676,7 +680,7 @@ struct AdvancedColumnFamilyOptions { // // Values: // 0: Turn off Periodic compactions. - // UINT64_MAX (i.e 0xffffffffffffffff): Let RocksDB control this feature + // UINT64_MAX - 1 (i.e 0xfffffffffffffffe): Let RocksDB control this feature // as needed. For now, RocksDB will change this value to 30 days // (i.e 30 * 24 * 60 * 60) so that every file goes through the compaction // process at least once every 30 days if not compacted sooner. @@ -684,10 +688,10 @@ struct AdvancedColumnFamilyOptions { // when this value is left default, and ttl is left to 0, 30 days will be // used. Otherwise, min(ttl, periodic_compaction_seconds) will be used. // - // Default: UINT64_MAX (allow RocksDB to auto-tune) + // Default: UINT64_MAX - 1 (allow RocksDB to auto-tune) // // Dynamically changeable through SetOptions() API - uint64_t periodic_compaction_seconds = 0xffffffffffffffff; + uint64_t periodic_compaction_seconds = 0xfffffffffffffffe; // If this option is set then 1 in N blocks are compressed // using a fast (lz4) and slow (zstd) compression algorithm.