diff --git a/HISTORY.md b/HISTORY.md index c308cbb7d..5b24ca866 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,6 +6,7 @@ * With level_compaction_dynamic_level_bytes = true, level multiplier may be adjusted automatically when Level 0 to 1 compaction is lagged behind. * Introduced DB option `atomic_flush`. If true, RocksDB supports flushing multiple column families and atomically committing the result to MANIFEST. Useful when WAL is disabled. * Added `num_deletions` and `num_merge_operands` members to `TableProperties`. +* Added "rocksdb.min-obsolete-sst-number-to-keep" DB property that reports the lower bound on SST file numbers that are being kept from deletion, even if the SSTs are obsolete. ### Bug Fixes * Fix corner case where a write group leader blocked due to write stall blocks other writers in queue with WriteOptions::no_slowdown set. diff --git a/db/db_impl.h b/db/db_impl.h index 255d02101..4fb4e002e 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -491,6 +491,14 @@ class DBImpl : public DB { uint64_t MinLogNumberToKeep(); + // Returns the lower bound file number for SSTs that won't be deleted, even if + // they're obsolete. This lower bound is used internally to prevent newly + // created flush/compaction output files from being deleted before they're + // installed. This technique avoids the need for tracking the exact numbers of + // files pending creation, although it prevents more files than necessary from + // being deleted. + uint64_t MinObsoleteSstNumberToKeep(); + // Returns the list of live files in 'live' and the list // of all files in the filesystem in 'candidate_files'. // If force == false and the last call was less than diff --git a/db/db_impl_files.cc b/db/db_impl_files.cc index 99fb1df40..fe09e062c 100644 --- a/db/db_impl_files.cc +++ b/db/db_impl_files.cc @@ -20,6 +20,7 @@ #include "util/sst_file_manager_impl.h" namespace rocksdb { + uint64_t DBImpl::MinLogNumberToKeep() { if (allow_2pc()) { return versions_->min_log_number_to_keep_2pc(); @@ -28,6 +29,14 @@ uint64_t DBImpl::MinLogNumberToKeep() { } } +uint64_t DBImpl::MinObsoleteSstNumberToKeep() { + mutex_.AssertHeld(); + if (!pending_outputs_.empty()) { + return *pending_outputs_.begin(); + } + return std::numeric_limits::max(); +} + // * Returns the list of live files in 'sst_live' // If it's doing full scan: // * Returns the list of all files in the filesystem in diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc index e496ad6c2..115cf682f 100644 --- a/db/db_properties_test.cc +++ b/db/db_properties_test.cc @@ -1529,6 +1529,64 @@ TEST_F(DBPropertiesTest, SstFilesSize) { ASSERT_TRUE(listener->callback_triggered); } +TEST_F(DBPropertiesTest, MinObsoleteSstNumberToKeep) { + class TestListener : public EventListener { + public: + void OnTableFileCreated(const TableFileCreationInfo& info) override { + if (info.reason == TableFileCreationReason::kCompaction) { + // Verify the property indicates that SSTs created by a running + // compaction cannot be deleted. + uint64_t created_file_num; + FileType created_file_type; + std::string filename = + info.file_path.substr(info.file_path.rfind('/') + 1); + ASSERT_TRUE( + ParseFileName(filename, &created_file_num, &created_file_type)); + ASSERT_EQ(kTableFile, created_file_type); + + uint64_t keep_sst_lower_bound; + ASSERT_TRUE( + db_->GetIntProperty(DB::Properties::kMinObsoleteSstNumberToKeep, + &keep_sst_lower_bound)); + + ASSERT_LE(keep_sst_lower_bound, created_file_num); + validated_ = true; + } + } + + void SetDB(DB* db) { db_ = db; } + + int GetNumCompactions() { return num_compactions_; } + + // True if we've verified the property for at least one output file + bool Validated() { return validated_; } + + private: + int num_compactions_ = 0; + bool validated_ = false; + DB* db_ = nullptr; + }; + + const int kNumL0Files = 4; + + std::shared_ptr listener = std::make_shared(); + + Options options = CurrentOptions(); + options.listeners.push_back(listener); + options.level0_file_num_compaction_trigger = kNumL0Files; + DestroyAndReopen(options); + listener->SetDB(db_); + + for (int i = 0; i < kNumL0Files; ++i) { + // Make sure they overlap in keyspace to prevent trivial move + Put("key1", "val"); + Put("key2", "val"); + Flush(); + } + dbfull()->TEST_WaitForCompact(); + ASSERT_TRUE(listener->Validated()); +} + TEST_F(DBPropertiesTest, BlockCacheProperties) { Options options; uint64_t value; diff --git a/db/internal_stats.cc b/db/internal_stats.cc index 906d00795..b330a40f1 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -231,6 +231,8 @@ static const std::string current_version_number = "current-super-version-number"; static const std::string estimate_live_data_size = "estimate-live-data-size"; static const std::string min_log_number_to_keep_str = "min-log-number-to-keep"; +static const std::string min_obsolete_sst_number_to_keep_str = + "min-obsolete-sst-number-to-keep"; static const std::string base_level_str = "base-level"; static const std::string total_sst_files_size = "total-sst-files-size"; static const std::string live_sst_files_size = "live-sst-files-size"; @@ -310,6 +312,8 @@ const std::string DB::Properties::kEstimateLiveDataSize = rocksdb_prefix + estimate_live_data_size; const std::string DB::Properties::kMinLogNumberToKeep = rocksdb_prefix + min_log_number_to_keep_str; +const std::string DB::Properties::kMinObsoleteSstNumberToKeep = + rocksdb_prefix + min_obsolete_sst_number_to_keep_str; const std::string DB::Properties::kTotalSstFilesSize = rocksdb_prefix + total_sst_files_size; const std::string DB::Properties::kLiveSstFilesSize = @@ -430,6 +434,9 @@ const std::unordered_map {DB::Properties::kMinLogNumberToKeep, {false, nullptr, &InternalStats::HandleMinLogNumberToKeep, nullptr, nullptr}}, + {DB::Properties::kMinObsoleteSstNumberToKeep, + {false, nullptr, &InternalStats::HandleMinObsoleteSstNumberToKeep, + nullptr, nullptr}}, {DB::Properties::kBaseLevel, {false, nullptr, &InternalStats::HandleBaseLevel, nullptr, nullptr}}, {DB::Properties::kTotalSstFilesSize, @@ -826,6 +833,13 @@ bool InternalStats::HandleMinLogNumberToKeep(uint64_t* value, DBImpl* db, return true; } +bool InternalStats::HandleMinObsoleteSstNumberToKeep(uint64_t* value, + DBImpl* db, + Version* /*version*/) { + *value = db->MinObsoleteSstNumberToKeep(); + return true; +} + bool InternalStats::HandleActualDelayedWriteRate(uint64_t* value, DBImpl* db, Version* /*version*/) { const WriteController& wc = db->write_controller(); diff --git a/db/internal_stats.h b/db/internal_stats.h index 620ae4734..6f0127513 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -533,6 +533,8 @@ class InternalStats { bool HandleEstimateLiveDataSize(uint64_t* value, DBImpl* db, Version* version); bool HandleMinLogNumberToKeep(uint64_t* value, DBImpl* db, Version* version); + bool HandleMinObsoleteSstNumberToKeep(uint64_t* value, DBImpl* db, + Version* version); bool HandleActualDelayedWriteRate(uint64_t* value, DBImpl* db, Version* version); bool HandleIsWriteStopped(uint64_t* value, DBImpl* db, Version* version); diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index f7517b91f..07e6d0f6d 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -572,6 +572,11 @@ class DB { // log files that should be kept. static const std::string kMinLogNumberToKeep; + // "rocksdb.min-obsolete-sst-number-to-keep" - return the minimum file + // number for an obsolete SST to be kept. The max value of `uint64_t` + // will be returned if all obsolete files can be deleted. + static const std::string kMinObsoleteSstNumberToKeep; + // "rocksdb.total-sst-files-size" - returns total size (bytes) of all SST // files. // WARNING: may slow down online queries if there are too many files. @@ -670,6 +675,7 @@ class DB { // "rocksdb.current-super-version-number" // "rocksdb.estimate-live-data-size" // "rocksdb.min-log-number-to-keep" + // "rocksdb.min-obsolete-sst-number-to-keep" // "rocksdb.total-sst-files-size" // "rocksdb.live-sst-files-size" // "rocksdb.base-level"