Add DB property for SST files kept from deletion (#4618)

Summary:
This property can help debug why SST files aren't being deleted. Previously we only had the property "rocksdb.is-file-deletions-enabled". However, even when that returned true, obsolete SSTs may still not be deleted due to the coarse-grained mechanism we use to prevent newly created SSTs from being accidentally deleted. That coarse-grained mechanism uses a lower bound file number for SSTs that should not be deleted, and this property exposes that lower bound.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4618

Differential Revision: D12898179

Pulled By: ajkr

fbshipit-source-id: fe68acc041ddbcc9276bbd48976524d95aafc776
main
Andrew Kryczka 6 years ago committed by Facebook Github Bot
parent a29053b648
commit fffac43cfb
  1. 1
      HISTORY.md
  2. 8
      db/db_impl.h
  3. 9
      db/db_impl_files.cc
  4. 58
      db/db_properties_test.cc
  5. 14
      db/internal_stats.cc
  6. 2
      db/internal_stats.h
  7. 6
      include/rocksdb/db.h

@ -6,6 +6,7 @@
* With level_compaction_dynamic_level_bytes = true, level multiplier may be adjusted automatically when Level 0 to 1 compaction is lagged behind. * With level_compaction_dynamic_level_bytes = true, level multiplier may be adjusted automatically when Level 0 to 1 compaction is lagged behind.
* Introduced DB option `atomic_flush`. If true, RocksDB supports flushing multiple column families and atomically committing the result to MANIFEST. Useful when WAL is disabled. * Introduced DB option `atomic_flush`. If true, RocksDB supports flushing multiple column families and atomically committing the result to MANIFEST. Useful when WAL is disabled.
* Added `num_deletions` and `num_merge_operands` members to `TableProperties`. * Added `num_deletions` and `num_merge_operands` members to `TableProperties`.
* Added "rocksdb.min-obsolete-sst-number-to-keep" DB property that reports the lower bound on SST file numbers that are being kept from deletion, even if the SSTs are obsolete.
### Bug Fixes ### Bug Fixes
* Fix corner case where a write group leader blocked due to write stall blocks other writers in queue with WriteOptions::no_slowdown set. * Fix corner case where a write group leader blocked due to write stall blocks other writers in queue with WriteOptions::no_slowdown set.

@ -491,6 +491,14 @@ class DBImpl : public DB {
uint64_t MinLogNumberToKeep(); uint64_t MinLogNumberToKeep();
// Returns the lower bound file number for SSTs that won't be deleted, even if
// they're obsolete. This lower bound is used internally to prevent newly
// created flush/compaction output files from being deleted before they're
// installed. This technique avoids the need for tracking the exact numbers of
// files pending creation, although it prevents more files than necessary from
// being deleted.
uint64_t MinObsoleteSstNumberToKeep();
// Returns the list of live files in 'live' and the list // Returns the list of live files in 'live' and the list
// of all files in the filesystem in 'candidate_files'. // of all files in the filesystem in 'candidate_files'.
// If force == false and the last call was less than // If force == false and the last call was less than

@ -20,6 +20,7 @@
#include "util/sst_file_manager_impl.h" #include "util/sst_file_manager_impl.h"
namespace rocksdb { namespace rocksdb {
uint64_t DBImpl::MinLogNumberToKeep() { uint64_t DBImpl::MinLogNumberToKeep() {
if (allow_2pc()) { if (allow_2pc()) {
return versions_->min_log_number_to_keep_2pc(); return versions_->min_log_number_to_keep_2pc();
@ -28,6 +29,14 @@ uint64_t DBImpl::MinLogNumberToKeep() {
} }
} }
uint64_t DBImpl::MinObsoleteSstNumberToKeep() {
mutex_.AssertHeld();
if (!pending_outputs_.empty()) {
return *pending_outputs_.begin();
}
return std::numeric_limits<uint64_t>::max();
}
// * Returns the list of live files in 'sst_live' // * Returns the list of live files in 'sst_live'
// If it's doing full scan: // If it's doing full scan:
// * Returns the list of all files in the filesystem in // * Returns the list of all files in the filesystem in

@ -1529,6 +1529,64 @@ TEST_F(DBPropertiesTest, SstFilesSize) {
ASSERT_TRUE(listener->callback_triggered); ASSERT_TRUE(listener->callback_triggered);
} }
TEST_F(DBPropertiesTest, MinObsoleteSstNumberToKeep) {
class TestListener : public EventListener {
public:
void OnTableFileCreated(const TableFileCreationInfo& info) override {
if (info.reason == TableFileCreationReason::kCompaction) {
// Verify the property indicates that SSTs created by a running
// compaction cannot be deleted.
uint64_t created_file_num;
FileType created_file_type;
std::string filename =
info.file_path.substr(info.file_path.rfind('/') + 1);
ASSERT_TRUE(
ParseFileName(filename, &created_file_num, &created_file_type));
ASSERT_EQ(kTableFile, created_file_type);
uint64_t keep_sst_lower_bound;
ASSERT_TRUE(
db_->GetIntProperty(DB::Properties::kMinObsoleteSstNumberToKeep,
&keep_sst_lower_bound));
ASSERT_LE(keep_sst_lower_bound, created_file_num);
validated_ = true;
}
}
void SetDB(DB* db) { db_ = db; }
int GetNumCompactions() { return num_compactions_; }
// True if we've verified the property for at least one output file
bool Validated() { return validated_; }
private:
int num_compactions_ = 0;
bool validated_ = false;
DB* db_ = nullptr;
};
const int kNumL0Files = 4;
std::shared_ptr<TestListener> listener = std::make_shared<TestListener>();
Options options = CurrentOptions();
options.listeners.push_back(listener);
options.level0_file_num_compaction_trigger = kNumL0Files;
DestroyAndReopen(options);
listener->SetDB(db_);
for (int i = 0; i < kNumL0Files; ++i) {
// Make sure they overlap in keyspace to prevent trivial move
Put("key1", "val");
Put("key2", "val");
Flush();
}
dbfull()->TEST_WaitForCompact();
ASSERT_TRUE(listener->Validated());
}
TEST_F(DBPropertiesTest, BlockCacheProperties) { TEST_F(DBPropertiesTest, BlockCacheProperties) {
Options options; Options options;
uint64_t value; uint64_t value;

@ -231,6 +231,8 @@ static const std::string current_version_number =
"current-super-version-number"; "current-super-version-number";
static const std::string estimate_live_data_size = "estimate-live-data-size"; static const std::string estimate_live_data_size = "estimate-live-data-size";
static const std::string min_log_number_to_keep_str = "min-log-number-to-keep"; static const std::string min_log_number_to_keep_str = "min-log-number-to-keep";
static const std::string min_obsolete_sst_number_to_keep_str =
"min-obsolete-sst-number-to-keep";
static const std::string base_level_str = "base-level"; static const std::string base_level_str = "base-level";
static const std::string total_sst_files_size = "total-sst-files-size"; static const std::string total_sst_files_size = "total-sst-files-size";
static const std::string live_sst_files_size = "live-sst-files-size"; static const std::string live_sst_files_size = "live-sst-files-size";
@ -310,6 +312,8 @@ const std::string DB::Properties::kEstimateLiveDataSize =
rocksdb_prefix + estimate_live_data_size; rocksdb_prefix + estimate_live_data_size;
const std::string DB::Properties::kMinLogNumberToKeep = const std::string DB::Properties::kMinLogNumberToKeep =
rocksdb_prefix + min_log_number_to_keep_str; rocksdb_prefix + min_log_number_to_keep_str;
const std::string DB::Properties::kMinObsoleteSstNumberToKeep =
rocksdb_prefix + min_obsolete_sst_number_to_keep_str;
const std::string DB::Properties::kTotalSstFilesSize = const std::string DB::Properties::kTotalSstFilesSize =
rocksdb_prefix + total_sst_files_size; rocksdb_prefix + total_sst_files_size;
const std::string DB::Properties::kLiveSstFilesSize = const std::string DB::Properties::kLiveSstFilesSize =
@ -430,6 +434,9 @@ const std::unordered_map<std::string, DBPropertyInfo>
{DB::Properties::kMinLogNumberToKeep, {DB::Properties::kMinLogNumberToKeep,
{false, nullptr, &InternalStats::HandleMinLogNumberToKeep, nullptr, {false, nullptr, &InternalStats::HandleMinLogNumberToKeep, nullptr,
nullptr}}, nullptr}},
{DB::Properties::kMinObsoleteSstNumberToKeep,
{false, nullptr, &InternalStats::HandleMinObsoleteSstNumberToKeep,
nullptr, nullptr}},
{DB::Properties::kBaseLevel, {DB::Properties::kBaseLevel,
{false, nullptr, &InternalStats::HandleBaseLevel, nullptr, nullptr}}, {false, nullptr, &InternalStats::HandleBaseLevel, nullptr, nullptr}},
{DB::Properties::kTotalSstFilesSize, {DB::Properties::kTotalSstFilesSize,
@ -826,6 +833,13 @@ bool InternalStats::HandleMinLogNumberToKeep(uint64_t* value, DBImpl* db,
return true; return true;
} }
bool InternalStats::HandleMinObsoleteSstNumberToKeep(uint64_t* value,
DBImpl* db,
Version* /*version*/) {
*value = db->MinObsoleteSstNumberToKeep();
return true;
}
bool InternalStats::HandleActualDelayedWriteRate(uint64_t* value, DBImpl* db, bool InternalStats::HandleActualDelayedWriteRate(uint64_t* value, DBImpl* db,
Version* /*version*/) { Version* /*version*/) {
const WriteController& wc = db->write_controller(); const WriteController& wc = db->write_controller();

@ -533,6 +533,8 @@ class InternalStats {
bool HandleEstimateLiveDataSize(uint64_t* value, DBImpl* db, bool HandleEstimateLiveDataSize(uint64_t* value, DBImpl* db,
Version* version); Version* version);
bool HandleMinLogNumberToKeep(uint64_t* value, DBImpl* db, Version* version); bool HandleMinLogNumberToKeep(uint64_t* value, DBImpl* db, Version* version);
bool HandleMinObsoleteSstNumberToKeep(uint64_t* value, DBImpl* db,
Version* version);
bool HandleActualDelayedWriteRate(uint64_t* value, DBImpl* db, bool HandleActualDelayedWriteRate(uint64_t* value, DBImpl* db,
Version* version); Version* version);
bool HandleIsWriteStopped(uint64_t* value, DBImpl* db, Version* version); bool HandleIsWriteStopped(uint64_t* value, DBImpl* db, Version* version);

@ -572,6 +572,11 @@ class DB {
// log files that should be kept. // log files that should be kept.
static const std::string kMinLogNumberToKeep; static const std::string kMinLogNumberToKeep;
// "rocksdb.min-obsolete-sst-number-to-keep" - return the minimum file
// number for an obsolete SST to be kept. The max value of `uint64_t`
// will be returned if all obsolete files can be deleted.
static const std::string kMinObsoleteSstNumberToKeep;
// "rocksdb.total-sst-files-size" - returns total size (bytes) of all SST // "rocksdb.total-sst-files-size" - returns total size (bytes) of all SST
// files. // files.
// WARNING: may slow down online queries if there are too many files. // WARNING: may slow down online queries if there are too many files.
@ -670,6 +675,7 @@ class DB {
// "rocksdb.current-super-version-number" // "rocksdb.current-super-version-number"
// "rocksdb.estimate-live-data-size" // "rocksdb.estimate-live-data-size"
// "rocksdb.min-log-number-to-keep" // "rocksdb.min-log-number-to-keep"
// "rocksdb.min-obsolete-sst-number-to-keep"
// "rocksdb.total-sst-files-size" // "rocksdb.total-sst-files-size"
// "rocksdb.live-sst-files-size" // "rocksdb.live-sst-files-size"
// "rocksdb.base-level" // "rocksdb.base-level"

Loading…
Cancel
Save