Make trash-to-DB size ratio limit configurable

Summary:
Allow users to configure the trash-to-DB size ratio limit, so
that ratelimits for deletes can be enforced even when larger portions of
the database are being deleted.
Closes https://github.com/facebook/rocksdb/pull/3158

Differential Revision: D6304897

Pulled By: gdavidsson

fbshipit-source-id: a28dd13059ebab7d4171b953ed91ce383a84d6b3
main
Gustav Davidsson 7 years ago committed by Facebook Github Bot
parent 32e31d49d1
commit 2d04ed65e4
  1. 1
      HISTORY.md
  2. 13
      db/db_sst_test.cc
  3. 14
      include/rocksdb/sst_file_manager.h
  4. 9
      util/delete_scheduler.cc
  5. 22
      util/delete_scheduler.h
  6. 11
      util/delete_scheduler_test.cc
  7. 24
      util/sst_file_manager_impl.cc
  8. 9
      util/sst_file_manager_impl.h

@ -11,6 +11,7 @@
* API call `DB::SetPreserveDeletesSequenceNumber(SequenceNumber seqnum)` was added, users who wish to preserve deletes are expected to periodically call this function to advance the cutoff seqnum (all deletes made before this seqnum can be dropped by DB). It's user responsibility to figure out how to advance the seqnum in the way so the tombstones are kept for the desired period of time, yet are eventually processed in time and don't eat up too much space. * API call `DB::SetPreserveDeletesSequenceNumber(SequenceNumber seqnum)` was added, users who wish to preserve deletes are expected to periodically call this function to advance the cutoff seqnum (all deletes made before this seqnum can be dropped by DB). It's user responsibility to figure out how to advance the seqnum in the way so the tombstones are kept for the desired period of time, yet are eventually processed in time and don't eat up too much space.
* `ReadOptions::iter_start_seqnum` was added; if set to something > 0 user will see 2 changes in iterators behavior 1) only keys written with sequence larger than this parameter would be returned and 2) the `Slice` returned by iter->key() now points to the the memory that keep User-oriented representation of the internal key, rather than user key. New struct `FullKey` was added to represent internal keys, along with a new helper function `ParseFullKey(const Slice& internal_key, FullKey* result);`. * `ReadOptions::iter_start_seqnum` was added; if set to something > 0 user will see 2 changes in iterators behavior 1) only keys written with sequence larger than this parameter would be returned and 2) the `Slice` returned by iter->key() now points to the the memory that keep User-oriented representation of the internal key, rather than user key. New struct `FullKey` was added to represent internal keys, along with a new helper function `ParseFullKey(const Slice& internal_key, FullKey* result);`.
* Deprecate trash_dir param in NewSstFileManager, right now we will rename deleted files to <name>.trash instead of moving them to trash directory * Deprecate trash_dir param in NewSstFileManager, right now we will rename deleted files to <name>.trash instead of moving them to trash directory
* Allow setting a custom trash/DB size ratio limit in the SstFileManager, after which files that are to be scheduled for deletion are deleted immediately, regardless of any delete ratelimit.
* Return an error on write if write_options.sync = true and write_options.disableWAL = true to warn user of inconsistent options. Previously we will not write to WAL and not respecting the sync options in this case. * Return an error on write if write_options.sync = true and write_options.disableWAL = true to warn user of inconsistent options. Previously we will not write to WAL and not respecting the sync options in this case.
### New Features ### New Features

@ -330,11 +330,11 @@ TEST_F(DBSSTTest, RateLimitedDelete) {
int64_t rate_bytes_per_sec = 1024 * 10; // 10 Kbs / Sec int64_t rate_bytes_per_sec = 1024 * 10; // 10 Kbs / Sec
Status s; Status s;
options.sst_file_manager.reset( options.sst_file_manager.reset(
NewSstFileManager(env_, nullptr, "", 0, false, &s)); NewSstFileManager(env_, nullptr, "", 0, false, &s, 0));
ASSERT_OK(s); ASSERT_OK(s);
options.sst_file_manager->SetDeleteRateBytesPerSecond(rate_bytes_per_sec); options.sst_file_manager->SetDeleteRateBytesPerSecond(rate_bytes_per_sec);
auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get()); auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
sfm->delete_scheduler()->TEST_SetMaxTrashDBRatio(1.1); sfm->delete_scheduler()->SetMaxTrashDBRatio(1.1);
ASSERT_OK(TryReopen(options)); ASSERT_OK(TryReopen(options));
// Create 4 files in L0 // Create 4 files in L0
@ -396,10 +396,11 @@ TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) {
int64_t rate_bytes_per_sec = 1024 * 1024; // 1 Mb / Sec int64_t rate_bytes_per_sec = 1024 * 1024; // 1 Mb / Sec
Status s; Status s;
options.sst_file_manager.reset( options.sst_file_manager.reset(
NewSstFileManager(env_, nullptr, "", rate_bytes_per_sec, false, &s)); NewSstFileManager(env_, nullptr, "", rate_bytes_per_sec, false, &s,
/* max_trash_db_ratio= */ 1.1));
ASSERT_OK(s); ASSERT_OK(s);
auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get()); auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
sfm->delete_scheduler()->TEST_SetMaxTrashDBRatio(1.1);
DestroyAndReopen(options); DestroyAndReopen(options);
@ -459,7 +460,7 @@ TEST_F(DBSSTTest, DestroyDBWithRateLimitedDelete) {
options.disable_auto_compactions = true; options.disable_auto_compactions = true;
options.env = env_; options.env = env_;
options.sst_file_manager.reset( options.sst_file_manager.reset(
NewSstFileManager(env_, nullptr, "", 0, false, &s)); NewSstFileManager(env_, nullptr, "", 0, false, &s, 0));
ASSERT_OK(s); ASSERT_OK(s);
DestroyAndReopen(options); DestroyAndReopen(options);
@ -477,7 +478,7 @@ TEST_F(DBSSTTest, DestroyDBWithRateLimitedDelete) {
auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get()); auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
sfm->SetDeleteRateBytesPerSecond(1024 * 1024); sfm->SetDeleteRateBytesPerSecond(1024 * 1024);
sfm->delete_scheduler()->TEST_SetMaxTrashDBRatio(1.1); sfm->delete_scheduler()->SetMaxTrashDBRatio(1.1);
ASSERT_OK(DestroyDB(dbname_, options)); ASSERT_OK(DestroyDB(dbname_, options));
sfm->WaitForEmptyTrash(); sfm->WaitForEmptyTrash();
// We have deleted the 4 sst files in the delete_scheduler // We have deleted the 4 sst files in the delete_scheduler

@ -57,6 +57,14 @@ class SstFileManager {
// zero means disable delete rate limiting and delete files immediately // zero means disable delete rate limiting and delete files immediately
// thread-safe // thread-safe
virtual void SetDeleteRateBytesPerSecond(int64_t delete_rate) = 0; virtual void SetDeleteRateBytesPerSecond(int64_t delete_rate) = 0;
// Return trash/DB size ratio where new files will be deleted immediately
// thread-safe
virtual double GetMaxTrashDBRatio() = 0;
// Update trash/DB size ratio where new files will be deleted immediately
// thread-safe
virtual void SetMaxTrashDBRatio(double ratio) = 0;
}; };
// Create a new SstFileManager that can be shared among multiple RocksDB // Create a new SstFileManager that can be shared among multiple RocksDB
@ -75,9 +83,13 @@ class SstFileManager {
// if user provide trash_dir we will schedule deletes for files in the dir // if user provide trash_dir we will schedule deletes for files in the dir
// @param status: If not nullptr, status will contain any errors that happened // @param status: If not nullptr, status will contain any errors that happened
// during creating the missing trash_dir or deleting existing files in trash. // during creating the missing trash_dir or deleting existing files in trash.
// @param max_trash_db_ratio: If the trash size constitutes for more than this
// fraction of the total DB size we will start deleting new files passed to
// DeleteScheduler immediately
extern SstFileManager* NewSstFileManager( extern SstFileManager* NewSstFileManager(
Env* env, std::shared_ptr<Logger> info_log = nullptr, Env* env, std::shared_ptr<Logger> info_log = nullptr,
std::string trash_dir = "", int64_t rate_bytes_per_sec = 0, std::string trash_dir = "", int64_t rate_bytes_per_sec = 0,
bool delete_existing_trash = true, Status* status = nullptr); bool delete_existing_trash = true, Status* status = nullptr,
double max_trash_db_ratio = 0.25);
} // namespace rocksdb } // namespace rocksdb

@ -21,7 +21,8 @@ namespace rocksdb {
DeleteScheduler::DeleteScheduler(Env* env, int64_t rate_bytes_per_sec, DeleteScheduler::DeleteScheduler(Env* env, int64_t rate_bytes_per_sec,
Logger* info_log, Logger* info_log,
SstFileManagerImpl* sst_file_manager) SstFileManagerImpl* sst_file_manager,
double max_trash_db_ratio)
: env_(env), : env_(env),
total_trash_size_(0), total_trash_size_(0),
rate_bytes_per_sec_(rate_bytes_per_sec), rate_bytes_per_sec_(rate_bytes_per_sec),
@ -29,8 +30,10 @@ DeleteScheduler::DeleteScheduler(Env* env, int64_t rate_bytes_per_sec,
closing_(false), closing_(false),
cv_(&mu_), cv_(&mu_),
info_log_(info_log), info_log_(info_log),
sst_file_manager_(sst_file_manager) { sst_file_manager_(sst_file_manager),
max_trash_db_ratio_(max_trash_db_ratio) {
assert(sst_file_manager != nullptr); assert(sst_file_manager != nullptr);
assert(max_trash_db_ratio >= 0);
bg_thread_.reset( bg_thread_.reset(
new port::Thread(&DeleteScheduler::BackgroundEmptyTrash, this)); new port::Thread(&DeleteScheduler::BackgroundEmptyTrash, this));
} }
@ -50,7 +53,7 @@ Status DeleteScheduler::DeleteFile(const std::string& file_path) {
Status s; Status s;
if (rate_bytes_per_sec_.load() <= 0 || if (rate_bytes_per_sec_.load() <= 0 ||
total_trash_size_.load() > total_trash_size_.load() >
sst_file_manager_->GetTotalSize() * max_trash_db_ratio_) { sst_file_manager_->GetTotalSize() * max_trash_db_ratio_.load()) {
// Rate limiting is disabled or trash size makes up more than // Rate limiting is disabled or trash size makes up more than
// max_trash_db_ratio_ (default 25%) of the total DB size // max_trash_db_ratio_ (default 25%) of the total DB size
TEST_SYNC_POINT("DeleteScheduler::DeleteFile"); TEST_SYNC_POINT("DeleteScheduler::DeleteFile");

@ -33,7 +33,8 @@ class SstFileManagerImpl;
class DeleteScheduler { class DeleteScheduler {
public: public:
DeleteScheduler(Env* env, int64_t rate_bytes_per_sec, Logger* info_log, DeleteScheduler(Env* env, int64_t rate_bytes_per_sec, Logger* info_log,
SstFileManagerImpl* sst_file_manager); SstFileManagerImpl* sst_file_manager,
double max_trash_db_ratio);
~DeleteScheduler(); ~DeleteScheduler();
@ -42,7 +43,7 @@ class DeleteScheduler {
// Set delete rate limit in bytes per second // Set delete rate limit in bytes per second
void SetRateBytesPerSecond(int64_t bytes_per_sec) { void SetRateBytesPerSecond(int64_t bytes_per_sec) {
return rate_bytes_per_sec_.store(bytes_per_sec); rate_bytes_per_sec_.store(bytes_per_sec);
} }
// Mark file as trash directory and schedule it's deletion // Mark file as trash directory and schedule it's deletion
@ -58,9 +59,15 @@ class DeleteScheduler {
uint64_t GetTotalTrashSize() { return total_trash_size_.load(); } uint64_t GetTotalTrashSize() { return total_trash_size_.load(); }
void TEST_SetMaxTrashDBRatio(double r) { // Return trash/DB size ratio where new files will be deleted immediately
double GetMaxTrashDBRatio() {
return max_trash_db_ratio_.load();
}
// Update trash/DB size ratio where new files will be deleted immediately
void SetMaxTrashDBRatio(double r) {
assert(r >= 0); assert(r >= 0);
max_trash_db_ratio_ = r; max_trash_db_ratio_.store(r);
} }
static const std::string kTrashExtension; static const std::string kTrashExtension;
@ -105,9 +112,10 @@ class DeleteScheduler {
InstrumentedMutex file_move_mu_; InstrumentedMutex file_move_mu_;
Logger* info_log_; Logger* info_log_;
SstFileManagerImpl* sst_file_manager_; SstFileManagerImpl* sst_file_manager_;
// If the trash size constitutes for more than 25% of the total DB size // If the trash size constitutes for more than this fraction of the total DB
// we will start deleting new files passed to DeleteScheduler immediately // size we will start deleting new files passed to DeleteScheduler
double max_trash_db_ratio_ = 0.25; // immediately
std::atomic<double> max_trash_db_ratio_;
static const uint64_t kMicrosInSecond = 1000 * 1000LL; static const uint64_t kMicrosInSecond = 1000 * 1000LL;
}; };

@ -83,12 +83,13 @@ class DeleteSchedulerTest : public testing::Test {
} }
void NewDeleteScheduler() { void NewDeleteScheduler() {
// Tests in this file are for DeleteScheduler component and dont create any
// DBs, so we need to set max_trash_db_ratio to 100% (instead of default
// 25%)
sst_file_mgr_.reset( sst_file_mgr_.reset(
new SstFileManagerImpl(env_, nullptr, rate_bytes_per_sec_)); new SstFileManagerImpl(env_, nullptr, rate_bytes_per_sec_,
/* max_trash_db_ratio= */ 1.1));
delete_scheduler_ = sst_file_mgr_->delete_scheduler(); delete_scheduler_ = sst_file_mgr_->delete_scheduler();
// Tests in this file are for DeleteScheduler component and dont create any
// DBs, so we need to use set this value to 100% (instead of default 25%)
delete_scheduler_->TEST_SetMaxTrashDBRatio(1.1);
} }
Env* env_; Env* env_;
@ -517,7 +518,7 @@ TEST_F(DeleteSchedulerTest, ImmediateDeleteOn25PercDBSize) {
rate_bytes_per_sec_ = 1; // 1 byte per sec (very slow trash delete) rate_bytes_per_sec_ = 1; // 1 byte per sec (very slow trash delete)
NewDeleteScheduler(); NewDeleteScheduler();
delete_scheduler_->TEST_SetMaxTrashDBRatio(0.25); delete_scheduler_->SetMaxTrashDBRatio(0.25);
std::vector<std::string> generated_files; std::vector<std::string> generated_files;
for (int i = 0; i < num_files; i++) { for (int i = 0; i < num_files; i++) {

@ -17,12 +17,14 @@ namespace rocksdb {
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
SstFileManagerImpl::SstFileManagerImpl(Env* env, std::shared_ptr<Logger> logger, SstFileManagerImpl::SstFileManagerImpl(Env* env, std::shared_ptr<Logger> logger,
int64_t rate_bytes_per_sec) int64_t rate_bytes_per_sec,
double max_trash_db_ratio)
: env_(env), : env_(env),
logger_(logger), logger_(logger),
total_files_size_(0), total_files_size_(0),
max_allowed_space_(0), max_allowed_space_(0),
delete_scheduler_(env, rate_bytes_per_sec, logger.get(), this) {} delete_scheduler_(env, rate_bytes_per_sec, logger.get(), this,
max_trash_db_ratio) {}
SstFileManagerImpl::~SstFileManagerImpl() {} SstFileManagerImpl::~SstFileManagerImpl() {}
@ -93,6 +95,14 @@ void SstFileManagerImpl::SetDeleteRateBytesPerSecond(int64_t delete_rate) {
return delete_scheduler_.SetRateBytesPerSecond(delete_rate); return delete_scheduler_.SetRateBytesPerSecond(delete_rate);
} }
double SstFileManagerImpl::GetMaxTrashDBRatio() {
return delete_scheduler_.GetMaxTrashDBRatio();
}
void SstFileManagerImpl::SetMaxTrashDBRatio(double r) {
return delete_scheduler_.SetMaxTrashDBRatio(r);
}
Status SstFileManagerImpl::ScheduleFileDeletion(const std::string& file_path) { Status SstFileManagerImpl::ScheduleFileDeletion(const std::string& file_path) {
return delete_scheduler_.DeleteFile(file_path); return delete_scheduler_.DeleteFile(file_path);
} }
@ -128,9 +138,11 @@ void SstFileManagerImpl::OnDeleteFileImpl(const std::string& file_path) {
SstFileManager* NewSstFileManager(Env* env, std::shared_ptr<Logger> info_log, SstFileManager* NewSstFileManager(Env* env, std::shared_ptr<Logger> info_log,
std::string trash_dir, std::string trash_dir,
int64_t rate_bytes_per_sec, int64_t rate_bytes_per_sec,
bool delete_existing_trash, Status* status) { bool delete_existing_trash, Status* status,
double max_trash_db_ratio) {
SstFileManagerImpl* res = SstFileManagerImpl* res =
new SstFileManagerImpl(env, info_log, rate_bytes_per_sec); new SstFileManagerImpl(env, info_log, rate_bytes_per_sec,
max_trash_db_ratio);
// trash_dir is deprecated and not needed anymore, but if user passed it // trash_dir is deprecated and not needed anymore, but if user passed it
// we will still remove files in it. // we will still remove files in it.
@ -166,7 +178,8 @@ SstFileManager* NewSstFileManager(Env* env, std::shared_ptr<Logger> info_log,
SstFileManager* NewSstFileManager(Env* env, std::shared_ptr<Logger> info_log, SstFileManager* NewSstFileManager(Env* env, std::shared_ptr<Logger> info_log,
std::string trash_dir, std::string trash_dir,
int64_t rate_bytes_per_sec, int64_t rate_bytes_per_sec,
bool delete_existing_trash, Status* status) { bool delete_existing_trash, Status* status,
double max_trash_db_ratio) {
if (status) { if (status) {
*status = *status =
Status::NotSupported("SstFileManager is not supported in ROCKSDB_LITE"); Status::NotSupported("SstFileManager is not supported in ROCKSDB_LITE");
@ -177,4 +190,3 @@ SstFileManager* NewSstFileManager(Env* env, std::shared_ptr<Logger> info_log,
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
} // namespace rocksdb } // namespace rocksdb

@ -25,7 +25,8 @@ class Logger;
class SstFileManagerImpl : public SstFileManager { class SstFileManagerImpl : public SstFileManager {
public: public:
explicit SstFileManagerImpl(Env* env, std::shared_ptr<Logger> logger, explicit SstFileManagerImpl(Env* env, std::shared_ptr<Logger> logger,
int64_t rate_bytes_per_sec); int64_t rate_bytes_per_sec,
double max_trash_db_ratio);
~SstFileManagerImpl(); ~SstFileManagerImpl();
@ -67,6 +68,12 @@ class SstFileManagerImpl : public SstFileManager {
// Update the delete rate limit in bytes per second. // Update the delete rate limit in bytes per second.
virtual void SetDeleteRateBytesPerSecond(int64_t delete_rate) override; virtual void SetDeleteRateBytesPerSecond(int64_t delete_rate) override;
// Return trash/DB size ratio where new files will be deleted immediately
virtual double GetMaxTrashDBRatio() override;
// Update trash/DB size ratio where new files will be deleted immediately
virtual void SetMaxTrashDBRatio(double ratio) override;
// Mark file as trash and schedule it's deletion. // Mark file as trash and schedule it's deletion.
virtual Status ScheduleFileDeletion(const std::string& file_path); virtual Status ScheduleFileDeletion(const std::string& file_path);

Loading…
Cancel
Save