SstFileManager: add bytes_max_delete_chunk

Summary:
Add `bytes_max_delete_chunk` in SstFileManager so that we can drop a large file in multiple batches.
Closes https://github.com/facebook/rocksdb/pull/3640

Differential Revision: D7358679

Pulled By: siying

fbshipit-source-id: ef17f0da2f5723dbece2669485a9b91b3edc0bb7
main
Siying Dong 7 years ago committed by Facebook Github Bot
parent 88c3e26cc0
commit 118058ba69
  1. 1
      HISTORY.md
  2. 6
      include/rocksdb/sst_file_manager.h
  3. 51
      util/delete_scheduler.cc
  4. 5
      util/delete_scheduler.h
  5. 30
      util/delete_scheduler_test.cc
  6. 13
      util/sst_file_manager_impl.cc
  7. 3
      util/sst_file_manager_impl.h

@ -17,6 +17,7 @@
* Avoid unnecessarily flushing in `CompactRange()` when the range specified by the user does not overlap unflushed memtables. * Avoid unnecessarily flushing in `CompactRange()` when the range specified by the user does not overlap unflushed memtables.
* If `ColumnFamilyOptions::max_subcompactions` is set greater than one, we now parallelize large manual level-based compactions. * If `ColumnFamilyOptions::max_subcompactions` is set greater than one, we now parallelize large manual level-based compactions.
* Add "rocksdb.live-sst-files-size" DB property to return total bytes of all SST files belong to the latest LSM tree. * Add "rocksdb.live-sst-files-size" DB property to return total bytes of all SST files belong to the latest LSM tree.
* NewSstFileManager to add an argument bytes_max_delete_chunk with default 64MB. With this argument, a file larger than 64MB will be ftruncated multiple times based on this size.
### Bug Fixes ### Bug Fixes
* Fix a leak in prepared_section_completed_ where the zeroed entries would not removed from the map. * Fix a leak in prepared_section_completed_ where the zeroed entries would not removed from the map.

@ -96,10 +96,14 @@ class SstFileManager {
// @param max_trash_db_ratio: If the trash size constitutes for more than this // @param max_trash_db_ratio: If the trash size constitutes for more than this
// fraction of the total DB size we will start deleting new files passed to // fraction of the total DB size we will start deleting new files passed to
// DeleteScheduler immediately // DeleteScheduler immediately
// @param bytes_max_delete_chunk: if a single file is larger than delete chunk,
// ftruncate the file by this size each time, rather than dropping the whole
// file. 0 means to always delete the whole file.
extern SstFileManager* NewSstFileManager( extern SstFileManager* NewSstFileManager(
Env* env, std::shared_ptr<Logger> info_log = nullptr, Env* env, std::shared_ptr<Logger> info_log = nullptr,
std::string trash_dir = "", int64_t rate_bytes_per_sec = 0, std::string trash_dir = "", int64_t rate_bytes_per_sec = 0,
bool delete_existing_trash = true, Status* status = nullptr, bool delete_existing_trash = true, Status* status = nullptr,
double max_trash_db_ratio = 0.25); double max_trash_db_ratio = 0.25,
uint64_t bytes_max_delete_chunk = 64 * 1024 * 1024);
} // namespace rocksdb } // namespace rocksdb

@ -22,11 +22,13 @@ namespace rocksdb {
DeleteScheduler::DeleteScheduler(Env* env, int64_t rate_bytes_per_sec, DeleteScheduler::DeleteScheduler(Env* env, int64_t rate_bytes_per_sec,
Logger* info_log, Logger* info_log,
SstFileManagerImpl* sst_file_manager, SstFileManagerImpl* sst_file_manager,
double max_trash_db_ratio) double max_trash_db_ratio,
uint64_t bytes_max_delete_chunk)
: env_(env), : env_(env),
total_trash_size_(0), total_trash_size_(0),
rate_bytes_per_sec_(rate_bytes_per_sec), rate_bytes_per_sec_(rate_bytes_per_sec),
pending_files_(0), pending_files_(0),
bytes_max_delete_chunk_(bytes_max_delete_chunk),
closing_(false), closing_(false),
cv_(&mu_), cv_(&mu_),
info_log_(info_log), info_log_(info_log),
@ -208,15 +210,18 @@ void DeleteScheduler::BackgroundEmptyTrash() {
// Get new file to delete // Get new file to delete
std::string path_in_trash = queue_.front(); std::string path_in_trash = queue_.front();
queue_.pop();
// We dont need to hold the lock while deleting the file // We dont need to hold the lock while deleting the file
mu_.Unlock(); mu_.Unlock();
uint64_t deleted_bytes = 0; uint64_t deleted_bytes = 0;
bool is_complete = true;
// Delete file from trash and update total_penlty value // Delete file from trash and update total_penlty value
Status s = DeleteTrashFile(path_in_trash, &deleted_bytes); Status s = DeleteTrashFile(path_in_trash, &deleted_bytes, &is_complete);
total_deleted_bytes += deleted_bytes; total_deleted_bytes += deleted_bytes;
mu_.Lock(); mu_.Lock();
if (is_complete) {
queue_.pop();
}
if (!s.ok()) { if (!s.ok()) {
bg_errors_[path_in_trash] = s; bg_errors_[path_in_trash] = s;
@ -236,7 +241,9 @@ void DeleteScheduler::BackgroundEmptyTrash() {
TEST_SYNC_POINT_CALLBACK("DeleteScheduler::BackgroundEmptyTrash:Wait", TEST_SYNC_POINT_CALLBACK("DeleteScheduler::BackgroundEmptyTrash:Wait",
&total_penlty); &total_penlty);
if (is_complete) {
pending_files_--; pending_files_--;
}
if (pending_files_ == 0) { if (pending_files_ == 0) {
// Unblock WaitForEmptyTrash since there are no more files waiting // Unblock WaitForEmptyTrash since there are no more files waiting
// to be deleted // to be deleted
@ -247,23 +254,49 @@ void DeleteScheduler::BackgroundEmptyTrash() {
} }
Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash, Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash,
uint64_t* deleted_bytes) { uint64_t* deleted_bytes,
bool* is_complete) {
uint64_t file_size; uint64_t file_size;
Status s = env_->GetFileSize(path_in_trash, &file_size); Status s = env_->GetFileSize(path_in_trash, &file_size);
if (s.ok()) { *is_complete = true;
TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:DeleteFile"); TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:DeleteFile");
s = env_->DeleteFile(path_in_trash); if (s.ok()) {
bool need_full_delete = true;
if (bytes_max_delete_chunk_ != 0 && file_size > bytes_max_delete_chunk_) {
unique_ptr<WritableFile> wf;
Status my_status =
env_->ReopenWritableFile(path_in_trash, &wf, EnvOptions());
if (my_status.ok()) {
my_status = wf->Truncate(file_size - bytes_max_delete_chunk_);
if (my_status.ok()) {
TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:Fsync");
my_status = wf->Fsync();
}
}
if (my_status.ok()) {
*deleted_bytes = bytes_max_delete_chunk_;
need_full_delete = false;
*is_complete = false;
} else {
ROCKS_LOG_WARN(info_log_,
"Failed to partially delete %s from trash -- %s",
path_in_trash.c_str(), my_status.ToString().c_str());
}
} }
if (need_full_delete) {
s = env_->DeleteFile(path_in_trash);
*deleted_bytes = file_size;
sst_file_manager_->OnDeleteFile(path_in_trash);
}
}
if (!s.ok()) { if (!s.ok()) {
// Error while getting file size or while deleting // Error while getting file size or while deleting
ROCKS_LOG_ERROR(info_log_, "Failed to delete %s from trash -- %s", ROCKS_LOG_ERROR(info_log_, "Failed to delete %s from trash -- %s",
path_in_trash.c_str(), s.ToString().c_str()); path_in_trash.c_str(), s.ToString().c_str());
*deleted_bytes = 0; *deleted_bytes = 0;
} else { } else {
*deleted_bytes = file_size; total_trash_size_.fetch_sub(*deleted_bytes);
total_trash_size_.fetch_sub(file_size);
sst_file_manager_->OnDeleteFile(path_in_trash);
} }
return s; return s;

@ -34,7 +34,7 @@ class DeleteScheduler {
public: public:
DeleteScheduler(Env* env, int64_t rate_bytes_per_sec, Logger* info_log, DeleteScheduler(Env* env, int64_t rate_bytes_per_sec, Logger* info_log,
SstFileManagerImpl* sst_file_manager, SstFileManagerImpl* sst_file_manager,
double max_trash_db_ratio); double max_trash_db_ratio, uint64_t bytes_max_delete_chunk);
~DeleteScheduler(); ~DeleteScheduler();
@ -82,7 +82,7 @@ class DeleteScheduler {
Status MarkAsTrash(const std::string& file_path, std::string* path_in_trash); Status MarkAsTrash(const std::string& file_path, std::string* path_in_trash);
Status DeleteTrashFile(const std::string& path_in_trash, Status DeleteTrashFile(const std::string& path_in_trash,
uint64_t* deleted_bytes); uint64_t* deleted_bytes, bool* is_complete);
void BackgroundEmptyTrash(); void BackgroundEmptyTrash();
@ -97,6 +97,7 @@ class DeleteScheduler {
std::queue<std::string> queue_; std::queue<std::string> queue_;
// Number of trash files that are waiting to be deleted // Number of trash files that are waiting to be deleted
int32_t pending_files_; int32_t pending_files_;
uint64_t bytes_max_delete_chunk_;
// Errors that happened in BackgroundEmptyTrash (file_path => error) // Errors that happened in BackgroundEmptyTrash (file_path => error)
std::map<std::string, Status> bg_errors_; std::map<std::string, Status> bg_errors_;
// Set to true in ~DeleteScheduler() to force BackgroundEmptyTrash to stop // Set to true in ~DeleteScheduler() to force BackgroundEmptyTrash to stop

@ -99,7 +99,7 @@ class DeleteSchedulerTest : public testing::Test {
// 25%) // 25%)
sst_file_mgr_.reset( sst_file_mgr_.reset(
new SstFileManagerImpl(env_, nullptr, rate_bytes_per_sec_, new SstFileManagerImpl(env_, nullptr, rate_bytes_per_sec_,
/* max_trash_db_ratio= */ 1.1)); /* max_trash_db_ratio= */ 1.1, 128 * 1024));
delete_scheduler_ = sst_file_mgr_->delete_scheduler(); delete_scheduler_ = sst_file_mgr_->delete_scheduler();
} }
@ -436,6 +436,34 @@ TEST_F(DeleteSchedulerTest, StartBGEmptyTrashMultipleTimes) {
rocksdb::SyncPoint::GetInstance()->EnableProcessing(); rocksdb::SyncPoint::GetInstance()->EnableProcessing();
} }
TEST_F(DeleteSchedulerTest, DeletePartialFile) {
int bg_delete_file = 0;
int bg_fsync = 0;
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DeleteScheduler::DeleteTrashFile:DeleteFile",
[&](void*) { bg_delete_file++; });
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DeleteScheduler::DeleteTrashFile:Fsync", [&](void*) { bg_fsync++; });
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
rate_bytes_per_sec_ = 1024 * 1024; // 1 MB / sec
NewDeleteScheduler();
// Should delete in 4 batch
ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile("data_1", 500 * 1024)));
ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile("data_2", 100 * 1024)));
// Should delete in 2 batch
ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile("data_2", 200 * 1024)));
delete_scheduler_->WaitForEmptyTrash();
auto bg_errors = delete_scheduler_->GetBackgroundErrors();
ASSERT_EQ(bg_errors.size(), 0);
ASSERT_EQ(7, bg_delete_file);
ASSERT_EQ(4, bg_fsync);
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
}
// 1- Create a DeleteScheduler with very slow rate limit (1 Byte / sec) // 1- Create a DeleteScheduler with very slow rate limit (1 Byte / sec)
// 2- Delete 100 files using DeleteScheduler // 2- Delete 100 files using DeleteScheduler
// 3- Delete the DeleteScheduler (call the destructor while queue is not empty) // 3- Delete the DeleteScheduler (call the destructor while queue is not empty)

@ -18,7 +18,8 @@ namespace rocksdb {
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
SstFileManagerImpl::SstFileManagerImpl(Env* env, std::shared_ptr<Logger> logger, SstFileManagerImpl::SstFileManagerImpl(Env* env, std::shared_ptr<Logger> logger,
int64_t rate_bytes_per_sec, int64_t rate_bytes_per_sec,
double max_trash_db_ratio) double max_trash_db_ratio,
uint64_t bytes_max_delete_chunk)
: env_(env), : env_(env),
logger_(logger), logger_(logger),
total_files_size_(0), total_files_size_(0),
@ -26,7 +27,7 @@ SstFileManagerImpl::SstFileManagerImpl(Env* env, std::shared_ptr<Logger> logger,
cur_compactions_reserved_size_(0), cur_compactions_reserved_size_(0),
max_allowed_space_(0), max_allowed_space_(0),
delete_scheduler_(env, rate_bytes_per_sec, logger.get(), this, delete_scheduler_(env, rate_bytes_per_sec, logger.get(), this,
max_trash_db_ratio) {} max_trash_db_ratio, bytes_max_delete_chunk) {}
SstFileManagerImpl::~SstFileManagerImpl() {} SstFileManagerImpl::~SstFileManagerImpl() {}
@ -196,10 +197,11 @@ SstFileManager* NewSstFileManager(Env* env, std::shared_ptr<Logger> info_log,
std::string trash_dir, std::string trash_dir,
int64_t rate_bytes_per_sec, int64_t rate_bytes_per_sec,
bool delete_existing_trash, Status* status, bool delete_existing_trash, Status* status,
double max_trash_db_ratio) { double max_trash_db_ratio,
uint64_t bytes_max_delete_chunk) {
SstFileManagerImpl* res = SstFileManagerImpl* res =
new SstFileManagerImpl(env, info_log, rate_bytes_per_sec, new SstFileManagerImpl(env, info_log, rate_bytes_per_sec,
max_trash_db_ratio); max_trash_db_ratio, bytes_max_delete_chunk);
// trash_dir is deprecated and not needed anymore, but if user passed it // trash_dir is deprecated and not needed anymore, but if user passed it
// we will still remove files in it. // we will still remove files in it.
@ -236,7 +238,8 @@ SstFileManager* NewSstFileManager(Env* env, std::shared_ptr<Logger> info_log,
std::string trash_dir, std::string trash_dir,
int64_t rate_bytes_per_sec, int64_t rate_bytes_per_sec,
bool delete_existing_trash, Status* status, bool delete_existing_trash, Status* status,
double max_trash_db_ratio) { double max_trash_db_ratio,
uint64_t bytes_max_delete_chunk) {
if (status) { if (status) {
*status = *status =
Status::NotSupported("SstFileManager is not supported in ROCKSDB_LITE"); Status::NotSupported("SstFileManager is not supported in ROCKSDB_LITE");

@ -27,7 +27,8 @@ class SstFileManagerImpl : public SstFileManager {
public: public:
explicit SstFileManagerImpl(Env* env, std::shared_ptr<Logger> logger, explicit SstFileManagerImpl(Env* env, std::shared_ptr<Logger> logger,
int64_t rate_bytes_per_sec, int64_t rate_bytes_per_sec,
double max_trash_db_ratio); double max_trash_db_ratio,
uint64_t bytes_max_delete_chunk);
~SstFileManagerImpl(); ~SstFileManagerImpl();

Loading…
Cancel
Save