Summary: Introduce DeleteScheduler that allow enforcing a rate limit on file deletion Instead of deleting files immediately, files are moved to trash directory and deleted in a background thread that apply sleep penalty between deletes if needed. I have updated PurgeObsoleteFiles and PurgeObsoleteWALFiles to use the delete_scheduler instead of env_->DeleteFile Test Plan: added delete_scheduler_test existing unit tests Reviewers: kradhakrishnan, anthony, rven, yhchiang, sdong Reviewed By: sdong Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D43221main
parent
102ac118b2
commit
c45a57b41e
@ -0,0 +1,62 @@ |
|||||||
|
// Copyright (c) 2015, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include <map> |
||||||
|
#include <string> |
||||||
|
|
||||||
|
#include "rocksdb/status.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class Env; |
||||||
|
class Logger; |
||||||
|
|
||||||
|
// DeleteScheduler allow the DB to enforce a rate limit on file deletion,
|
||||||
|
// Instead of deleteing files immediately, files are moved to trash_dir
|
||||||
|
// and deleted in a background thread that apply sleep penlty between deletes
|
||||||
|
// if they are happening in a rate faster than rate_bytes_per_sec,
|
||||||
|
//
|
||||||
|
// Rate limiting can be turned off by setting rate_bytes_per_sec = 0, In this
|
||||||
|
// case DeleteScheduler will delete files immediately.
|
||||||
|
class DeleteScheduler { |
||||||
|
public: |
||||||
|
virtual ~DeleteScheduler() {} |
||||||
|
|
||||||
|
// Return delete rate limit in bytes per second
|
||||||
|
virtual int64_t GetRateBytesPerSecond() = 0; |
||||||
|
|
||||||
|
// Move file to trash directory and schedule it's deletion
|
||||||
|
virtual Status DeleteFile(const std::string& fname) = 0; |
||||||
|
|
||||||
|
// Return a map containing errors that happened in the background thread
|
||||||
|
// file_path => error status
|
||||||
|
virtual std::map<std::string, Status> GetBackgroundErrors() = 0; |
||||||
|
}; |
||||||
|
|
||||||
|
// Create a new DeleteScheduler that can be shared among multiple RocksDB
|
||||||
|
// instances to control the file deletion rate.
|
||||||
|
//
|
||||||
|
// @env: Pointer to Env object, please see "rocksdb/env.h".
|
||||||
|
// @trash_dir: Path to the directory where deleted files will be moved into
|
||||||
|
// to be deleted in a background thread while applying rate limiting. If this
|
||||||
|
// directory dont exist, it will be created. This directory should not be
|
||||||
|
// used by any other process or any other DeleteScheduler.
|
||||||
|
// @rate_bytes_per_sec: How many bytes should be deleted per second, If this
|
||||||
|
// value is set to 1024 (1 Kb / sec) and we deleted a file of size 4 Kb
|
||||||
|
// in 1 second, we will wait for another 3 seconds before we delete other
|
||||||
|
// files, Set to 0 to disable rate limiting.
|
||||||
|
// @info_log: If not nullptr, info_log will be used to log errors.
|
||||||
|
// @delete_exisitng_trash: If set to true, the newly created DeleteScheduler
|
||||||
|
// will delete files that already exist in trash_dir.
|
||||||
|
// @status: If not nullptr, status will contain any errors that happened during
|
||||||
|
// creating the missing trash_dir or deleting existing files in trash.
|
||||||
|
extern DeleteScheduler* NewDeleteScheduler( |
||||||
|
Env* env, const std::string& trash_dir, int64_t rate_bytes_per_sec, |
||||||
|
std::shared_ptr<Logger> info_log = nullptr, |
||||||
|
bool delete_exisitng_trash = true, Status* status = nullptr); |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,228 @@ |
|||||||
|
// Copyright (c) 2015, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include <thread> |
||||||
|
#include <vector> |
||||||
|
|
||||||
|
#include "port/port.h" |
||||||
|
#include "rocksdb/env.h" |
||||||
|
#include "util/delete_scheduler_impl.h" |
||||||
|
#include "util/mutexlock.h" |
||||||
|
#include "util/sync_point.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
DeleteSchedulerImpl::DeleteSchedulerImpl(Env* env, const std::string& trash_dir, |
||||||
|
int64_t rate_bytes_per_sec, |
||||||
|
std::shared_ptr<Logger> info_log) |
||||||
|
: env_(env), |
||||||
|
trash_dir_(trash_dir), |
||||||
|
rate_bytes_per_sec_(rate_bytes_per_sec), |
||||||
|
pending_files_(0), |
||||||
|
closing_(false), |
||||||
|
cv_(&mu_), |
||||||
|
info_log_(info_log) { |
||||||
|
if (rate_bytes_per_sec_ == 0) { |
||||||
|
// Rate limiting is disabled
|
||||||
|
bg_thread_.reset(); |
||||||
|
} else { |
||||||
|
bg_thread_.reset( |
||||||
|
new std::thread(&DeleteSchedulerImpl::BackgroundEmptyTrash, this)); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
DeleteSchedulerImpl::~DeleteSchedulerImpl() { |
||||||
|
{ |
||||||
|
MutexLock l(&mu_); |
||||||
|
closing_ = true; |
||||||
|
cv_.SignalAll(); |
||||||
|
} |
||||||
|
if (bg_thread_) { |
||||||
|
bg_thread_->join(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
Status DeleteSchedulerImpl::DeleteFile(const std::string& file_path) { |
||||||
|
if (rate_bytes_per_sec_ == 0) { |
||||||
|
// Rate limiting is disabled
|
||||||
|
return env_->DeleteFile(file_path); |
||||||
|
} |
||||||
|
|
||||||
|
// Move file to trash
|
||||||
|
std::string path_in_trash; |
||||||
|
Status s = MoveToTrash(file_path, &path_in_trash); |
||||||
|
if (!s.ok()) { |
||||||
|
Log(InfoLogLevel::ERROR_LEVEL, info_log_, |
||||||
|
"Failed to move %s to trash directory (%s)", file_path.c_str(), |
||||||
|
trash_dir_.c_str()); |
||||||
|
return env_->DeleteFile(file_path); |
||||||
|
} |
||||||
|
|
||||||
|
// Add file to delete queue
|
||||||
|
{ |
||||||
|
MutexLock l(&mu_); |
||||||
|
queue_.push(path_in_trash); |
||||||
|
pending_files_++; |
||||||
|
if (pending_files_ == 1) { |
||||||
|
cv_.SignalAll(); |
||||||
|
} |
||||||
|
} |
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
std::map<std::string, Status> DeleteSchedulerImpl::GetBackgroundErrors() { |
||||||
|
MutexLock l(&mu_); |
||||||
|
return bg_errors_; |
||||||
|
} |
||||||
|
|
||||||
|
Status DeleteSchedulerImpl::MoveToTrash(const std::string& file_path, |
||||||
|
std::string* path_in_trash) { |
||||||
|
Status s; |
||||||
|
// Figure out the name of the file in trash folder
|
||||||
|
size_t idx = file_path.rfind("/"); |
||||||
|
if (idx == std::string::npos || idx == file_path.size() - 1) { |
||||||
|
return Status::InvalidArgument("file_path is corrupted"); |
||||||
|
} |
||||||
|
*path_in_trash = trash_dir_ + file_path.substr(idx); |
||||||
|
std::string unique_suffix = ""; |
||||||
|
|
||||||
|
if (*path_in_trash == file_path) { |
||||||
|
// This file is already in trash
|
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
// TODO(tec) : Implement Env::RenameFileIfNotExist and remove
|
||||||
|
// file_move_mu mutex.
|
||||||
|
MutexLock l(&file_move_mu_); |
||||||
|
while (true) { |
||||||
|
s = env_->FileExists(*path_in_trash + unique_suffix); |
||||||
|
if (s.IsNotFound()) { |
||||||
|
// We found a path for our file in trash
|
||||||
|
*path_in_trash += unique_suffix; |
||||||
|
s = env_->RenameFile(file_path, *path_in_trash); |
||||||
|
break; |
||||||
|
} else if (s.ok()) { |
||||||
|
// Name conflict, generate new random suffix
|
||||||
|
unique_suffix = env_->GenerateUniqueId(); |
||||||
|
} else { |
||||||
|
// Error during FileExists call, we cannot continue
|
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
void DeleteSchedulerImpl::BackgroundEmptyTrash() { |
||||||
|
TEST_SYNC_POINT("DeleteSchedulerImpl::BackgroundEmptyTrash"); |
||||||
|
|
||||||
|
while (true) { |
||||||
|
MutexLock l(&mu_); |
||||||
|
while (queue_.empty() && !closing_) { |
||||||
|
cv_.Wait(); |
||||||
|
} |
||||||
|
|
||||||
|
if (closing_) { |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
// Delete all files in queue_
|
||||||
|
uint64_t start_time = env_->NowMicros(); |
||||||
|
uint64_t total_deleted_bytes = 0; |
||||||
|
while (!queue_.empty() && !closing_) { |
||||||
|
std::string path_in_trash = queue_.front(); |
||||||
|
queue_.pop(); |
||||||
|
|
||||||
|
// We dont need to hold the lock while deleting the file
|
||||||
|
mu_.Unlock(); |
||||||
|
uint64_t deleted_bytes = 0; |
||||||
|
// Delete file from trash and update total_penlty value
|
||||||
|
Status s = DeleteTrashFile(path_in_trash, &deleted_bytes); |
||||||
|
total_deleted_bytes += deleted_bytes; |
||||||
|
mu_.Lock(); |
||||||
|
|
||||||
|
if (!s.ok()) { |
||||||
|
bg_errors_[path_in_trash] = s; |
||||||
|
} |
||||||
|
|
||||||
|
// Apply penlty if necessary
|
||||||
|
uint64_t total_penlty = |
||||||
|
((total_deleted_bytes * kMicrosInSecond) / rate_bytes_per_sec_); |
||||||
|
while (!closing_ && !cv_.TimedWait(start_time + total_penlty)) {} |
||||||
|
|
||||||
|
pending_files_--; |
||||||
|
if (pending_files_ == 0) { |
||||||
|
// Unblock TEST_WaitForEmptyTrash since there are no more files waiting
|
||||||
|
// to be deleted
|
||||||
|
cv_.SignalAll(); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
Status DeleteSchedulerImpl::DeleteTrashFile(const std::string& path_in_trash, |
||||||
|
uint64_t* deleted_bytes) { |
||||||
|
uint64_t file_size; |
||||||
|
Status s = env_->GetFileSize(path_in_trash, &file_size); |
||||||
|
if (s.ok()) { |
||||||
|
TEST_SYNC_POINT("DeleteSchedulerImpl::DeleteTrashFile:DeleteFile"); |
||||||
|
s = env_->DeleteFile(path_in_trash); |
||||||
|
} |
||||||
|
|
||||||
|
if (!s.ok()) { |
||||||
|
// Error while getting file size or while deleting
|
||||||
|
Log(InfoLogLevel::ERROR_LEVEL, info_log_, |
||||||
|
"Failed to delete %s from trash -- %s", path_in_trash.c_str(), |
||||||
|
s.ToString().c_str()); |
||||||
|
*deleted_bytes = 0; |
||||||
|
} else { |
||||||
|
*deleted_bytes = file_size; |
||||||
|
} |
||||||
|
|
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
void DeleteSchedulerImpl::TEST_WaitForEmptyTrash() { |
||||||
|
MutexLock l(&mu_); |
||||||
|
while (pending_files_ > 0 && !closing_) { |
||||||
|
cv_.Wait(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
DeleteScheduler* NewDeleteScheduler(Env* env, const std::string& trash_dir, |
||||||
|
int64_t rate_bytes_per_sec, |
||||||
|
std::shared_ptr<Logger> info_log, |
||||||
|
bool delete_exisitng_trash, |
||||||
|
Status* status) { |
||||||
|
DeleteScheduler* res = |
||||||
|
new DeleteSchedulerImpl(env, trash_dir, rate_bytes_per_sec, info_log); |
||||||
|
|
||||||
|
Status s; |
||||||
|
if (trash_dir != "") { |
||||||
|
s = env->CreateDirIfMissing(trash_dir); |
||||||
|
if (s.ok() && delete_exisitng_trash) { |
||||||
|
std::vector<std::string> files_in_trash; |
||||||
|
s = env->GetChildren(trash_dir, &files_in_trash); |
||||||
|
if (s.ok()) { |
||||||
|
for (const std::string& trash_file : files_in_trash) { |
||||||
|
if (trash_file == "." || trash_file == "..") { |
||||||
|
continue; |
||||||
|
} |
||||||
|
Status file_delete = res->DeleteFile(trash_dir + "/" + trash_file); |
||||||
|
if (s.ok() && !file_delete.ok()) { |
||||||
|
s = file_delete; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if (status) { |
||||||
|
*status = s; |
||||||
|
} |
||||||
|
|
||||||
|
return res; |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,81 @@ |
|||||||
|
// Copyright (c) 2015, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include <map> |
||||||
|
#include <queue> |
||||||
|
#include <string> |
||||||
|
#include <thread> |
||||||
|
|
||||||
|
#include "port/port.h" |
||||||
|
|
||||||
|
#include "rocksdb/delete_scheduler.h" |
||||||
|
#include "rocksdb/status.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class Env; |
||||||
|
class Logger; |
||||||
|
|
||||||
|
class DeleteSchedulerImpl : public DeleteScheduler { |
||||||
|
public: |
||||||
|
DeleteSchedulerImpl(Env* env, const std::string& trash_dir, |
||||||
|
int64_t rate_bytes_per_sec, |
||||||
|
std::shared_ptr<Logger> info_log); |
||||||
|
|
||||||
|
~DeleteSchedulerImpl(); |
||||||
|
|
||||||
|
// Return delete rate limit in bytes per second
|
||||||
|
int64_t GetRateBytesPerSecond() { return rate_bytes_per_sec_; } |
||||||
|
|
||||||
|
// Move file to trash directory and schedule it's deletion
|
||||||
|
Status DeleteFile(const std::string& fname); |
||||||
|
|
||||||
|
// Wait for all files being deleteing in the background to finish or for
|
||||||
|
// destructor to be called.
|
||||||
|
void TEST_WaitForEmptyTrash(); |
||||||
|
|
||||||
|
// Return a map containing errors that happened in BackgroundEmptyTrash
|
||||||
|
// file_path => error status
|
||||||
|
std::map<std::string, Status> GetBackgroundErrors(); |
||||||
|
|
||||||
|
private: |
||||||
|
Status MoveToTrash(const std::string& file_path, std::string* path_in_trash); |
||||||
|
|
||||||
|
Status DeleteTrashFile(const std::string& path_in_trash, |
||||||
|
uint64_t* deleted_bytes); |
||||||
|
|
||||||
|
void BackgroundEmptyTrash(); |
||||||
|
|
||||||
|
Env* env_; |
||||||
|
// Path to the trash directory
|
||||||
|
std::string trash_dir_; |
||||||
|
// Maximum number of bytes that should be deleted per second
|
||||||
|
int64_t rate_bytes_per_sec_; |
||||||
|
// Mutex to protect queue_, pending_files_, bg_errors_, closing_
|
||||||
|
port::Mutex mu_; |
||||||
|
// Queue of files in trash that need to be deleted
|
||||||
|
std::queue<std::string> queue_; |
||||||
|
// Number of files in trash that are waiting to be deleted
|
||||||
|
int32_t pending_files_; |
||||||
|
// Errors that happened in BackgroundEmptyTrash (file_path => error)
|
||||||
|
std::map<std::string, Status> bg_errors_; |
||||||
|
// Set to true in ~DeleteSchedulerImpl() to force BackgroundEmptyTrash to stop
|
||||||
|
bool closing_; |
||||||
|
// Condition variable signaled in these conditions
|
||||||
|
// - pending_files_ value change from 0 => 1
|
||||||
|
// - pending_files_ value change from 1 => 0
|
||||||
|
// - closing_ value is set to true
|
||||||
|
port::CondVar cv_; |
||||||
|
// Background thread running BackgroundEmptyTrash
|
||||||
|
std::unique_ptr<std::thread> bg_thread_; |
||||||
|
// Mutex to protect threads from file name conflicts
|
||||||
|
port::Mutex file_move_mu_; |
||||||
|
std::shared_ptr<Logger> info_log_; |
||||||
|
static const uint64_t kMicrosInSecond = 1000 * 1000LL; |
||||||
|
}; |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,439 @@ |
|||||||
|
// Copyright (c) 2015, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#ifndef __STDC_FORMAT_MACROS |
||||||
|
#define __STDC_FORMAT_MACROS |
||||||
|
#endif |
||||||
|
|
||||||
|
#include <inttypes.h> |
||||||
|
#include <atomic> |
||||||
|
#include <thread> |
||||||
|
#include <vector> |
||||||
|
|
||||||
|
#include "rocksdb/delete_scheduler.h" |
||||||
|
#include "rocksdb/env.h" |
||||||
|
#include "rocksdb/options.h" |
||||||
|
#include "util/delete_scheduler_impl.h" |
||||||
|
#include "util/string_util.h" |
||||||
|
#include "util/sync_point.h" |
||||||
|
#include "util/testharness.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class DeleteSchedulerTest : public testing::Test { |
||||||
|
public: |
||||||
|
DeleteSchedulerTest() : env_(Env::Default()) { |
||||||
|
dummy_files_dir_ = test::TmpDir(env_) + "/dummy_data_dir"; |
||||||
|
DestroyAndCreateDir(dummy_files_dir_); |
||||||
|
trash_dir_ = test::TmpDir(env_) + "/trash"; |
||||||
|
DestroyAndCreateDir(trash_dir_); |
||||||
|
} |
||||||
|
|
||||||
|
~DeleteSchedulerTest() { |
||||||
|
rocksdb::SyncPoint::GetInstance()->DisableProcessing(); |
||||||
|
rocksdb::SyncPoint::GetInstance()->LoadDependency({}); |
||||||
|
rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks(); |
||||||
|
DestroyDir(dummy_files_dir_); |
||||||
|
if (delete_scheduler_ != nullptr) { |
||||||
|
delete delete_scheduler_; |
||||||
|
delete_scheduler_ = nullptr; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void WaitForEmptyTrash() { |
||||||
|
reinterpret_cast<DeleteSchedulerImpl*>(delete_scheduler_) |
||||||
|
->TEST_WaitForEmptyTrash(); |
||||||
|
} |
||||||
|
|
||||||
|
void DestroyDir(const std::string& dir) { |
||||||
|
if (env_->FileExists(dir).IsNotFound()) { |
||||||
|
return; |
||||||
|
} |
||||||
|
std::vector<std::string> files_in_dir; |
||||||
|
EXPECT_OK(env_->GetChildren(dir, &files_in_dir)); |
||||||
|
for (auto& file_in_dir : files_in_dir) { |
||||||
|
if (file_in_dir == "." || file_in_dir == "..") { |
||||||
|
continue; |
||||||
|
} |
||||||
|
EXPECT_OK(env_->DeleteFile(dir + "/" + file_in_dir)); |
||||||
|
} |
||||||
|
EXPECT_OK(env_->DeleteDir(dir)); |
||||||
|
} |
||||||
|
|
||||||
|
void DestroyAndCreateDir(const std::string& dir) { |
||||||
|
DestroyDir(dir); |
||||||
|
EXPECT_OK(env_->CreateDir(dir)); |
||||||
|
} |
||||||
|
|
||||||
|
int CountFilesInDir(const std::string& dir) { |
||||||
|
std::vector<std::string> files_in_dir; |
||||||
|
EXPECT_OK(env_->GetChildren(dir, &files_in_dir)); |
||||||
|
// Ignore "." and ".."
|
||||||
|
return static_cast<int>(files_in_dir.size()) - 2; |
||||||
|
} |
||||||
|
|
||||||
|
std::string NewDummyFile(const std::string& file_name, uint64_t size = 1024) { |
||||||
|
std::string file_path = dummy_files_dir_ + "/" + file_name; |
||||||
|
std::unique_ptr<WritableFile> f; |
||||||
|
env_->NewWritableFile(file_path, &f, EnvOptions()); |
||||||
|
std::string data(size, 'A'); |
||||||
|
EXPECT_OK(f->Append(data)); |
||||||
|
EXPECT_OK(f->Close()); |
||||||
|
return file_path; |
||||||
|
} |
||||||
|
|
||||||
|
Env* env_; |
||||||
|
std::string dummy_files_dir_; |
||||||
|
std::string trash_dir_; |
||||||
|
int64_t rate_bytes_per_sec_; |
||||||
|
DeleteScheduler* delete_scheduler_; |
||||||
|
}; |
||||||
|
|
||||||
|
// Test the basic functionality of DeleteScheduler (Rate Limiting).
|
||||||
|
// 1- Create 100 dummy files
|
||||||
|
// 2- Delete the 100 dummy files using DeleteScheduler
|
||||||
|
// 3- Wait for DeleteScheduler to delete all files in trash
|
||||||
|
// 4- Measure time spent in step 2,3 and make sure it matches the expected
|
||||||
|
// time from a rate limited delete
|
||||||
|
// 5- Make sure that all created files were completely deleted
|
||||||
|
TEST_F(DeleteSchedulerTest, BasicRateLimiting) { |
||||||
|
int num_files = 100; // 100 files
|
||||||
|
uint64_t file_size = 1024; // every file is 1 kb
|
||||||
|
std::vector<uint64_t> delete_kbs_per_sec = {512, 200, 100, 50, 25}; |
||||||
|
|
||||||
|
for (size_t t = 0; t < delete_kbs_per_sec.size(); t++) { |
||||||
|
DestroyAndCreateDir(dummy_files_dir_); |
||||||
|
rate_bytes_per_sec_ = delete_kbs_per_sec[t] * 1024; |
||||||
|
delete_scheduler_ = |
||||||
|
NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_); |
||||||
|
|
||||||
|
// Create 100 dummy files, every file is 1 Kb
|
||||||
|
std::vector<std::string> generated_files; |
||||||
|
uint64_t total_files_size = 0; |
||||||
|
for (int i = 0; i < num_files; i++) { |
||||||
|
std::string file_name = "file" + ToString(i) + ".data"; |
||||||
|
generated_files.push_back(NewDummyFile(file_name, file_size)); |
||||||
|
total_files_size += file_size; |
||||||
|
} |
||||||
|
|
||||||
|
// Delete dummy files and measure time spent to empty trash
|
||||||
|
uint64_t delete_start_time = env_->NowMicros(); |
||||||
|
for (int i = 0; i < num_files; i++) { |
||||||
|
ASSERT_OK(delete_scheduler_->DeleteFile(generated_files[i])); |
||||||
|
} |
||||||
|
ASSERT_EQ(CountFilesInDir(dummy_files_dir_), 0); |
||||||
|
|
||||||
|
WaitForEmptyTrash(); |
||||||
|
uint64_t time_spent_deleting = env_->NowMicros() - delete_start_time; |
||||||
|
uint64_t expected_delete_time = |
||||||
|
((total_files_size * 1000000) / rate_bytes_per_sec_); |
||||||
|
ASSERT_GT(time_spent_deleting, expected_delete_time * 0.9); |
||||||
|
ASSERT_LT(time_spent_deleting, expected_delete_time * 1.1); |
||||||
|
printf("Delete time = %" PRIu64 ", Expected delete time = %" PRIu64 |
||||||
|
", Ratio %f\n", |
||||||
|
time_spent_deleting, expected_delete_time, |
||||||
|
static_cast<double>(time_spent_deleting) / expected_delete_time); |
||||||
|
|
||||||
|
ASSERT_EQ(CountFilesInDir(trash_dir_), 0); |
||||||
|
auto bg_errors = delete_scheduler_->GetBackgroundErrors(); |
||||||
|
ASSERT_EQ(bg_errors.size(), 0); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Same as the BasicRateLimiting test but delete files in multiple threads.
|
||||||
|
// 1- Create 100 dummy files
|
||||||
|
// 2- Delete the 100 dummy files using DeleteScheduler using 10 threads
|
||||||
|
// 3- Wait for DeleteScheduler to delete all files in queue
|
||||||
|
// 4- Measure time spent in step 2,3 and make sure it matches the expected
|
||||||
|
// time from a rate limited delete
|
||||||
|
// 5- Make sure that all created files were completely deleted
|
||||||
|
TEST_F(DeleteSchedulerTest, RateLimitingMultiThreaded) { |
||||||
|
int thread_cnt = 10; |
||||||
|
int num_files = 10; // 10 files per thread
|
||||||
|
uint64_t file_size = 1024; // every file is 1 kb
|
||||||
|
std::vector<uint64_t> delete_kbs_per_sec = {512, 200, 100, 50, 25}; |
||||||
|
|
||||||
|
for (size_t t = 0; t < delete_kbs_per_sec.size(); t++) { |
||||||
|
DestroyAndCreateDir(dummy_files_dir_); |
||||||
|
rate_bytes_per_sec_ = delete_kbs_per_sec[t] * 1024; |
||||||
|
delete_scheduler_ = |
||||||
|
NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_); |
||||||
|
|
||||||
|
// Create 100 dummy files, every file is 1 Kb
|
||||||
|
std::vector<std::string> generated_files; |
||||||
|
uint64_t total_files_size = 0; |
||||||
|
for (int i = 0; i < num_files * thread_cnt; i++) { |
||||||
|
std::string file_name = "file" + ToString(i) + ".data"; |
||||||
|
generated_files.push_back(NewDummyFile(file_name, file_size)); |
||||||
|
total_files_size += file_size; |
||||||
|
} |
||||||
|
|
||||||
|
// Delete dummy files using 10 threads and measure time spent to empty trash
|
||||||
|
uint64_t delete_start_time = env_->NowMicros(); |
||||||
|
std::atomic<int> thread_num(0); |
||||||
|
std::vector<std::thread> threads; |
||||||
|
for (int i = 0; i < thread_cnt; i++) { |
||||||
|
threads.emplace_back([&]() { |
||||||
|
int idx = thread_num.fetch_add(1); |
||||||
|
int range_start = idx * num_files; |
||||||
|
int range_end = range_start + num_files; |
||||||
|
for (int j = range_start; j < range_end; j++){ |
||||||
|
ASSERT_OK(delete_scheduler_->DeleteFile(generated_files[j])); |
||||||
|
} |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
for (size_t i = 0; i < threads.size(); i++) { |
||||||
|
threads[i].join(); |
||||||
|
} |
||||||
|
ASSERT_EQ(CountFilesInDir(dummy_files_dir_), 0); |
||||||
|
|
||||||
|
WaitForEmptyTrash(); |
||||||
|
uint64_t time_spent_deleting = env_->NowMicros() - delete_start_time; |
||||||
|
uint64_t expected_delete_time = |
||||||
|
((total_files_size * 1000000) / rate_bytes_per_sec_); |
||||||
|
ASSERT_GT(time_spent_deleting, expected_delete_time * 0.9); |
||||||
|
ASSERT_LT(time_spent_deleting, expected_delete_time * 1.1); |
||||||
|
printf("Delete time = %" PRIu64 ", Expected delete time = %" PRIu64 |
||||||
|
", Ratio %f\n", |
||||||
|
time_spent_deleting, expected_delete_time, |
||||||
|
static_cast<double>(time_spent_deleting) / expected_delete_time); |
||||||
|
|
||||||
|
ASSERT_EQ(CountFilesInDir(trash_dir_), 0); |
||||||
|
auto bg_errors = delete_scheduler_->GetBackgroundErrors(); |
||||||
|
ASSERT_EQ(bg_errors.size(), 0); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Disable rate limiting by setting rate_bytes_per_sec_ to 0 and make sure
|
||||||
|
// that when DeleteScheduler delete a file it delete it immediately and dont
|
||||||
|
// move it to trash
|
||||||
|
TEST_F(DeleteSchedulerTest, DisableRateLimiting) { |
||||||
|
int bg_delete_file = 0; |
||||||
|
rocksdb::SyncPoint::GetInstance()->SetCallBack( |
||||||
|
"DeleteSchedulerImpl::DeleteTrashFile:DeleteFile", |
||||||
|
[&](void* arg) { bg_delete_file++; }); |
||||||
|
|
||||||
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing(); |
||||||
|
|
||||||
|
delete_scheduler_ = NewDeleteScheduler(env_, "", 0); |
||||||
|
|
||||||
|
for (int i = 0; i < 10; i++) { |
||||||
|
// Every file we delete will be deleted immediately
|
||||||
|
std::string dummy_file = NewDummyFile("dummy.data"); |
||||||
|
ASSERT_OK(delete_scheduler_->DeleteFile(dummy_file)); |
||||||
|
ASSERT_TRUE(env_->FileExists(dummy_file).IsNotFound()); |
||||||
|
ASSERT_EQ(CountFilesInDir(dummy_files_dir_), 0); |
||||||
|
ASSERT_EQ(CountFilesInDir(trash_dir_), 0); |
||||||
|
} |
||||||
|
|
||||||
|
ASSERT_EQ(bg_delete_file, 0); |
||||||
|
|
||||||
|
rocksdb::SyncPoint::GetInstance()->DisableProcessing(); |
||||||
|
} |
||||||
|
|
||||||
|
// Testing that moving files to trash with the same name is not a problem
|
||||||
|
// 1- Create 10 files with the same name "conflict.data"
|
||||||
|
// 2- Delete the 10 files using DeleteScheduler
|
||||||
|
// 3- Make sure that trash directory contain 10 files ("conflict.data" x 10)
|
||||||
|
// --- Hold DeleteSchedulerImpl::BackgroundEmptyTrash ---
|
||||||
|
// 4- Make sure that files are deleted from trash
|
||||||
|
TEST_F(DeleteSchedulerTest, ConflictNames) { |
||||||
|
rocksdb::SyncPoint::GetInstance()->LoadDependency({ |
||||||
|
{"DeleteSchedulerTest::ConflictNames:1", |
||||||
|
"DeleteSchedulerImpl::BackgroundEmptyTrash"}, |
||||||
|
}); |
||||||
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing(); |
||||||
|
|
||||||
|
rate_bytes_per_sec_ = 1024 * 1024; // 1 Mb/sec
|
||||||
|
delete_scheduler_ = NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_); |
||||||
|
|
||||||
|
// Create "conflict.data" and move it to trash 10 times
|
||||||
|
for (int i = 0; i < 10; i++) { |
||||||
|
std::string dummy_file = NewDummyFile("conflict.data"); |
||||||
|
ASSERT_OK(delete_scheduler_->DeleteFile(dummy_file)); |
||||||
|
} |
||||||
|
ASSERT_EQ(CountFilesInDir(dummy_files_dir_), 0); |
||||||
|
// 10 files ("conflict.data" x 10) in trash
|
||||||
|
ASSERT_EQ(CountFilesInDir(trash_dir_), 10); |
||||||
|
|
||||||
|
// Hold BackgroundEmptyTrash
|
||||||
|
TEST_SYNC_POINT("DeleteSchedulerTest::ConflictNames:1"); |
||||||
|
WaitForEmptyTrash(); |
||||||
|
ASSERT_EQ(CountFilesInDir(trash_dir_), 0); |
||||||
|
|
||||||
|
auto bg_errors = delete_scheduler_->GetBackgroundErrors(); |
||||||
|
ASSERT_EQ(bg_errors.size(), 0); |
||||||
|
|
||||||
|
rocksdb::SyncPoint::GetInstance()->DisableProcessing(); |
||||||
|
} |
||||||
|
|
||||||
|
// 1- Create 10 dummy files
|
||||||
|
// 2- Delete the 10 files using DeleteScheduler (move them to trsah)
|
||||||
|
// 3- Delete the 10 files directly (using env_->DeleteFile)
|
||||||
|
// --- Hold DeleteSchedulerImpl::BackgroundEmptyTrash ---
|
||||||
|
// 4- Make sure that DeleteScheduler failed to delete the 10 files and
|
||||||
|
// reported 10 background errors
|
||||||
|
TEST_F(DeleteSchedulerTest, BackgroundError) { |
||||||
|
rocksdb::SyncPoint::GetInstance()->LoadDependency({ |
||||||
|
{"DeleteSchedulerTest::BackgroundError:1", |
||||||
|
"DeleteSchedulerImpl::BackgroundEmptyTrash"}, |
||||||
|
}); |
||||||
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing(); |
||||||
|
|
||||||
|
rate_bytes_per_sec_ = 1024 * 1024; // 1 Mb/sec
|
||||||
|
delete_scheduler_ = NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_); |
||||||
|
|
||||||
|
// Generate 10 dummy files and move them to trash
|
||||||
|
for (int i = 0; i < 10; i++) { |
||||||
|
std::string file_name = "data_" + ToString(i) + ".data"; |
||||||
|
ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile(file_name))); |
||||||
|
} |
||||||
|
ASSERT_EQ(CountFilesInDir(dummy_files_dir_), 0); |
||||||
|
ASSERT_EQ(CountFilesInDir(trash_dir_), 10); |
||||||
|
|
||||||
|
// Delete 10 files from trash, this will cause background errors in
|
||||||
|
// BackgroundEmptyTrash since we already deleted the files it was
|
||||||
|
// goind to delete
|
||||||
|
for (int i = 0; i < 10; i++) { |
||||||
|
std::string file_name = "data_" + ToString(i) + ".data"; |
||||||
|
ASSERT_OK(env_->DeleteFile(trash_dir_ + "/" + file_name)); |
||||||
|
} |
||||||
|
|
||||||
|
// Hold BackgroundEmptyTrash
|
||||||
|
TEST_SYNC_POINT("DeleteSchedulerTest::BackgroundError:1"); |
||||||
|
WaitForEmptyTrash(); |
||||||
|
auto bg_errors = delete_scheduler_->GetBackgroundErrors(); |
||||||
|
ASSERT_EQ(bg_errors.size(), 10); |
||||||
|
|
||||||
|
rocksdb::SyncPoint::GetInstance()->DisableProcessing(); |
||||||
|
} |
||||||
|
|
||||||
|
// 1- Create 10 files in trash
|
||||||
|
// 2- Create a DeleteScheduler with delete_exisitng_trash = true
|
||||||
|
// 3- Wait for DeleteScheduler to delete all files in queue
|
||||||
|
// 4- Make sure that all files in trash directory were deleted
|
||||||
|
TEST_F(DeleteSchedulerTest, TrashWithExistingFiles) { |
||||||
|
std::vector<std::string> dummy_files; |
||||||
|
for (int i = 0; i < 10; i++) { |
||||||
|
std::string file_name = "data_" + ToString(i) + ".data"; |
||||||
|
std::string trash_path = trash_dir_ + "/" + file_name; |
||||||
|
env_->RenameFile(NewDummyFile(file_name), trash_path); |
||||||
|
} |
||||||
|
ASSERT_EQ(CountFilesInDir(trash_dir_), 10); |
||||||
|
|
||||||
|
Status s; |
||||||
|
rate_bytes_per_sec_ = 1024 * 1024; // 1 Mb/sec
|
||||||
|
delete_scheduler_ = NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_, |
||||||
|
nullptr, true, &s); |
||||||
|
ASSERT_OK(s); |
||||||
|
|
||||||
|
WaitForEmptyTrash(); |
||||||
|
ASSERT_EQ(CountFilesInDir(trash_dir_), 0); |
||||||
|
|
||||||
|
auto bg_errors = delete_scheduler_->GetBackgroundErrors(); |
||||||
|
ASSERT_EQ(bg_errors.size(), 0); |
||||||
|
} |
||||||
|
|
||||||
|
// 1- Create 10 dummy files
|
||||||
|
// 2- Delete 10 dummy files using DeleteScheduler
|
||||||
|
// 3- Wait for DeleteScheduler to delete all files in queue
|
||||||
|
// 4- Make sure all files in trash directory were deleted
|
||||||
|
// 5- Repeat previous steps 5 times
|
||||||
|
TEST_F(DeleteSchedulerTest, StartBGEmptyTrashMultipleTimes) { |
||||||
|
int bg_delete_file = 0; |
||||||
|
rocksdb::SyncPoint::GetInstance()->SetCallBack( |
||||||
|
"DeleteSchedulerImpl::DeleteTrashFile:DeleteFile", |
||||||
|
[&](void* arg) { bg_delete_file++; }); |
||||||
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing(); |
||||||
|
|
||||||
|
rate_bytes_per_sec_ = 1024 * 1024; // 1 MB / sec
|
||||||
|
delete_scheduler_ = NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_); |
||||||
|
|
||||||
|
// Move files to trash, wait for empty trash, start again
|
||||||
|
for (int run = 1; run <= 5; run++) { |
||||||
|
// Generate 10 dummy files and move them to trash
|
||||||
|
for (int i = 0; i < 10; i++) { |
||||||
|
std::string file_name = "data_" + ToString(i) + ".data"; |
||||||
|
ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile(file_name))); |
||||||
|
} |
||||||
|
ASSERT_EQ(CountFilesInDir(dummy_files_dir_), 0); |
||||||
|
WaitForEmptyTrash(); |
||||||
|
ASSERT_EQ(bg_delete_file, 10 * run); |
||||||
|
ASSERT_EQ(CountFilesInDir(trash_dir_), 0); |
||||||
|
|
||||||
|
auto bg_errors = delete_scheduler_->GetBackgroundErrors(); |
||||||
|
ASSERT_EQ(bg_errors.size(), 0); |
||||||
|
} |
||||||
|
|
||||||
|
ASSERT_EQ(bg_delete_file, 50); |
||||||
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing(); |
||||||
|
} |
||||||
|
|
||||||
|
// 1- Create a DeleteScheduler with very slow rate limit (1 Byte / sec)
|
||||||
|
// 2- Delete 100 files using DeleteScheduler
|
||||||
|
// 3- Delete the DeleteScheduler (call the destructor while queue is not empty)
|
||||||
|
// 4- Make sure that not all files were deleted from trash and that
|
||||||
|
// DeleteScheduler background thread did not delete all files
|
||||||
|
TEST_F(DeleteSchedulerTest, DestructorWithNonEmptyQueue) { |
||||||
|
int bg_delete_file = 0; |
||||||
|
rocksdb::SyncPoint::GetInstance()->SetCallBack( |
||||||
|
"DeleteSchedulerImpl::DeleteTrashFile:DeleteFile", |
||||||
|
[&](void* arg) { bg_delete_file++; }); |
||||||
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing(); |
||||||
|
|
||||||
|
rate_bytes_per_sec_ = 1; // 1 Byte / sec
|
||||||
|
delete_scheduler_ = NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_); |
||||||
|
|
||||||
|
for (int i = 0; i < 100; i++) { |
||||||
|
std::string file_name = "data_" + ToString(i) + ".data"; |
||||||
|
ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile(file_name))); |
||||||
|
} |
||||||
|
|
||||||
|
// Deleting 100 files will need >28 hours to delete
|
||||||
|
// we will delete the DeleteScheduler while delete queue is not empty
|
||||||
|
delete delete_scheduler_; |
||||||
|
delete_scheduler_ = nullptr; |
||||||
|
|
||||||
|
ASSERT_LT(bg_delete_file, 100); |
||||||
|
ASSERT_GT(CountFilesInDir(trash_dir_), 0); |
||||||
|
|
||||||
|
rocksdb::SyncPoint::GetInstance()->DisableProcessing(); |
||||||
|
} |
||||||
|
|
||||||
|
// 1- Delete the trash directory
|
||||||
|
// 2- Delete 10 files using DeleteScheduler
|
||||||
|
// 3- Make sure that the 10 files were deleted immediately since DeleteScheduler
|
||||||
|
// failed to move them to trash directory
|
||||||
|
TEST_F(DeleteSchedulerTest, MoveToTrashError) { |
||||||
|
int bg_delete_file = 0; |
||||||
|
rocksdb::SyncPoint::GetInstance()->SetCallBack( |
||||||
|
"DeleteSchedulerImpl::DeleteTrashFile:DeleteFile", |
||||||
|
[&](void* arg) { bg_delete_file++; }); |
||||||
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing(); |
||||||
|
|
||||||
|
rate_bytes_per_sec_ = 1024; // 1 Kb / sec
|
||||||
|
delete_scheduler_ = NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_); |
||||||
|
|
||||||
|
// We will delete the trash directory, that mean that DeleteScheduler wont
|
||||||
|
// be able to move files to trash and will delete files them immediately.
|
||||||
|
DestroyDir(trash_dir_); |
||||||
|
for (int i = 0; i < 10; i++) { |
||||||
|
std::string file_name = "data_" + ToString(i) + ".data"; |
||||||
|
ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile(file_name))); |
||||||
|
} |
||||||
|
|
||||||
|
ASSERT_EQ(CountFilesInDir(dummy_files_dir_), 0); |
||||||
|
ASSERT_EQ(bg_delete_file, 0); |
||||||
|
|
||||||
|
rocksdb::SyncPoint::GetInstance()->DisableProcessing(); |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) { |
||||||
|
::testing::InitGoogleTest(&argc, argv); |
||||||
|
return RUN_ALL_TESTS(); |
||||||
|
} |
Loading…
Reference in new issue