Summary: Introduce DeleteScheduler that allow enforcing a rate limit on file deletion Instead of deleting files immediately, files are moved to trash directory and deleted in a background thread that apply sleep penalty between deletes if needed. I have updated PurgeObsoleteFiles and PurgeObsoleteWALFiles to use the delete_scheduler instead of env_->DeleteFile Test Plan: added delete_scheduler_test existing unit tests Reviewers: kradhakrishnan, anthony, rven, yhchiang, sdong Reviewed By: sdong Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D43221main
							parent
							
								
									102ac118b2
								
							
						
					
					
						commit
						c45a57b41e
					
				| @ -0,0 +1,62 @@ | ||||
| //  Copyright (c) 2015, Facebook, Inc.  All rights reserved.
 | ||||
| //  This source code is licensed under the BSD-style license found in the
 | ||||
| //  LICENSE file in the root directory of this source tree. An additional grant
 | ||||
| //  of patent rights can be found in the PATENTS file in the same directory.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <map> | ||||
| #include <string> | ||||
| 
 | ||||
| #include "rocksdb/status.h" | ||||
| 
 | ||||
| namespace rocksdb { | ||||
| 
 | ||||
| class Env; | ||||
| class Logger; | ||||
| 
 | ||||
| // DeleteScheduler allow the DB to enforce a rate limit on file deletion,
 | ||||
| // Instead of deleteing files immediately, files are moved to trash_dir
 | ||||
| // and deleted in a background thread that apply sleep penlty between deletes
 | ||||
| // if they are happening in a rate faster than rate_bytes_per_sec,
 | ||||
| //
 | ||||
| // Rate limiting can be turned off by setting rate_bytes_per_sec = 0, In this
 | ||||
| // case DeleteScheduler will delete files immediately.
 | ||||
| class DeleteScheduler { | ||||
|  public: | ||||
|   virtual ~DeleteScheduler() {} | ||||
| 
 | ||||
|   // Return delete rate limit in bytes per second
 | ||||
|   virtual int64_t GetRateBytesPerSecond() = 0; | ||||
| 
 | ||||
|   // Move file to trash directory and schedule it's deletion
 | ||||
|   virtual Status DeleteFile(const std::string& fname) = 0; | ||||
| 
 | ||||
|   // Return a map containing errors that happened in the background thread
 | ||||
|   // file_path => error status
 | ||||
|   virtual std::map<std::string, Status> GetBackgroundErrors() = 0; | ||||
| }; | ||||
| 
 | ||||
| // Create a new DeleteScheduler that can be shared among multiple RocksDB
 | ||||
| // instances to control the file deletion rate.
 | ||||
| //
 | ||||
| // @env: Pointer to Env object, please see "rocksdb/env.h".
 | ||||
| // @trash_dir: Path to the directory where deleted files will be moved into
 | ||||
| //    to be deleted in a background thread while applying rate limiting. If this
 | ||||
| //    directory dont exist, it will be created. This directory should not be
 | ||||
| //    used by any other process or any other DeleteScheduler.
 | ||||
| // @rate_bytes_per_sec: How many bytes should be deleted per second, If this
 | ||||
| //    value is set to 1024 (1 Kb / sec) and we deleted a file of size 4 Kb
 | ||||
| //    in 1 second, we will wait for another 3 seconds before we delete other
 | ||||
| //    files, Set to 0 to disable rate limiting.
 | ||||
| // @info_log: If not nullptr, info_log will be used to log errors.
 | ||||
| // @delete_exisitng_trash: If set to true, the newly created DeleteScheduler
 | ||||
| //    will delete files that already exist in trash_dir.
 | ||||
| // @status: If not nullptr, status will contain any errors that happened during
 | ||||
| //    creating the missing trash_dir or deleting existing files in trash.
 | ||||
| extern DeleteScheduler* NewDeleteScheduler( | ||||
|     Env* env, const std::string& trash_dir, int64_t rate_bytes_per_sec, | ||||
|     std::shared_ptr<Logger> info_log = nullptr, | ||||
|     bool delete_exisitng_trash = true, Status* status = nullptr); | ||||
| 
 | ||||
| }  // namespace rocksdb
 | ||||
| @ -0,0 +1,228 @@ | ||||
| //  Copyright (c) 2015, Facebook, Inc.  All rights reserved.
 | ||||
| //  This source code is licensed under the BSD-style license found in the
 | ||||
| //  LICENSE file in the root directory of this source tree. An additional grant
 | ||||
| //  of patent rights can be found in the PATENTS file in the same directory.
 | ||||
| 
 | ||||
| #include <thread> | ||||
| #include <vector> | ||||
| 
 | ||||
| #include "port/port.h" | ||||
| #include "rocksdb/env.h" | ||||
| #include "util/delete_scheduler_impl.h" | ||||
| #include "util/mutexlock.h" | ||||
| #include "util/sync_point.h" | ||||
| 
 | ||||
| namespace rocksdb { | ||||
| 
 | ||||
| DeleteSchedulerImpl::DeleteSchedulerImpl(Env* env, const std::string& trash_dir, | ||||
|                                          int64_t rate_bytes_per_sec, | ||||
|                                          std::shared_ptr<Logger> info_log) | ||||
|     : env_(env), | ||||
|       trash_dir_(trash_dir), | ||||
|       rate_bytes_per_sec_(rate_bytes_per_sec), | ||||
|       pending_files_(0), | ||||
|       closing_(false), | ||||
|       cv_(&mu_), | ||||
|       info_log_(info_log) { | ||||
|   if (rate_bytes_per_sec_ == 0) { | ||||
|     // Rate limiting is disabled
 | ||||
|     bg_thread_.reset(); | ||||
|   } else { | ||||
|     bg_thread_.reset( | ||||
|         new std::thread(&DeleteSchedulerImpl::BackgroundEmptyTrash, this)); | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| DeleteSchedulerImpl::~DeleteSchedulerImpl() { | ||||
|   { | ||||
|     MutexLock l(&mu_); | ||||
|     closing_ = true; | ||||
|     cv_.SignalAll(); | ||||
|   } | ||||
|   if (bg_thread_) { | ||||
|     bg_thread_->join(); | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| Status DeleteSchedulerImpl::DeleteFile(const std::string& file_path) { | ||||
|   if (rate_bytes_per_sec_ == 0) { | ||||
|     // Rate limiting is disabled
 | ||||
|     return env_->DeleteFile(file_path); | ||||
|   } | ||||
| 
 | ||||
|   // Move file to trash
 | ||||
|   std::string path_in_trash; | ||||
|   Status s = MoveToTrash(file_path, &path_in_trash); | ||||
|   if (!s.ok()) { | ||||
|     Log(InfoLogLevel::ERROR_LEVEL, info_log_, | ||||
|         "Failed to move %s to trash directory (%s)", file_path.c_str(), | ||||
|         trash_dir_.c_str()); | ||||
|     return env_->DeleteFile(file_path); | ||||
|   } | ||||
| 
 | ||||
|   // Add file to delete queue
 | ||||
|   { | ||||
|     MutexLock l(&mu_); | ||||
|     queue_.push(path_in_trash); | ||||
|     pending_files_++; | ||||
|     if (pending_files_ == 1) { | ||||
|       cv_.SignalAll(); | ||||
|     } | ||||
|   } | ||||
|   return s; | ||||
| } | ||||
| 
 | ||||
| std::map<std::string, Status> DeleteSchedulerImpl::GetBackgroundErrors() { | ||||
|   MutexLock l(&mu_); | ||||
|   return bg_errors_; | ||||
| } | ||||
| 
 | ||||
| Status DeleteSchedulerImpl::MoveToTrash(const std::string& file_path, | ||||
|                                         std::string* path_in_trash) { | ||||
|   Status s; | ||||
|   // Figure out the name of the file in trash folder
 | ||||
|   size_t idx = file_path.rfind("/"); | ||||
|   if (idx == std::string::npos || idx == file_path.size() - 1) { | ||||
|     return Status::InvalidArgument("file_path is corrupted"); | ||||
|   } | ||||
|   *path_in_trash = trash_dir_ + file_path.substr(idx); | ||||
|   std::string unique_suffix = ""; | ||||
| 
 | ||||
|   if (*path_in_trash == file_path) { | ||||
|     // This file is already in trash
 | ||||
|     return s; | ||||
|   } | ||||
| 
 | ||||
|   // TODO(tec) : Implement Env::RenameFileIfNotExist and remove
 | ||||
|   //             file_move_mu mutex.
 | ||||
|   MutexLock l(&file_move_mu_); | ||||
|   while (true) { | ||||
|     s = env_->FileExists(*path_in_trash + unique_suffix); | ||||
|     if (s.IsNotFound()) { | ||||
|       // We found a path for our file in trash
 | ||||
|       *path_in_trash += unique_suffix; | ||||
|       s = env_->RenameFile(file_path, *path_in_trash); | ||||
|       break; | ||||
|     } else if (s.ok()) { | ||||
|       // Name conflict, generate new random suffix
 | ||||
|       unique_suffix = env_->GenerateUniqueId(); | ||||
|     } else { | ||||
|       // Error during FileExists call, we cannot continue
 | ||||
|       break; | ||||
|     } | ||||
|   } | ||||
|   return s; | ||||
| } | ||||
| 
 | ||||
| void DeleteSchedulerImpl::BackgroundEmptyTrash() { | ||||
|   TEST_SYNC_POINT("DeleteSchedulerImpl::BackgroundEmptyTrash"); | ||||
| 
 | ||||
|   while (true) { | ||||
|     MutexLock l(&mu_); | ||||
|     while (queue_.empty() && !closing_) { | ||||
|       cv_.Wait(); | ||||
|     } | ||||
| 
 | ||||
|     if (closing_) { | ||||
|       return; | ||||
|     } | ||||
| 
 | ||||
|     // Delete all files in queue_
 | ||||
|     uint64_t start_time = env_->NowMicros(); | ||||
|     uint64_t total_deleted_bytes = 0; | ||||
|     while (!queue_.empty() && !closing_) { | ||||
|       std::string path_in_trash = queue_.front(); | ||||
|       queue_.pop(); | ||||
| 
 | ||||
|       // We dont need to hold the lock while deleting the file
 | ||||
|       mu_.Unlock(); | ||||
|       uint64_t deleted_bytes = 0; | ||||
|       // Delete file from trash and update total_penlty value
 | ||||
|       Status s = DeleteTrashFile(path_in_trash,  &deleted_bytes); | ||||
|       total_deleted_bytes += deleted_bytes; | ||||
|       mu_.Lock(); | ||||
| 
 | ||||
|       if (!s.ok()) { | ||||
|         bg_errors_[path_in_trash] = s; | ||||
|       } | ||||
| 
 | ||||
|       // Apply penlty if necessary
 | ||||
|       uint64_t total_penlty = | ||||
|           ((total_deleted_bytes * kMicrosInSecond) / rate_bytes_per_sec_); | ||||
|       while (!closing_ && !cv_.TimedWait(start_time + total_penlty)) {} | ||||
| 
 | ||||
|       pending_files_--; | ||||
|       if (pending_files_ == 0) { | ||||
|         // Unblock TEST_WaitForEmptyTrash since there are no more files waiting
 | ||||
|         // to be deleted
 | ||||
|         cv_.SignalAll(); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| Status DeleteSchedulerImpl::DeleteTrashFile(const std::string& path_in_trash, | ||||
|                                             uint64_t* deleted_bytes) { | ||||
|   uint64_t file_size; | ||||
|   Status s = env_->GetFileSize(path_in_trash, &file_size); | ||||
|   if (s.ok()) { | ||||
|     TEST_SYNC_POINT("DeleteSchedulerImpl::DeleteTrashFile:DeleteFile"); | ||||
|     s = env_->DeleteFile(path_in_trash); | ||||
|   } | ||||
| 
 | ||||
|   if (!s.ok()) { | ||||
|     // Error while getting file size or while deleting
 | ||||
|     Log(InfoLogLevel::ERROR_LEVEL, info_log_, | ||||
|         "Failed to delete %s from trash -- %s", path_in_trash.c_str(), | ||||
|         s.ToString().c_str()); | ||||
|     *deleted_bytes = 0; | ||||
|   } else { | ||||
|     *deleted_bytes = file_size; | ||||
|   } | ||||
| 
 | ||||
|   return s; | ||||
| } | ||||
| 
 | ||||
| void DeleteSchedulerImpl::TEST_WaitForEmptyTrash() { | ||||
|   MutexLock l(&mu_); | ||||
|   while (pending_files_ > 0 && !closing_) { | ||||
|     cv_.Wait(); | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| DeleteScheduler* NewDeleteScheduler(Env* env, const std::string& trash_dir, | ||||
|                                     int64_t rate_bytes_per_sec, | ||||
|                                     std::shared_ptr<Logger> info_log, | ||||
|                                     bool delete_exisitng_trash, | ||||
|                                     Status* status) { | ||||
|   DeleteScheduler* res = | ||||
|       new DeleteSchedulerImpl(env, trash_dir, rate_bytes_per_sec, info_log); | ||||
| 
 | ||||
|   Status s; | ||||
|   if (trash_dir != "") { | ||||
|     s = env->CreateDirIfMissing(trash_dir); | ||||
|     if (s.ok() && delete_exisitng_trash) { | ||||
|       std::vector<std::string> files_in_trash; | ||||
|       s = env->GetChildren(trash_dir, &files_in_trash); | ||||
|       if (s.ok()) { | ||||
|         for (const std::string& trash_file : files_in_trash) { | ||||
|           if (trash_file == "." || trash_file == "..") { | ||||
|             continue; | ||||
|           } | ||||
|           Status file_delete = res->DeleteFile(trash_dir + "/" + trash_file); | ||||
|           if (s.ok() && !file_delete.ok()) { | ||||
|             s = file_delete; | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   if (status) { | ||||
|     *status = s; | ||||
|   } | ||||
| 
 | ||||
|   return res; | ||||
| } | ||||
| 
 | ||||
| }  // namespace rocksdb
 | ||||
| @ -0,0 +1,81 @@ | ||||
| //  Copyright (c) 2015, Facebook, Inc.  All rights reserved.
 | ||||
| //  This source code is licensed under the BSD-style license found in the
 | ||||
| //  LICENSE file in the root directory of this source tree. An additional grant
 | ||||
| //  of patent rights can be found in the PATENTS file in the same directory.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <map> | ||||
| #include <queue> | ||||
| #include <string> | ||||
| #include <thread> | ||||
| 
 | ||||
| #include "port/port.h" | ||||
| 
 | ||||
| #include "rocksdb/delete_scheduler.h" | ||||
| #include "rocksdb/status.h" | ||||
| 
 | ||||
| namespace rocksdb { | ||||
| 
 | ||||
| class Env; | ||||
| class Logger; | ||||
| 
 | ||||
| class DeleteSchedulerImpl : public DeleteScheduler { | ||||
|  public: | ||||
|   DeleteSchedulerImpl(Env* env, const std::string& trash_dir, | ||||
|                       int64_t rate_bytes_per_sec, | ||||
|                       std::shared_ptr<Logger> info_log); | ||||
| 
 | ||||
|   ~DeleteSchedulerImpl(); | ||||
| 
 | ||||
|   // Return delete rate limit in bytes per second
 | ||||
|   int64_t GetRateBytesPerSecond() { return rate_bytes_per_sec_; } | ||||
| 
 | ||||
|   // Move file to trash directory and schedule it's deletion
 | ||||
|   Status DeleteFile(const std::string& fname); | ||||
| 
 | ||||
|   // Wait for all files being deleteing in the background to finish or for
 | ||||
|   // destructor to be called.
 | ||||
|   void TEST_WaitForEmptyTrash(); | ||||
| 
 | ||||
|   // Return a map containing errors that happened in BackgroundEmptyTrash
 | ||||
|   // file_path => error status
 | ||||
|   std::map<std::string, Status> GetBackgroundErrors(); | ||||
| 
 | ||||
|  private: | ||||
|   Status MoveToTrash(const std::string& file_path, std::string* path_in_trash); | ||||
| 
 | ||||
|   Status DeleteTrashFile(const std::string& path_in_trash, | ||||
|                          uint64_t* deleted_bytes); | ||||
| 
 | ||||
|   void BackgroundEmptyTrash(); | ||||
| 
 | ||||
|   Env* env_; | ||||
|   // Path to the trash directory
 | ||||
|   std::string trash_dir_; | ||||
|   // Maximum number of bytes that should be deleted per second
 | ||||
|   int64_t rate_bytes_per_sec_; | ||||
|   // Mutex to protect queue_, pending_files_, bg_errors_, closing_
 | ||||
|   port::Mutex mu_; | ||||
|   // Queue of files in trash that need to be deleted
 | ||||
|   std::queue<std::string> queue_; | ||||
|   // Number of files in trash that are waiting to be deleted
 | ||||
|   int32_t pending_files_; | ||||
|   // Errors that happened in BackgroundEmptyTrash (file_path => error)
 | ||||
|   std::map<std::string, Status> bg_errors_; | ||||
|   // Set to true in ~DeleteSchedulerImpl() to force BackgroundEmptyTrash to stop
 | ||||
|   bool closing_; | ||||
|   // Condition variable signaled in these conditions
 | ||||
|   //    - pending_files_ value change from 0 => 1
 | ||||
|   //    - pending_files_ value change from 1 => 0
 | ||||
|   //    - closing_ value is set to true
 | ||||
|   port::CondVar cv_; | ||||
|   // Background thread running BackgroundEmptyTrash
 | ||||
|   std::unique_ptr<std::thread> bg_thread_; | ||||
|   // Mutex to protect threads from file name conflicts
 | ||||
|   port::Mutex file_move_mu_; | ||||
|   std::shared_ptr<Logger> info_log_; | ||||
|   static const uint64_t kMicrosInSecond = 1000 * 1000LL; | ||||
| }; | ||||
| 
 | ||||
| }  // namespace rocksdb
 | ||||
| @ -0,0 +1,439 @@ | ||||
| //  Copyright (c) 2015, Facebook, Inc.  All rights reserved.
 | ||||
| //  This source code is licensed under the BSD-style license found in the
 | ||||
| //  LICENSE file in the root directory of this source tree. An additional grant
 | ||||
| //  of patent rights can be found in the PATENTS file in the same directory.
 | ||||
| 
 | ||||
| #ifndef __STDC_FORMAT_MACROS | ||||
| #define __STDC_FORMAT_MACROS | ||||
| #endif | ||||
| 
 | ||||
| #include <inttypes.h> | ||||
| #include <atomic> | ||||
| #include <thread> | ||||
| #include <vector> | ||||
| 
 | ||||
| #include "rocksdb/delete_scheduler.h" | ||||
| #include "rocksdb/env.h" | ||||
| #include "rocksdb/options.h" | ||||
| #include "util/delete_scheduler_impl.h" | ||||
| #include "util/string_util.h" | ||||
| #include "util/sync_point.h" | ||||
| #include "util/testharness.h" | ||||
| 
 | ||||
| namespace rocksdb { | ||||
| 
 | ||||
| class DeleteSchedulerTest : public testing::Test { | ||||
|  public: | ||||
|   DeleteSchedulerTest() : env_(Env::Default()) { | ||||
|     dummy_files_dir_ = test::TmpDir(env_) + "/dummy_data_dir"; | ||||
|     DestroyAndCreateDir(dummy_files_dir_); | ||||
|     trash_dir_ = test::TmpDir(env_) + "/trash"; | ||||
|     DestroyAndCreateDir(trash_dir_); | ||||
|   } | ||||
| 
 | ||||
|   ~DeleteSchedulerTest() { | ||||
|     rocksdb::SyncPoint::GetInstance()->DisableProcessing(); | ||||
|     rocksdb::SyncPoint::GetInstance()->LoadDependency({}); | ||||
|     rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks(); | ||||
|     DestroyDir(dummy_files_dir_); | ||||
|     if (delete_scheduler_ != nullptr) { | ||||
|       delete delete_scheduler_; | ||||
|       delete_scheduler_ = nullptr; | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   void WaitForEmptyTrash() { | ||||
|     reinterpret_cast<DeleteSchedulerImpl*>(delete_scheduler_) | ||||
|         ->TEST_WaitForEmptyTrash(); | ||||
|   } | ||||
| 
 | ||||
|   void DestroyDir(const std::string& dir) { | ||||
|     if (env_->FileExists(dir).IsNotFound()) { | ||||
|       return; | ||||
|     } | ||||
|     std::vector<std::string> files_in_dir; | ||||
|     EXPECT_OK(env_->GetChildren(dir, &files_in_dir)); | ||||
|     for (auto& file_in_dir : files_in_dir) { | ||||
|       if (file_in_dir == "." || file_in_dir == "..") { | ||||
|         continue; | ||||
|       } | ||||
|       EXPECT_OK(env_->DeleteFile(dir + "/" + file_in_dir)); | ||||
|     } | ||||
|     EXPECT_OK(env_->DeleteDir(dir)); | ||||
|   } | ||||
| 
 | ||||
|   void DestroyAndCreateDir(const std::string& dir) { | ||||
|     DestroyDir(dir); | ||||
|     EXPECT_OK(env_->CreateDir(dir)); | ||||
|   } | ||||
| 
 | ||||
|   int CountFilesInDir(const std::string& dir) { | ||||
|     std::vector<std::string> files_in_dir; | ||||
|     EXPECT_OK(env_->GetChildren(dir, &files_in_dir)); | ||||
|     // Ignore "." and ".."
 | ||||
|     return static_cast<int>(files_in_dir.size()) - 2; | ||||
|   } | ||||
| 
 | ||||
|   std::string NewDummyFile(const std::string& file_name, uint64_t size = 1024) { | ||||
|     std::string file_path = dummy_files_dir_ + "/" + file_name; | ||||
|     std::unique_ptr<WritableFile> f; | ||||
|     env_->NewWritableFile(file_path, &f, EnvOptions()); | ||||
|     std::string data(size, 'A'); | ||||
|     EXPECT_OK(f->Append(data)); | ||||
|     EXPECT_OK(f->Close()); | ||||
|     return file_path; | ||||
|   } | ||||
| 
 | ||||
|   Env* env_; | ||||
|   std::string dummy_files_dir_; | ||||
|   std::string trash_dir_; | ||||
|   int64_t rate_bytes_per_sec_; | ||||
|   DeleteScheduler* delete_scheduler_; | ||||
| }; | ||||
| 
 | ||||
| // Test the basic functionality of DeleteScheduler (Rate Limiting).
 | ||||
| // 1- Create 100 dummy files
 | ||||
| // 2- Delete the 100 dummy files using DeleteScheduler
 | ||||
| // 3- Wait for DeleteScheduler to delete all files in trash
 | ||||
| // 4- Measure time spent in step 2,3 and make sure it matches the expected
 | ||||
| //    time from a rate limited delete
 | ||||
| // 5- Make sure that all created files were completely deleted
 | ||||
| TEST_F(DeleteSchedulerTest, BasicRateLimiting) { | ||||
|   int num_files = 100;  // 100 files
 | ||||
|   uint64_t file_size = 1024;  // every file is 1 kb
 | ||||
|   std::vector<uint64_t> delete_kbs_per_sec = {512, 200, 100, 50, 25}; | ||||
| 
 | ||||
|   for (size_t t = 0; t < delete_kbs_per_sec.size(); t++) { | ||||
|     DestroyAndCreateDir(dummy_files_dir_); | ||||
|     rate_bytes_per_sec_ = delete_kbs_per_sec[t] * 1024; | ||||
|     delete_scheduler_ = | ||||
|         NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_); | ||||
| 
 | ||||
|     // Create 100 dummy files, every file is 1 Kb
 | ||||
|     std::vector<std::string> generated_files; | ||||
|     uint64_t total_files_size = 0; | ||||
|     for (int i = 0; i < num_files; i++) { | ||||
|       std::string file_name = "file" + ToString(i) + ".data"; | ||||
|       generated_files.push_back(NewDummyFile(file_name, file_size)); | ||||
|       total_files_size += file_size; | ||||
|     } | ||||
| 
 | ||||
|     // Delete dummy files and measure time spent to empty trash
 | ||||
|     uint64_t delete_start_time = env_->NowMicros(); | ||||
|     for (int i = 0; i < num_files; i++) { | ||||
|       ASSERT_OK(delete_scheduler_->DeleteFile(generated_files[i])); | ||||
|     } | ||||
|     ASSERT_EQ(CountFilesInDir(dummy_files_dir_), 0); | ||||
| 
 | ||||
|     WaitForEmptyTrash(); | ||||
|     uint64_t time_spent_deleting = env_->NowMicros() - delete_start_time; | ||||
|     uint64_t expected_delete_time = | ||||
|         ((total_files_size * 1000000) / rate_bytes_per_sec_); | ||||
|     ASSERT_GT(time_spent_deleting, expected_delete_time * 0.9); | ||||
|     ASSERT_LT(time_spent_deleting, expected_delete_time * 1.1); | ||||
|     printf("Delete time = %" PRIu64 ", Expected delete time = %" PRIu64 | ||||
|            ", Ratio %f\n", | ||||
|            time_spent_deleting, expected_delete_time, | ||||
|            static_cast<double>(time_spent_deleting) / expected_delete_time); | ||||
| 
 | ||||
|     ASSERT_EQ(CountFilesInDir(trash_dir_), 0); | ||||
|     auto bg_errors = delete_scheduler_->GetBackgroundErrors(); | ||||
|     ASSERT_EQ(bg_errors.size(), 0); | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| // Same as the BasicRateLimiting test but delete files in multiple threads.
 | ||||
| // 1- Create 100 dummy files
 | ||||
| // 2- Delete the 100 dummy files using DeleteScheduler using 10 threads
 | ||||
| // 3- Wait for DeleteScheduler to delete all files in queue
 | ||||
| // 4- Measure time spent in step 2,3 and make sure it matches the expected
 | ||||
| //    time from a rate limited delete
 | ||||
| // 5- Make sure that all created files were completely deleted
 | ||||
| TEST_F(DeleteSchedulerTest, RateLimitingMultiThreaded) { | ||||
|   int thread_cnt = 10; | ||||
|   int num_files = 10;  // 10 files per thread
 | ||||
|   uint64_t file_size = 1024;  // every file is 1 kb
 | ||||
|   std::vector<uint64_t> delete_kbs_per_sec = {512, 200, 100, 50, 25}; | ||||
| 
 | ||||
|   for (size_t t = 0; t < delete_kbs_per_sec.size(); t++) { | ||||
|     DestroyAndCreateDir(dummy_files_dir_); | ||||
|     rate_bytes_per_sec_ = delete_kbs_per_sec[t] * 1024; | ||||
|     delete_scheduler_ = | ||||
|         NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_); | ||||
| 
 | ||||
|     // Create 100 dummy files, every file is 1 Kb
 | ||||
|     std::vector<std::string> generated_files; | ||||
|     uint64_t total_files_size = 0; | ||||
|     for (int i = 0; i < num_files * thread_cnt; i++) { | ||||
|       std::string file_name = "file" + ToString(i) + ".data"; | ||||
|       generated_files.push_back(NewDummyFile(file_name, file_size)); | ||||
|       total_files_size += file_size; | ||||
|     } | ||||
| 
 | ||||
|     // Delete dummy files using 10 threads and measure time spent to empty trash
 | ||||
|     uint64_t delete_start_time = env_->NowMicros(); | ||||
|     std::atomic<int> thread_num(0); | ||||
|     std::vector<std::thread> threads; | ||||
|     for (int i = 0; i < thread_cnt; i++) { | ||||
|       threads.emplace_back([&]() { | ||||
|         int idx = thread_num.fetch_add(1); | ||||
|         int range_start = idx * num_files; | ||||
|         int range_end = range_start + num_files; | ||||
|         for (int j = range_start; j < range_end; j++){ | ||||
|           ASSERT_OK(delete_scheduler_->DeleteFile(generated_files[j])); | ||||
|         } | ||||
|       }); | ||||
|     } | ||||
| 
 | ||||
|     for (size_t i = 0; i < threads.size(); i++) { | ||||
|       threads[i].join(); | ||||
|     } | ||||
|     ASSERT_EQ(CountFilesInDir(dummy_files_dir_), 0); | ||||
| 
 | ||||
|     WaitForEmptyTrash(); | ||||
|     uint64_t time_spent_deleting = env_->NowMicros() - delete_start_time; | ||||
|     uint64_t expected_delete_time = | ||||
|         ((total_files_size * 1000000) / rate_bytes_per_sec_); | ||||
|     ASSERT_GT(time_spent_deleting, expected_delete_time * 0.9); | ||||
|     ASSERT_LT(time_spent_deleting, expected_delete_time * 1.1); | ||||
|     printf("Delete time = %" PRIu64 ", Expected delete time = %" PRIu64 | ||||
|            ", Ratio %f\n", | ||||
|            time_spent_deleting, expected_delete_time, | ||||
|            static_cast<double>(time_spent_deleting) / expected_delete_time); | ||||
| 
 | ||||
|     ASSERT_EQ(CountFilesInDir(trash_dir_), 0); | ||||
|     auto bg_errors = delete_scheduler_->GetBackgroundErrors(); | ||||
|     ASSERT_EQ(bg_errors.size(), 0); | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| // Disable rate limiting by setting rate_bytes_per_sec_ to 0 and make sure
 | ||||
| // that when DeleteScheduler delete a file it delete it immediately and dont
 | ||||
| // move it to trash
 | ||||
| TEST_F(DeleteSchedulerTest, DisableRateLimiting) { | ||||
|   int bg_delete_file = 0; | ||||
|   rocksdb::SyncPoint::GetInstance()->SetCallBack( | ||||
|       "DeleteSchedulerImpl::DeleteTrashFile:DeleteFile", | ||||
|       [&](void* arg) { bg_delete_file++; }); | ||||
| 
 | ||||
|   rocksdb::SyncPoint::GetInstance()->EnableProcessing(); | ||||
| 
 | ||||
|   delete_scheduler_ = NewDeleteScheduler(env_, "", 0); | ||||
| 
 | ||||
|   for (int i = 0; i < 10; i++) { | ||||
|     // Every file we delete will be deleted immediately
 | ||||
|     std::string dummy_file = NewDummyFile("dummy.data"); | ||||
|     ASSERT_OK(delete_scheduler_->DeleteFile(dummy_file)); | ||||
|     ASSERT_TRUE(env_->FileExists(dummy_file).IsNotFound()); | ||||
|     ASSERT_EQ(CountFilesInDir(dummy_files_dir_), 0); | ||||
|     ASSERT_EQ(CountFilesInDir(trash_dir_), 0); | ||||
|   } | ||||
| 
 | ||||
|   ASSERT_EQ(bg_delete_file, 0); | ||||
| 
 | ||||
|   rocksdb::SyncPoint::GetInstance()->DisableProcessing(); | ||||
| } | ||||
| 
 | ||||
| // Testing that moving files to trash with the same name is not a problem
 | ||||
| // 1- Create 10 files with the same name "conflict.data"
 | ||||
| // 2- Delete the 10 files using DeleteScheduler
 | ||||
| // 3- Make sure that trash directory contain 10 files ("conflict.data" x 10)
 | ||||
| // --- Hold DeleteSchedulerImpl::BackgroundEmptyTrash ---
 | ||||
| // 4- Make sure that files are deleted from trash
 | ||||
| TEST_F(DeleteSchedulerTest, ConflictNames) { | ||||
|   rocksdb::SyncPoint::GetInstance()->LoadDependency({ | ||||
|       {"DeleteSchedulerTest::ConflictNames:1", | ||||
|        "DeleteSchedulerImpl::BackgroundEmptyTrash"}, | ||||
|   }); | ||||
|   rocksdb::SyncPoint::GetInstance()->EnableProcessing(); | ||||
| 
 | ||||
|   rate_bytes_per_sec_ = 1024 * 1024;  // 1 Mb/sec
 | ||||
|   delete_scheduler_ = NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_); | ||||
| 
 | ||||
|   // Create "conflict.data" and move it to trash 10 times
 | ||||
|   for (int i = 0; i < 10; i++) { | ||||
|     std::string dummy_file = NewDummyFile("conflict.data"); | ||||
|     ASSERT_OK(delete_scheduler_->DeleteFile(dummy_file)); | ||||
|   } | ||||
|   ASSERT_EQ(CountFilesInDir(dummy_files_dir_), 0); | ||||
|   // 10 files ("conflict.data" x 10) in trash
 | ||||
|   ASSERT_EQ(CountFilesInDir(trash_dir_), 10); | ||||
| 
 | ||||
|   // Hold BackgroundEmptyTrash
 | ||||
|   TEST_SYNC_POINT("DeleteSchedulerTest::ConflictNames:1"); | ||||
|   WaitForEmptyTrash(); | ||||
|   ASSERT_EQ(CountFilesInDir(trash_dir_), 0); | ||||
| 
 | ||||
|   auto bg_errors = delete_scheduler_->GetBackgroundErrors(); | ||||
|   ASSERT_EQ(bg_errors.size(), 0); | ||||
| 
 | ||||
|   rocksdb::SyncPoint::GetInstance()->DisableProcessing(); | ||||
| } | ||||
| 
 | ||||
| // 1- Create 10 dummy files
 | ||||
| // 2- Delete the 10 files using DeleteScheduler (move them to trsah)
 | ||||
| // 3- Delete the 10 files directly (using env_->DeleteFile)
 | ||||
| // --- Hold DeleteSchedulerImpl::BackgroundEmptyTrash ---
 | ||||
| // 4- Make sure that DeleteScheduler failed to delete the 10 files and
 | ||||
| //    reported 10 background errors
 | ||||
| TEST_F(DeleteSchedulerTest, BackgroundError) { | ||||
|   rocksdb::SyncPoint::GetInstance()->LoadDependency({ | ||||
|       {"DeleteSchedulerTest::BackgroundError:1", | ||||
|        "DeleteSchedulerImpl::BackgroundEmptyTrash"}, | ||||
|   }); | ||||
|   rocksdb::SyncPoint::GetInstance()->EnableProcessing(); | ||||
| 
 | ||||
|   rate_bytes_per_sec_ = 1024 * 1024;  // 1 Mb/sec
 | ||||
|   delete_scheduler_ = NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_); | ||||
| 
 | ||||
|   // Generate 10 dummy files and move them to trash
 | ||||
|   for (int i = 0; i < 10; i++) { | ||||
|     std::string file_name = "data_" + ToString(i) + ".data"; | ||||
|     ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile(file_name))); | ||||
|   } | ||||
|   ASSERT_EQ(CountFilesInDir(dummy_files_dir_), 0); | ||||
|   ASSERT_EQ(CountFilesInDir(trash_dir_), 10); | ||||
| 
 | ||||
|   // Delete 10 files from trash, this will cause background errors in
 | ||||
|   // BackgroundEmptyTrash since we already deleted the files it was
 | ||||
|   // goind to delete
 | ||||
|   for (int i = 0; i < 10; i++) { | ||||
|     std::string file_name = "data_" + ToString(i) + ".data"; | ||||
|     ASSERT_OK(env_->DeleteFile(trash_dir_ + "/" + file_name)); | ||||
|   } | ||||
| 
 | ||||
|   // Hold BackgroundEmptyTrash
 | ||||
|   TEST_SYNC_POINT("DeleteSchedulerTest::BackgroundError:1"); | ||||
|   WaitForEmptyTrash(); | ||||
|   auto bg_errors = delete_scheduler_->GetBackgroundErrors(); | ||||
|   ASSERT_EQ(bg_errors.size(), 10); | ||||
| 
 | ||||
|   rocksdb::SyncPoint::GetInstance()->DisableProcessing(); | ||||
| } | ||||
| 
 | ||||
| // 1- Create 10 files in trash
 | ||||
| // 2- Create a DeleteScheduler with delete_exisitng_trash = true
 | ||||
| // 3- Wait for DeleteScheduler to delete all files in queue
 | ||||
| // 4- Make sure that all files in trash directory were deleted
 | ||||
| TEST_F(DeleteSchedulerTest, TrashWithExistingFiles) { | ||||
|   std::vector<std::string> dummy_files; | ||||
|   for (int i = 0; i < 10; i++) { | ||||
|     std::string file_name = "data_" + ToString(i) + ".data"; | ||||
|     std::string trash_path = trash_dir_ + "/" + file_name; | ||||
|     env_->RenameFile(NewDummyFile(file_name), trash_path); | ||||
|   } | ||||
|   ASSERT_EQ(CountFilesInDir(trash_dir_), 10); | ||||
| 
 | ||||
|   Status s; | ||||
|   rate_bytes_per_sec_ = 1024 * 1024;  // 1 Mb/sec
 | ||||
|   delete_scheduler_ = NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_, | ||||
|                                          nullptr, true, &s); | ||||
|   ASSERT_OK(s); | ||||
| 
 | ||||
|   WaitForEmptyTrash(); | ||||
|   ASSERT_EQ(CountFilesInDir(trash_dir_), 0); | ||||
| 
 | ||||
|   auto bg_errors = delete_scheduler_->GetBackgroundErrors(); | ||||
|   ASSERT_EQ(bg_errors.size(), 0); | ||||
| } | ||||
| 
 | ||||
| // 1- Create 10 dummy files
 | ||||
| // 2- Delete 10 dummy files using DeleteScheduler
 | ||||
| // 3- Wait for DeleteScheduler to delete all files in queue
 | ||||
| // 4- Make sure all files in trash directory were deleted
 | ||||
| // 5- Repeat previous steps 5 times
 | ||||
| TEST_F(DeleteSchedulerTest, StartBGEmptyTrashMultipleTimes) { | ||||
|   int bg_delete_file = 0; | ||||
|   rocksdb::SyncPoint::GetInstance()->SetCallBack( | ||||
|       "DeleteSchedulerImpl::DeleteTrashFile:DeleteFile", | ||||
|       [&](void* arg) { bg_delete_file++; }); | ||||
|   rocksdb::SyncPoint::GetInstance()->EnableProcessing(); | ||||
| 
 | ||||
|   rate_bytes_per_sec_ = 1024 * 1024;  // 1 MB / sec
 | ||||
|   delete_scheduler_ = NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_); | ||||
| 
 | ||||
|   // Move files to trash, wait for empty trash, start again
 | ||||
|   for (int run = 1; run <= 5; run++) { | ||||
|     // Generate 10 dummy files and move them to trash
 | ||||
|     for (int i = 0; i < 10; i++) { | ||||
|       std::string file_name = "data_" + ToString(i) + ".data"; | ||||
|       ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile(file_name))); | ||||
|     } | ||||
|     ASSERT_EQ(CountFilesInDir(dummy_files_dir_), 0); | ||||
|     WaitForEmptyTrash(); | ||||
|     ASSERT_EQ(bg_delete_file, 10 * run); | ||||
|     ASSERT_EQ(CountFilesInDir(trash_dir_), 0); | ||||
| 
 | ||||
|     auto bg_errors = delete_scheduler_->GetBackgroundErrors(); | ||||
|     ASSERT_EQ(bg_errors.size(), 0); | ||||
|   } | ||||
| 
 | ||||
|   ASSERT_EQ(bg_delete_file, 50); | ||||
|   rocksdb::SyncPoint::GetInstance()->EnableProcessing(); | ||||
| } | ||||
| 
 | ||||
| // 1- Create a DeleteScheduler with very slow rate limit (1 Byte / sec)
 | ||||
| // 2- Delete 100 files using DeleteScheduler
 | ||||
| // 3- Delete the DeleteScheduler (call the destructor while queue is not empty)
 | ||||
| // 4- Make sure that not all files were deleted from trash and that
 | ||||
| //    DeleteScheduler background thread did not delete all files
 | ||||
| TEST_F(DeleteSchedulerTest, DestructorWithNonEmptyQueue) { | ||||
|   int bg_delete_file = 0; | ||||
|   rocksdb::SyncPoint::GetInstance()->SetCallBack( | ||||
|       "DeleteSchedulerImpl::DeleteTrashFile:DeleteFile", | ||||
|       [&](void* arg) { bg_delete_file++; }); | ||||
|   rocksdb::SyncPoint::GetInstance()->EnableProcessing(); | ||||
| 
 | ||||
|   rate_bytes_per_sec_ = 1;  // 1 Byte / sec
 | ||||
|   delete_scheduler_ = NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_); | ||||
| 
 | ||||
|   for (int i = 0; i < 100; i++) { | ||||
|     std::string file_name = "data_" + ToString(i) + ".data"; | ||||
|     ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile(file_name))); | ||||
|   } | ||||
| 
 | ||||
|   // Deleting 100 files will need >28 hours to delete
 | ||||
|   // we will delete the DeleteScheduler while delete queue is not empty
 | ||||
|   delete delete_scheduler_; | ||||
|   delete_scheduler_ = nullptr; | ||||
| 
 | ||||
|   ASSERT_LT(bg_delete_file, 100); | ||||
|   ASSERT_GT(CountFilesInDir(trash_dir_), 0); | ||||
| 
 | ||||
|   rocksdb::SyncPoint::GetInstance()->DisableProcessing(); | ||||
| } | ||||
| 
 | ||||
| // 1- Delete the trash directory
 | ||||
| // 2- Delete 10 files using DeleteScheduler
 | ||||
| // 3- Make sure that the 10 files were deleted immediately since DeleteScheduler
 | ||||
| //    failed to move them to trash directory
 | ||||
| TEST_F(DeleteSchedulerTest, MoveToTrashError) { | ||||
|   int bg_delete_file = 0; | ||||
|   rocksdb::SyncPoint::GetInstance()->SetCallBack( | ||||
|       "DeleteSchedulerImpl::DeleteTrashFile:DeleteFile", | ||||
|       [&](void* arg) { bg_delete_file++; }); | ||||
|   rocksdb::SyncPoint::GetInstance()->EnableProcessing(); | ||||
| 
 | ||||
|   rate_bytes_per_sec_ = 1024;  // 1 Kb / sec
 | ||||
|   delete_scheduler_ = NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_); | ||||
| 
 | ||||
|   // We will delete the trash directory, that mean that DeleteScheduler wont
 | ||||
|   // be able to move files to trash and will delete files them immediately.
 | ||||
|   DestroyDir(trash_dir_); | ||||
|   for (int i = 0; i < 10; i++) { | ||||
|     std::string file_name = "data_" + ToString(i) + ".data"; | ||||
|     ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile(file_name))); | ||||
|   } | ||||
| 
 | ||||
|   ASSERT_EQ(CountFilesInDir(dummy_files_dir_), 0); | ||||
|   ASSERT_EQ(bg_delete_file, 0); | ||||
| 
 | ||||
|   rocksdb::SyncPoint::GetInstance()->DisableProcessing(); | ||||
| } | ||||
| 
 | ||||
| }  // namespace rocksdb
 | ||||
| 
 | ||||
| int main(int argc, char** argv) { | ||||
|   ::testing::InitGoogleTest(&argc, argv); | ||||
|   return RUN_ALL_TESTS(); | ||||
| } | ||||
					Loading…
					
					
				
		Reference in new issue
	
	 Islam AbdelRahman
						Islam AbdelRahman