From aa73538f2a8cddb717ea4a1d638d6cbda36bca7e Mon Sep 17 00:00:00 2001 From: Dhruba Borthakur Date: Tue, 16 Oct 2012 08:53:46 -0700 Subject: [PATCH] The deletion of obsolete files should not occur very frequently. Summary: The method DeleteObsolete files is a very costly methind, especially when the number of files in a system is large. It makes a list of all live-files and then scans the directory to compute the diff. By default, this method is executed after every compaction run. This patch makes it such that DeleteObsolete files is never invoked twice within a configured period. Test Plan: run all unit tests Reviewers: heyongqiang, MarkCallaghan Reviewed By: MarkCallaghan Differential Revision: https://reviews.facebook.net/D6045 --- db/db_bench.cc | 10 ++++++++++ db/db_impl.cc | 15 ++++++++++++++- db/db_impl.h | 3 +++ include/leveldb/options.h | 5 +++++ util/options.cc | 3 ++- 5 files changed, 34 insertions(+), 2 deletions(-) diff --git a/db/db_bench.cc b/db/db_bench.cc index f8765638c..623c72ecb 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -167,6 +167,11 @@ static int FLAGS_readwritepercent = 90; // Option to disable compation triggered by read. static int FLAGS_disable_seek_compaction = false; +// Option to delete obsolete files periodically +// Default: 0 which means that obsolete files are +// deleted after every compaction run. +static uint64_t FLAGS_delete_obsolete_files_period_micros = 0; + // Algorithm to use to compress the database static enum leveldb::CompressionType FLAGS_compression_type = leveldb::kSnappyCompression; @@ -886,6 +891,8 @@ class Benchmark { FLAGS_level0_slowdown_writes_trigger; options.compression = FLAGS_compression_type; options.disable_seek_compaction = FLAGS_disable_seek_compaction; + options.delete_obsolete_files_period_micros = + FLAGS_delete_obsolete_files_period_micros; Status s = DB::Open(options, FLAGS_db, &db_); if (!s.ok()) { fprintf(stderr, "open error: %s\n", s.ToString().c_str()); @@ -1290,6 +1297,9 @@ int main(int argc, char** argv) { } else if (sscanf(argv[i], "--disable_seek_compaction=%d%c", &n, &junk) == 1 && (n == 0 || n == 1)) { FLAGS_disable_seek_compaction = n; + } else if (sscanf(argv[i], "--delete_obsolete_files_period_micros=%ld%c", + &l, &junk) == 1) { + FLAGS_delete_obsolete_files_period_micros = n; } else if (sscanf(argv[i], "--stats_interval=%d%c", &n, &junk) == 1 && n >= 0 && n < 2000000000) { FLAGS_stats_interval = n; diff --git a/db/db_impl.cc b/db/db_impl.cc index 2ed749909..07ff09688 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -143,7 +143,8 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname) bg_logstats_scheduled_(false), manual_compaction_(NULL), logger_(NULL), - disable_delete_obsolete_files_(false) { + disable_delete_obsolete_files_(false), + delete_obsolete_files_last_run_(0) { mem_->Ref(); has_imm_.Release_Store(NULL); @@ -253,6 +254,18 @@ void DBImpl::DeleteObsoleteFiles() { return; } + // This method is costly when the number of files is large. + // Do not allow it to trigger more often than once in + // delete_obsolete_files_period_micros. + if (options_.delete_obsolete_files_period_micros != 0) { + const uint64_t now_micros = env_->NowMicros(); + if (delete_obsolete_files_last_run_ + + options_.delete_obsolete_files_period_micros > now_micros) { + return; + } + delete_obsolete_files_last_run_ = now_micros; + } + // Make a set of all of the live files std::set live = pending_outputs_; versions_->AddLiveFiles(&live); diff --git a/db/db_impl.h b/db/db_impl.h index eefb77deb..e08025eca 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -197,6 +197,9 @@ class DBImpl : public DB { // shall we disable deletion of obsolete files bool disable_delete_obsolete_files_; + // last time when DeleteObsoleteFiles was invoked + uint64_t delete_obsolete_files_last_run_; + // Per level compaction stats. stats_[level] stores the stats for // compactions that produced data for the specified "level". struct CompactionStats { diff --git a/include/leveldb/options.h b/include/leveldb/options.h index 95a22e6b5..da26cd440 100644 --- a/include/leveldb/options.h +++ b/include/leveldb/options.h @@ -240,6 +240,11 @@ struct Options { // (which is true if max_open_files is large). bool disable_seek_compaction; + // The periodicity when obsolete files get deleted. The default + // value is 0 which means that obsolete files get removed after + // every compaction run. + uint64_t delete_obsolete_files_period_micros; + // Create an Options object with default values for all fields. Options(); diff --git a/util/options.cc b/util/options.cc index f8c02df09..8078b4db1 100644 --- a/util/options.cc +++ b/util/options.cc @@ -41,7 +41,8 @@ Options::Options() use_fsync(false), db_stats_log_interval(1800), db_log_dir(""), - disable_seek_compaction(false) { + disable_seek_compaction(false), + delete_obsolete_files_period_micros(0) { } void