From 9bc4a26f56f4454432c02622d8449747ba90906c Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Tue, 12 Nov 2013 11:53:26 -0800 Subject: [PATCH] Small changes in Deleting obsolete files Summary: @haobo's suggestions from https://reviews.facebook.net/D13827 Renaming some variables, deprecating purge_log_after_flush, changing for loop into auto for loop. I have not implemented deleting objects outside of mutex yet because it would require a big code change - we would delete object in db_impl, which currently does not know anything about object because it's defined in version_edit.h (FileMetaData). We should do it at some point, though. Test Plan: Ran deletefile_test Reviewers: haobo Reviewed By: haobo CC: leveldb, haobo Differential Revision: https://reviews.facebook.net/D14025 --- db/db_impl.cc | 61 ++++++++++++++++++++------------------- db/db_impl.h | 17 ++++++----- db/version_set.cc | 19 +++++------- db/version_set.h | 2 +- include/rocksdb/options.h | 5 ---- util/options.cc | 3 -- 6 files changed, 51 insertions(+), 56 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 502d1a0d1..22a79efa0 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -435,8 +435,8 @@ void DBImpl::MaybeDumpStats() { } } -// Returns the list of live files in 'sstlive' and the list -// of all files in the filesystem in 'allfiles'. +// Returns the list of live files in 'sst_live' and the list +// of all files in the filesystem in 'all_files'. void DBImpl::FindObsoleteFiles(DeletionState& deletion_state, bool force) { mutex_.AssertHeld(); @@ -464,19 +464,19 @@ void DBImpl::FindObsoleteFiles(DeletionState& deletion_state, bool force) { // Make a list of all of the live files; set is slow, should not // be used. - deletion_state.sstlive.assign(pending_outputs_.begin(), - pending_outputs_.end()); - versions_->AddLiveFiles(&deletion_state.sstlive); + deletion_state.sst_live.assign(pending_outputs_.begin(), + pending_outputs_.end()); + versions_->AddLiveFiles(&deletion_state.sst_live); // set of all files in the directory - env_->GetChildren(dbname_, &deletion_state.allfiles); // Ignore errors + env_->GetChildren(dbname_, &deletion_state.all_files); // Ignore errors //Add log files in wal_dir if (options_.wal_dir != dbname_) { std::vector log_files; env_->GetChildren(options_.wal_dir, &log_files); // Ignore errors - deletion_state.allfiles.insert( - deletion_state.allfiles.end(), + deletion_state.all_files.insert( + deletion_state.all_files.end(), log_files.begin(), log_files.end() ); @@ -485,7 +485,7 @@ void DBImpl::FindObsoleteFiles(DeletionState& deletion_state, bool force) { // Diffs the files listed in filenames and those that do not // belong to live files are posibly removed. Also, removes all the -// files in sstdeletefiles and logdeletefiles. +// files in sst_delete_files and log_delete_files. // It is not necessary to hold the mutex when invoking this method. void DBImpl::PurgeObsoleteFiles(DeletionState& state) { // if deletion is disabled, do nothing @@ -499,23 +499,26 @@ void DBImpl::PurgeObsoleteFiles(DeletionState& state) { // Now, convert live list to an unordered set, WITHOUT mutex held; // set is slow. - std::unordered_set live_set(state.sstlive.begin(), - state.sstlive.end()); + std::unordered_set live_set(state.sst_live.begin(), + state.sst_live.end()); - state.allfiles.reserve(state.allfiles.size() + state.sstdeletefiles.size()); - for (auto filenum : state.sstdeletefiles) { - state.allfiles.push_back(TableFileName("", filenum)); + state.all_files.reserve(state.all_files.size() + + state.sst_delete_files.size()); + for (auto file : state.sst_delete_files) { + state.all_files.push_back(TableFileName("", file->number)); + delete file; } - state.allfiles.reserve(state.allfiles.size() + state.logdeletefiles.size()); - for (auto filenum : state.logdeletefiles) { + state.all_files.reserve(state.all_files.size() + + state.log_delete_files.size()); + for (auto filenum : state.log_delete_files) { if (filenum > 0) { - state.allfiles.push_back(LogFileName("", filenum)); + state.all_files.push_back(LogFileName("", filenum)); } } - for (size_t i = 0; i < state.allfiles.size(); i++) { - if (ParseFileName(state.allfiles[i], &number, &type)) { + for (size_t i = 0; i < state.all_files.size(); i++) { + if (ParseFileName(state.all_files[i], &number, &type)) { bool keep = true; switch (type) { case kLogFile: @@ -538,7 +541,7 @@ void DBImpl::PurgeObsoleteFiles(DeletionState& state) { case kInfoLogFile: keep = true; if (number != 0) { - old_log_files.push_back(state.allfiles[i]); + old_log_files.push_back(state.all_files[i]); } break; case kCurrentFile: @@ -559,14 +562,14 @@ void DBImpl::PurgeObsoleteFiles(DeletionState& state) { Status st; if (type == kLogFile && (options_.WAL_ttl_seconds > 0 || options_.WAL_size_limit_MB > 0)) { - st = env_->RenameFile(dbname_ + "/" + state.allfiles[i], + st = env_->RenameFile(dbname_ + "/" + state.all_files[i], ArchivedLogFileName(options_.wal_dir, number)); if (!st.ok()) { Log(options_.info_log, "RenameFile logfile #%lu FAILED", number); } } else { - st = env_->DeleteFile(dbname_ + "/" + state.allfiles[i]); + st = env_->DeleteFile(dbname_ + "/" + state.all_files[i]); if (!st.ok()) { Log(options_.info_log, "Delete type=%d #%lu FAILED\n", int(type), number); @@ -1132,12 +1135,12 @@ Status DBImpl::FlushMemTableToOutputFile(bool* madeProgress, MaybeScheduleLogDBDeployStats(); - if (options_.purge_log_after_memtable_flush && - !disable_delete_obsolete_files_) { + if (!disable_delete_obsolete_files_) { // add to deletion state - deletion_state.logdeletefiles.insert(deletion_state.logdeletefiles.end(), - logs_to_delete.begin(), - logs_to_delete.end()); + deletion_state.log_delete_files.insert( + deletion_state.log_delete_files.end(), + logs_to_delete.begin(), + logs_to_delete.end()); } } return s; @@ -1776,7 +1779,7 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress, CleanupCompaction(compact, status); versions_->ReleaseCompactionFiles(c.get(), status); c->ReleaseInputs(); - versions_->GetAndFreeObsoleteFiles(&deletion_state.sstdeletefiles); + versions_->GetObsoleteFiles(&deletion_state.sst_delete_files); *madeProgress = true; } c.reset(); @@ -3366,7 +3369,7 @@ Status DBImpl::DeleteFile(std::string name) { edit.DeleteFile(level, number); status = versions_->LogAndApply(&edit, &mutex_); if (status.ok()) { - versions_->GetAndFreeObsoleteFiles(&deletion_state.sstdeletefiles); + versions_->GetObsoleteFiles(&deletion_state.sst_delete_files); } FindObsoleteFiles(deletion_state, false); } // lock released here diff --git a/db/db_impl.h b/db/db_impl.h index f1559f21c..3daf2e348 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -14,6 +14,7 @@ #include "db/dbformat.h" #include "db/log_writer.h" #include "db/snapshot.h" +#include "db/version_edit.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/memtablerep.h" @@ -159,22 +160,24 @@ class DBImpl : public DB { struct DeletionState { inline bool HaveSomethingToDelete() const { - return allfiles.size() || sstdeletefiles.size() || logdeletefiles.size(); + return all_files.size() || + sst_delete_files.size() || + log_delete_files.size(); } // a list of all files that we'll consider deleting // (every once in a while this is filled up with all files // in the DB directory) - std::vector allfiles; + std::vector all_files; // the list of all live sst files that cannot be deleted - std::vector sstlive; + std::vector sst_live; // a list of sst files that we need to delete - std::vector sstdeletefiles; + std::vector sst_delete_files; // a list of log files that we need to delete - std::vector logdeletefiles; + std::vector log_delete_files; // the current manifest_file_number, log_number and prev_log_number // that corresponds to the set of files in 'live'. @@ -241,7 +244,7 @@ class DBImpl : public DB { void ReleaseCompactionUnusedFileNumbers(CompactionState* compact); // Returns the list of live files in 'live' and the list - // of all files in the filesystem in 'allfiles'. + // of all files in the filesystem in 'all_files'. // If force == false and the last call was less than // options_.delete_obsolete_files_period_micros microseconds ago, // it will not fill up the deletion_state @@ -249,7 +252,7 @@ class DBImpl : public DB { // Diffs the files listed in filenames and those that do not // belong to live files are posibly removed. Also, removes all the - // files in sstdeletefiles and logdeletefiles. + // files in sst_delete_files and log_delete_files. // It is not necessary to hold the mutex when invoking this method. void PurgeObsoleteFiles(DeletionState& deletion_state); diff --git a/db/version_set.cc b/db/version_set.cc index 391bbb0ad..a566a073c 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1161,7 +1161,10 @@ VersionSet::VersionSet(const std::string& dbname, VersionSet::~VersionSet() { current_->Unref(); assert(dummy_versions_.next_ == &dummy_versions_); // List must be empty - GetAndFreeObsoleteFiles(nullptr); + for (auto file : obsolete_files_) { + delete file; + } + obsolete_files_.clear(); delete[] compact_pointer_; delete[] max_file_size_; delete[] level_max_bytes_; @@ -2861,16 +2864,10 @@ void VersionSet::GetLiveFilesMetaData( } } -void VersionSet::GetAndFreeObsoleteFiles(std::vector* files) { - if (files != nullptr) { - files->reserve(files->size() + obsolete_files_.size()); - } - for (size_t i = 0; i < obsolete_files_.size(); i++) { - if (files != nullptr) { - files->push_back(obsolete_files_[i]->number); - } - delete obsolete_files_[i]; - } +void VersionSet::GetObsoleteFiles(std::vector* files) { + files->insert(files->end(), + obsolete_files_.begin(), + obsolete_files_.end()); obsolete_files_.clear(); } diff --git a/db/version_set.h b/db/version_set.h index 541bd8122..38415173c 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -431,7 +431,7 @@ class VersionSet { void GetLiveFilesMetaData( std::vector *metadata); - void GetAndFreeObsoleteFiles(std::vector* files); + void GetObsoleteFiles(std::vector* files); private: class Builder; diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index f2415fa45..5aca34065 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -609,11 +609,6 @@ struct Options { // Default: a factory that doesn't provide any object std::shared_ptr compaction_filter_factory; - // Remove the log file immediately after the corresponding memtable is flushed - // to data file. - // Default: true - bool purge_log_after_memtable_flush; - // This option allows user to to collect their own interested statistics of // the tables. // Default: emtpy vector -- no user-defined statistics collection will be diff --git a/util/options.cc b/util/options.cc index b54320391..eae03237a 100644 --- a/util/options.cc +++ b/util/options.cc @@ -101,7 +101,6 @@ Options::Options() compaction_filter_factory( std::shared_ptr( new DefaultCompactionFilterFactory())), - purge_log_after_memtable_flush(true), inplace_update_support(false), inplace_update_num_locks(10000) { assert(memtable_factory.get() != nullptr); @@ -280,8 +279,6 @@ Options::Dump(Logger* log) const Log(log,"Options.compaction_options_universal." "max_size_amplification_percent: %u", compaction_options_universal.max_size_amplification_percent); - Log(log," Options.purge_log_after_memtable_flush: %d", - purge_log_after_memtable_flush); std::string collector_names; for (auto collector : table_stats_collectors) { collector_names.append(collector->Name());