From 5010daa7a8d552f46d88a5188ab20e62a5a8ea5e Mon Sep 17 00:00:00 2001 From: heyongqiang Date: Fri, 19 Oct 2012 16:41:34 -0700 Subject: [PATCH 1/2] add "seek_compaction" to log for better debug Summary: Summary: as subject Test Plan: compile Reviewers: dhruba Reviewed By: dhruba CC: MarkCallaghan Differential Revision: https://reviews.facebook.net/D6117 --- db/version_set.cc | 16 ++++++++++------ db/version_set.h | 5 ++++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index b00dc7737..f11d62321 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1410,7 +1410,7 @@ Compaction* VersionSet::PickCompaction() { } else if (seek_compaction) { level = current_->file_to_compact_level_; c = new Compaction(level, MaxFileSizeForLevel(level), - MaxGrandParentOverlapBytes(level), NumberLevels()); + MaxGrandParentOverlapBytes(level), NumberLevels(), true); c->inputs_[0].push_back(current_->file_to_compact_); } else { return NULL; @@ -1526,7 +1526,7 @@ Compaction* VersionSet::CompactRange( } Compaction* c = new Compaction(level, limit, - MaxGrandParentOverlapBytes(level), NumberLevels()); + MaxGrandParentOverlapBytes(level), NumberLevels()); c->input_version_ = current_; c->input_version_->Ref(); c->inputs_[0] = inputs; @@ -1535,17 +1535,19 @@ Compaction* VersionSet::CompactRange( } Compaction::Compaction(int level, uint64_t target_file_size, - uint64_t max_grandparent_overlap_bytes, int number_levels) + uint64_t max_grandparent_overlap_bytes, int number_levels, + bool seek_compaction) : level_(level), max_output_file_size_(target_file_size), maxGrandParentOverlapBytes_(max_grandparent_overlap_bytes), input_version_(NULL), number_levels_(number_levels), + seek_compaction_(seek_compaction), grandparent_index_(0), seen_key_(false), overlapped_bytes_(0) { - edit_ = new VersionEdit(number_levels_); - level_ptrs_ = new size_t[number_levels_]; + edit_ = new VersionEdit(number_levels_); + level_ptrs_ = new size_t[number_levels_]; for (int i = 0; i < number_levels_; i++) { level_ptrs_[i] = 0; } @@ -1642,7 +1644,9 @@ static void InputSummary(std::vector& files, } void Compaction::Summary(char* output, int len) { - int write = snprintf(output, len, "Base level %d, inputs:", level_); + int write = snprintf(output, len, + "Base level %d, seek compaction:%d, inputs:", + level_, seek_compaction_); if(write < 0 || write > len) return; diff --git a/db/version_set.h b/db/version_set.h index c9aa2a5d4..5295bd058 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -376,7 +376,8 @@ class Compaction { friend class VersionSet; explicit Compaction(int level, uint64_t target_file_size, - uint64_t max_grandparent_overlap_bytes, int number_levels); + uint64_t max_grandparent_overlap_bytes, int number_levels, + bool seek_compaction = false); int level_; uint64_t max_output_file_size_; @@ -385,6 +386,8 @@ class Compaction { VersionEdit* edit_; int number_levels_; + bool seek_compaction_; + // Each compaction reads inputs from "level_" and "level_+1" std::vector inputs_[2]; // The two sets of inputs From 4c107587ed47af84633f8c61f65516a504d6cd98 Mon Sep 17 00:00:00 2001 From: Dhruba Borthakur Date: Sun, 21 Oct 2012 01:49:48 -0700 Subject: [PATCH 2/2] Delete files outside the mutex. Summary: The compaction process deletes a large number of files. This takes quite a bit of time and is best done outside the mutex lock. Test Plan: make check Differential Revision: https://reviews.facebook.net/D6123 --- db/db_impl.cc | 101 ++++++++++++++++++++++++++++++++++++++++---------- db/db_impl.h | 15 +++++++- 2 files changed, 96 insertions(+), 20 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 07ff09688..ae34c36eb 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -84,6 +84,22 @@ struct DBImpl::CompactionState { } }; +struct DBImpl::DeletionState { + + // the set of all live files that cannot be deleted + std::set live; + + // a list of all siles that exists in the db directory + std::vector allfiles; + + // the current filenumber, lognumber and prevlognumber + // that corresponds to the set of files in 'live'. + uint64_t filenumber, lognumber, prevlognumber; + + // the list of all files to be evicted from the table cahce + std::vector files_to_evict; +}; + // Fix user-supplied options to be reasonable template static void ClipToRange(T* ptr, V minvalue, V maxvalue) { @@ -248,7 +264,11 @@ void DBImpl::MaybeIgnoreError(Status* s) const { } } -void DBImpl::DeleteObsoleteFiles() { +// Returns the list of live files in 'live' and the list +// of all files in the filesystem in 'allfiles'. +void DBImpl::FindObsoleteFiles(DeletionState& deletion_state) { + mutex_.AssertHeld(); + // if deletion is disabled, do nothing if (disable_delete_obsolete_files_) { return; @@ -267,39 +287,51 @@ void DBImpl::DeleteObsoleteFiles() { } // Make a set of all of the live files - std::set live = pending_outputs_; - versions_->AddLiveFiles(&live); + deletion_state.live = pending_outputs_; + versions_->AddLiveFiles(&deletion_state.live); - std::vector filenames; - env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose + // set of all files in the directory + env_->GetChildren(dbname_, &deletion_state.allfiles); // Ignore errors + + // store the current filenum, lognum, etc + deletion_state.filenumber = versions_->ManifestFileNumber(); + deletion_state.lognumber = versions_->LogNumber(); + deletion_state.prevlognumber = versions_->PrevLogNumber(); +} + +// Diffs the files listed in filenames and those that do not +// belong to live files are posibly removed. If the removed file +// is a sst file, then it returns the file number in files_to_evict. +// It is not necesary to hold the mutex when invoking this method. +void DBImpl::PurgeObsoleteFiles(DeletionState& state) { uint64_t number; FileType type; std::vector old_log_files; - for (size_t i = 0; i < filenames.size(); i++) { - if (ParseFileName(filenames[i], &number, &type)) { + for (size_t i = 0; i < state.allfiles.size(); i++) { + if (ParseFileName(state.allfiles[i], &number, &type)) { bool keep = true; switch (type) { case kLogFile: - keep = ((number >= versions_->LogNumber()) || - (number == versions_->PrevLogNumber())); + keep = ((number >= state.lognumber) || + (number == state.prevlognumber)); break; case kDescriptorFile: // Keep my manifest file, and any newer incarnations' // (in case there is a race that allows other incarnations) - keep = (number >= versions_->ManifestFileNumber()); + keep = (number >= state.filenumber); break; case kTableFile: - keep = (live.find(number) != live.end()); + keep = (state.live.find(number) != state.live.end()); break; case kTempFile: // Any temp files that are currently being written to must // be recorded in pending_outputs_, which is inserted into "live" - keep = (live.find(number) != live.end()); + keep = (state.live.find(number) != state.live.end()); break; case kInfoLogFile: keep = true; if (number != 0) { - old_log_files.push_back(filenames[i]); + old_log_files.push_back(state.allfiles[i]); } break; case kCurrentFile: @@ -310,12 +342,13 @@ void DBImpl::DeleteObsoleteFiles() { if (!keep) { if (type == kTableFile) { - table_cache_->Evict(number); + // record the files to be evicted from the cache + state.files_to_evict.push_back(number); } Log(options_.info_log, "Delete type=%d #%lld\n", int(type), static_cast(number)); - Status st = env_->DeleteFile(dbname_ + "/" + filenames[i]); + Status st = env_->DeleteFile(dbname_ + "/" + state.allfiles[i]); if(!st.ok()) { Log(options_.info_log, "Delete type=%d #%lld FAILED\n", int(type), @@ -339,6 +372,25 @@ void DBImpl::DeleteObsoleteFiles() { } } +void DBImpl::EvictObsoleteFiles(DeletionState& state) { + mutex_.AssertHeld(); + for (unsigned int i = 0; i < state.files_to_evict.size(); i++) { + table_cache_->Evict(state.files_to_evict[i]); + } +} + +void DBImpl::DeleteObsoleteFiles() { + mutex_.AssertHeld(); + DeletionState deletion_state; + std::set live; + std::vector allfiles; + std::vector files_to_evict; + uint64_t filenumber, lognumber, prevlognumber; + FindObsoleteFiles(deletion_state); + PurgeObsoleteFiles(deletion_state); + EvictObsoleteFiles(deletion_state); +} + Status DBImpl::Recover(VersionEdit* edit) { mutex_.AssertHeld(); @@ -587,8 +639,10 @@ Status DBImpl::CompactMemTable() { imm_->Unref(); imm_ = NULL; has_imm_.Release_Store(NULL); - DeleteObsoleteFiles(); MaybeScheduleLogDBDeployStats(); + // we could have deleted obsolete files here, but it is not + // absolutely necessary because it could be also done as part + // of other background compaction } return s; @@ -723,10 +777,11 @@ void DBImpl::BGWork(void* db) { } void DBImpl::BackgroundCall() { + DeletionState deletion_state; MutexLock l(&mutex_); assert(bg_compaction_scheduled_); if (!shutting_down_.Acquire_Load()) { - Status s = BackgroundCompaction(); + Status s = BackgroundCompaction(deletion_state); if (!s.ok()) { // Wait a little bit before retrying background compaction in // case this is an environmental problem and we do not want to @@ -741,6 +796,14 @@ void DBImpl::BackgroundCall() { } } + // delete unnecessary files if any, this is done outside the mutex + if (!deletion_state.live.empty()) { + mutex_.Unlock(); + PurgeObsoleteFiles(deletion_state); + mutex_.Lock(); + EvictObsoleteFiles(deletion_state); + } + bg_compaction_scheduled_ = false; MaybeScheduleLogDBDeployStats(); @@ -751,7 +814,7 @@ void DBImpl::BackgroundCall() { bg_cv_.SignalAll(); } -Status DBImpl::BackgroundCompaction() { +Status DBImpl::BackgroundCompaction(DeletionState& deletion_state) { mutex_.AssertHeld(); if (imm_ != NULL) { @@ -801,7 +864,7 @@ Status DBImpl::BackgroundCompaction() { status = DoCompactionWork(compact); CleanupCompaction(compact); c->ReleaseInputs(); - DeleteObsoleteFiles(); + FindObsoleteFiles(deletion_state); } delete c; diff --git a/db/db_impl.h b/db/db_impl.h index e08025eca..1d480ecde 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -81,6 +81,7 @@ class DBImpl : public DB { friend class DB; struct CompactionState; struct Writer; + struct DeletionState; Iterator* NewInternalIterator(const ReadOptions&, SequenceNumber* latest_snapshot); @@ -123,7 +124,7 @@ class DBImpl : public DB { void MaybeScheduleCompaction(); static void BGWork(void* db); void BackgroundCall(); - Status BackgroundCompaction(); + Status BackgroundCompaction(DeletionState& deletion_state); void CleanupCompaction(CompactionState* compact); Status DoCompactionWork(CompactionState* compact); @@ -131,6 +132,18 @@ class DBImpl : public DB { Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input); Status InstallCompactionResults(CompactionState* compact); + // Returns the list of live files in 'live' and the list + // of all files in the filesystem in 'allfiles'. + void FindObsoleteFiles(DeletionState& deletion_state); + + // Diffs the files listed in filenames and those that do not + // belong to live files are posibly removed. If the removed file + // is a sst file, then it returns the file number in files_to_evict. + void PurgeObsoleteFiles(DeletionState& deletion_state); + + // Removes the file listed in files_to_evict from the table_cache + void EvictObsoleteFiles(DeletionState& deletion_state); + // Constant after construction Env* const env_; const InternalKeyComparator internal_comparator_;