From 013e9ebbf1e61ade258e6964b52e34797b34a573 Mon Sep 17 00:00:00 2001 From: Haobo Xu Date: Thu, 11 Apr 2013 16:49:53 -0700 Subject: [PATCH] [RocksDB] [Performance] Speed up FindObsoleteFiles Summary: FindObsoleteFiles was slow, holding the single big lock, resulted in bad p99 behavior. Didn't profile anything, but several things could be improved: 1. VersionSet::AddLiveFiles works with std::set, which is by itself slow (a tree). You also don't know how many dynamic allocations occur just for building up this tree. switched to std::vector, also added logic to pre-calculate total size and do just one allocation 2. Don't see why env_->GetChildren() needs to be mutex proteced, moved to PurgeObsoleteFiles where mutex could be unlocked. 3. switched std::set to std:unordered_set, the conversion from vector is also inside PurgeObsoleteFiles I have a feeling this should pretty much fix it. Test Plan: make check; db_stress Reviewers: dhruba, heyongqiang, MarkCallaghan Reviewed By: dhruba CC: leveldb, zshao Differential Revision: https://reviews.facebook.net/D10197 --- db/db_impl.cc | 22 +++++++++++++++------- db/version_set.cc | 20 ++++++++++++++++---- db/version_set.h | 3 +-- 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index fd9a0d533..ada7b3659 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include "db/builder.h" #include "db/db_iter.h" @@ -91,8 +92,8 @@ struct DBImpl::CompactionState { struct DBImpl::DeletionState { - // the set of all live files that cannot be deleted - std::set live; + // the list of all live files that cannot be deleted + std::vector live; // a list of all siles that exists in the db directory std::vector allfiles; @@ -101,7 +102,7 @@ struct DBImpl::DeletionState { // that corresponds to the set of files in 'live'. uint64_t filenumber, lognumber, prevlognumber; - // the list of all files to be evicted from the table cahce + // the list of all files to be evicted from the table cache std::vector files_to_evict; }; @@ -319,8 +320,10 @@ void DBImpl::FindObsoleteFiles(DeletionState& deletion_state) { delete_obsolete_files_last_run_ = now_micros; } - // Make a set of all of the live files - deletion_state.live = pending_outputs_; + // Make a list of all of the live files; set is slow, should not + // be used. + deletion_state.live.assign(pending_outputs_.begin(), + pending_outputs_.end()); versions_->AddLiveFiles(&deletion_state.live); // set of all files in the directory @@ -341,6 +344,11 @@ void DBImpl::PurgeObsoleteFiles(DeletionState& state) { FileType type; std::vector old_log_files; + // Now, convert live list to an unordered set, WITHOUT mutex held; + // set is slow. + std::unordered_set live_set(state.live.begin(), + state.live.end()); + for (size_t i = 0; i < state.allfiles.size(); i++) { if (ParseFileName(state.allfiles[i], &number, &type)) { bool keep = true; @@ -355,12 +363,12 @@ void DBImpl::PurgeObsoleteFiles(DeletionState& state) { keep = (number >= state.filenumber); break; case kTableFile: - keep = (state.live.find(number) != state.live.end()); + keep = (live_set.find(number) != live_set.end()); break; case kTempFile: // Any temp files that are currently being written to must // be recorded in pending_outputs_, which is inserted into "live" - keep = (state.live.find(number) != state.live.end()); + keep = (live_set.find(number) != live_set.end()); break; case kInfoLogFile: keep = true; diff --git a/db/version_set.cc b/db/version_set.cc index 329fcffb2..6ce8240f8 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1643,14 +1643,26 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) { return result; } -void VersionSet::AddLiveFiles(std::set* live) { +void VersionSet::AddLiveFiles(std::vector* live_list) { + // pre-calculate space requirement + int64_t total_files = 0; for (Version* v = dummy_versions_.next_; v != &dummy_versions_; v = v->next_) { for (int level = 0; level < NumberLevels(); level++) { - const std::vector& files = v->files_[level]; - for (size_t i = 0; i < files.size(); i++) { - live->insert(files[i]->number); + total_files += v->files_[level].size(); + } + } + + // just one time extension to the right size + live_list->reserve(live_list->size() + total_files); + + for (Version* v = dummy_versions_.next_; + v != &dummy_versions_; + v = v->next_) { + for (int level = 0; level < NumberLevels(); level++) { + for (const auto& f : v->files_[level]) { + live_list->push_back(f->number); } } } diff --git a/db/version_set.h b/db/version_set.h index 7a7c814dc..88b9cd29f 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -326,8 +326,7 @@ class VersionSet { } // Add all files listed in any live version to *live. - // May also mutate some internal state. - void AddLiveFiles(std::set* live); + void AddLiveFiles(std::vector* live_list); // Add all files listed in the current version to *live. void AddLiveFilesCurrentVersion(std::set* live);