From fa430bfd04bf8e17d0cde2055cd223d13c2625d1 Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 17 Apr 2014 14:07:05 -0700 Subject: [PATCH] Minimize accessing multiple objects in Version::Get() Summary: One of our profilings shows that Version::Get() sometimes is slow when getting pointer of user comparators or other global objects. In this patch: (1) we keep pointers of immutable objects in Version to avoid accesses them though option objects or cfd objects (2) table_reader is directly cached in FileMetaData so that table cache don't have to go through handle first to fetch it (3) If level 0 has less than 3 files, skip the filtering logic based on SST tables' key range. Smallest and largest key are stored in separated memory locations, which has potential cache misses Test Plan: make all check Reviewers: haobo, ljin Reviewed By: haobo CC: igor, yhchiang, nkg-, leveldb Differential Revision: https://reviews.facebook.net/D17739 --- db/db_impl.cc | 4 +-- db/db_impl_readonly.cc | 2 +- db/table_cache.cc | 38 ++++++++++++++++------------- db/version_edit.h | 5 +++- db/version_set.cc | 55 ++++++++++++++++++++++++++---------------- db/version_set.h | 9 +++++-- 6 files changed, 69 insertions(+), 44 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 1dc8eb1dc..44f18fb48 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -3219,7 +3219,7 @@ Status DBImpl::GetImpl(const ReadOptions& options, PERF_TIMER_START(get_from_output_files_time); sv->current->Get(options, lkey, value, &s, &merge_context, &stats, - *cfd->options(), value_found); + value_found); have_stat_update = true; PERF_TIMER_STOP(get_from_output_files_time); RecordTick(options_.statistics.get(), MEMTABLE_MISS); @@ -3334,7 +3334,7 @@ std::vector DBImpl::MultiGet( // Done } else { super_version->current->Get(options, lkey, value, &s, &merge_context, - &mgd->stats, *cfd->options()); + &mgd->stats); mgd->have_stat_update = true; } diff --git a/db/db_impl_readonly.cc b/db/db_impl_readonly.cc index 435384bd3..43083746d 100644 --- a/db/db_impl_readonly.cc +++ b/db/db_impl_readonly.cc @@ -67,7 +67,7 @@ Status DBImplReadOnly::Get(const ReadOptions& options, } else { Version::GetStats stats; super_version->current->Get(options, lkey, value, &s, &merge_context, - &stats, *cfd->options()); + &stats); } return s; } diff --git a/db/table_cache.cc b/db/table_cache.cc index 36168d109..c73168185 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -106,19 +106,20 @@ Iterator* TableCache::NewIterator(const ReadOptions& options, if (table_reader_ptr != nullptr) { *table_reader_ptr = nullptr; } - Cache::Handle* handle = file_meta.table_reader_handle; + TableReader* table_reader = file_meta.table_reader; + Cache::Handle* handle = nullptr; Status s; - if (!handle) { + if (table_reader == nullptr) { s = FindTable(toptions, icomparator, file_meta.number, file_meta.file_size, &handle, nullptr, options.read_tier == kBlockCacheTier); + table_reader = GetTableReaderFromHandle(handle); } if (!s.ok()) { return NewErrorIterator(s); } - TableReader* table_reader = GetTableReaderFromHandle(handle); Iterator* result = table_reader->NewIterator(options); - if (!file_meta.table_reader_handle) { + if (handle != nullptr) { result->RegisterCleanup(&UnrefEntry, cache_, handle); } if (table_reader_ptr != nullptr) { @@ -138,17 +139,18 @@ Status TableCache::Get(const ReadOptions& options, bool (*saver)(void*, const ParsedInternalKey&, const Slice&, bool), bool* table_io, void (*mark_key_may_exist)(void*)) { - Cache::Handle* handle = file_meta.table_reader_handle; + TableReader* t = file_meta.table_reader; Status s; - if (!handle) { + Cache::Handle* handle = nullptr; + if (!t) { s = FindTable(storage_options_, internal_comparator, file_meta.number, file_meta.file_size, &handle, table_io, options.read_tier == kBlockCacheTier); + t = GetTableReaderFromHandle(handle); } if (s.ok()) { - TableReader* t = GetTableReaderFromHandle(handle); s = t->Get(options, k, arg, saver, mark_key_may_exist); - if (!file_meta.table_reader_handle) { + if (handle != nullptr) { ReleaseHandle(handle); } } else if (options.read_tier && s.IsIncomplete()) { @@ -164,15 +166,16 @@ Status TableCache::GetTableProperties( const FileMetaData& file_meta, std::shared_ptr* properties, bool no_io) { Status s; - auto table_handle = file_meta.table_reader_handle; + auto table_reader = file_meta.table_reader; // table already been pre-loaded? - if (table_handle) { - auto table = GetTableReaderFromHandle(table_handle); - *properties = table->GetTableProperties(); + if (table_reader) { + *properties = table_reader->GetTableProperties(); + return s; } bool table_io; + Cache::Handle* table_handle = nullptr; s = FindTable(toptions, internal_comparator, file_meta.number, file_meta.file_size, &table_handle, &table_io, no_io); if (!s.ok()) { @@ -190,20 +193,21 @@ bool TableCache::PrefixMayMatch(const ReadOptions& options, const FileMetaData& file_meta, const Slice& internal_prefix, bool* table_io) { bool may_match = true; - auto table_handle = file_meta.table_reader_handle; - if (table_handle == nullptr) { + auto table_reader = file_meta.table_reader; + Cache::Handle* table_handle = nullptr; + if (table_reader == nullptr) { // Need to get table handle from file number Status s = FindTable(storage_options_, icomparator, file_meta.number, file_meta.file_size, &table_handle, table_io); if (!s.ok()) { return may_match; } + table_reader = GetTableReaderFromHandle(table_handle); } - auto table = GetTableReaderFromHandle(table_handle); - may_match = table->PrefixMayMatch(internal_prefix); + may_match = table_reader->PrefixMayMatch(internal_prefix); - if (file_meta.table_reader_handle == nullptr) { + if (table_handle != nullptr) { // Need to release handle if it is generated from here. ReleaseHandle(table_handle); } diff --git a/db/version_edit.h b/db/version_edit.h index 98731cfb2..acaec8a4f 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -32,6 +32,8 @@ struct FileMetaData { // Needs to be disposed when refs becomes 0. Cache::Handle* table_reader_handle; + // Table reader in table_reader_handle + TableReader* table_reader; FileMetaData(uint64_t number, uint64_t file_size) : refs(0), @@ -39,7 +41,8 @@ struct FileMetaData { number(number), file_size(file_size), being_compacted(false), - table_reader_handle(nullptr) {} + table_reader_handle(nullptr), + table_reader(nullptr) {} FileMetaData() : FileMetaData(0, 0) {} }; diff --git a/db/version_set.cc b/db/version_set.cc index 12a8f670b..e6ece7c3e 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -483,6 +483,17 @@ bool BySmallestKey(FileMetaData* a, FileMetaData* b, Version::Version(ColumnFamilyData* cfd, VersionSet* vset, uint64_t version_number) : cfd_(cfd), + internal_comparator_((cfd == nullptr) ? nullptr + : &cfd->internal_comparator()), + user_comparator_((cfd == nullptr) + ? nullptr + : internal_comparator_->user_comparator()), + table_cache_((cfd == nullptr) ? nullptr : cfd->table_cache()), + merge_operator_((cfd == nullptr) ? nullptr + : cfd->options()->merge_operator.get()), + info_log_((cfd == nullptr) ? nullptr : cfd->options()->info_log.get()), + db_statistics_((cfd == nullptr) ? nullptr + : cfd->options()->statistics.get()), vset_(vset), next_(this), prev_(this), @@ -504,27 +515,22 @@ void Version::Get(const ReadOptions& options, Status* status, MergeContext* merge_context, GetStats* stats, - const Options& db_options, bool* value_found) { Slice ikey = k.internal_key(); Slice user_key = k.user_key(); - const Comparator* ucmp = cfd_->internal_comparator().user_comparator(); - - auto merge_operator = db_options.merge_operator.get(); - auto logger = db_options.info_log.get(); assert(status->ok() || status->IsMergeInProgress()); Saver saver; saver.state = status->ok()? kNotFound : kMerge; - saver.ucmp = ucmp; + saver.ucmp = user_comparator_; saver.user_key = user_key; saver.value_found = value_found; saver.value = value; - saver.merge_operator = merge_operator; + saver.merge_operator = merge_operator_; saver.merge_context = merge_context; - saver.logger = logger; + saver.logger = info_log_; saver.didIO = false; - saver.statistics = db_options.statistics.get(); + saver.statistics = db_statistics_; stats->seek_file = nullptr; stats->seek_file_level = -1; @@ -555,7 +561,7 @@ void Version::Get(const ReadOptions& options, // On Level-n (n>=1), files are sorted. // Binary search to find earliest index whose largest key >= ikey. // We will also stop when the file no longer overlaps ikey - start_index = FindFile(cfd_->internal_comparator(), files_[level], ikey); + start_index = FindFile(*internal_comparator_, files_[level], ikey); } // Traverse each relevant file to find the desired key @@ -564,8 +570,10 @@ void Version::Get(const ReadOptions& options, #endif for (uint32_t i = start_index; i < num_files; ++i) { FileMetaData* f = files[i]; - if (ucmp->Compare(user_key, f->smallest.user_key()) < 0 || - ucmp->Compare(user_key, f->largest.user_key()) > 0) { + // Skip key range filtering for levle 0 if there are few level 0 files. + if ((level > 0 || num_files > 2) && + (user_comparator_->Compare(user_key, f->smallest.user_key()) < 0 || + user_comparator_->Compare(user_key, f->largest.user_key()) > 0)) { // Only process overlapping files. if (level > 0) { // If on Level-n (n>=1) then the files are sorted. @@ -581,8 +589,8 @@ void Version::Get(const ReadOptions& options, // Sanity check to make sure that the files are correctly sorted if (prev_file) { if (level != 0) { - int comp_sign = cfd_->internal_comparator().Compare( - prev_file->largest, f->smallest); + int comp_sign = + internal_comparator_->Compare(prev_file->largest, f->smallest); assert(comp_sign < 0); } else { // level == 0, the current file cannot be newer than the previous one. @@ -596,9 +604,8 @@ void Version::Get(const ReadOptions& options, prev_file = f; #endif bool tableIO = false; - *status = cfd_->table_cache()->Get(options, cfd_->internal_comparator(), - *f, ikey, &saver, SaveValue, &tableIO, - MarkKeyMayExist); + *status = table_cache_->Get(options, *internal_comparator_, *f, ikey, + &saver, SaveValue, &tableIO, MarkKeyMayExist); // TODO: examine the behavior for corrupted key if (!status->ok()) { return; @@ -643,12 +650,12 @@ void Version::Get(const ReadOptions& options, if (kMerge == saver.state) { // merge_operands are in saver and we hit the beginning of the key history // do a final merge of nullptr and operands; - if (merge_operator->FullMerge(user_key, nullptr, - saver.merge_context->GetOperands(), - value, logger)) { + if (merge_operator_->FullMerge(user_key, nullptr, + saver.merge_context->GetOperands(), value, + info_log_)) { *status = Status::OK(); } else { - RecordTick(db_options.statistics.get(), NUMBER_MERGE_FAILURES); + RecordTick(db_statistics_, NUMBER_MERGE_FAILURES); *status = Status::Corruption("could not perform end-of-key merge for ", user_key); } @@ -1458,6 +1465,12 @@ class VersionSet::Builder { base_->vset_->storage_options_, cfd_->internal_comparator(), file_meta->number, file_meta->file_size, &file_meta->table_reader_handle, &table_io, false); + if (file_meta->table_reader_handle != nullptr) { + // Load table_reader + file_meta->table_reader = + cfd_->table_cache()->GetTableReaderFromHandle( + file_meta->table_reader_handle); + } } } } diff --git a/db/version_set.h b/db/version_set.h index fd3e5c893..ef616f34b 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -88,8 +88,7 @@ class Version { int seek_file_level; }; void Get(const ReadOptions&, const LookupKey& key, std::string* val, - Status* status, MergeContext* merge_context, - GetStats* stats, const Options& db_option, + Status* status, MergeContext* merge_context, GetStats* stats, bool* value_found = nullptr); // Adds "stats" into the current state. Returns true if a new @@ -230,6 +229,12 @@ class Version { void UpdateFilesBySize(); ColumnFamilyData* cfd_; // ColumnFamilyData to which this Version belongs + const InternalKeyComparator* internal_comparator_; + const Comparator* user_comparator_; + TableCache* table_cache_; + const MergeOperator* merge_operator_; + Logger* info_log_; + Statistics* db_statistics_; VersionSet* vset_; // VersionSet to which this Version belongs Version* next_; // Next version in linked list Version* prev_; // Previous version in linked list