Improve CPU Efficiency of ApproximateSize (part 1) (#5613)

Summary:
1. Avoid creating the iterator in order to call BlockBasedTable::ApproximateOffsetOf(). Instead, directly call into it.
2. Optimize BlockBasedTable::ApproximateOffsetOf() keeps the index block iterator in stack.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5613

Differential Revision: D16442660

Pulled By: elipoz

fbshipit-source-id: 9320be3e918c139b10e758cbbb684706d172e516
main
Eli Pozniansky 5 years ago committed by Facebook Github Bot
parent 3782accf7d
commit 6b7fcc0d5f
  1. 28
      db/table_cache.cc
  2. 6
      db/table_cache.h
  3. 17
      db/version_set.cc
  4. 4
      db/version_set.h
  5. 12
      table/block_based/block_based_table_reader.cc
  6. 7
      table/block_based/block_based_table_reader.h

@ -194,7 +194,7 @@ InternalIterator* TableCache::NewIterator(
if (table_reader == nullptr) { if (table_reader == nullptr) {
s = FindTable(env_options, icomparator, fd, &handle, prefix_extractor, s = FindTable(env_options, icomparator, fd, &handle, prefix_extractor,
options.read_tier == kBlockCacheTier /* no_io */, options.read_tier == kBlockCacheTier /* no_io */,
!for_compaction /* record read_stats */, file_read_hist, !for_compaction /* record_read_stats */, file_read_hist,
skip_filters, level); skip_filters, level);
if (s.ok()) { if (s.ok()) {
table_reader = GetTableReaderFromHandle(handle); table_reader = GetTableReaderFromHandle(handle);
@ -505,4 +505,30 @@ void TableCache::Evict(Cache* cache, uint64_t file_number) {
cache->Erase(GetSliceForFileNumber(&file_number)); cache->Erase(GetSliceForFileNumber(&file_number));
} }
uint64_t TableCache::ApproximateOffsetOf(
const Slice& key, const FileDescriptor& fd, TableReaderCaller caller,
const InternalKeyComparator& internal_comparator,
const SliceTransform* prefix_extractor) {
uint64_t result = 0;
TableReader* table_reader = fd.table_reader;
Cache::Handle* table_handle = nullptr;
if (table_reader == nullptr) {
const bool for_compaction = (caller == TableReaderCaller::kCompaction);
Status s = FindTable(env_options_, internal_comparator, fd, &table_handle,
prefix_extractor, false /* no_io */,
!for_compaction /* record_read_stats */);
if (s.ok()) {
table_reader = GetTableReaderFromHandle(table_handle);
}
}
if (table_reader != nullptr) {
result = table_reader->ApproximateOffsetOf(key, caller);
}
if (table_handle != nullptr) {
ReleaseHandle(table_handle);
}
return result;
}
} // namespace rocksdb } // namespace rocksdb

@ -153,6 +153,12 @@ class TableCache {
const FileDescriptor& fd, const FileDescriptor& fd,
const SliceTransform* prefix_extractor = nullptr); const SliceTransform* prefix_extractor = nullptr);
// Returns approximated offset of a key in a file represented by fd.
uint64_t ApproximateOffsetOf(
const Slice& key, const FileDescriptor& fd, TableReaderCaller caller,
const InternalKeyComparator& internal_comparator,
const SliceTransform* prefix_extractor = nullptr);
// Release the handle from a cache // Release the handle from a cache
void ReleaseHandle(Cache::Handle* handle); void ReleaseHandle(Cache::Handle* handle);

@ -4974,19 +4974,12 @@ uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f,
} else { } else {
// "key" falls in the range for this table. Add the // "key" falls in the range for this table. Add the
// approximate offset of "key" within the table. // approximate offset of "key" within the table.
TableReader* table_reader_ptr; TableCache* table_cache = v->cfd_->table_cache();
InternalIterator* iter = v->cfd_->table_cache()->NewIterator( if (table_cache != nullptr) {
ReadOptions(), v->env_options_, v->cfd_->internal_comparator(), result = table_cache->ApproximateOffsetOf(
*f.file_metadata, nullptr /* range_del_agg */, key, f.file_metadata->fd, caller, v->cfd()->internal_comparator(),
v->GetMutableCFOptions().prefix_extractor.get(), &table_reader_ptr, v->GetMutableCFOptions().prefix_extractor.get());
/*file_read_hist=*/nullptr, caller,
/*arena=*/nullptr, /*skip_filters=*/false, /*level=*/-1,
/*smallest_compaction_key=*/nullptr,
/*largest_compaction_key=*/nullptr);
if (table_reader_ptr != nullptr) {
result = table_reader_ptr->ApproximateOffsetOf(key, caller);
} }
delete iter;
} }
return result; return result;
} }

@ -655,7 +655,7 @@ class Version {
uint64_t GetSstFilesSize(); uint64_t GetSstFilesSize();
MutableCFOptions GetMutableCFOptions() { return mutable_cf_options_; } const MutableCFOptions& GetMutableCFOptions() { return mutable_cf_options_; }
private: private:
Env* env_; Env* env_;
@ -981,7 +981,7 @@ class VersionSet {
void AddLiveFiles(std::vector<FileDescriptor>* live_list); void AddLiveFiles(std::vector<FileDescriptor>* live_list);
// Return the approximate size of data to be scanned for range [start, end) // Return the approximate size of data to be scanned for range [start, end)
// in levels [start_level, end_level). If end_level == 0 it will search // in levels [start_level, end_level). If end_level == -1 it will search
// through all non-empty levels // through all non-empty levels
uint64_t ApproximateSize(Version* v, const Slice& start, const Slice& end, uint64_t ApproximateSize(Version* v, const Slice& start, const Slice& end,
int start_level, int end_level, int start_level, int end_level,

@ -4018,10 +4018,11 @@ Status BlockBasedTable::CreateIndexReader(
uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key, uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key,
TableReaderCaller caller) { TableReaderCaller caller) {
BlockCacheLookupContext context(caller); BlockCacheLookupContext context(caller);
std::unique_ptr<InternalIteratorBase<IndexValue>> index_iter( IndexBlockIter iiter_on_stack;
auto index_iter =
NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false, NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false,
/*input_iter=*/nullptr, /*get_context=*/nullptr, /*input_iter=*/&iiter_on_stack, /*get_context=*/nullptr,
/*lookup_context=*/&context)); /*lookup_context=*/&context);
index_iter->Seek(key); index_iter->Seek(key);
uint64_t result; uint64_t result;
@ -4041,6 +4042,11 @@ uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key,
result = rep_->footer.metaindex_handle().offset(); result = rep_->footer.metaindex_handle().offset();
} }
} }
if (index_iter != &iiter_on_stack) {
delete index_iter;
}
return result; return result;
} }

@ -318,8 +318,11 @@ class BlockBasedTable : public TableReader {
BlockCacheLookupContext* lookup_context) const; BlockCacheLookupContext* lookup_context) const;
// Get the iterator from the index reader. // Get the iterator from the index reader.
// If input_iter is not set, return new Iterator //
// If input_iter is set, update it and return it as Iterator // If input_iter is not set, return a new Iterator.
// If input_iter is set, try to update it and return it as Iterator.
// However note that in some cases the returned iterator may be different
// from input_iter. In such case the returned iterator should be freed.
// //
// Note: ErrorIterator with Status::Incomplete shall be returned if all the // Note: ErrorIterator with Status::Incomplete shall be returned if all the
// following conditions are met: // following conditions are met:

Loading…
Cancel
Save