|
|
|
@ -340,28 +340,6 @@ class HashIndexReader : public IndexReader { |
|
|
|
|
BlockContents prefixes_contents_; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
// CachableEntry represents the entries that *may* be fetched from block cache.
|
|
|
|
|
// field `value` is the item we want to get.
|
|
|
|
|
// field `cache_handle` is the cache handle to the block cache. If the value
|
|
|
|
|
// was not read from cache, `cache_handle` will be nullptr.
|
|
|
|
|
template <class TValue> |
|
|
|
|
struct BlockBasedTable::CachableEntry { |
|
|
|
|
CachableEntry(TValue* _value, Cache::Handle* _cache_handle) |
|
|
|
|
: value(_value), cache_handle(_cache_handle) {} |
|
|
|
|
CachableEntry() : CachableEntry(nullptr, nullptr) {} |
|
|
|
|
void Release(Cache* cache) { |
|
|
|
|
if (cache_handle) { |
|
|
|
|
cache->Release(cache_handle); |
|
|
|
|
value = nullptr; |
|
|
|
|
cache_handle = nullptr; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
bool IsSet() const { return cache_handle != nullptr; } |
|
|
|
|
|
|
|
|
|
TValue* value = nullptr; |
|
|
|
|
// if the entry is from the cache, cache_handle will be populated.
|
|
|
|
|
Cache::Handle* cache_handle = nullptr; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
struct BlockBasedTable::Rep { |
|
|
|
|
Rep(const ImmutableCFOptions& _ioptions, const EnvOptions& _env_options, |
|
|
|
@ -416,21 +394,34 @@ struct BlockBasedTable::Rep { |
|
|
|
|
// and compatible with existing code, we introduce a wrapper that allows
|
|
|
|
|
// block to extract prefix without knowing if a key is internal or not.
|
|
|
|
|
unique_ptr<SliceTransform> internal_prefix_transform; |
|
|
|
|
|
|
|
|
|
// only used in level 0 files:
|
|
|
|
|
// when pin_l0_filter_and_index_blocks_in_cache is true, we do use the
|
|
|
|
|
// LRU cache, but we always keep the filter & idndex block's handle checked
|
|
|
|
|
// out here (=we don't call Release()), plus the parsed out objects
|
|
|
|
|
// the LRU cache will never push flush them out, hence they're pinned
|
|
|
|
|
CachableEntry<FilterBlockReader> filter_entry; |
|
|
|
|
CachableEntry<IndexReader> index_entry; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
BlockBasedTable::~BlockBasedTable() { |
|
|
|
|
Close(); |
|
|
|
|
delete rep_; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// CachableEntry represents the entries that *may* be fetched from block cache.
|
|
|
|
|
// field `value` is the item we want to get.
|
|
|
|
|
// field `cache_handle` is the cache handle to the block cache. If the value
|
|
|
|
|
// was not read from cache, `cache_handle` will be nullptr.
|
|
|
|
|
template <class TValue> |
|
|
|
|
struct BlockBasedTable::CachableEntry { |
|
|
|
|
CachableEntry(TValue* _value, Cache::Handle* _cache_handle) |
|
|
|
|
: value(_value), cache_handle(_cache_handle) {} |
|
|
|
|
CachableEntry() : CachableEntry(nullptr, nullptr) {} |
|
|
|
|
void Release(Cache* cache) { |
|
|
|
|
if (cache_handle) { |
|
|
|
|
cache->Release(cache_handle); |
|
|
|
|
value = nullptr; |
|
|
|
|
cache_handle = nullptr; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
TValue* value = nullptr; |
|
|
|
|
// if the entry is from the cache, cache_handle will be populated.
|
|
|
|
|
Cache::Handle* cache_handle = nullptr; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
// Helper function to setup the cache key's prefix for the Table.
|
|
|
|
|
void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep, uint64_t file_size) { |
|
|
|
|
assert(kMaxCacheKeyPrefixSize >= 10); |
|
|
|
@ -507,7 +498,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, |
|
|
|
|
uint64_t file_size, |
|
|
|
|
unique_ptr<TableReader>* table_reader, |
|
|
|
|
const bool prefetch_index_and_filter, |
|
|
|
|
const bool skip_filters, const int level) { |
|
|
|
|
const bool skip_filters) { |
|
|
|
|
table_reader->reset(); |
|
|
|
|
|
|
|
|
|
Footer footer; |
|
|
|
@ -603,33 +594,14 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, |
|
|
|
|
assert(table_options.block_cache != nullptr); |
|
|
|
|
// Hack: Call NewIndexIterator() to implicitly add index to the
|
|
|
|
|
// block_cache
|
|
|
|
|
|
|
|
|
|
// if pin_l0_filter_and_index_blocks_in_cache is true and this is
|
|
|
|
|
// a level0 file, then we will pass in this pointer to rep->index
|
|
|
|
|
// to NewIndexIterator(), which will save the index block in there
|
|
|
|
|
// else it's a nullptr and nothing special happens
|
|
|
|
|
CachableEntry<IndexReader>* index_entry = nullptr; |
|
|
|
|
if (rep->table_options.pin_l0_filter_and_index_blocks_in_cache && |
|
|
|
|
level == 0) { |
|
|
|
|
index_entry = &rep->index_entry; |
|
|
|
|
} |
|
|
|
|
unique_ptr<InternalIterator> iter( |
|
|
|
|
new_table->NewIndexIterator(ReadOptions(), nullptr, index_entry)); |
|
|
|
|
new_table->NewIndexIterator(ReadOptions())); |
|
|
|
|
s = iter->status(); |
|
|
|
|
|
|
|
|
|
if (s.ok()) { |
|
|
|
|
// Hack: Call GetFilter() to implicitly add filter to the block_cache
|
|
|
|
|
auto filter_entry = new_table->GetFilter(); |
|
|
|
|
// if pin_l0_filter_and_index_blocks_in_cache is true, and this is
|
|
|
|
|
// a level0 file, then save it in rep_->filter_entry; it will be
|
|
|
|
|
// released in the destructor only, hence it will be pinned in the
|
|
|
|
|
// cache until this reader is alive
|
|
|
|
|
if (rep->table_options.pin_l0_filter_and_index_blocks_in_cache && |
|
|
|
|
level == 0) { |
|
|
|
|
rep->filter_entry = filter_entry; |
|
|
|
|
} else { |
|
|
|
|
filter_entry.Release(table_options.block_cache.get()); |
|
|
|
|
} |
|
|
|
|
filter_entry.Release(table_options.block_cache.get()); |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
// If we don't use block cache for index/filter blocks access, we'll
|
|
|
|
@ -914,11 +886,6 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter( |
|
|
|
|
return {rep_->filter.get(), nullptr /* cache handle */}; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// we have a pinned filter block
|
|
|
|
|
if (rep_->filter_entry.IsSet()) { |
|
|
|
|
return rep_->filter_entry; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
PERF_TIMER_GUARD(read_filter_block_nanos); |
|
|
|
|
|
|
|
|
|
Cache* block_cache = rep_->table_options.block_cache.get(); |
|
|
|
@ -968,19 +935,12 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter( |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
InternalIterator* BlockBasedTable::NewIndexIterator( |
|
|
|
|
const ReadOptions& read_options, BlockIter* input_iter, |
|
|
|
|
CachableEntry<IndexReader>* index_entry) { |
|
|
|
|
const ReadOptions& read_options, BlockIter* input_iter) { |
|
|
|
|
// index reader has already been pre-populated.
|
|
|
|
|
if (rep_->index_reader) { |
|
|
|
|
return rep_->index_reader->NewIterator( |
|
|
|
|
input_iter, read_options.total_order_seek); |
|
|
|
|
} |
|
|
|
|
// we have a pinned index block
|
|
|
|
|
if (rep_->index_entry.IsSet()) { |
|
|
|
|
return rep_->index_entry.value->NewIterator(input_iter, |
|
|
|
|
read_options.total_order_seek); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
PERF_TIMER_GUARD(read_index_block_nanos); |
|
|
|
|
|
|
|
|
|
bool no_io = read_options.read_tier == kBlockCacheTier; |
|
|
|
@ -1036,15 +996,7 @@ InternalIterator* BlockBasedTable::NewIndexIterator( |
|
|
|
|
assert(cache_handle); |
|
|
|
|
auto* iter = index_reader->NewIterator( |
|
|
|
|
input_iter, read_options.total_order_seek); |
|
|
|
|
|
|
|
|
|
// the caller would like to take ownership of the index block
|
|
|
|
|
// don't call RegisterCleanup() in this case, the caller will take care of it
|
|
|
|
|
if (index_entry != nullptr) { |
|
|
|
|
*index_entry = {index_reader, cache_handle}; |
|
|
|
|
} else { |
|
|
|
|
iter->RegisterCleanup(&ReleaseCachedEntry, block_cache, cache_handle); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
iter->RegisterCleanup(&ReleaseCachedEntry, block_cache, cache_handle); |
|
|
|
|
return iter; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -1272,13 +1224,7 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) { |
|
|
|
|
RecordTick(statistics, BLOOM_FILTER_PREFIX_USEFUL); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// if rep_->filter_entry is not set, we should call Release(); otherwise
|
|
|
|
|
// don't call, in this case we have a local copy in rep_->filter_entry,
|
|
|
|
|
// it's pinned to the cache and will be released in the destructor
|
|
|
|
|
if (!rep_->filter_entry.IsSet()) { |
|
|
|
|
filter_entry.Release(rep_->table_options.block_cache.get()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
filter_entry.Release(rep_->table_options.block_cache.get()); |
|
|
|
|
return may_match; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -1378,12 +1324,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// if rep_->filter_entry is not set, we should call Release(); otherwise
|
|
|
|
|
// don't call, in this case we have a local copy in rep_->filter_entry,
|
|
|
|
|
// it's pinned to the cache and will be released in the destructor
|
|
|
|
|
if (!rep_->filter_entry.IsSet()) { |
|
|
|
|
filter_entry.Release(rep_->table_options.block_cache.get()); |
|
|
|
|
} |
|
|
|
|
filter_entry.Release(rep_->table_options.block_cache.get()); |
|
|
|
|
return s; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -1671,11 +1612,6 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) { |
|
|
|
|
return s; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void BlockBasedTable::Close() { |
|
|
|
|
rep_->filter_entry.Release(rep_->table_options.block_cache.get()); |
|
|
|
|
rep_->index_entry.Release(rep_->table_options.block_cache.get()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
Status BlockBasedTable::DumpIndexBlock(WritableFile* out_file) { |
|
|
|
|
out_file->Append( |
|
|
|
|
"Index Details:\n" |
|
|
|
|