@ -340,6 +340,28 @@ class HashIndexReader : public IndexReader {
BlockContents prefixes_contents_ ;
} ;
// CachableEntry represents the entries that *may* be fetched from block cache.
// field `value` is the item we want to get.
// field `cache_handle` is the cache handle to the block cache. If the value
// was not read from cache, `cache_handle` will be nullptr.
template < class TValue >
struct BlockBasedTable : : CachableEntry {
CachableEntry ( TValue * _value , Cache : : Handle * _cache_handle )
: value ( _value ) , cache_handle ( _cache_handle ) { }
CachableEntry ( ) : CachableEntry ( nullptr , nullptr ) { }
void Release ( Cache * cache ) {
if ( cache_handle ) {
cache - > Release ( cache_handle ) ;
value = nullptr ;
cache_handle = nullptr ;
}
}
bool IsSet ( ) const { return cache_handle ! = nullptr ; }
TValue * value = nullptr ;
// if the entry is from the cache, cache_handle will be populated.
Cache : : Handle * cache_handle = nullptr ;
} ;
struct BlockBasedTable : : Rep {
Rep ( const ImmutableCFOptions & _ioptions , const EnvOptions & _env_options ,
@ -394,34 +416,21 @@ struct BlockBasedTable::Rep {
// and compatible with existing code, we introduce a wrapper that allows
// block to extract prefix without knowing if a key is internal or not.
unique_ptr < SliceTransform > internal_prefix_transform ;
// only used in level 0 files:
// when pin_l0_filter_and_index_blocks_in_cache is true, we do use the
// LRU cache, but we always keep the filter & idndex block's handle checked
// out here (=we don't call Release()), plus the parsed out objects
// the LRU cache will never push flush them out, hence they're pinned
CachableEntry < FilterBlockReader > filter_entry ;
CachableEntry < IndexReader > index_entry ;
} ;
BlockBasedTable : : ~ BlockBasedTable ( ) {
Close ( ) ;
delete rep_ ;
}
// CachableEntry represents the entries that *may* be fetched from block cache.
// field `value` is the item we want to get.
// field `cache_handle` is the cache handle to the block cache. If the value
// was not read from cache, `cache_handle` will be nullptr.
template < class TValue >
struct BlockBasedTable : : CachableEntry {
CachableEntry ( TValue * _value , Cache : : Handle * _cache_handle )
: value ( _value ) , cache_handle ( _cache_handle ) { }
CachableEntry ( ) : CachableEntry ( nullptr , nullptr ) { }
void Release ( Cache * cache ) {
if ( cache_handle ) {
cache - > Release ( cache_handle ) ;
value = nullptr ;
cache_handle = nullptr ;
}
}
TValue * value = nullptr ;
// if the entry is from the cache, cache_handle will be populated.
Cache : : Handle * cache_handle = nullptr ;
} ;
// Helper function to setup the cache key's prefix for the Table.
void BlockBasedTable : : SetupCacheKeyPrefix ( Rep * rep , uint64_t file_size ) {
assert ( kMaxCacheKeyPrefixSize > = 10 ) ;
@ -498,7 +507,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
uint64_t file_size ,
unique_ptr < TableReader > * table_reader ,
const bool prefetch_index_and_filter ,
const bool skip_filters ) {
const bool skip_filters , const int level ) {
table_reader - > reset ( ) ;
Footer footer ;
@ -594,14 +603,33 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
assert ( table_options . block_cache ! = nullptr ) ;
// Hack: Call NewIndexIterator() to implicitly add index to the
// block_cache
// if pin_l0_filter_and_index_blocks_in_cache is true and this is
// a level0 file, then we will pass in this pointer to rep->index
// to NewIndexIterator(), which will save the index block in there
// else it's a nullptr and nothing special happens
CachableEntry < IndexReader > * index_entry = nullptr ;
if ( rep - > table_options . pin_l0_filter_and_index_blocks_in_cache & &
level = = 0 ) {
index_entry = & rep - > index_entry ;
}
unique_ptr < InternalIterator > iter (
new_table - > NewIndexIterator ( ReadOptions ( ) ) ) ;
new_table - > NewIndexIterator ( ReadOptions ( ) , nullptr , index_entry ) ) ;
s = iter - > status ( ) ;
if ( s . ok ( ) ) {
// Hack: Call GetFilter() to implicitly add filter to the block_cache
auto filter_entry = new_table - > GetFilter ( ) ;
filter_entry . Release ( table_options . block_cache . get ( ) ) ;
// if pin_l0_filter_and_index_blocks_in_cache is true, and this is
// a level0 file, then save it in rep_->filter_entry; it will be
// released in the destructor only, hence it will be pinned in the
// cache until this reader is alive
if ( rep - > table_options . pin_l0_filter_and_index_blocks_in_cache & &
level = = 0 ) {
rep - > filter_entry = filter_entry ;
} else {
filter_entry . Release ( table_options . block_cache . get ( ) ) ;
}
}
} else {
// If we don't use block cache for index/filter blocks access, we'll
@ -886,14 +914,19 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
return { rep_ - > filter . get ( ) , nullptr /* cache handle */ } ;
}
PERF_TIMER_GUARD ( read_filter_block_nanos ) ;
Cache * block_cache = rep_ - > table_options . block_cache . get ( ) ;
if ( rep_ - > filter_policy = = nullptr /* do not use filter */ | |
block_cache = = nullptr /* no block cache at all */ ) {
return { nullptr /* filter */ , nullptr /* cache handle */ } ;
}
// we have a pinned filter block
if ( rep_ - > filter_entry . IsSet ( ) ) {
return rep_ - > filter_entry ;
}
PERF_TIMER_GUARD ( read_filter_block_nanos ) ;
// Fetching from the cache
char cache_key [ kMaxCacheKeyPrefixSize + kMaxVarint64Length ] ;
auto key = GetCacheKey ( rep_ - > cache_key_prefix , rep_ - > cache_key_prefix_size ,
@ -935,12 +968,19 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
}
InternalIterator * BlockBasedTable : : NewIndexIterator (
const ReadOptions & read_options , BlockIter * input_iter ) {
const ReadOptions & read_options , BlockIter * input_iter ,
CachableEntry < IndexReader > * index_entry ) {
// index reader has already been pre-populated.
if ( rep_ - > index_reader ) {
return rep_ - > index_reader - > NewIterator (
input_iter , read_options . total_order_seek ) ;
}
// we have a pinned index block
if ( rep_ - > index_entry . IsSet ( ) ) {
return rep_ - > index_entry . value - > NewIterator ( input_iter ,
read_options . total_order_seek ) ;
}
PERF_TIMER_GUARD ( read_index_block_nanos ) ;
bool no_io = read_options . read_tier = = kBlockCacheTier ;
@ -996,7 +1036,15 @@ InternalIterator* BlockBasedTable::NewIndexIterator(
assert ( cache_handle ) ;
auto * iter = index_reader - > NewIterator (
input_iter , read_options . total_order_seek ) ;
iter - > RegisterCleanup ( & ReleaseCachedEntry , block_cache , cache_handle ) ;
// the caller would like to take ownership of the index block
// don't call RegisterCleanup() in this case, the caller will take care of it
if ( index_entry ! = nullptr ) {
* index_entry = { index_reader , cache_handle } ;
} else {
iter - > RegisterCleanup ( & ReleaseCachedEntry , block_cache , cache_handle ) ;
}
return iter ;
}
@ -1224,7 +1272,13 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) {
RecordTick ( statistics , BLOOM_FILTER_PREFIX_USEFUL ) ;
}
filter_entry . Release ( rep_ - > table_options . block_cache . get ( ) ) ;
// if rep_->filter_entry is not set, we should call Release(); otherwise
// don't call, in this case we have a local copy in rep_->filter_entry,
// it's pinned to the cache and will be released in the destructor
if ( ! rep_ - > filter_entry . IsSet ( ) ) {
filter_entry . Release ( rep_ - > table_options . block_cache . get ( ) ) ;
}
return may_match ;
}
@ -1324,7 +1378,12 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
}
}
filter_entry . Release ( rep_ - > table_options . block_cache . get ( ) ) ;
// if rep_->filter_entry is not set, we should call Release(); otherwise
// don't call, in this case we have a local copy in rep_->filter_entry,
// it's pinned to the cache and will be released in the destructor
if ( ! rep_ - > filter_entry . IsSet ( ) ) {
filter_entry . Release ( rep_ - > table_options . block_cache . get ( ) ) ;
}
return s ;
}
@ -1612,6 +1671,11 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
return s ;
}
void BlockBasedTable : : Close ( ) {
rep_ - > filter_entry . Release ( rep_ - > table_options . block_cache . get ( ) ) ;
rep_ - > index_entry . Release ( rep_ - > table_options . block_cache . get ( ) ) ;
}
Status BlockBasedTable : : DumpIndexBlock ( WritableFile * out_file ) {
out_file - > Append (
" Index Details: \n "