@ -80,12 +80,11 @@ Status ReadBlockFromFile(
std : : unique_ptr < Block > * result , const ImmutableCFOptions & ioptions ,
std : : unique_ptr < Block > * result , const ImmutableCFOptions & ioptions ,
bool do_uncompress , const Slice & compression_dict ,
bool do_uncompress , const Slice & compression_dict ,
const PersistentCacheOptions & cache_options , SequenceNumber global_seqno ,
const PersistentCacheOptions & cache_options , SequenceNumber global_seqno ,
size_t read_amp_bytes_per_bit , MemoryAllocator * allocator = nullptr ,
size_t read_amp_bytes_per_bit , MemoryAllocator * allocator = nullptr ) {
const bool immortal_file = false ) {
BlockContents contents ;
BlockContents contents ;
BlockFetcher block_fetcher (
BlockFetcher block_fetcher ( file , prefetch_buffer , footer , options , handle ,
file , prefetch_buffer , footer , options , handle , & contents , ioption s,
& contents , ioptions , do_uncompres s ,
do_uncompress , compression_dict , cache_options , allocator , immortal_file ) ;
compression_dict , cache_options , allocator ) ;
Status s = block_fetcher . ReadBlockContents ( ) ;
Status s = block_fetcher . ReadBlockContents ( ) ;
if ( s . ok ( ) ) {
if ( s . ok ( ) ) {
result - > reset ( new Block ( std : : move ( contents ) , global_seqno ,
result - > reset ( new Block ( std : : move ( contents ) , global_seqno ,
@ -245,6 +244,8 @@ class PartitionIndexReader : public IndexReader, public Cleanable {
Statistics * kNullStats = nullptr ;
Statistics * kNullStats = nullptr ;
// Filters are already checked before seeking the index
// Filters are already checked before seeking the index
if ( ! partition_map_ . empty ( ) ) {
if ( ! partition_map_ . empty ( ) ) {
// We don't return pinned datat from index blocks, so no need
// to set `block_contents_pinned`.
return NewTwoLevelIterator (
return NewTwoLevelIterator (
new BlockBasedTable : : PartitionedIndexIteratorState (
new BlockBasedTable : : PartitionedIndexIteratorState (
table_ , & partition_map_ , index_key_includes_seq_ ,
table_ , & partition_map_ , index_key_includes_seq_ ,
@ -256,6 +257,8 @@ class PartitionIndexReader : public IndexReader, public Cleanable {
auto ro = ReadOptions ( ) ;
auto ro = ReadOptions ( ) ;
ro . fill_cache = fill_cache ;
ro . fill_cache = fill_cache ;
bool kIsIndex = true ;
bool kIsIndex = true ;
// We don't return pinned datat from index blocks, so no need
// to set `block_contents_pinned`.
return new BlockBasedTableIterator < IndexBlockIter , BlockHandle > (
return new BlockBasedTableIterator < IndexBlockIter , BlockHandle > (
table_ , ro , * icomparator_ ,
table_ , ro , * icomparator_ ,
index_block_ - > NewIterator < IndexBlockIter > (
index_block_ - > NewIterator < IndexBlockIter > (
@ -276,6 +279,8 @@ class PartitionIndexReader : public IndexReader, public Cleanable {
IndexBlockIter biter ;
IndexBlockIter biter ;
BlockHandle handle ;
BlockHandle handle ;
Statistics * kNullStats = nullptr ;
Statistics * kNullStats = nullptr ;
// We don't return pinned datat from index blocks, so no need
// to set `block_contents_pinned`.
index_block_ - > NewIterator < IndexBlockIter > (
index_block_ - > NewIterator < IndexBlockIter > (
icomparator_ , icomparator_ - > user_comparator ( ) , & biter , kNullStats , true ,
icomparator_ , icomparator_ - > user_comparator ( ) , & biter , kNullStats , true ,
index_key_includes_seq_ , index_value_is_full_ ) ;
index_key_includes_seq_ , index_value_is_full_ ) ;
@ -318,7 +323,7 @@ class PartitionIndexReader : public IndexReader, public Cleanable {
const bool is_index = true ;
const bool is_index = true ;
// TODO: Support counter batch update for partitioned index and
// TODO: Support counter batch update for partitioned index and
// filter blocks
// filter blocks
s = table_ - > MaybeLoadDataBlock ToCache (
s = table_ - > MaybeReadBlockAndLoad ToCache (
prefetch_buffer . get ( ) , rep , ro , handle , compression_dict , & block ,
prefetch_buffer . get ( ) , rep , ro , handle , compression_dict , & block ,
is_index , nullptr /* get_context */ ) ;
is_index , nullptr /* get_context */ ) ;
@ -415,6 +420,8 @@ class BinarySearchIndexReader : public IndexReader {
IndexBlockIter * iter = nullptr , bool /*dont_care*/ = true ,
IndexBlockIter * iter = nullptr , bool /*dont_care*/ = true ,
bool /*dont_care*/ = true ) override {
bool /*dont_care*/ = true ) override {
Statistics * kNullStats = nullptr ;
Statistics * kNullStats = nullptr ;
// We don't return pinned datat from index blocks, so no need
// to set `block_contents_pinned`.
return index_block_ - > NewIterator < IndexBlockIter > (
return index_block_ - > NewIterator < IndexBlockIter > (
icomparator_ , icomparator_ - > user_comparator ( ) , iter , kNullStats , true ,
icomparator_ , icomparator_ - > user_comparator ( ) , iter , kNullStats , true ,
index_key_includes_seq_ , index_value_is_full_ ) ;
index_key_includes_seq_ , index_value_is_full_ ) ;
@ -540,10 +547,12 @@ class HashIndexReader : public IndexReader {
IndexBlockIter * iter = nullptr , bool total_order_seek = true ,
IndexBlockIter * iter = nullptr , bool total_order_seek = true ,
bool /*dont_care*/ = true ) override {
bool /*dont_care*/ = true ) override {
Statistics * kNullStats = nullptr ;
Statistics * kNullStats = nullptr ;
// We don't return pinned datat from index blocks, so no need
// to set `block_contents_pinned`.
return index_block_ - > NewIterator < IndexBlockIter > (
return index_block_ - > NewIterator < IndexBlockIter > (
icomparator_ , icomparator_ - > user_comparator ( ) , iter , kNullStats ,
icomparator_ , icomparator_ - > user_comparator ( ) , iter , kNullStats ,
total_order_seek , index_key_includes_seq_ , index_value_is_full_ ,
total_order_seek , index_key_includes_seq_ , index_value_is_full_ ,
prefix_index_ . get ( ) ) ;
false /* block_contents_pinned */ , prefix_index_ . get ( ) ) ;
}
}
virtual size_t size ( ) const override { return index_block_ - > size ( ) ; }
virtual size_t size ( ) const override { return index_block_ - > size ( ) ; }
@ -578,8 +587,7 @@ class HashIndexReader : public IndexReader {
assert ( index_block_ ! = nullptr ) ;
assert ( index_block_ ! = nullptr ) ;
}
}
~ HashIndexReader ( ) {
~ HashIndexReader ( ) { }
}
std : : unique_ptr < Block > index_block_ ;
std : : unique_ptr < Block > index_block_ ;
std : : unique_ptr < BlockPrefixIndex > prefix_index_ ;
std : : unique_ptr < BlockPrefixIndex > prefix_index_ ;
@ -972,7 +980,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
s . ToString ( ) . c_str ( ) ) ;
s . ToString ( ) . c_str ( ) ) ;
} else if ( found_range_del_block & & ! rep - > range_del_handle . IsNull ( ) ) {
} else if ( found_range_del_block & & ! rep - > range_del_handle . IsNull ( ) ) {
ReadOptions read_options ;
ReadOptions read_options ;
s = MaybeLoadDataBlock ToCache (
s = MaybeReadBlockAndLoad ToCache (
prefetch_buffer . get ( ) , rep , read_options , rep - > range_del_handle ,
prefetch_buffer . get ( ) , rep , read_options , rep - > range_del_handle ,
Slice ( ) /* compression_dict */ , & rep - > range_del_entry ,
Slice ( ) /* compression_dict */ , & rep - > range_del_entry ,
false /* is_index */ , nullptr /* get_context */ ) ;
false /* is_index */ , nullptr /* get_context */ ) ;
@ -1177,15 +1185,14 @@ Status BlockBasedTable::ReadMetaBlock(Rep* rep,
Status BlockBasedTable : : GetDataBlockFromCache (
Status BlockBasedTable : : GetDataBlockFromCache (
const Slice & block_cache_key , const Slice & compressed_block_cache_key ,
const Slice & block_cache_key , const Slice & compressed_block_cache_key ,
Cache * block_cache , Cache * block_cache_compressed ,
Cache * block_cache , Cache * block_cache_compressed , Rep * rep ,
const ImmutableCFOptions & ioptions , const ReadOptions & read_options ,
const ReadOptions & read_options ,
BlockBasedTable : : CachableEntry < Block > * block , uint32_t format_version ,
BlockBasedTable : : CachableEntry < Block > * block , const Slice & compression_dict ,
const Slice & compression_dict , size_t read_amp_bytes_per_bit , bool is_index ,
size_t read_amp_bytes_per_bit , bool is_index , GetContext * get_context ) {
GetContext * get_context , MemoryAllocator * allocator ) {
Status s ;
Status s ;
Block * compressed_block = nullptr ;
BlockContents * compressed_block = nullptr ;
Cache : : Handle * block_cache_compressed_handle = nullptr ;
Cache : : Handle * block_cache_compressed_handle = nullptr ;
Statistics * statistics = ioptions . statistics ;
Statistics * statistics = rep - > ioptions . statistics ;
// Lookup uncompressed cache first
// Lookup uncompressed cache first
if ( block_cache ! = nullptr ) {
if ( block_cache ! = nullptr ) {
@ -1228,32 +1235,34 @@ Status BlockBasedTable::GetDataBlockFromCache(
// found compressed block
// found compressed block
RecordTick ( statistics , BLOCK_CACHE_COMPRESSED_HIT ) ;
RecordTick ( statistics , BLOCK_CACHE_COMPRESSED_HIT ) ;
compressed_block = reinterpret_cast < Block * > (
compressed_block = reinterpret_cast < BlockContents * > (
block_cache_compressed - > Value ( block_cache_compressed_handle ) ) ;
block_cache_compressed - > Value ( block_cache_compressed_handle ) ) ;
assert ( compressed_block - > compression_type ( ) ! = kNoCompression ) ;
CompressionType compression_type = compressed_block - > get_compression_type ( ) ;
assert ( compression_type ! = kNoCompression ) ;
// Retrieve the uncompressed contents into a new buffer
// Retrieve the uncompressed contents into a new buffer
BlockContents contents ;
BlockContents contents ;
UncompressionContext uncompresssion_ctx ( compressed_block - > compression_type ( ) ,
UncompressionContext uncompresssion_ctx ( compression_type , compression_dict ) ;
compression_dict ) ;
s = UncompressBlockContents ( uncompresssion_ctx , compressed_block - > data . data ( ) ,
s = UncompressBlockContents ( uncompresssion_ctx , compressed_block - > data ( ) ,
compressed_block - > data . size ( ) , & contents ,
compressed_block - > size ( ) , & content s,
rep - > table_options . format_version , rep - > ioption s,
format_version , ioptions , allocator ) ;
GetMemoryAllocator ( rep - > table_options ) ) ;
// Insert uncompressed block into block cache
// Insert uncompressed block into block cache
if ( s . ok ( ) ) {
if ( s . ok ( ) ) {
block - > value =
block - > value =
new Block ( std : : move ( contents ) , compressed_block - > global_seqno ( ) ,
new Block ( std : : move ( contents ) , rep - > get_g lobal_seqno ( is_index ) ,
read_amp_bytes_per_bit ,
read_amp_bytes_per_bit ,
statistics ) ; // uncompressed block
statistics ) ; // uncompressed block
assert ( block - > value - > compression_type ( ) = = kNoCompression ) ;
if ( block_cache ! = nullptr & & block - > value - > own_bytes ( ) & &
if ( block_cache ! = nullptr & & block - > value - > cachable ( ) & &
read_options . fill_cache ) {
read_options . fill_cache ) {
size_t charge = block - > value - > ApproximateMemoryUsage ( ) ;
size_t charge = block - > value - > ApproximateMemoryUsage ( ) ;
s = block_cache - > Insert ( block_cache_key , block - > value , charge ,
s = block_cache - > Insert ( block_cache_key , block - > value , charge ,
& DeleteCachedEntry < Block > ,
& DeleteCachedEntry < Block > ,
& ( block - > cache_handle ) ) ;
& ( block - > cache_handle ) ) ;
# ifndef NDEBUG
block_cache - > TEST_mark_as_data_block ( block_cache_key , charge ) ;
block_cache - > TEST_mark_as_data_block ( block_cache_key , charge ) ;
# endif // NDEBUG
if ( s . ok ( ) ) {
if ( s . ok ( ) ) {
if ( get_context ! = nullptr ) {
if ( get_context ! = nullptr ) {
get_context - > get_context_stats_ . num_cache_add + + ;
get_context - > get_context_stats_ . num_cache_add + + ;
@ -1298,65 +1307,77 @@ Status BlockBasedTable::PutDataBlockToCache(
const Slice & block_cache_key , const Slice & compressed_block_cache_key ,
const Slice & block_cache_key , const Slice & compressed_block_cache_key ,
Cache * block_cache , Cache * block_cache_compressed ,
Cache * block_cache , Cache * block_cache_compressed ,
const ReadOptions & /*read_options*/ , const ImmutableCFOptions & ioptions ,
const ReadOptions & /*read_options*/ , const ImmutableCFOptions & ioptions ,
CachableEntry < Block > * block , Block * raw_block , uint32_t format_version ,
CachableEntry < Block > * cached_block , BlockContents * raw_block_contents ,
const Slice & compression_dict , size_t read_amp_bytes_per_bit , bool is_index ,
CompressionType raw_block_comp_type , uint32_t format_version ,
Cache : : Priority priority , GetContext * get_context ,
const Slice & compression_dict , SequenceNumber seq_no ,
MemoryAllocator * allocator ) {
size_t read_amp_bytes_per_bit , bool is_index , Cache : : Priority priority ,
assert ( raw_block - > compression_type ( ) = = kNoCompression | |
GetContext * get_context , MemoryAllocator * allocator ) {
assert ( raw_block_comp_type = = kNoCompression | |
block_cache_compressed ! = nullptr ) ;
block_cache_compressed ! = nullptr ) ;
Status s ;
Status s ;
// Retrieve the uncompressed contents into a new buffer
// Retrieve the uncompressed contents into a new buffer
BlockContents contents ;
BlockContents uncompressed_block_ contents;
Statistics * statistics = ioptions . statistics ;
Statistics * statistics = ioptions . statistics ;
if ( raw_block - > compression_type ( ) ! = kNoCompression ) {
if ( raw_block_comp_type ! = kNoCompression ) {
UncompressionContext uncompression_ctx ( raw_block - > compression_type ( ) ,
UncompressionContext uncompression_ctx ( raw_block_comp_type ,
compression_dict ) ;
compression_dict ) ;
s = UncompressBlockContents ( uncompression_ctx , raw_block - > data ( ) ,
s = UncompressBlockContents (
raw_block - > size ( ) , & contents , format_version ,
uncompression_ctx , raw_block_contents - > data . data ( ) ,
ioptions , allocator ) ;
raw_block_contents - > data . size ( ) , & uncompressed_block_contents ,
format_version , ioptions , allocator ) ;
}
}
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
delete raw_block ;
return s ;
return s ;
}
}
if ( raw_block - > compression_type ( ) ! = kNoCompression ) {
if ( raw_block_comp_type ! = kNoCompression ) {
block - > value = new Block ( std : : move ( contents ) , raw_block - > global_seqno ( ) ,
cached_ block- > value = new Block ( std : : move ( uncompressed_block_contents ) ,
read_amp_bytes_per_bit ,
seq_no , read_amp_bytes_per_bit ,
statistics ) ; // uncompressed block
statistics ) ; // uncompressed block
} else {
} else {
block - > value = raw_block ;
cached_block - > value =
raw_block = nullptr ;
new Block ( std : : move ( * raw_block_contents ) , seq_no ,
read_amp_bytes_per_bit , ioptions . statistics ) ;
}
}
// Insert compressed block into compressed block cache.
// Insert compressed block into compressed block cache.
// Release the hold on the compressed cache entry immediately.
// Release the hold on the compressed cache entry immediately.
if ( block_cache_compressed ! = nullptr & & raw_block ! = nullptr & &
if ( block_cache_compressed ! = nullptr & &
raw_block - > cachable ( ) ) {
raw_block_comp_type ! = kNoCompression & & raw_block_contents ! = nullptr & &
s = block_cache_compressed - > Insert ( compressed_block_cache_key , raw_block ,
raw_block_contents - > own_bytes ( ) ) {
raw_block - > ApproximateMemoryUsage ( ) ,
# ifndef NDEBUG
& DeleteCachedEntry < Block > ) ;
assert ( raw_block_contents - > is_raw_block ) ;
# endif // NDEBUG
// We cannot directly put raw_block_contents because this could point to
// an object in the stack.
BlockContents * block_cont_for_comp_cache =
new BlockContents ( std : : move ( * raw_block_contents ) ) ;
s = block_cache_compressed - > Insert (
compressed_block_cache_key , block_cont_for_comp_cache ,
block_cont_for_comp_cache - > ApproximateMemoryUsage ( ) ,
& DeleteCachedEntry < BlockContents > ) ;
if ( s . ok ( ) ) {
if ( s . ok ( ) ) {
// Avoid the following code to delete this cached block.
// Avoid the following code to delete this cached block.
raw_block = nullptr ;
RecordTick ( statistics , BLOCK_CACHE_COMPRESSED_ADD ) ;
RecordTick ( statistics , BLOCK_CACHE_COMPRESSED_ADD ) ;
} else {
} else {
RecordTick ( statistics , BLOCK_CACHE_COMPRESSED_ADD_FAILURES ) ;
RecordTick ( statistics , BLOCK_CACHE_COMPRESSED_ADD_FAILURES ) ;
delete block_cont_for_comp_cache ;
}
}
}
}
delete raw_block ;
// insert into uncompressed block cache
// insert into uncompressed block cache
assert ( ( block - > value - > compression_type ( ) = = kNoCompression ) ) ;
if ( block_cache ! = nullptr & & cached_block - > value - > own_bytes ( ) ) {
if ( block_cache ! = nullptr & & block - > value - > cachable ( ) ) {
size_t charge = cached_ block - > value - > ApproximateMemoryUsage ( ) ;
size_t charge = block - > value - > ApproximateMemoryUsage ( ) ;
s = block_cache - > Insert ( block_cache_key , cached_block - > value , charge ,
s = block_cache - > Insert ( block_cache_key , block - > value , charge ,
& DeleteCachedEntry < Block > ,
& DeleteCachedEntry < Block > , & ( block - > cache_handle ) ,
& ( cached_ block- > cache_handle ) , priority ) ;
priority ) ;
# ifndef NDEBUG
block_cache - > TEST_mark_as_data_block ( block_cache_key , charge ) ;
block_cache - > TEST_mark_as_data_block ( block_cache_key , charge ) ;
# endif // NDEBUG
if ( s . ok ( ) ) {
if ( s . ok ( ) ) {
assert ( block - > cache_handle ! = nullptr ) ;
assert ( cached_ block- > cache_handle ! = nullptr ) ;
if ( get_context ! = nullptr ) {
if ( get_context ! = nullptr ) {
get_context - > get_context_stats_ . num_cache_add + + ;
get_context - > get_context_stats_ . num_cache_add + + ;
get_context - > get_context_stats_ . num_cache_bytes_write + = charge ;
get_context - > get_context_stats_ . num_cache_bytes_write + = charge ;
@ -1382,12 +1403,12 @@ Status BlockBasedTable::PutDataBlockToCache(
RecordTick ( statistics , BLOCK_CACHE_DATA_BYTES_INSERT , charge ) ;
RecordTick ( statistics , BLOCK_CACHE_DATA_BYTES_INSERT , charge ) ;
}
}
}
}
assert ( reinterpret_cast < Block * > (
assert ( reinterpret_cast < Block * > ( block_cache - > Value (
block_cache - > Value ( block - > cache_handle ) ) = = block - > value ) ;
cached_ block- > cache_handle ) ) = = cached_ block- > value ) ;
} else {
} else {
RecordTick ( statistics , BLOCK_CACHE_ADD_FAILURES ) ;
RecordTick ( statistics , BLOCK_CACHE_ADD_FAILURES ) ;
delete block - > value ;
delete cached_ block- > value ;
block - > value = nullptr ;
cached_ block- > value = nullptr ;
}
}
}
}
@ -1561,12 +1582,16 @@ InternalIteratorBase<BlockHandle>* BlockBasedTable::NewIndexIterator(
GetContext * get_context ) {
GetContext * get_context ) {
// index reader has already been pre-populated.
// index reader has already been pre-populated.
if ( rep_ - > index_reader ) {
if ( rep_ - > index_reader ) {
// We don't return pinned datat from index blocks, so no need
// to set `block_contents_pinned`.
return rep_ - > index_reader - > NewIterator (
return rep_ - > index_reader - > NewIterator (
input_iter , read_options . total_order_seek | | disable_prefix_seek ,
input_iter , read_options . total_order_seek | | disable_prefix_seek ,
read_options . fill_cache ) ;
read_options . fill_cache ) ;
}
}
// we have a pinned index block
// we have a pinned index block
if ( rep_ - > index_entry . IsSet ( ) ) {
if ( rep_ - > index_entry . IsSet ( ) ) {
// We don't return pinned datat from index blocks, so no need
// to set `block_contents_pinned`.
return rep_ - > index_entry . value - > NewIterator (
return rep_ - > index_entry . value - > NewIterator (
input_iter , read_options . total_order_seek | | disable_prefix_seek ,
input_iter , read_options . total_order_seek | | disable_prefix_seek ,
read_options . fill_cache ) ;
read_options . fill_cache ) ;
@ -1649,6 +1674,8 @@ InternalIteratorBase<BlockHandle>* BlockBasedTable::NewIndexIterator(
}
}
assert ( cache_handle ) ;
assert ( cache_handle ) ;
// We don't return pinned datat from index blocks, so no need
// to set `block_contents_pinned`.
auto * iter = index_reader - > NewIterator (
auto * iter = index_reader - > NewIterator (
input_iter , read_options . total_order_seek | | disable_prefix_seek ) ;
input_iter , read_options . total_order_seek | | disable_prefix_seek ) ;
@ -1683,7 +1710,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
if ( rep - > compression_dict_block ) {
if ( rep - > compression_dict_block ) {
compression_dict = rep - > compression_dict_block - > data ;
compression_dict = rep - > compression_dict_block - > data ;
}
}
s = MaybeLoadDataBlock ToCache ( prefetch_buffer , rep , ro , handle ,
s = MaybeReadBlockAndLoad ToCache ( prefetch_buffer , rep , ro , handle ,
compression_dict , & block , is_index ,
compression_dict , & block , is_index ,
get_context ) ;
get_context ) ;
}
}
@ -1711,7 +1738,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
compression_dict , rep - > persistent_cache_options ,
compression_dict , rep - > persistent_cache_options ,
is_index ? kDisableGlobalSequenceNumber : rep - > global_seqno ,
is_index ? kDisableGlobalSequenceNumber : rep - > global_seqno ,
rep - > table_options . read_amp_bytes_per_bit ,
rep - > table_options . read_amp_bytes_per_bit ,
GetMemoryAllocator ( rep - > table_options ) , rep - > immortal_table ) ;
GetMemoryAllocator ( rep - > table_options ) ) ;
}
}
if ( s . ok ( ) ) {
if ( s . ok ( ) ) {
block . value = block_value . release ( ) ;
block . value = block_value . release ( ) ;
@ -1721,10 +1748,20 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
if ( s . ok ( ) ) {
if ( s . ok ( ) ) {
assert ( block . value ! = nullptr ) ;
assert ( block . value ! = nullptr ) ;
const bool kTotalOrderSeek = true ;
const bool kTotalOrderSeek = true ;
// Block contents are pinned and it is still pinned after the iterator
// is destoryed as long as cleanup functions are moved to another object,
// when:
// 1. block cache handle is set to be released in cleanup function, or
// 2. it's pointing to immortable source. If own_bytes is true then we are
// not reading data from the original source, weather immortal or not.
// Otherwise, the block is pinned iff the source is immortal.
bool block_contents_pinned =
( block . cache_handle ! = nullptr | |
( ! block . value - > own_bytes ( ) & & rep - > immortal_table ) ) ;
iter = block . value - > NewIterator < TBlockIter > (
iter = block . value - > NewIterator < TBlockIter > (
& rep - > internal_comparator , rep - > internal_comparator . user_comparator ( ) ,
& rep - > internal_comparator , rep - > internal_comparator . user_comparator ( ) ,
iter , rep - > ioptions . statistics , kTotalOrderSeek , key_includes_seq ,
iter , rep - > ioptions . statistics , kTotalOrderSeek , key_includes_seq ,
index_key_is_full ) ;
index_key_is_full , block_contents_pinned ) ;
if ( block . cache_handle ! = nullptr ) {
if ( block . cache_handle ! = nullptr ) {
iter - > RegisterCleanup ( & ReleaseCachedEntry , block_cache ,
iter - > RegisterCleanup ( & ReleaseCachedEntry , block_cache ,
block . cache_handle ) ;
block . cache_handle ) ;
@ -1733,7 +1770,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
// insert a dummy record to block cache to track the memory usage
// insert a dummy record to block cache to track the memory usage
Cache : : Handle * cache_handle ;
Cache : : Handle * cache_handle ;
// There are two other types of cache keys: 1) SST cache key added in
// There are two other types of cache keys: 1) SST cache key added in
// `MaybeLoadDataBlock ToCache` 2) dummy cache key added in
// `MaybeReadBlockAndLoad ToCache` 2) dummy cache key added in
// `write_buffer_manager`. Use longer prefix (41 bytes) to differentiate
// `write_buffer_manager`. Use longer prefix (41 bytes) to differentiate
// from SST cache key(31 bytes), and use non-zero prefix to
// from SST cache key(31 bytes), and use non-zero prefix to
// differentiate from `write_buffer_manager`
// differentiate from `write_buffer_manager`
@ -1769,25 +1806,28 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
return iter ;
return iter ;
}
}
Status BlockBasedTable : : MaybeLoadDataBlock ToCache (
Status BlockBasedTable : : MaybeReadBlockAndLoad ToCache (
FilePrefetchBuffer * prefetch_buffer , Rep * rep , const ReadOptions & ro ,
FilePrefetchBuffer * prefetch_buffer , Rep * rep , const ReadOptions & ro ,
const BlockHandle & handle , Slice compression_dict ,
const BlockHandle & handle , Slice compression_dict ,
CachableEntry < Block > * block_entry , bool is_index , GetContext * get_context ) {
CachableEntry < Block > * block_entry , bool is_index , GetContext * get_context ) {
assert ( block_entry ! = nullptr ) ;
assert ( block_entry ! = nullptr ) ;
const bool no_io = ( ro . read_tier = = kBlockCacheTier ) ;
const bool no_io = ( ro . read_tier = = kBlockCacheTier ) ;
Cache * block_cache = rep - > table_options . block_cache . get ( ) ;
Cache * block_cache = rep - > table_options . block_cache . get ( ) ;
// No point to cache compressed blocks if it never goes away
Cache * block_cache_compressed =
Cache * block_cache_compressed =
rep - > table_options . block_cache_compressed . get ( ) ;
rep - > immortal_table ? nullptr
: rep - > table_options . block_cache_compressed . get ( ) ;
// First, try to get the block from the cache
//
// If either block cache is enabled, we'll try to read from it.
// If either block cache is enabled, we'll try to read from it.
Status s ;
Status s ;
if ( block_cache ! = nullptr | | block_cache_compressed ! = nullptr ) {
Statistics * statistics = rep - > ioptions . statistics ;
char cache_key [ kMaxCacheKeyPrefixSize + kMaxVarint64Length ] ;
char cache_key [ kMaxCacheKeyPrefixSize + kMaxVarint64Length ] ;
char compressed_cache_key [ kMaxCacheKeyPrefixSize + kMaxVarint64Length ] ;
char compressed_cache_key [ kMaxCacheKeyPrefixSize + kMaxVarint64Length ] ;
Slice key , /* key to the block cache */
Slice key /* key to the block cache */ ;
ckey /* key to the compressed block cache */ ;
Slice ckey /* key to the compressed block cache */ ;
if ( block_cache ! = nullptr | | block_cache_compressed ! = nullptr ) {
// create key for block cache
// create key for block cache
if ( block_cache ! = nullptr ) {
if ( block_cache ! = nullptr ) {
key = GetCacheKey ( rep - > cache_key_prefix , rep - > cache_key_prefix_size ,
key = GetCacheKey ( rep - > cache_key_prefix , rep - > cache_key_prefix_size ,
@ -1800,32 +1840,41 @@ Status BlockBasedTable::MaybeLoadDataBlockToCache(
compressed_cache_key ) ;
compressed_cache_key ) ;
}
}
s = GetDataBlockFromCache (
s = GetDataBlockFromCache ( key , ckey , block_cache , block_cache_compressed ,
key , ckey , block_cache , block_cache_compressed , rep - > ioptions , ro ,
rep , ro , block_entry , compression_dict ,
block_entry , rep - > table_options . format_version , compression_dict ,
rep - > table_options . read_amp_bytes_per_bit ,
rep - > table_options . read_amp_bytes_per_bit , is_index , get_context ,
is_index , get_context ) ;
GetMemoryAllocator ( rep - > table_options ) ) ;
// Can't find the block from the cache. If I/O is allowed, read from the
// file.
if ( block_entry - > value = = nullptr & & ! no_io & & ro . fill_cache ) {
if ( block_entry - > value = = nullptr & & ! no_io & & ro . fill_cache ) {
std : : unique_ptr < Block > raw_block ;
Statistics * statistics = rep - > ioptions . statistics ;
bool do_decompress =
block_cache_compressed = = nullptr & & rep - > blocks_maybe_compressed ;
CompressionType raw_block_comp_type ;
BlockContents raw_block_contents ;
{
{
StopWatch sw ( rep - > ioptions . env , statistics , READ_BLOCK_GET_MICROS ) ;
StopWatch sw ( rep - > ioptions . env , statistics , READ_BLOCK_GET_MICROS ) ;
s = ReadBlockFromFile (
BlockFetcher block_fetcher (
rep - > file . get ( ) , prefetch_buffer , rep - > footer , ro , handle ,
rep - > file . get ( ) , prefetch_buffer , rep - > footer , ro , handle ,
& raw_block , rep - > ioptions ,
& raw_block_contents , rep - > ioptions ,
block_cache_compressed = = nullptr & & rep - > blocks_maybe_compressed ,
do_decompress /* do uncompress */ , compression_dict ,
compression_dict , rep - > persistent_cache_options ,
rep - > persistent_cache_options ,
is_index ? kDisableGlobalSequenceNumber : rep - > global_seqno ,
GetMemoryAllocator ( rep - > table_options ) ) ;
rep - > table_options . read_amp_bytes_per_bit ,
s = block_fetcher . ReadBlockContents ( ) ;
GetMemoryAllocator ( rep - > table_options ) , rep - > immortal_table ) ;
raw_block_comp_type = block_fetcher . get_compression_type ( ) ;
}
}
if ( s . ok ( ) ) {
if ( s . ok ( ) ) {
SequenceNumber seq_no = rep - > get_global_seqno ( is_index ) ;
// If filling cache is allowed and a cache is configured, try to put the
// block to the cache.
s = PutDataBlockToCache (
s = PutDataBlockToCache (
key , ckey , block_cache , block_cache_compressed , ro , rep - > ioptions ,
key , ckey , block_cache , block_cache_compressed , ro , rep - > ioptions ,
block_entry , raw_block . release ( ) , rep - > table_options . format_version ,
block_entry , & raw_block_contents , raw_block_comp_type ,
compression_dict , rep - > table_options . read_amp_bytes_per_bit ,
rep - > table_options . format_version , compression_dict , seq_no ,
is_index ,
rep - > table_options . read_amp_bytes_per_bit , is_index ,
is_index & & rep - > table_options
is_index & & rep - > table_options
. cache_index_and_filter_blocks_with_high_priority
. cache_index_and_filter_blocks_with_high_priority
? Cache : : Priority : : HIGH
? Cache : : Priority : : HIGH
@ -1868,6 +1917,8 @@ BlockBasedTable::PartitionedIndexIteratorState::NewSecondaryIterator(
RecordTick ( rep - > ioptions . statistics , BLOCK_CACHE_BYTES_READ ,
RecordTick ( rep - > ioptions . statistics , BLOCK_CACHE_BYTES_READ ,
block_cache - > GetUsage ( block - > second . cache_handle ) ) ;
block_cache - > GetUsage ( block - > second . cache_handle ) ) ;
Statistics * kNullStats = nullptr ;
Statistics * kNullStats = nullptr ;
// We don't return pinned datat from index blocks, so no need
// to set `block_contents_pinned`.
return block - > second . value - > NewIterator < IndexBlockIter > (
return block - > second . value - > NewIterator < IndexBlockIter > (
& rep - > internal_comparator , rep - > internal_comparator . user_comparator ( ) ,
& rep - > internal_comparator , rep - > internal_comparator . user_comparator ( ) ,
nullptr , kNullStats , true , index_key_includes_seq_ , index_key_is_full_ ) ;
nullptr , kNullStats , true , index_key_includes_seq_ , index_key_is_full_ ) ;
@ -2612,8 +2663,7 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
Status s ;
Status s ;
s = GetDataBlockFromCache (
s = GetDataBlockFromCache (
cache_key , ckey , block_cache , nullptr , rep_ - > ioptions , options , & block ,
cache_key , ckey , block_cache , nullptr , rep_ , options , & block ,
rep_ - > table_options . format_version ,
rep_ - > compression_dict_block ? rep_ - > compression_dict_block - > data
rep_ - > compression_dict_block ? rep_ - > compression_dict_block - > data
: Slice ( ) ,
: Slice ( ) ,
0 /* read_amp_bytes_per_bit */ ) ;
0 /* read_amp_bytes_per_bit */ ) ;