@ -371,7 +371,7 @@ struct BlockBasedTable::Rep {
filter_type ( FilterType : : kNoFilter ) ,
filter_type ( FilterType : : kNoFilter ) ,
whole_key_filtering ( _table_opt . whole_key_filtering ) ,
whole_key_filtering ( _table_opt . whole_key_filtering ) ,
prefix_filtering ( true ) ,
prefix_filtering ( true ) ,
range_del_block ( nullptr ) ,
range_del_handle ( BlockHandle : : NullBlockHandle ( ) ) ,
global_seqno ( kDisableGlobalSequenceNumber ) { }
global_seqno ( kDisableGlobalSequenceNumber ) { }
const ImmutableCFOptions & ioptions ;
const ImmutableCFOptions & ioptions ;
@ -430,7 +430,10 @@ struct BlockBasedTable::Rep {
// the LRU cache will never push flush them out, hence they're pinned
// the LRU cache will never push flush them out, hence they're pinned
CachableEntry < FilterBlockReader > filter_entry ;
CachableEntry < FilterBlockReader > filter_entry ;
CachableEntry < IndexReader > index_entry ;
CachableEntry < IndexReader > index_entry ;
unique_ptr < Block > range_del_block ;
// range deletion meta-block is pinned through reader's lifetime when LRU
// cache is enabled.
CachableEntry < Block > range_del_entry ;
BlockHandle range_del_handle ;
// If global_seqno is used, all Keys in this file will have the same
// If global_seqno is used, all Keys in this file will have the same
// seqno with value `global_seqno`.
// seqno with value `global_seqno`.
@ -702,29 +705,23 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
}
}
// Read the range del meta block
// Read the range del meta block
// TODO(wanning&andrewkr): cache range delete tombstone block
bool found_range_del_block ;
bool found_range_del_block ;
BlockHandle range_del_handle ;
s = SeekToRangeDelBlock ( meta_iter . get ( ) , & found_range_del_block ,
s = SeekToRangeDelBlock ( meta_iter . get ( ) , & found_range_del_block ,
& range_del_handle ) ;
& rep - > r ange_del_handle ) ;
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
Log ( InfoLogLevel : : WARN_LEVEL , rep - > ioptions . info_log ,
Log ( InfoLogLevel : : WARN_LEVEL , rep - > ioptions . info_log ,
" Error when seeking to range delete tombstones block from file: %s " ,
" Error when seeking to range delete tombstones block from file: %s " ,
s . ToString ( ) . c_str ( ) ) ;
s . ToString ( ) . c_str ( ) ) ;
} else {
} else {
if ( found_range_del_block & & ! range_del_handle . IsNull ( ) ) {
if ( found_range_del_block & & ! rep - > range_del_handle . IsNull ( ) ) {
BlockContents range_del_block_contents ;
ReadOptions read_options ;
ReadOptions read_options ;
s = ReadBlockContents ( rep - > file . get ( ) , rep - > footer , read_options ,
s = MaybeLoadDataBlockToCache ( rep , read_options , rep - > range_del_handle ,
range_del_handle , & range_del_block_contents ,
Slice ( ) /* compression_dict */ ,
rep - > ioptions , false /* decompressed */ ) ;
& rep - > range_del_entry ) ;
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
Log ( InfoLogLevel : : WARN_LEVEL , rep - > ioptions . info_log ,
Log ( InfoLogLevel : : WARN_LEVEL , rep - > ioptions . info_log ,
" Encountered error while reading data from range del block %s " ,
" Encountered error while reading data from range del block %s " ,
s . ToString ( ) . c_str ( ) ) ;
s . ToString ( ) . c_str ( ) ) ;
} else {
rep - > range_del_block . reset ( new Block (
std : : move ( range_del_block_contents ) , kDisableGlobalSequenceNumber ) ) ;
}
}
}
}
}
}
@ -1241,30 +1238,74 @@ InternalIterator* BlockBasedTable::NewDataBlockIterator(
const bool no_io = ( ro . read_tier = = kBlockCacheTier ) ;
const bool no_io = ( ro . read_tier = = kBlockCacheTier ) ;
Cache * block_cache = rep - > table_options . block_cache . get ( ) ;
Cache * block_cache = rep - > table_options . block_cache . get ( ) ;
Cache * block_cache_compressed =
rep - > table_options . block_cache_compressed . get ( ) ;
CachableEntry < Block > block ;
CachableEntry < Block > block ;
BlockHandle handle ;
BlockHandle handle ;
Slice input = index_value ;
Slice input = index_value ;
// We intentionally allow extra stuff in index_value so that we
// We intentionally allow extra stuff in index_value so that we
// can add more features in the future.
// can add more features in the future.
Status s = handle . DecodeFrom ( & input ) ;
Status s = handle . DecodeFrom ( & input ) ;
Slice compression_dict ;
if ( s . ok ( ) ) {
if ( rep - > compression_dict_block ) {
compression_dict = rep - > compression_dict_block - > data ;
}
s = MaybeLoadDataBlockToCache ( rep , ro , handle , compression_dict , & block ) ;
}
if ( ! s . ok ( ) ) {
// Didn't get any data from block caches.
if ( s . ok ( ) & & block . value = = nullptr ) {
if ( no_io ) {
// Could not read from block_cache and can't do IO
if ( input_iter ! = nullptr ) {
if ( input_iter ! = nullptr ) {
input_iter - > SetStatus ( s ) ;
input_iter - > SetStatus ( Statu s: : Incomplete ( " no blocking io " ) ) ;
return input_iter ;
return input_iter ;
} else {
} else {
return NewErrorInternalIterator ( s ) ;
return NewErrorInternalIterator ( Status : : Incomplete ( " no blocking io " ) ) ;
}
}
std : : unique_ptr < Block > block_value ;
s = ReadBlockFromFile (
rep - > file . get ( ) , rep - > footer , ro , handle , & block_value , rep - > ioptions ,
true /* compress */ , compression_dict , rep - > persistent_cache_options ,
rep - > global_seqno , rep - > table_options . read_amp_bytes_per_bit ) ;
if ( s . ok ( ) ) {
block . value = block_value . release ( ) ;
}
}
}
}
Slice compression_dict ;
InternalIterator * iter ;
if ( rep - > compression_dict_block ) {
if ( s . ok ( ) ) {
compression_dict = rep - > compression_dict_block - > data ;
assert ( block . value ! = nullptr ) ;
iter = block . value - > NewIterator ( & rep - > internal_comparator , input_iter , true ,
rep - > ioptions . statistics ) ;
if ( block . cache_handle ! = nullptr ) {
iter - > RegisterCleanup ( & ReleaseCachedEntry , block_cache ,
block . cache_handle ) ;
} else {
iter - > RegisterCleanup ( & DeleteHeldResource < Block > , block . value , nullptr ) ;
}
} else {
assert ( block . value = = nullptr ) ;
if ( input_iter ! = nullptr ) {
input_iter - > SetStatus ( s ) ;
iter = input_iter ;
} else {
iter = NewErrorInternalIterator ( s ) ;
}
}
return iter ;
}
}
Status BlockBasedTable : : MaybeLoadDataBlockToCache (
Rep * rep , const ReadOptions & ro , const BlockHandle & handle ,
Slice compression_dict , CachableEntry < Block > * block_entry ) {
const bool no_io = ( ro . read_tier = = kBlockCacheTier ) ;
Cache * block_cache = rep - > table_options . block_cache . get ( ) ;
Cache * block_cache_compressed =
rep - > table_options . block_cache_compressed . get ( ) ;
// If either block cache is enabled, we'll try to read from it.
// If either block cache is enabled, we'll try to read from it.
Status s ;
if ( block_cache ! = nullptr | | block_cache_compressed ! = nullptr ) {
if ( block_cache ! = nullptr | | block_cache_compressed ! = nullptr ) {
Statistics * statistics = rep - > ioptions . statistics ;
Statistics * statistics = rep - > ioptions . statistics ;
char cache_key [ kMaxCacheKeyPrefixSize + kMaxVarint64Length ] ;
char cache_key [ kMaxCacheKeyPrefixSize + kMaxVarint64Length ] ;
@ -1286,10 +1327,10 @@ InternalIterator* BlockBasedTable::NewDataBlockIterator(
s = GetDataBlockFromCache (
s = GetDataBlockFromCache (
key , ckey , block_cache , block_cache_compressed , rep - > ioptions , ro ,
key , ckey , block_cache , block_cache_compressed , rep - > ioptions , ro ,
& block , rep - > table_options . format_version , compression_dict ,
block_entry , rep - > table_options . format_version , compression_dict ,
rep - > table_options . read_amp_bytes_per_bit ) ;
rep - > table_options . read_amp_bytes_per_bit ) ;
if ( block . value = = nullptr & & ! no_io & & ro . fill_cache ) {
if ( block_entry - > value = = nullptr & & ! no_io & & ro . fill_cache ) {
std : : unique_ptr < Block > raw_block ;
std : : unique_ptr < Block > raw_block ;
{
{
StopWatch sw ( rep - > ioptions . env , statistics , READ_BLOCK_GET_MICROS ) ;
StopWatch sw ( rep - > ioptions . env , statistics , READ_BLOCK_GET_MICROS ) ;
@ -1303,54 +1344,12 @@ InternalIterator* BlockBasedTable::NewDataBlockIterator(
if ( s . ok ( ) ) {
if ( s . ok ( ) ) {
s = PutDataBlockToCache (
s = PutDataBlockToCache (
key , ckey , block_cache , block_cache_compressed , ro , rep - > ioptions ,
key , ckey , block_cache , block_cache_compressed , ro , rep - > ioptions ,
& block , raw_block . release ( ) , rep - > table_options . format_version ,
block_entry , raw_block . release ( ) , rep - > table_options . format_version ,
compression_dict , rep - > table_options . read_amp_bytes_per_bit ) ;
compression_dict , rep - > table_options . read_amp_bytes_per_bit ) ;
}
}
}
}
}
}
return s ;
// Didn't get any data from block caches.
if ( s . ok ( ) & & block . value = = nullptr ) {
if ( no_io ) {
// Could not read from block_cache and can't do IO
if ( input_iter ! = nullptr ) {
input_iter - > SetStatus ( Status : : Incomplete ( " no blocking io " ) ) ;
return input_iter ;
} else {
return NewErrorInternalIterator ( Status : : Incomplete ( " no blocking io " ) ) ;
}
}
std : : unique_ptr < Block > block_value ;
s = ReadBlockFromFile (
rep - > file . get ( ) , rep - > footer , ro , handle , & block_value , rep - > ioptions ,
true /* compress */ , compression_dict , rep - > persistent_cache_options ,
rep - > global_seqno , rep - > table_options . read_amp_bytes_per_bit ) ;
if ( s . ok ( ) ) {
block . value = block_value . release ( ) ;
}
}
InternalIterator * iter ;
if ( s . ok ( ) ) {
assert ( block . value ! = nullptr ) ;
iter = block . value - > NewIterator ( & rep - > internal_comparator , input_iter , true ,
rep - > ioptions . statistics ) ;
if ( block . cache_handle ! = nullptr ) {
iter - > RegisterCleanup ( & ReleaseCachedEntry , block_cache ,
block . cache_handle ) ;
} else {
iter - > RegisterCleanup ( & DeleteHeldResource < Block > , block . value , nullptr ) ;
}
} else {
assert ( block . value = = nullptr ) ;
if ( input_iter ! = nullptr ) {
input_iter - > SetStatus ( s ) ;
iter = input_iter ;
} else {
iter = NewErrorInternalIterator ( s ) ;
}
}
return iter ;
}
}
class BlockBasedTable : : BlockEntryIteratorState : public TwoLevelIteratorState {
class BlockBasedTable : : BlockEntryIteratorState : public TwoLevelIteratorState {
@ -1489,13 +1488,16 @@ InternalIterator* BlockBasedTable::NewIterator(const ReadOptions& read_options,
InternalIterator * BlockBasedTable : : NewRangeTombstoneIterator (
InternalIterator * BlockBasedTable : : NewRangeTombstoneIterator (
const ReadOptions & read_options ) {
const ReadOptions & read_options ) {
if ( rep_ - > range_del_block . get ( ) ! = nullptr ) {
if ( rep_ - > range_del_handle . IsNull ( ) ) {
auto iter =
rep_ - > range_del_block - > NewIterator ( & ( rep_ - > internal_comparator ) ) ;
return iter ;
}
return NewEmptyInternalIterator ( ) ;
return NewEmptyInternalIterator ( ) ;
}
}
std : : string str ;
rep_ - > range_del_handle . EncodeTo ( & str ) ;
// Even though range_del_entry already references the meta-block when block
// cache is enabled, we still call the below function to get another reference
// since the caller may need the iterator beyond this table reader's lifetime.
return NewDataBlockIterator ( rep_ , read_options , Slice ( str ) ) ;
}
bool BlockBasedTable : : FullFilterKeyMayMatch ( const ReadOptions & read_options ,
bool BlockBasedTable : : FullFilterKeyMayMatch ( const ReadOptions & read_options ,
FilterBlockReader * filter ,
FilterBlockReader * filter ,
@ -1968,6 +1970,7 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
void BlockBasedTable : : Close ( ) {
void BlockBasedTable : : Close ( ) {
rep_ - > filter_entry . Release ( rep_ - > table_options . block_cache . get ( ) ) ;
rep_ - > filter_entry . Release ( rep_ - > table_options . block_cache . get ( ) ) ;
rep_ - > index_entry . Release ( rep_ - > table_options . block_cache . get ( ) ) ;
rep_ - > index_entry . Release ( rep_ - > table_options . block_cache . get ( ) ) ;
rep_ - > range_del_entry . Release ( rep_ - > table_options . block_cache . get ( ) ) ;
// cleanup index and filter blocks to avoid accessing dangling pointer
// cleanup index and filter blocks to avoid accessing dangling pointer
if ( ! rep_ - > table_options . no_block_cache ) {
if ( ! rep_ - > table_options . no_block_cache ) {
char cache_key [ kMaxCacheKeyPrefixSize + kMaxVarint64Length ] ;
char cache_key [ kMaxCacheKeyPrefixSize + kMaxVarint64Length ] ;