@ -168,8 +168,8 @@ class Block {
BlockBasedTableOptions : : DataBlockIndexType IndexType ( ) const ;
BlockBasedTableOptions : : DataBlockIndexType IndexType ( ) const ;
// If comparator is InternalKeyComparator, user_comparator is its user
// ucmp is a raw (i.e., not wrapped by `UserComparatorWrapper`) user key
// comparator; they are equal otherwise .
// comparator.
//
//
// If iter is null, return new Iterator
// If iter is null, return new Iterator
// If iter is not null, update this one and return it as Iterator*
// If iter is not null, update this one and return it as Iterator*
@ -187,13 +187,15 @@ class Block {
// NOTE: for the hash based lookup, if a key prefix doesn't match any key,
// NOTE: for the hash based lookup, if a key prefix doesn't match any key,
// the iterator will simply be set as "invalid", rather than returning
// the iterator will simply be set as "invalid", rather than returning
// the key that is just pass the target key.
// the key that is just pass the target key.
DataBlockIter * NewDataIterator ( const Comparator * comparator ,
DataBlockIter * NewDataIterator ( const Comparator * ucmp ,
const Comparator * user_comparator ,
SequenceNumber global_seqno ,
SequenceNumber global_seqno ,
DataBlockIter * iter = nullptr ,
DataBlockIter * iter = nullptr ,
Statistics * stats = nullptr ,
Statistics * stats = nullptr ,
bool block_contents_pinned = false ) ;
bool block_contents_pinned = false ) ;
// ucmp is a raw (i.e., not wrapped by `UserComparatorWrapper`) user key
// comparator.
//
// key_includes_seq, default true, means that the keys are in internal key
// key_includes_seq, default true, means that the keys are in internal key
// format.
// format.
// value_is_full, default true, means that no delta encoding is
// value_is_full, default true, means that no delta encoding is
@ -206,8 +208,7 @@ class Block {
// first_internal_key. It affects data serialization format, so the same value
// first_internal_key. It affects data serialization format, so the same value
// have_first_key must be used when writing and reading index.
// have_first_key must be used when writing and reading index.
// It is determined by IndexType property of the table.
// It is determined by IndexType property of the table.
IndexBlockIter * NewIndexIterator ( const Comparator * comparator ,
IndexBlockIter * NewIndexIterator ( const Comparator * ucmp ,
const Comparator * user_comparator ,
SequenceNumber global_seqno ,
SequenceNumber global_seqno ,
IndexBlockIter * iter , Statistics * stats ,
IndexBlockIter * iter , Statistics * stats ,
bool total_order_seek , bool have_first_key ,
bool total_order_seek , bool have_first_key ,
@ -228,61 +229,36 @@ class Block {
DataBlockHashIndex data_block_hash_index_ ;
DataBlockHashIndex data_block_hash_index_ ;
} ;
} ;
// A GlobalSeqnoAppliedKey exposes a key with global sequence number applied
// A `BlockIter` iterates over the entries in a `Block`'s data buffer. The
// if configured with `global_seqno != kDisableGlobalSequenceNumber`. It may
// format of this data buffer is an uncompressed, sorted sequence of key-value
// hold a user key or an internal key since `format_version>=3` index blocks
// pairs (see `Block` API for more details).
// contain user keys. In case it holds user keys, it must be configured with
//
// `global_seqno == kDisableGlobalSequenceNumber`.
// Notably, the keys may either be in internal key format or user key format.
class GlobalSeqnoAppliedKey {
// Subclasses are responsible for configuring the key format.
public :
//
void Initialize ( IterKey * key , SequenceNumber global_seqno ) {
// `BlockIter` intends to provide final overrides for all of
key_ = key ;
// `InternalIteratorBase` functions that can move the iterator. It does
global_seqno_ = global_seqno ;
// this to guarantee `UpdateKey()` is called exactly once after each key
# ifndef NDEBUG
// movement potentially visible to users. In this step, the key is prepared
init_ = true ;
// (e.g., serialized if global seqno is in effect) so it can be returned
# endif // NDEBUG
// immediately when the user asks for it via calling `key() const`.
}
//
// For its subclasses, it provides protected variants of the above-mentioned
Slice UpdateAndGetKey ( ) {
// final-overridden methods. They are named with the "Impl" suffix, e.g.,
assert ( init_ ) ;
// `Seek()` logic would be implemented by subclasses in `SeekImpl()`. These
if ( global_seqno_ = = kDisableGlobalSequenceNumber ) {
// "Impl" functions are responsible for positioning `raw_key_` but not
return key_ - > GetKey ( ) ;
// invoking `UpdateKey()`.
}
ParsedInternalKey parsed ( Slice ( ) , 0 , kTypeValue ) ;
if ( ! ParseInternalKey ( key_ - > GetInternalKey ( ) , & parsed ) ) {
assert ( false ) ; // error not handled in optimized builds
return Slice ( ) ;
}
parsed . sequence = global_seqno_ ;
scratch_ . SetInternalKey ( parsed ) ;
return scratch_ . GetInternalKey ( ) ;
}
bool IsKeyPinned ( ) const {
return global_seqno_ = = kDisableGlobalSequenceNumber & & key_ - > IsKeyPinned ( ) ;
}
private :
const IterKey * key_ ;
SequenceNumber global_seqno_ ;
IterKey scratch_ ;
# ifndef NDEBUG
bool init_ = false ;
# endif // NDEBUG
} ;
template < class TValue >
template < class TValue >
class BlockIter : public InternalIteratorBase < TValue > {
class BlockIter : public InternalIteratorBase < TValue > {
public :
public :
void InitializeBase ( const Comparator * co mparator , const char * data ,
void InitializeBase ( const Comparator * ucmp , const char * data ,
uint32_t restarts , uint32_t num_restarts ,
uint32_t restarts , uint32_t num_restarts ,
SequenceNumber global_seqno , bool block_contents_pinned ) {
SequenceNumber global_seqno , bool block_contents_pinned ) {
assert ( data_ = = nullptr ) ; // Ensure it is called only once
assert ( data_ = = nullptr ) ; // Ensure it is called only once
assert ( num_restarts > 0 ) ; // Ensure the param is valid
assert ( num_restarts > 0 ) ; // Ensure the param is valid
applied_key_ . Initialize ( & raw_key_ , global_seqno ) ;
ucmp_wrapper_ = UserComparatorWrapper ( ucmp ) ;
icmp_ = InternalKeyComparator ( ucmp , false /* named */ ) ;
comparator_ = comparator ;
data_ = data ;
data_ = data ;
restarts_ = restarts ;
restarts_ = restarts ;
num_restarts_ = num_restarts ;
num_restarts_ = num_restarts ;
@ -309,6 +285,43 @@ class BlockIter : public InternalIteratorBase<TValue> {
}
}
bool Valid ( ) const override { return current_ < restarts_ ; }
bool Valid ( ) const override { return current_ < restarts_ ; }
virtual void SeekToFirst ( ) override final {
SeekToFirstImpl ( ) ;
UpdateKey ( ) ;
}
virtual void SeekToLast ( ) override final {
SeekToLastImpl ( ) ;
UpdateKey ( ) ;
}
virtual void Seek ( const Slice & target ) override final {
SeekImpl ( target ) ;
UpdateKey ( ) ;
}
virtual void SeekForPrev ( const Slice & target ) override final {
SeekForPrevImpl ( target ) ;
UpdateKey ( ) ;
}
virtual void Next ( ) override final {
NextImpl ( ) ;
UpdateKey ( ) ;
}
virtual bool NextAndGetResult ( IterateResult * result ) override final {
// This does not need to call `UpdateKey()` as the parent class only has
// access to the `UpdateKey()`-invoking functions.
return InternalIteratorBase < TValue > : : NextAndGetResult ( result ) ;
}
virtual void Prev ( ) override final {
PrevImpl ( ) ;
UpdateKey ( ) ;
}
Status status ( ) const override { return status_ ; }
Status status ( ) const override { return status_ ; }
Slice key ( ) const override {
Slice key ( ) const override {
assert ( Valid ( ) ) ;
assert ( Valid ( ) ) ;
@ -343,12 +356,10 @@ class BlockIter : public InternalIteratorBase<TValue> {
Cache : : Handle * cache_handle ( ) { return cache_handle_ ; }
Cache : : Handle * cache_handle ( ) { return cache_handle_ ; }
virtual void Next ( ) override = 0 ;
protected :
protected :
// Note: The type could be changed to InternalKeyComparator but we see a weird
UserComparatorWrapper ucmp_wrapper_ ;
// performance drop by that.
InternalKeyComparator icmp_ ;
const Comparator * comparator_ ;
const char * data_ ; // underlying block contents
const char * data_ ; // underlying block contents
uint32_t num_restarts_ ; // Number of uint32_t entries in restart array
uint32_t num_restarts_ ; // Number of uint32_t entries in restart array
@ -359,13 +370,12 @@ class BlockIter : public InternalIteratorBase<TValue> {
uint32_t current_ ;
uint32_t current_ ;
// Raw key from block.
// Raw key from block.
IterKey raw_key_ ;
IterKey raw_key_ ;
// raw_key_ with global seqno applied if necessary. Use this one for
// Buffer for key data when global seqno assignment is enabled.
// comparisons.
IterKey key_buf_ ;
GlobalSeqnoAppliedKey applied_key_ ;
// Key to be exposed to users.
Slice key_ ;
Slice value_ ;
Slice value_ ;
Status status_ ;
Status status_ ;
// Key to be exposed to users.
Slice key_ ;
bool key_pinned_ ;
bool key_pinned_ ;
// Whether the block data is guaranteed to outlive this iterator, and
// Whether the block data is guaranteed to outlive this iterator, and
// as long as the cleanup functions are transferred to another class,
// as long as the cleanup functions are transferred to another class,
@ -373,6 +383,50 @@ class BlockIter : public InternalIteratorBase<TValue> {
bool block_contents_pinned_ ;
bool block_contents_pinned_ ;
SequenceNumber global_seqno_ ;
SequenceNumber global_seqno_ ;
virtual void SeekToFirstImpl ( ) = 0 ;
virtual void SeekToLastImpl ( ) = 0 ;
virtual void SeekImpl ( const Slice & target ) = 0 ;
virtual void SeekForPrevImpl ( const Slice & target ) = 0 ;
virtual void NextImpl ( ) = 0 ;
virtual void PrevImpl ( ) = 0 ;
// Must be called every time a key is found that needs to be returned to user,
// and may be called when no key is found (as a no-op). Updates `key_`,
// `key_buf_`, and `key_pinned_` with info about the found key.
void UpdateKey ( ) {
key_buf_ . Clear ( ) ;
if ( ! Valid ( ) ) {
return ;
}
if ( raw_key_ . IsUserKey ( ) ) {
assert ( global_seqno_ = = kDisableGlobalSequenceNumber ) ;
key_ = raw_key_ . GetUserKey ( ) ;
key_pinned_ = raw_key_ . IsKeyPinned ( ) ;
} else if ( global_seqno_ = = kDisableGlobalSequenceNumber ) {
key_ = raw_key_ . GetInternalKey ( ) ;
key_pinned_ = raw_key_ . IsKeyPinned ( ) ;
} else {
key_buf_ . SetInternalKey ( raw_key_ . GetUserKey ( ) , global_seqno_ ,
ExtractValueType ( raw_key_ . GetInternalKey ( ) ) ) ;
key_ = key_buf_ . GetInternalKey ( ) ;
key_pinned_ = false ;
}
}
// Returns the result of `Comparator::Compare()`, where the appropriate
// comparator is used for the block contents, the LHS argument is the current
// key with global seqno applied, and the RHS argument is `other`.
int CompareCurrentKey ( const Slice & other ) {
if ( raw_key_ . IsUserKey ( ) ) {
assert ( global_seqno_ = = kDisableGlobalSequenceNumber ) ;
return ucmp_wrapper_ . Compare ( raw_key_ . GetUserKey ( ) , other ) ;
} else if ( global_seqno_ = = kDisableGlobalSequenceNumber ) {
return icmp_ . Compare ( raw_key_ . GetInternalKey ( ) , other ) ;
}
return icmp_ . Compare ( raw_key_ . GetInternalKey ( ) , global_seqno_ , other ,
kDisableGlobalSequenceNumber ) ;
}
private :
private :
// Store the cache handle, if the block is cached. We need this since the
// Store the cache handle, if the block is cached. We need this since the
// only other place the handle is stored is as an argument to the Cleanable
// only other place the handle is stored is as an argument to the Cleanable
@ -408,37 +462,31 @@ class BlockIter : public InternalIteratorBase<TValue> {
protected :
protected :
template < typename DecodeKeyFunc >
template < typename DecodeKeyFunc >
inline bool BinarySeek ( const Slice & target , uint32_t left , uint32_t right ,
inline bool BinarySeek ( const Slice & target , uint32_t left , uint32_t right ,
uint32_t * index , bool * is_index_key_result ,
uint32_t * index , bool * is_index_key_result ) ;
const Comparator * comp ) ;
void FindKeyAfterBinarySeek ( const Slice & target , uint32_t index ,
void FindKeyAfterBinarySeek ( const Slice & target , uint32_t index ,
bool is_index_key_result , const Comparator * comp ) ;
bool is_index_key_result ) ;
} ;
} ;
class DataBlockIter final : public BlockIter < Slice > {
class DataBlockIter final : public BlockIter < Slice > {
public :
public :
DataBlockIter ( )
DataBlockIter ( )
: BlockIter ( ) , read_amp_bitmap_ ( nullptr ) , last_bitmap_offset_ ( 0 ) { }
: BlockIter ( ) , read_amp_bitmap_ ( nullptr ) , last_bitmap_offset_ ( 0 ) { }
DataBlockIter ( const Comparator * comparator , const Comparator * user_comparator ,
DataBlockIter ( const Comparator * ucmp , const char * data , uint32_t restarts ,
const char * data , uint32_t restarts , uint32_t num_restarts ,
uint32_t num_restarts , SequenceNumber global_seqno ,
SequenceNumber global_seqno ,
BlockReadAmpBitmap * read_amp_bitmap , bool block_contents_pinned ,
BlockReadAmpBitmap * read_amp_bitmap , bool block_contents_pinned ,
DataBlockHashIndex * data_block_hash_index )
DataBlockHashIndex * data_block_hash_index )
: DataBlockIter ( ) {
: DataBlockIter ( ) {
Initialize ( comparator , user_comparator , data , restarts , num_restarts ,
Initialize ( ucmp , data , restarts , num_restarts , global_seqno ,
global_seqno , read_amp_bitmap , block_contents_pinned ,
read_amp_bitmap , block_contents_pinned , data_block_hash_index ) ;
data_block_hash_index ) ;
}
}
void Initialize ( const Comparator * ucmp , const char * data , uint32_t restarts ,
void Initialize ( const Comparator * comparator ,
uint32_t num_restarts , SequenceNumber global_seqno ,
const Comparator * user_comparator , const char * data ,
uint32_t restarts , uint32_t num_restarts ,
SequenceNumber global_seqno ,
BlockReadAmpBitmap * read_amp_bitmap ,
BlockReadAmpBitmap * read_amp_bitmap ,
bool block_contents_pinned ,
bool block_contents_pinned ,
DataBlockHashIndex * data_block_hash_index ) {
DataBlockHashIndex * data_block_hash_index ) {
InitializeBase ( co mparator , data , restarts , num_restarts , global_seqno ,
InitializeBase ( u cmp, data , restarts , num_restarts , global_seqno ,
block_contents_pinned ) ;
block_contents_pinned ) ;
user_comparator_ = user_comparator ;
raw_key_ . SetIsUserKey ( false ) ;
raw_key_ . SetIsUserKey ( false ) ;
read_amp_bitmap_ = read_amp_bitmap ;
read_amp_bitmap_ = read_amp_bitmap ;
last_bitmap_offset_ = current_ + 1 ;
last_bitmap_offset_ = current_ + 1 ;
@ -456,36 +504,32 @@ class DataBlockIter final : public BlockIter<Slice> {
return value_ ;
return value_ ;
}
}
void Seek ( const Slice & target ) override ;
inline bool SeekForGet ( const Slice & target ) {
inline bool SeekForGet ( const Slice & target ) {
if ( ! data_block_hash_index_ ) {
if ( ! data_block_hash_index_ ) {
Seek ( target ) ;
SeekImpl ( target ) ;
UpdateKey ( ) ;
return true ;
return true ;
}
}
bool res = SeekForGetImpl ( target ) ;
return SeekForGetImpl ( target ) ;
UpdateKey ( ) ;
return res ;
}
}
void SeekForPrev ( const Slice & target ) override ;
void Prev ( ) override ;
void Next ( ) final override ;
// Try to advance to the next entry in the block. If there is data corruption
// Try to advance to the next entry in the block. If there is data corruption
// or error, report it to the caller instead of aborting the process. May
// or error, report it to the caller instead of aborting the process. May
// incur higher CPU overhead because we need to perform check on every entry.
// incur higher CPU overhead because we need to perform check on every entry.
void NextOrReport ( ) ;
void NextOrReport ( ) {
NextOrReportImpl ( ) ;
void SeekToFirst ( ) override ;
UpdateKey ( ) ;
}
// Try to seek to the first entry in the block. If there is data corruption
// Try to seek to the first entry in the block. If there is data corruption
// or error, report it to caller instead of aborting the process. May incur
// or error, report it to caller instead of aborting the process. May incur
// higher CPU overhead because we need to perform check on every entry.
// higher CPU overhead because we need to perform check on every entry.
void SeekToFirstOrReport ( ) ;
void SeekToFirstOrReport ( ) {
SeekToFirstOrReportImpl ( ) ;
void SeekToLast ( ) override ;
UpdateKey ( ) ;
}
void Invalidate ( Status s ) {
void Invalidate ( Status s ) {
InvalidateBase ( s ) ;
InvalidateBase ( s ) ;
@ -495,6 +539,14 @@ class DataBlockIter final : public BlockIter<Slice> {
prev_entries_idx_ = - 1 ;
prev_entries_idx_ = - 1 ;
}
}
protected :
virtual void SeekToFirstImpl ( ) override ;
virtual void SeekToLastImpl ( ) override ;
virtual void SeekImpl ( const Slice & target ) override ;
virtual void SeekForPrevImpl ( const Slice & target ) override ;
virtual void NextImpl ( ) override ;
virtual void PrevImpl ( ) override ;
private :
private :
// read-amp bitmap
// read-amp bitmap
BlockReadAmpBitmap * read_amp_bitmap_ ;
BlockReadAmpBitmap * read_amp_bitmap_ ;
@ -525,12 +577,13 @@ class DataBlockIter final : public BlockIter<Slice> {
int32_t prev_entries_idx_ = - 1 ;
int32_t prev_entries_idx_ = - 1 ;
DataBlockHashIndex * data_block_hash_index_ ;
DataBlockHashIndex * data_block_hash_index_ ;
const Comparator * user_comparator_ ;
template < typename DecodeEntryFunc >
template < typename DecodeEntryFunc >
inline bool ParseNextDataKey ( const char * limit = nullptr ) ;
inline bool ParseNextDataKey ( const char * limit = nullptr ) ;
bool SeekForGetImpl ( const Slice & target ) ;
bool SeekForGetImpl ( const Slice & target ) ;
void NextOrReportImpl ( ) ;
void SeekToFirstOrReportImpl ( ) ;
} ;
} ;
class IndexBlockIter final : public BlockIter < IndexValue > {
class IndexBlockIter final : public BlockIter < IndexValue > {
@ -541,22 +594,14 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
// format.
// format.
// value_is_full, default true, means that no delta encoding is
// value_is_full, default true, means that no delta encoding is
// applied to values.
// applied to values.
void Initialize ( const Comparator * comparator ,
void Initialize ( const Comparator * ucmp , const char * data , uint32_t restarts ,
const Comparator * user_comparator , const char * data ,
uint32_t num_restarts , SequenceNumber global_seqno ,
uint32_t restarts , uint32_t num_restarts ,
BlockPrefixIndex * prefix_index , bool have_first_key ,
SequenceNumber global_seqno , BlockPrefixIndex * prefix_index ,
bool key_includes_seq , bool value_is_full ,
bool have_first_key , bool key_includes_seq ,
bool block_contents_pinned ) {
bool value_is_full , bool block_contents_pinned ) {
InitializeBase ( ucmp , data , restarts , num_restarts ,
if ( ! key_includes_seq ) {
kDisableGlobalSequenceNumber , block_contents_pinned ) ;
user_comparator_wrapper_ = std : : unique_ptr < UserComparatorWrapper > (
raw_key_ . SetIsUserKey ( ! key_includes_seq ) ;
new UserComparatorWrapper ( user_comparator ) ) ;
}
InitializeBase (
key_includes_seq ? comparator : user_comparator_wrapper_ . get ( ) , data ,
restarts , num_restarts , kDisableGlobalSequenceNumber ,
block_contents_pinned ) ;
key_includes_seq_ = key_includes_seq ;
raw_key_ . SetIsUserKey ( ! key_includes_seq_ ) ;
prefix_index_ = prefix_index ;
prefix_index_ = prefix_index ;
value_delta_encoded_ = ! value_is_full ;
value_delta_encoded_ = ! value_is_full ;
have_first_key_ = have_first_key ;
have_first_key_ = have_first_key ;
@ -568,10 +613,8 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
}
}
Slice user_key ( ) const override {
Slice user_key ( ) const override {
if ( key_includes_seq_ ) {
assert ( Valid ( ) ) ;
return ExtractUserKey ( key ( ) ) ;
return raw_key_ . GetUserKey ( ) ;
}
return key ( ) ;
}
}
IndexValue value ( ) const override {
IndexValue value ( ) const override {
@ -588,6 +631,13 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
}
}
}
}
void Invalidate ( Status s ) { InvalidateBase ( s ) ; }
bool IsValuePinned ( ) const override {
return global_seqno_state_ ! = nullptr ? false : BlockIter : : IsValuePinned ( ) ;
}
protected :
// IndexBlockIter follows a different contract for prefix iterator
// IndexBlockIter follows a different contract for prefix iterator
// from data iterators.
// from data iterators.
// If prefix of the seek key `target` exists in the file, it must
// If prefix of the seek key `target` exists in the file, it must
@ -595,9 +645,9 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
// If the prefix of `target` doesn't exist in the file, it can either
// If the prefix of `target` doesn't exist in the file, it can either
// return the result of total order seek, or set both of Valid() = false
// return the result of total order seek, or set both of Valid() = false
// and status() = NotFound().
// and status() = NotFound().
void Seek ( const Slice & target ) override ;
void SeekImpl ( const Slice & target ) override ;
void SeekForPrev ( const Slice & ) override {
void SeekForPrevImpl ( const Slice & ) override {
assert ( false ) ;
assert ( false ) ;
current_ = restarts_ ;
current_ = restarts_ ;
restart_index_ = num_restarts_ ;
restart_index_ = num_restarts_ ;
@ -608,24 +658,15 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
value_ . clear ( ) ;
value_ . clear ( ) ;
}
}
void Prev ( ) override ;
void PrevImpl ( ) override ;
void Next ( ) override ;
void NextImpl ( ) override ;
void SeekToFirst ( ) override ;
void SeekToFirstImpl ( ) override ;
void SeekToLast ( ) override ;
void SeekToLastImpl ( ) override ;
void Invalidate ( Status s ) { InvalidateBase ( s ) ; }
bool IsValuePinned ( ) const override {
return global_seqno_state_ ! = nullptr ? false : BlockIter : : IsValuePinned ( ) ;
}
private :
private :
std : : unique_ptr < UserComparatorWrapper > user_comparator_wrapper_ ;
// Key is in InternalKey format
bool key_includes_seq_ ;
bool value_delta_encoded_ ;
bool value_delta_encoded_ ;
bool have_first_key_ ; // value includes first_internal_key
bool have_first_key_ ; // value includes first_internal_key
BlockPrefixIndex * prefix_index_ ;
BlockPrefixIndex * prefix_index_ ;