@ -126,10 +126,24 @@ struct DecodeKeyV4 {
}
}
} ;
} ;
void DataBlockIter : : NextImpl ( ) { ParseNextDataKey < DecodeEntry > ( ) ; }
struct DecodeEntryV4 {
inline const char * operator ( ) ( const char * p , const char * limit ,
uint32_t * shared , uint32_t * non_shared ,
uint32_t * value_length ) {
assert ( value_length ) ;
* value_length = 0 ;
return DecodeKeyV4 ( ) ( p , limit , shared , non_shared ) ;
}
} ;
void DataBlockIter : : NextImpl ( ) {
bool is_shared = false ;
ParseNextDataKey ( & is_shared ) ;
}
void DataBlockIter : : NextOrReportImpl ( ) {
void MetaBlockIter : : NextImpl ( ) {
ParseNextDataKey < CheckAndDecodeEntry > ( ) ;
bool is_shared = false ;
ParseNextKey < CheckAndDecodeEntry > ( & is_shared ) ;
}
}
void IndexBlockIter : : NextImpl ( ) { ParseNextIndexKey ( ) ; }
void IndexBlockIter : : NextImpl ( ) { ParseNextIndexKey ( ) ; }
@ -153,6 +167,27 @@ void IndexBlockIter::PrevImpl() {
}
}
}
}
void MetaBlockIter : : PrevImpl ( ) {
assert ( Valid ( ) ) ;
// Scan backwards to a restart point before current_
const uint32_t original = current_ ;
while ( GetRestartPoint ( restart_index_ ) > = original ) {
if ( restart_index_ = = 0 ) {
// No more entries
current_ = restarts_ ;
restart_index_ = num_restarts_ ;
return ;
}
restart_index_ - - ;
}
SeekToRestartPoint ( restart_index_ ) ;
bool is_shared = false ;
// Loop until end of current entry hits the start of original entry
while ( ParseNextKey < CheckAndDecodeEntry > ( & is_shared ) & &
NextEntryOffset ( ) < original ) {
}
}
// Similar to IndexBlockIter::PrevImpl but also caches the prev entries
// Similar to IndexBlockIter::PrevImpl but also caches the prev entries
void DataBlockIter : : PrevImpl ( ) {
void DataBlockIter : : PrevImpl ( ) {
assert ( Valid ( ) ) ;
assert ( Valid ( ) ) ;
@ -212,7 +247,8 @@ void DataBlockIter::PrevImpl() {
SeekToRestartPoint ( restart_index_ ) ;
SeekToRestartPoint ( restart_index_ ) ;
do {
do {
if ( ! ParseNextDataKey < DecodeEntry > ( ) ) {
bool is_shared = false ;
if ( ! ParseNextDataKey ( & is_shared ) ) {
break ;
break ;
}
}
Slice current_key = raw_key_ . GetKey ( ) ;
Slice current_key = raw_key_ . GetKey ( ) ;
@ -250,6 +286,22 @@ void DataBlockIter::SeekImpl(const Slice& target) {
FindKeyAfterBinarySeek ( seek_key , index , skip_linear_scan ) ;
FindKeyAfterBinarySeek ( seek_key , index , skip_linear_scan ) ;
}
}
void MetaBlockIter : : SeekImpl ( const Slice & target ) {
Slice seek_key = target ;
PERF_TIMER_GUARD ( block_seek_nanos ) ;
if ( data_ = = nullptr ) { // Not init yet
return ;
}
uint32_t index = 0 ;
bool skip_linear_scan = false ;
bool ok = BinarySeek < DecodeKey > ( seek_key , & index , & skip_linear_scan ) ;
if ( ! ok ) {
return ;
}
FindKeyAfterBinarySeek ( seek_key , index , skip_linear_scan ) ;
}
// Optimized Seek for point lookup for an internal key `target`
// Optimized Seek for point lookup for an internal key `target`
// target = "seek_user_key @ type | seqno".
// target = "seek_user_key @ type | seqno".
//
//
@ -309,23 +361,21 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
// check if the key is in the restart_interval
// check if the key is in the restart_interval
assert ( restart_index < num_restarts_ ) ;
assert ( restart_index < num_restarts_ ) ;
SeekToRestartPoint ( restart_index ) ;
SeekToRestartPoint ( restart_index ) ;
current_ = GetRestartPoint ( restart_index ) ;
const char * limit = nullptr ;
uint32_t limit = restarts_ ;
if ( restart_index_ + 1 < num_restarts_ ) {
if ( restart_index + 1 < num_restarts_ ) {
limit = data_ + GetRestartPoint ( restart_index_ + 1 ) ;
limit = GetRestartPoint ( restart_index + 1 ) ;
} else {
limit = data_ + restarts_ ;
}
}
while ( current_ < limit ) {
while ( true ) {
bool shared ;
// Here we only linear seek the target key inside the restart interval.
// Here we only linear seek the target key inside the restart interval.
// If a key does not exist inside a restart interval, we avoid
// If a key does not exist inside a restart interval, we avoid
// further searching the block content acc ross restart interval boundary.
// further searching the block content across restart interval boundary.
//
//
// TODO(fwu): check the left and w rite boundary of the restart interval
// TODO(fwu): check the left and righ t boundary of the restart interval
// to avoid linear seek a target key that is out of range.
// to avoid linear seek a target key that is out of range.
if ( ! ParseNextDataKey < DecodeEntry > ( limit ) | |
if ( ! ParseNextDataKey ( & shared ) | | CompareCurrentKey ( target ) > = 0 ) {
CompareCurrentKey ( target ) > = 0 ) {
// we stop at the first potential matching user key.
// we stop at the first potential matching user key.
break ;
break ;
}
}
@ -336,7 +386,7 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
// 1) there is only one user_key match in the block (otherwise collsion).
// 1) there is only one user_key match in the block (otherwise collsion).
// the matching user_key resides in the last restart interval, and it
// the matching user_key resides in the last restart interval, and it
// is the last key of the restart interval and of the block as well.
// is the last key of the restart interval and of the block as well.
// ParseNextData Key() skiped it as its [ type | seqno ] is smaller.
// ParseNextKey() skiped it as its [ type | seqno ] is smaller.
//
//
// 2) The seek_key is not found in the HashIndex Lookup(), i.e. kNoEntry,
// 2) The seek_key is not found in the HashIndex Lookup(), i.e. kNoEntry,
// AND all existing user_keys in the restart interval are smaller than
// AND all existing user_keys in the restart interval are smaller than
@ -432,20 +482,46 @@ void DataBlockIter::SeekForPrevImpl(const Slice& target) {
}
}
}
}
void MetaBlockIter : : SeekForPrevImpl ( const Slice & target ) {
PERF_TIMER_GUARD ( block_seek_nanos ) ;
Slice seek_key = target ;
if ( data_ = = nullptr ) { // Not init yet
return ;
}
uint32_t index = 0 ;
bool skip_linear_scan = false ;
bool ok = BinarySeek < DecodeKey > ( seek_key , & index , & skip_linear_scan ) ;
if ( ! ok ) {
return ;
}
FindKeyAfterBinarySeek ( seek_key , index , skip_linear_scan ) ;
if ( ! Valid ( ) ) {
SeekToLastImpl ( ) ;
} else {
while ( Valid ( ) & & CompareCurrentKey ( seek_key ) > 0 ) {
PrevImpl ( ) ;
}
}
}
void DataBlockIter : : SeekToFirstImpl ( ) {
void DataBlockIter : : SeekToFirstImpl ( ) {
if ( data_ = = nullptr ) { // Not init yet
if ( data_ = = nullptr ) { // Not init yet
return ;
return ;
}
}
SeekToRestartPoint ( 0 ) ;
SeekToRestartPoint ( 0 ) ;
ParseNextDataKey < DecodeEntry > ( ) ;
bool is_shared = false ;
ParseNextDataKey ( & is_shared ) ;
}
}
void DataBlockIter : : SeekToFirstOrReportImpl ( ) {
void Me taBlockIter: : SeekToFirstImpl ( ) {
if ( data_ = = nullptr ) { // Not init yet
if ( data_ = = nullptr ) { // Not init yet
return ;
return ;
}
}
SeekToRestartPoint ( 0 ) ;
SeekToRestartPoint ( 0 ) ;
ParseNextDataKey < CheckAndDecodeEntry > ( ) ;
bool is_shared = false ;
ParseNextKey < CheckAndDecodeEntry > ( & is_shared ) ;
}
}
void IndexBlockIter : : SeekToFirstImpl ( ) {
void IndexBlockIter : : SeekToFirstImpl ( ) {
@ -462,7 +538,20 @@ void DataBlockIter::SeekToLastImpl() {
return ;
return ;
}
}
SeekToRestartPoint ( num_restarts_ - 1 ) ;
SeekToRestartPoint ( num_restarts_ - 1 ) ;
while ( ParseNextDataKey < DecodeEntry > ( ) & & NextEntryOffset ( ) < restarts_ ) {
bool is_shared = false ;
while ( ParseNextDataKey ( & is_shared ) & & NextEntryOffset ( ) < restarts_ ) {
// Keep skipping
}
}
void MetaBlockIter : : SeekToLastImpl ( ) {
if ( data_ = = nullptr ) { // Not init yet
return ;
}
SeekToRestartPoint ( num_restarts_ - 1 ) ;
bool is_shared = false ;
while ( ParseNextKey < CheckAndDecodeEntry > ( & is_shared ) & &
NextEntryOffset ( ) < restarts_ ) {
// Keep skipping
// Keep skipping
}
}
}
}
@ -487,13 +576,12 @@ void BlockIter<TValue>::CorruptionError() {
value_ . clear ( ) ;
value_ . clear ( ) ;
}
}
template < class TValue >
template < typename DecodeEntryFunc >
template < typename DecodeEntryFunc >
bool DataBlockIter : : ParseNextDataKey ( const char * limit ) {
bool BlockIter < TValue > : : ParseNextKey ( bool * is_shared ) {
current_ = NextEntryOffset ( ) ;
current_ = NextEntryOffset ( ) ;
const char * p = data_ + current_ ;
const char * p = data_ + current_ ;
if ( ! limit ) {
const char * limit = data_ + restarts_ ; // Restarts come right after data
limit = data_ + restarts_ ; // Restarts come right after data
}
if ( p > = limit ) {
if ( p > = limit ) {
// No more entries to return. Mark as invalid.
// No more entries to return. Mark as invalid.
@ -501,7 +589,6 @@ bool DataBlockIter::ParseNextDataKey(const char* limit) {
restart_index_ = num_restarts_ ;
restart_index_ = num_restarts_ ;
return false ;
return false ;
}
}
// Decode next entry
// Decode next entry
uint32_t shared , non_shared , value_length ;
uint32_t shared , non_shared , value_length ;
p = DecodeEntryFunc ( ) ( p , limit , & shared , & non_shared , & value_length ) ;
p = DecodeEntryFunc ( ) ( p , limit , & shared , & non_shared , & value_length ) ;
@ -510,14 +597,30 @@ bool DataBlockIter::ParseNextDataKey(const char* limit) {
return false ;
return false ;
} else {
} else {
if ( shared = = 0 ) {
if ( shared = = 0 ) {
* is_shared = false ;
// If this key doesn't share any bytes with prev key then we don't need
// If this key doesn't share any bytes with prev key then we don't need
// to decode it and can use its address in the block directly.
// to decode it and can use its address in the block directly.
raw_key_ . SetKey ( Slice ( p , non_shared ) , false /* copy */ ) ;
raw_key_ . SetKey ( Slice ( p , non_shared ) , false /* copy */ ) ;
} else {
} else {
// This key share `shared` bytes with prev key, we need to decode it
// This key share `shared` bytes with prev key, we need to decode it
* is_shared = true ;
raw_key_ . TrimAppend ( shared , p , non_shared ) ;
raw_key_ . TrimAppend ( shared , p , non_shared ) ;
}
}
value_ = Slice ( p + non_shared , value_length ) ;
if ( shared = = 0 ) {
while ( restart_index_ + 1 < num_restarts_ & &
GetRestartPoint ( restart_index_ + 1 ) < current_ ) {
+ + restart_index_ ;
}
}
// else we are in the middle of a restart interval and the restart_index_
// thus has not changed
return true ;
}
}
bool DataBlockIter : : ParseNextDataKey ( bool * is_shared ) {
if ( ParseNextKey < DecodeEntry > ( is_shared ) ) {
# ifndef NDEBUG
# ifndef NDEBUG
if ( global_seqno_ ! = kDisableGlobalSequenceNumber ) {
if ( global_seqno_ ! = kDisableGlobalSequenceNumber ) {
// If we are reading a file with a global sequence number we should
// If we are reading a file with a global sequence number we should
@ -536,64 +639,22 @@ bool DataBlockIter::ParseNextDataKey(const char* limit) {
assert ( seqno = = 0 ) ;
assert ( seqno = = 0 ) ;
}
}
# endif // NDEBUG
# endif // NDEBUG
value_ = Slice ( p + non_shared , value_length ) ;
if ( shared = = 0 ) {
while ( restart_index_ + 1 < num_restarts_ & &
GetRestartPoint ( restart_index_ + 1 ) < current_ ) {
+ + restart_index_ ;
}
}
// else we are in the middle of a restart interval and the restart_index_
// thus has not changed
return true ;
return true ;
} else {
return false ;
}
}
}
}
bool IndexBlockIter : : ParseNextIndexKey ( ) {
bool IndexBlockIter : : ParseNextIndexKey ( ) {
current_ = NextEntryOffset ( ) ;
bool is_shared = false ;
const char * p = data_ + current_ ;
bool ok = ( value_delta_encoded_ ) ? ParseNextKey < DecodeEntryV4 > ( & is_shared )
const char * limit = data_ + restarts_ ; // Restarts come right after data
: ParseNextKey < DecodeEntry > ( & is_shared ) ;
if ( p > = limit ) {
if ( ok ) {
// No more entries to return. Mark as invalid.
current_ = restarts_ ;
restart_index_ = num_restarts_ ;
return false ;
}
// Decode next entry
uint32_t shared , non_shared , value_length ;
if ( value_delta_encoded_ ) {
p = DecodeKeyV4 ( ) ( p , limit , & shared , & non_shared ) ;
value_length = 0 ;
} else {
p = DecodeEntry ( ) ( p , limit , & shared , & non_shared , & value_length ) ;
}
if ( p = = nullptr | | raw_key_ . Size ( ) < shared ) {
CorruptionError ( ) ;
return false ;
}
if ( shared = = 0 ) {
// If this key doesn't share any bytes with prev key then we don't need
// to decode it and can use its address in the block directly.
raw_key_ . SetKey ( Slice ( p , non_shared ) , false /* copy */ ) ;
} else {
// This key share `shared` bytes with prev key, we need to decode it
raw_key_ . TrimAppend ( shared , p , non_shared ) ;
}
value_ = Slice ( p + non_shared , value_length ) ;
if ( shared = = 0 ) {
while ( restart_index_ + 1 < num_restarts_ & &
GetRestartPoint ( restart_index_ + 1 ) < current_ ) {
+ + restart_index_ ;
}
}
// else we are in the middle of a restart interval and the restart_index_
// thus has not changed
if ( value_delta_encoded_ | | global_seqno_state_ ! = nullptr ) {
if ( value_delta_encoded_ | | global_seqno_state_ ! = nullptr ) {
DecodeCurrentValue ( shared ) ;
DecodeCurrentValue ( is_shared ) ;
}
}
return true ;
}
return ok ;
}
}
// The format:
// The format:
@ -604,15 +665,15 @@ bool IndexBlockIter::ParseNextIndexKey() {
// where, k is key, v is value, and its encoding is in parenthesis.
// where, k is key, v is value, and its encoding is in parenthesis.
// The format of each key is (shared_size, non_shared_size, shared, non_shared)
// The format of each key is (shared_size, non_shared_size, shared, non_shared)
// The format of each value, i.e., block handle, is (offset, size) whenever the
// The format of each value, i.e., block handle, is (offset, size) whenever the
// shared_size is 0 , which included the first entry in each restart point.
// is_shared is false , which included the first entry in each restart point.
// Otherwise the format is delta-size = block handle size - size of last block
// Otherwise the format is delta-size = block handle size - size of last block
// handle.
// handle.
void IndexBlockIter : : DecodeCurrentValue ( uint32_t shared ) {
void IndexBlockIter : : DecodeCurrentValue ( bool is_ shared) {
Slice v ( value_ . data ( ) , data_ + restarts_ - value_ . data ( ) ) ;
Slice v ( value_ . data ( ) , data_ + restarts_ - value_ . data ( ) ) ;
// Delta encoding is used if `shared` != 0.
// Delta encoding is used if `shared` != 0.
Status decode_s __attribute__ ( ( __unused__ ) ) = decoded_value_ . DecodeFrom (
Status decode_s __attribute__ ( ( __unused__ ) ) = decoded_value_ . DecodeFrom (
& v , have_first_key_ ,
& v , have_first_key_ ,
( value_delta_encoded_ & & shared ) ? & decoded_value_ . handle : nullptr ) ;
( value_delta_encoded_ & & is_ shared) ? & decoded_value_ . handle : nullptr ) ;
assert ( decode_s . ok ( ) ) ;
assert ( decode_s . ok ( ) ) ;
value_ = Slice ( value_ . data ( ) , v . data ( ) - value_ . data ( ) ) ;
value_ = Slice ( value_ . data ( ) , v . data ( ) - value_ . data ( ) ) ;
@ -970,6 +1031,21 @@ Block::Block(BlockContents&& contents, size_t read_amp_bytes_per_bit,
}
}
}
}
MetaBlockIter * Block : : NewMetaIterator ( bool block_contents_pinned ) {
MetaBlockIter * iter = new MetaBlockIter ( ) ;
if ( size_ < 2 * sizeof ( uint32_t ) ) {
iter - > Invalidate ( Status : : Corruption ( " bad block contents " ) ) ;
return iter ;
} else if ( num_restarts_ = = 0 ) {
// Empty block.
iter - > Invalidate ( Status : : OK ( ) ) ;
} else {
iter - > Initialize ( data_ , restart_offset_ , num_restarts_ ,
block_contents_pinned ) ;
}
return iter ;
}
DataBlockIter * Block : : NewDataIterator ( const Comparator * raw_ucmp ,
DataBlockIter * Block : : NewDataIterator ( const Comparator * raw_ucmp ,
SequenceNumber global_seqno ,
SequenceNumber global_seqno ,
DataBlockIter * iter , Statistics * stats ,
DataBlockIter * iter , Statistics * stats ,