@ -99,13 +99,13 @@ PlainTableReader::PlainTableReader(const ImmutableCFOptions& ioptions,
: internal_comparator_ ( icomparator ) ,
: internal_comparator_ ( icomparator ) ,
encoding_type_ ( encoding_type ) ,
encoding_type_ ( encoding_type ) ,
full_scan_mode_ ( false ) ,
full_scan_mode_ ( false ) ,
data_end_offset_ ( static_cast < uint32_t > ( table_properties - > data_size ) ) ,
user_key_len_ ( static_cast < uint32_t > ( table_properties - > fixed_key_len ) ) ,
user_key_len_ ( static_cast < uint32_t > ( table_properties - > fixed_key_len ) ) ,
prefix_extractor_ ( ioptions . prefix_extractor ) ,
prefix_extractor_ ( ioptions . prefix_extractor ) ,
enable_bloom_ ( false ) ,
enable_bloom_ ( false ) ,
bloom_ ( 6 , nullptr ) ,
bloom_ ( 6 , nullptr ) ,
file_info_ ( std : : move ( file ) , storage_options ,
static_cast < uint32_t > ( table_properties - > data_size ) ) ,
ioptions_ ( ioptions ) ,
ioptions_ ( ioptions ) ,
file_ ( std : : move ( file ) ) ,
file_size_ ( file_size ) ,
file_size_ ( file_size ) ,
table_properties_ ( nullptr ) { }
table_properties_ ( nullptr ) { }
@ -121,7 +121,6 @@ Status PlainTableReader::Open(const ImmutableCFOptions& ioptions,
const int bloom_bits_per_key ,
const int bloom_bits_per_key ,
double hash_table_ratio , size_t index_sparseness ,
double hash_table_ratio , size_t index_sparseness ,
size_t huge_page_tlb_size , bool full_scan_mode ) {
size_t huge_page_tlb_size , bool full_scan_mode ) {
assert ( ioptions . allow_mmap_reads ) ;
if ( file_size > PlainTableIndex : : kMaxFileSize ) {
if ( file_size > PlainTableIndex : : kMaxFileSize ) {
return Status : : NotSupported ( " File is too large for PlainTableReader! " ) ;
return Status : : NotSupported ( " File is too large for PlainTableReader! " ) ;
}
}
@ -163,7 +162,7 @@ Status PlainTableReader::Open(const ImmutableCFOptions& ioptions,
ioptions , std : : move ( file ) , env_options , internal_comparator ,
ioptions , std : : move ( file ) , env_options , internal_comparator ,
encoding_type , file_size , props ) ) ;
encoding_type , file_size , props ) ) ;
s = new_reader - > MmapDataFile ( ) ;
s = new_reader - > MmapDataIfNeeded ( ) ;
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
return s ;
return s ;
}
}
@ -204,13 +203,14 @@ Iterator* PlainTableReader::NewIterator(const ReadOptions& options,
Status PlainTableReader : : PopulateIndexRecordList (
Status PlainTableReader : : PopulateIndexRecordList (
PlainTableIndexBuilder * index_builder , vector < uint32_t > * prefix_hashes ) {
PlainTableIndexBuilder * index_builder , vector < uint32_t > * prefix_hashes ) {
Slice prev_key_prefix_slice ;
Slice prev_key_prefix_slice ;
std : : string prev_key_prefix_buf ;
uint32_t pos = data_start_offset_ ;
uint32_t pos = data_start_offset_ ;
bool is_first_record = true ;
bool is_first_record = true ;
Slice key_prefix_slice ;
Slice key_prefix_slice ;
PlainTableKeyDecoder decoder ( encoding_type_ , user_key_len_ ,
PlainTableKeyDecoder decoder ( & file_info_ , encoding_type_ , user_key_len_ ,
ioptions_ . prefix_extractor ) ;
ioptions_ . prefix_extractor ) ;
while ( pos < data_end_offset_ ) {
while ( pos < file_info_ . data_end_offset ) {
uint32_t key_offset = pos ;
uint32_t key_offset = pos ;
ParsedInternalKey key ;
ParsedInternalKey key ;
Slice value_slice ;
Slice value_slice ;
@ -228,7 +228,12 @@ Status PlainTableReader::PopulateIndexRecordList(
if ( ! is_first_record ) {
if ( ! is_first_record ) {
prefix_hashes - > push_back ( GetSliceHash ( prev_key_prefix_slice ) ) ;
prefix_hashes - > push_back ( GetSliceHash ( prev_key_prefix_slice ) ) ;
}
}
if ( file_info_ . is_mmap_mode ) {
prev_key_prefix_slice = key_prefix_slice ;
prev_key_prefix_slice = key_prefix_slice ;
} else {
prev_key_prefix_buf = key_prefix_slice . ToString ( ) ;
prev_key_prefix_slice = prev_key_prefix_buf ;
}
}
}
}
}
@ -268,9 +273,12 @@ void PlainTableReader::FillBloom(vector<uint32_t>* prefix_hashes) {
}
}
}
}
Status PlainTableReader : : MmapDataFile ( ) {
Status PlainTableReader : : MmapDataIfNeeded ( ) {
// Get mmapped memory to file_data_.
if ( file_info_ . is_mmap_mode ) {
return file_ - > Read ( 0 , file_size_ , & file_data_ , nullptr ) ;
// Get mmapped memory.
return file_info_ . file - > Read ( 0 , file_size_ , & file_info_ . file_data , nullptr ) ;
}
return Status : : OK ( ) ;
}
}
Status PlainTableReader : : PopulateIndex ( TableProperties * props ,
Status PlainTableReader : : PopulateIndex ( TableProperties * props ,
@ -282,31 +290,37 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
table_properties_ . reset ( props ) ;
table_properties_ . reset ( props ) ;
BlockContents bloom_block_contents ;
BlockContents bloom_block_contents ;
auto s = ReadMetaBlock ( file_ . get ( ) , file_size_ , kPlainTableMagicNumber ,
auto s = ReadMetaBlock ( file_info_ . file . get ( ) , file_size_ ,
ioptions_ . env , BloomBlockBuilder : : kBloomBlock ,
kPlainTableMagicNumber , ioptions_ . env ,
& bloom_block_contents ) ;
BloomBlockBuilder : : kBloomBlock , & bloom_block_contents ) ;
bool index_in_file = s . ok ( ) ;
bool index_in_file = s . ok ( ) ;
BlockContents index_block_contents ;
BlockContents index_block_contents ;
s = ReadMetaBlock ( file_ . get ( ) , file_size_ , kPlainTableMagicNumber ,
s = ReadMetaBlock (
ioptions_ . env , PlainTableIndexBuilder : : kPlainTableIndexBlock ,
file_info_ . file . get ( ) , file_size_ , kPlainTableMagicNumber , ioptions_ . env ,
& index_block_contents ) ;
PlainTableIndexBuilder : : kPlainTableIndexBlock , & index_block_contents ) ;
index_in_file & = s . ok ( ) ;
index_in_file & = s . ok ( ) ;
Slice * bloom_block ;
Slice * bloom_block ;
if ( index_in_file ) {
if ( index_in_file ) {
// If bloom_block_contents.allocation is not empty (which will be the case
// for non-mmap mode), it holds the alloated memory for the bloom block.
// It needs to be kept alive to keep `bloom_block` valid.
bloom_block_alloc_ = std : : move ( bloom_block_contents . allocation ) ;
bloom_block = & bloom_block_contents . data ;
bloom_block = & bloom_block_contents . data ;
} else {
} else {
bloom_block = nullptr ;
bloom_block = nullptr ;
}
}
// index_in_file == true only if there are kBloomBlock and
// index_in_file == true only if there are kBloomBlock and
// kPlainTableIndexBlock
// kPlainTableIndexBlock in file
// in file
Slice * index_block ;
Slice * index_block ;
if ( index_in_file ) {
if ( index_in_file ) {
// If index_block_contents.allocation is not empty (which will be the case
// for non-mmap mode), it holds the alloated memory for the index block.
// It needs to be kept alive to keep `index_block` valid.
index_block_alloc_ = std : : move ( index_block_contents . allocation ) ;
index_block = & index_block_contents . data ;
index_block = & index_block_contents . data ;
} else {
} else {
index_block = nullptr ;
index_block = nullptr ;
@ -401,7 +415,7 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix,
uint32_t prefix_index_offset ;
uint32_t prefix_index_offset ;
auto res = index_ . GetOffset ( prefix_hash , & prefix_index_offset ) ;
auto res = index_ . GetOffset ( prefix_hash , & prefix_index_offset ) ;
if ( res = = PlainTableIndex : : kNoPrefixForBucket ) {
if ( res = = PlainTableIndex : : kNoPrefixForBucket ) {
* offset = data_end_offset_ ;
* offset = file_info_ . data_end_offset ;
return Status : : OK ( ) ;
return Status : : OK ( ) ;
} else if ( res = = PlainTableIndex : : kDirectToFile ) {
} else if ( res = = PlainTableIndex : : kDirectToFile ) {
* offset = prefix_index_offset ;
* offset = prefix_index_offset ;
@ -420,16 +434,15 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix,
return Status : : Corruption ( Slice ( ) ) ;
return Status : : Corruption ( Slice ( ) ) ;
}
}
PlainTableKeyDecoder decoder ( & file_info_ , encoding_type_ , user_key_len_ ,
ioptions_ . prefix_extractor ) ;
// The key is between [low, high). Do a binary search between it.
// The key is between [low, high). Do a binary search between it.
while ( high - low > 1 ) {
while ( high - low > 1 ) {
uint32_t mid = ( high + low ) / 2 ;
uint32_t mid = ( high + low ) / 2 ;
uint32_t file_offset = GetFixed32Element ( base_ptr , mid ) ;
uint32_t file_offset = GetFixed32Element ( base_ptr , mid ) ;
size_t tmp ;
uint32_t tmp ;
Status s = PlainTableKeyDecoder ( encoding_type_ , user_key_len_ ,
Status s = decoder . NextKeyNoValue ( file_offset , & mid_key , nullptr , & tmp ) ;
ioptions_ . prefix_extractor )
. NextKey ( file_data_ . data ( ) + file_offset ,
file_data_ . data ( ) + data_end_offset_ , & mid_key ,
nullptr , & tmp ) ;
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
return s ;
return s ;
}
}
@ -452,13 +465,9 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix,
// prefix as target. We need to rule out one of them to avoid to go
// prefix as target. We need to rule out one of them to avoid to go
// to the wrong prefix.
// to the wrong prefix.
ParsedInternalKey low_key ;
ParsedInternalKey low_key ;
size _t tmp ;
uint32 _t tmp ;
uint32_t low_key_offset = GetFixed32Element ( base_ptr , low ) ;
uint32_t low_key_offset = GetFixed32Element ( base_ptr , low ) ;
Status s = PlainTableKeyDecoder ( encoding_type_ , user_key_len_ ,
Status s = decoder . NextKeyNoValue ( low_key_offset , & low_key , nullptr , & tmp ) ;
ioptions_ . prefix_extractor )
. NextKey ( file_data_ . data ( ) + low_key_offset ,
file_data_ . data ( ) + data_end_offset_ , & low_key ,
nullptr , & tmp ) ;
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
return s ;
return s ;
}
}
@ -473,7 +482,7 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix,
} else {
} else {
// target is larger than a key of the last prefix in this bucket
// target is larger than a key of the last prefix in this bucket
// but with a different prefix. Key does not exist.
// but with a different prefix. Key does not exist.
* offset = data_end_offset_ ;
* offset = file_info_ . data_end_offset ;
}
}
return Status : : OK ( ) ;
return Status : : OK ( ) ;
}
}
@ -482,41 +491,26 @@ bool PlainTableReader::MatchBloom(uint32_t hash) const {
return ! enable_bloom_ | | bloom_ . MayContainHash ( hash ) ;
return ! enable_bloom_ | | bloom_ . MayContainHash ( hash ) ;
}
}
Status PlainTableReader : : Next ( PlainTableKeyDecoder * decoder , uint32_t * offset ,
Status PlainTableReader : : Next ( PlainTableKeyDecoder * decoder , uint32_t * offset ,
ParsedInternalKey * parsed_key ,
ParsedInternalKey * parsed_key ,
Slice * internal_key , Slice * value ,
Slice * internal_key , Slice * value ,
bool * seekable ) const {
bool * seekable ) const {
if ( * offset = = data_end_offset_ ) {
if ( * offset = = file_info_ . data_end_offset ) {
* offset = data_end_offset_ ;
* offset = file_info_ . data_end_offset ;
return Status : : OK ( ) ;
return Status : : OK ( ) ;
}
}
if ( * offset > data_end_offset_ ) {
if ( * offset > file_info_ . data_end_offset ) {
return Status : : Corruption ( " Offset is out of file size " ) ;
return Status : : Corruption ( " Offset is out of file size " ) ;
}
}
const char * start = file_data_ . data ( ) + * offset ;
uint32_t bytes_read ;
size_t bytes_for_key ;
Status s = decoder - > NextKey ( * offset , parsed_key , internal_key , value ,
Status s =
& bytes_read , seekable ) ;
decoder - > NextKey ( start , file_data_ . data ( ) + data_end_offset_ , parsed_key ,
internal_key , & bytes_for_key , seekable ) ;
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
return s ;
return s ;
}
}
uint32_t value_size ;
* offset = * offset + bytes_read ;
const char * value_ptr = GetVarint32Ptr (
start + bytes_for_key , file_data_ . data ( ) + data_end_offset_ , & value_size ) ;
if ( value_ptr = = nullptr ) {
return Status : : Corruption (
" Unexpected EOF when reading the next value's size. " ) ;
}
* offset = * offset + static_cast < uint32_t > ( value_ptr - start ) + value_size ;
if ( * offset > data_end_offset_ ) {
return Status : : Corruption ( " Unexpected EOF when reading the next value. " ) ;
}
* value = Slice ( value_ptr , value_size ) ;
return Status : : OK ( ) ;
return Status : : OK ( ) ;
}
}
@ -556,6 +550,7 @@ Status PlainTableReader::Get(const ReadOptions& ro, const Slice& target,
bool prefix_match ;
bool prefix_match ;
Status s =
Status s =
GetOffset ( target , prefix_slice , prefix_hash , prefix_match , & offset ) ;
GetOffset ( target , prefix_slice , prefix_hash , prefix_match , & offset ) ;
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
return s ;
return s ;
}
}
@ -565,9 +560,9 @@ Status PlainTableReader::Get(const ReadOptions& ro, const Slice& target,
return Status : : Corruption ( Slice ( ) ) ;
return Status : : Corruption ( Slice ( ) ) ;
}
}
Slice found_value ;
Slice found_value ;
PlainTableKeyDecoder decoder ( encoding_type_ , user_key_len_ ,
PlainTableKeyDecoder decoder ( & file_info_ , encoding_type_ , user_key_len_ ,
ioptions_ . prefix_extractor ) ;
ioptions_ . prefix_extractor ) ;
while ( offset < data_end_offset_ ) {
while ( offset < file_info_ . data_end_offset ) {
s = Next ( & decoder , & offset , & found_key , nullptr , & found_value ) ;
s = Next ( & decoder , & offset , & found_key , nullptr , & found_value ) ;
if ( ! s . ok ( ) ) {
if ( ! s . ok ( ) ) {
return s ;
return s ;
@ -598,24 +593,24 @@ uint64_t PlainTableReader::ApproximateOffsetOf(const Slice& key) {
PlainTableIterator : : PlainTableIterator ( PlainTableReader * table ,
PlainTableIterator : : PlainTableIterator ( PlainTableReader * table ,
bool use_prefix_seek )
bool use_prefix_seek )
: table_ ( table ) ,
: table_ ( table ) ,
decoder_ ( table_ - > encoding_type _, table_ - > user_key_len _,
decoder_ ( & table_ - > file_info _, table_ - > encoding_type _,
table_ - > prefix_extractor_ ) ,
table_ - > user_key_len_ , table_ - > prefix_extractor_ ) ,
use_prefix_seek_ ( use_prefix_seek ) {
use_prefix_seek_ ( use_prefix_seek ) {
next_offset_ = offset_ = table_ - > data_end_offset_ ;
next_offset_ = offset_ = table_ - > file_info_ . data_end_offset ;
}
}
PlainTableIterator : : ~ PlainTableIterator ( ) {
PlainTableIterator : : ~ PlainTableIterator ( ) {
}
}
bool PlainTableIterator : : Valid ( ) const {
bool PlainTableIterator : : Valid ( ) const {
return offset_ < table_ - > data_end_offset_
return offset_ < table_ - > file_info_ . data_end_offset & &
& & offset_ > = table_ - > data_start_offset_ ;
offset_ > = table_ - > data_start_offset_ ;
}
}
void PlainTableIterator : : SeekToFirst ( ) {
void PlainTableIterator : : SeekToFirst ( ) {
next_offset_ = table_ - > data_start_offset_ ;
next_offset_ = table_ - > data_start_offset_ ;
if ( next_offset_ > = table_ - > data_end_offset_ ) {
if ( next_offset_ > = table_ - > file_info_ . data_end_offset ) {
next_offset_ = offset_ = table_ - > data_end_offset_ ;
next_offset_ = offset_ = table_ - > file_info_ . data_end_offset ;
} else {
} else {
Next ( ) ;
Next ( ) ;
}
}
@ -633,14 +628,14 @@ void PlainTableIterator::Seek(const Slice& target) {
if ( table_ - > full_scan_mode_ ) {
if ( table_ - > full_scan_mode_ ) {
status_ =
status_ =
Status : : InvalidArgument ( " Seek() is not allowed in full scan mode. " ) ;
Status : : InvalidArgument ( " Seek() is not allowed in full scan mode. " ) ;
offset_ = next_offset_ = table_ - > data_end_offset_ ;
offset_ = next_offset_ = table_ - > file_info_ . data_end_offset ;
return ;
return ;
} else if ( table_ - > GetIndexSize ( ) > 1 ) {
} else if ( table_ - > GetIndexSize ( ) > 1 ) {
assert ( false ) ;
assert ( false ) ;
status_ = Status : : NotSupported (
status_ = Status : : NotSupported (
" PlainTable cannot issue non-prefix seek unless in total order "
" PlainTable cannot issue non-prefix seek unless in total order "
" mode. " ) ;
" mode. " ) ;
offset_ = next_offset_ = table_ - > data_end_offset_ ;
offset_ = next_offset_ = table_ - > file_info_ . data_end_offset ;
return ;
return ;
}
}
}
}
@ -651,7 +646,7 @@ void PlainTableIterator::Seek(const Slice& target) {
if ( ! table_ - > IsTotalOrderMode ( ) ) {
if ( ! table_ - > IsTotalOrderMode ( ) ) {
prefix_hash = GetSliceHash ( prefix_slice ) ;
prefix_hash = GetSliceHash ( prefix_slice ) ;
if ( ! table_ - > MatchBloom ( prefix_hash ) ) {
if ( ! table_ - > MatchBloom ( prefix_hash ) ) {
offset_ = next_offset_ = table_ - > data_end_offset_ ;
offset_ = next_offset_ = table_ - > file_info_ . data_end_offset ;
return ;
return ;
}
}
}
}
@ -659,16 +654,16 @@ void PlainTableIterator::Seek(const Slice& target) {
status_ = table_ - > GetOffset ( target , prefix_slice , prefix_hash , prefix_match ,
status_ = table_ - > GetOffset ( target , prefix_slice , prefix_hash , prefix_match ,
& next_offset_ ) ;
& next_offset_ ) ;
if ( ! status_ . ok ( ) ) {
if ( ! status_ . ok ( ) ) {
offset_ = next_offset_ = table_ - > data_end_offset_ ;
offset_ = next_offset_ = table_ - > file_info_ . data_end_offset ;
return ;
return ;
}
}
if ( next_offset_ < table_ - > data_end_offset_ ) {
if ( next_offset_ < table_ - > file_info_ . data_end_offset ) {
for ( Next ( ) ; status_ . ok ( ) & & Valid ( ) ; Next ( ) ) {
for ( Next ( ) ; status_ . ok ( ) & & Valid ( ) ; Next ( ) ) {
if ( ! prefix_match ) {
if ( ! prefix_match ) {
// Need to verify the first key's prefix
// Need to verify the first key's prefix
if ( table_ - > GetPrefix ( key ( ) ) ! = prefix_slice ) {
if ( table_ - > GetPrefix ( key ( ) ) ! = prefix_slice ) {
offset_ = next_offset_ = table_ - > data_end_offset_ ;
offset_ = next_offset_ = table_ - > file_info_ . data_end_offset ;
break ;
break ;
}
}
prefix_match = true ;
prefix_match = true ;
@ -678,19 +673,19 @@ void PlainTableIterator::Seek(const Slice& target) {
}
}
}
}
} else {
} else {
offset_ = table_ - > data_end_offset_ ;
offset_ = table_ - > file_info_ . data_end_offset ;
}
}
}
}
void PlainTableIterator : : Next ( ) {
void PlainTableIterator : : Next ( ) {
offset_ = next_offset_ ;
offset_ = next_offset_ ;
if ( offset_ < table_ - > data_end_offset_ ) {
if ( offset_ < table_ - > file_info_ . data_end_offset ) {
Slice tmp_slice ;
Slice tmp_slice ;
ParsedInternalKey parsed_key ;
ParsedInternalKey parsed_key ;
status_ =
status_ =
table_ - > Next ( & decoder_ , & next_offset_ , & parsed_key , & key_ , & value_ ) ;
table_ - > Next ( & decoder_ , & next_offset_ , & parsed_key , & key_ , & value_ ) ;
if ( ! status_ . ok ( ) ) {
if ( ! status_ . ok ( ) ) {
offset_ = next_offset_ = table_ - > data_end_offset_ ;
offset_ = next_offset_ = table_ - > file_info_ . data_end_offset ;
}
}
}
}
}
}