@ -68,6 +68,9 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
table_ ( ioptions . memtable_factory - > CreateMemTableRep (
table_ ( ioptions . memtable_factory - > CreateMemTableRep (
comparator_ , & allocator_ , ioptions . prefix_extractor ,
comparator_ , & allocator_ , ioptions . prefix_extractor ,
ioptions . info_log ) ) ,
ioptions . info_log ) ) ,
range_del_table_ ( ioptions . memtable_factory - > CreateMemTableRep (
comparator_ , & allocator_ , nullptr /* transform */ ,
ioptions . info_log ) ) ,
data_size_ ( 0 ) ,
data_size_ ( 0 ) ,
num_entries_ ( 0 ) ,
num_entries_ ( 0 ) ,
num_deletes_ ( 0 ) ,
num_deletes_ ( 0 ) ,
@ -101,6 +104,7 @@ MemTable::~MemTable() { assert(refs_ == 0); }
size_t MemTable : : ApproximateMemoryUsage ( ) {
size_t MemTable : : ApproximateMemoryUsage ( ) {
size_t arena_usage = arena_ . ApproximateMemoryUsage ( ) ;
size_t arena_usage = arena_ . ApproximateMemoryUsage ( ) ;
size_t table_usage = table_ - > ApproximateMemoryUsage ( ) ;
size_t table_usage = table_ - > ApproximateMemoryUsage ( ) ;
table_usage + = range_del_table_ - > ApproximateMemoryUsage ( ) ;
// let MAX_USAGE = std::numeric_limits<size_t>::max()
// let MAX_USAGE = std::numeric_limits<size_t>::max()
// then if arena_usage + total_usage >= MAX_USAGE, return MAX_USAGE.
// then if arena_usage + total_usage >= MAX_USAGE, return MAX_USAGE.
// the following variation is to avoid numeric overflow.
// the following variation is to avoid numeric overflow.
@ -122,8 +126,9 @@ bool MemTable::ShouldFlushNow() const {
// If arena still have room for new block allocation, we can safely say it
// If arena still have room for new block allocation, we can safely say it
// shouldn't flush.
// shouldn't flush.
auto allocated_memory =
auto allocated_memory = table_ - > ApproximateMemoryUsage ( ) +
table_ - > ApproximateMemoryUsage ( ) + arena_ . MemoryAllocatedBytes ( ) ;
range_del_table_ - > ApproximateMemoryUsage ( ) +
arena_ . MemoryAllocatedBytes ( ) ;
// if we can still allocate one more block without exceeding the
// if we can still allocate one more block without exceeding the
// over-allocation ratio, then we should not flush.
// over-allocation ratio, then we should not flush.
@ -219,14 +224,16 @@ const char* EncodeKey(std::string* scratch, const Slice& target) {
class MemTableIterator : public InternalIterator {
class MemTableIterator : public InternalIterator {
public :
public :
MemTableIterator ( const MemTable & mem , const ReadOptions & read_options ,
MemTableIterator ( const MemTable & mem , const ReadOptions & read_options ,
Arena * arena )
Arena * arena , bool use_range_del_table = false )
: bloom_ ( nullptr ) ,
: bloom_ ( nullptr ) ,
prefix_extractor_ ( mem . prefix_extractor_ ) ,
prefix_extractor_ ( mem . prefix_extractor_ ) ,
comparator_ ( mem . comparator_ ) ,
comparator_ ( mem . comparator_ ) ,
valid_ ( false ) ,
valid_ ( false ) ,
arena_mode_ ( arena ! = nullptr ) ,
arena_mode_ ( arena ! = nullptr ) ,
value_pinned_ ( ! mem . GetMemTableOptions ( ) - > inplace_update_support ) {
value_pinned_ ( ! mem . GetMemTableOptions ( ) - > inplace_update_support ) {
if ( prefix_extractor_ ! = nullptr & & ! read_options . total_order_seek ) {
if ( use_range_del_table ) {
iter_ = mem . range_del_table_ - > GetIterator ( arena ) ;
} else if ( prefix_extractor_ ! = nullptr & & ! read_options . total_order_seek ) {
bloom_ = mem . prefix_bloom_ . get ( ) ;
bloom_ = mem . prefix_bloom_ . get ( ) ;
iter_ = mem . table_ - > GetDynamicPrefixIterator ( arena ) ;
iter_ = mem . table_ - > GetDynamicPrefixIterator ( arena ) ;
} else {
} else {
@ -356,6 +363,14 @@ InternalIterator* MemTable::NewIterator(const ReadOptions& read_options,
return new ( mem ) MemTableIterator ( * this , read_options , arena ) ;
return new ( mem ) MemTableIterator ( * this , read_options , arena ) ;
}
}
InternalIterator * MemTable : : NewRangeTombstoneIterator (
const ReadOptions & read_options , Arena * arena ) {
assert ( arena ! = nullptr ) ;
auto mem = arena - > AllocateAligned ( sizeof ( MemTableIterator ) ) ;
return new ( mem ) MemTableIterator ( * this , read_options , arena ,
true /* use_range_del_table */ ) ;
}
port : : RWMutex * MemTable : : GetLock ( const Slice & key ) {
port : : RWMutex * MemTable : : GetLock ( const Slice & key ) {
static murmur_hash hash ;
static murmur_hash hash ;
return & locks_ [ hash ( key ) % locks_ . size ( ) ] ;
return & locks_ [ hash ( key ) % locks_ . size ( ) ] ;
@ -364,6 +379,7 @@ port::RWMutex* MemTable::GetLock(const Slice& key) {
uint64_t MemTable : : ApproximateSize ( const Slice & start_ikey ,
uint64_t MemTable : : ApproximateSize ( const Slice & start_ikey ,
const Slice & end_ikey ) {
const Slice & end_ikey ) {
uint64_t entry_count = table_ - > ApproximateNumEntries ( start_ikey , end_ikey ) ;
uint64_t entry_count = table_ - > ApproximateNumEntries ( start_ikey , end_ikey ) ;
entry_count + = range_del_table_ - > ApproximateNumEntries ( start_ikey , end_ikey ) ;
if ( entry_count = = 0 ) {
if ( entry_count = = 0 ) {
return 0 ;
return 0 ;
}
}
@ -372,9 +388,9 @@ uint64_t MemTable::ApproximateSize(const Slice& start_ikey,
return 0 ;
return 0 ;
}
}
if ( entry_count > n ) {
if ( entry_count > n ) {
// table_->ApproximateNumEntries() is just an estimate so it can be larger
// (range_del_) table_->ApproximateNumEntries() is just an estimate so it can
// than actual entries we have. Cap it to entries we have to limit the
// be larger than actual entries we have. Cap it to entries we have to limit
// inaccuracy.
// the inaccuracy.
entry_count = n ;
entry_count = n ;
}
}
uint64_t data_size = data_size_ . load ( std : : memory_order_relaxed ) ;
uint64_t data_size = data_size_ . load ( std : : memory_order_relaxed ) ;
@ -397,7 +413,9 @@ void MemTable::Add(SequenceNumber s, ValueType type,
internal_key_size + VarintLength ( val_size ) +
internal_key_size + VarintLength ( val_size ) +
val_size ;
val_size ;
char * buf = nullptr ;
char * buf = nullptr ;
KeyHandle handle = table_ - > Allocate ( encoded_len , & buf ) ;
std : : unique_ptr < MemTableRep > & table =
type = = kTypeRangeDeletion ? range_del_table_ : table_ ;
KeyHandle handle = table - > Allocate ( encoded_len , & buf ) ;
char * p = EncodeVarint32 ( buf , internal_key_size ) ;
char * p = EncodeVarint32 ( buf , internal_key_size ) ;
memcpy ( p , key . data ( ) , key_size ) ;
memcpy ( p , key . data ( ) , key_size ) ;
@ -409,7 +427,7 @@ void MemTable::Add(SequenceNumber s, ValueType type,
memcpy ( p , value . data ( ) , val_size ) ;
memcpy ( p , value . data ( ) , val_size ) ;
assert ( ( unsigned ) ( p + val_size - buf ) = = ( unsigned ) encoded_len ) ;
assert ( ( unsigned ) ( p + val_size - buf ) = = ( unsigned ) encoded_len ) ;
if ( ! allow_concurrent ) {
if ( ! allow_concurrent ) {
table_ - > Insert ( handle ) ;
table - > Insert ( handle ) ;
// this is a bit ugly, but is the way to avoid locked instructions
// this is a bit ugly, but is the way to avoid locked instructions
// when incrementing an atomic
// when incrementing an atomic
@ -441,7 +459,7 @@ void MemTable::Add(SequenceNumber s, ValueType type,
assert ( post_process_info = = nullptr ) ;
assert ( post_process_info = = nullptr ) ;
UpdateFlushState ( ) ;
UpdateFlushState ( ) ;
} else {
} else {
table_ - > InsertConcurrently ( handle ) ;
table - > InsertConcurrently ( handle ) ;
assert ( post_process_info ! = nullptr ) ;
assert ( post_process_info ! = nullptr ) ;
post_process_info - > num_entries + + ;
post_process_info - > num_entries + + ;