@ -60,6 +60,9 @@ CuckooTableBuilder::CuckooTableBuilder(
hash_table_size_ ( use_module_hash ? 0 : 2 ) ,
is_last_level_file_ ( false ) ,
has_seen_first_key_ ( false ) ,
key_size_ ( 0 ) ,
value_size_ ( 0 ) ,
num_entries_ ( 0 ) ,
ucomp_ ( user_comparator ) ,
use_module_hash_ ( use_module_hash ) ,
identity_as_first_hash_ ( identity_as_first_hash ) ,
@ -72,7 +75,7 @@ CuckooTableBuilder::CuckooTableBuilder(
}
void CuckooTableBuilder : : Add ( const Slice & key , const Slice & value ) {
if ( kvs_ . size ( ) > = kMaxVectorIdx - 1 ) {
if ( num_entries_ > = kMaxVectorIdx - 1 ) {
status_ = Status : : NotSupported ( " Number of keys in a file must be < 2^32-1 " ) ;
return ;
}
@ -90,15 +93,18 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
has_seen_first_key_ = true ;
smallest_user_key_ . assign ( ikey . user_key . data ( ) , ikey . user_key . size ( ) ) ;
largest_user_key_ . assign ( ikey . user_key . data ( ) , ikey . user_key . size ( ) ) ;
key_size_ = is_last_level_file_ ? ikey . user_key . size ( ) : key . size ( ) ;
value_size_ = value . size ( ) ;
}
// Even if one sequence number is non-zero, then it is not last level.
assert ( ! is_last_level_file_ | | ikey . sequence = = 0 ) ;
if ( is_last_level_file_ ) {
kvs_ . emplace_back ( std : : make_pair (
ikey . user_key . ToString ( ) , value . ToString ( ) ) ) ;
kvs_ . append ( ikey . user_key . data ( ) , ikey . user_key . size ( ) ) ;
} else {
kvs_ . emplace_back ( std : : make_pair ( key . ToString ( ) , value . ToString ( ) ) ) ;
kvs_ . append ( key . data ( ) , key . size ( ) ) ;
}
kvs_ . append ( value . data ( ) , value . size ( ) ) ;
+ + num_entries_ ;
// In order to fill the empty buckets in the hash table, we identify a
// key which is not used so far (unused_user_key). We determine this by
@ -111,21 +117,32 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
largest_user_key_ . assign ( ikey . user_key . data ( ) , ikey . user_key . size ( ) ) ;
}
if ( ! use_module_hash_ ) {
if ( hash_table_size_ < kvs_ . size ( ) / max_hash_table_ratio_ ) {
if ( hash_table_size_ < num_entries_ / max_hash_table_ratio_ ) {
hash_table_size_ * = 2 ;
}
}
}
Slice CuckooTableBuilder : : GetKey ( uint64_t idx ) const {
return Slice ( & kvs_ [ idx * ( key_size_ + value_size_ ) ] , key_size_ ) ;
}
Slice CuckooTableBuilder : : GetUserKey ( uint64_t idx ) const {
return is_last_level_file_ ? GetKey ( idx ) : ExtractUserKey ( GetKey ( idx ) ) ;
}
Slice CuckooTableBuilder : : GetValue ( uint64_t idx ) const {
return Slice ( & kvs_ [ idx * ( key_size_ + value_size_ ) + key_size_ ] , value_size_ ) ;
}
Status CuckooTableBuilder : : MakeHashTable ( std : : vector < CuckooBucket > * buckets ) {
buckets - > resize ( hash_table_size_ + cuckoo_block_size_ - 1 ) ;
uint64_t make_space_for_key_call_id = 0 ;
for ( uint32_t vector_idx = 0 ; vector_idx < kvs_ . size ( ) ; vector_idx + + ) {
for ( uint32_t vector_idx = 0 ; vector_idx < num_entries_ ; vector_idx + + ) {
uint64_t bucket_id ;
bool bucket_found = false ;
autovector < uint64_t > hash_vals ;
Slice user_key = is_last_level_file_ ? kvs_ [ vector_idx ] . first :
ExtractUserKey ( kvs_ [ vector_idx ] . first ) ;
Slice user_key = GetUserKey ( vector_idx ) ;
for ( uint32_t hash_cnt = 0 ; hash_cnt < num_hash_func_ & & ! bucket_found ;
+ + hash_cnt ) {
uint64_t hash_val = CuckooHash ( user_key , hash_cnt , use_module_hash_ ,
@ -140,10 +157,8 @@ Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) {
bucket_found = true ;
break ;
} else {
if ( ucomp_ - > Compare ( user_key , is_last_level_file_
? Slice ( kvs_ [ ( * buckets ) [ hash_val ] . vector_idx ] . first )
: ExtractUserKey (
kvs_ [ ( * buckets ) [ hash_val ] . vector_idx ] . first ) ) = = 0 ) {
if ( ucomp_ - > Compare ( user_key ,
GetUserKey ( ( * buckets ) [ hash_val ] . vector_idx ) ) = = 0 ) {
return Status : : NotSupported ( " Same key is being inserted again. " ) ;
}
hash_vals . push_back ( hash_val ) ;
@ -183,10 +198,10 @@ Status CuckooTableBuilder::Finish() {
std : : vector < CuckooBucket > buckets ;
Status s ;
std : : string unused_bucket ;
if ( ! kvs_ . empty ( ) ) {
if ( num_entries_ > 0 ) {
// Calculate the real hash size if module hash is enabled.
if ( use_module_hash_ ) {
hash_table_size_ = kvs_ . size ( ) / max_hash_table_ratio_ ;
hash_table_size_ = num_entries_ / max_hash_table_ratio_ ;
}
s = MakeHashTable ( & buckets ) ;
if ( ! s . ok ( ) ) {
@ -224,14 +239,13 @@ Status CuckooTableBuilder::Finish() {
AppendInternalKey ( & unused_bucket , ikey ) ;
}
}
properties_ . num_entries = kvs_ . size ( ) ;
properties_ . fixed_key_len = unused_bucket . size ( ) ;
uint32_t value_length = kvs_ . empty ( ) ? 0 : kvs_ [ 0 ] . second . size ( ) ;
uint32_t bucket_size = value_length + properties_ . fixed_key_len ;
properties_ . num_entries = num_entries_ ;
properties_ . fixed_key_len = key_size_ ;
properties_ . user_collected_properties [
CuckooTablePropertyNames : : kValueLength ] . assign (
reinterpret_cast < const char * > ( & value_length ) , sizeof ( value_length ) ) ;
reinterpret_cast < const char * > ( & value_size_ ) , sizeof ( value_size_ ) ) ;
uint64_t bucket_size = key_size_ + value_size_ ;
unused_bucket . resize ( bucket_size , ' a ' ) ;
// Write the table.
uint32_t num_added = 0 ;
@ -240,9 +254,9 @@ Status CuckooTableBuilder::Finish() {
s = file_ - > Append ( Slice ( unused_bucket ) ) ;
} else {
+ + num_added ;
s = file_ - > Append ( kvs_ [ bucket . vector_idx ] . first ) ;
s = file_ - > Append ( GetKey ( bucket . vector_idx ) ) ;
if ( s . ok ( ) ) {
s = file_ - > Append ( kvs_ [ bucket . vector_idx ] . second ) ;
s = file_ - > Append ( GetValue ( bucket . vector_idx ) ) ;
}
}
if ( ! s . ok ( ) ) {
@ -251,7 +265,7 @@ Status CuckooTableBuilder::Finish() {
}
assert ( num_added = = NumEntries ( ) ) ;
properties_ . raw_key_size = num_added * properties_ . fixed_key_len ;
properties_ . raw_value_size = num_added * value_length ;
properties_ . raw_value_size = num_added * value_size_ ;
uint64_t offset = buckets . size ( ) * bucket_size ;
properties_ . data_size = offset ;
@ -330,19 +344,18 @@ void CuckooTableBuilder::Abandon() {
}
uint64_t CuckooTableBuilder : : NumEntries ( ) const {
return kvs_ . size ( ) ;
return num_entries_ ;
}
uint64_t CuckooTableBuilder : : FileSize ( ) const {
if ( closed_ ) {
return file_ - > GetFileSize ( ) ;
} else if ( kvs_ . size ( ) = = 0 ) {
} else if ( num_entries_ = = 0 ) {
return 0 ;
}
if ( use_module_hash_ ) {
return ( kvs_ [ 0 ] . first . size ( ) + kvs_ [ 0 ] . second . size ( ) ) * kvs_ . size ( ) /
max_hash_table_ratio_ ;
return ( key_size_ + value_size_ ) * num_entries_ / max_hash_table_ratio_ ;
} else {
// Account for buckets being a power of two.
// As elements are added, file size remains constant for a while and
@ -350,11 +363,10 @@ uint64_t CuckooTableBuilder::FileSize() const {
// only after it exceeds the file limit, we account for the extra element
// being added here.
uint64_t expected_hash_table_size = hash_table_size_ ;
if ( expected_hash_table_size < ( kvs_ . size ( ) + 1 ) / max_hash_table_ratio_ ) {
if ( expected_hash_table_size < ( num_entries_ + 1 ) / max_hash_table_ratio_ ) {
expected_hash_table_size * = 2 ;
}
return ( kvs_ [ 0 ] . first . size ( ) + kvs_ [ 0 ] . second . size ( ) ) *
expected_hash_table_size - 1 ;
return ( key_size_ + value_size_ ) * expected_hash_table_size - 1 ;
}
}
@ -390,7 +402,7 @@ bool CuckooTableBuilder::MakeSpaceForKey(
// of the method. We store this number into the nodes that we explore in
// current method call.
// It is unlikely for the increment operation to overflow because the maximum
// no. of times this will be called is <= max_num_hash_func_ + kvs_.size() .
// no. of times this will be called is <= max_num_hash_func_ + num_entries_ .
for ( uint32_t hash_cnt = 0 ; hash_cnt < num_hash_func_ ; + + hash_cnt ) {
uint64_t bucket_id = hash_vals [ hash_cnt ] ;
( * buckets ) [ bucket_id ] . make_space_for_key_call_id =
@ -408,9 +420,7 @@ bool CuckooTableBuilder::MakeSpaceForKey(
CuckooBucket & curr_bucket = ( * buckets ) [ curr_node . bucket_id ] ;
for ( uint32_t hash_cnt = 0 ;
hash_cnt < num_hash_func_ & & ! null_found ; + + hash_cnt ) {
uint64_t child_bucket_id = CuckooHash (
( is_last_level_file_ ? kvs_ [ curr_bucket . vector_idx ] . first :
ExtractUserKey ( Slice ( kvs_ [ curr_bucket . vector_idx ] . first ) ) ) ,
uint64_t child_bucket_id = CuckooHash ( GetUserKey ( curr_bucket . vector_idx ) ,
hash_cnt , use_module_hash_ , hash_table_size_ , identity_as_first_hash_ ,
get_slice_hash_ ) ;
// Iterate inside Cuckoo Block.