@ -1005,6 +1005,22 @@ DEFINE_uint64(
" is the global rate in bytes/second. " ) ;
// the parameters of mix_graph
DEFINE_double ( keyrange_dist_a , 0.0 ,
" The parameter 'a' of prefix average access distribution "
" f(x)=a*exp(b*x)+c*exp(d*x) " ) ;
DEFINE_double ( keyrange_dist_b , 0.0 ,
" The parameter 'b' of prefix average access distribution "
" f(x)=a*exp(b*x)+c*exp(d*x) " ) ;
DEFINE_double ( keyrange_dist_c , 0.0 ,
" The parameter 'c' of prefix average access distribution "
" f(x)=a*exp(b*x)+c*exp(d*x) " ) ;
DEFINE_double ( keyrange_dist_d , 0.0 ,
" The parameter 'd' of prefix average access distribution "
" f(x)=a*exp(b*x)+c*exp(d*x) " ) ;
DEFINE_int64 ( keyrange_num , 1 ,
" The number of key ranges that are in the same prefix "
" group, each prefix range will have its key acccess "
" distribution " ) ;
DEFINE_double ( key_dist_a , 0.0 ,
" The parameter 'a' of key access distribution model "
" f(x)=a*x^b " ) ;
@ -4962,7 +4978,7 @@ class Benchmark {
thread - > stats . AddMessage ( msg ) ;
}
// THe reverse function of Pareto func tion
// The inverse function of Pareto distribu tion
int64_t ParetoCdfInversion ( double u , double theta , double k , double sigma ) {
double ret ;
if ( k = = 0.0 ) {
@ -4972,7 +4988,7 @@ class Benchmark {
}
return static_cast < int64_t > ( ceil ( ret ) ) ;
}
// inversion of y=ax^b
// The inverse funct ion of power distribution ( y=ax^b)
int64_t PowerCdfInversion ( double u , double a , double b ) {
double ret ;
ret = std : : pow ( ( u / a ) , ( 1 / b ) ) ;
@ -4993,7 +5009,7 @@ class Benchmark {
}
}
// decide the query type
// Decide the ratio of different query types
// 0 Get, 1 Put, 2 Seek, 3 SeekForPrev, 4 Delete, 5 SingleDelete, 6 merge
class QueryDecider {
public :
@ -5034,7 +5050,157 @@ class Benchmark {
}
} ;
// The graph wokrload mixed with Get, Put, Iterator
// KeyrangeUnit is the struct of a keyrange. It is used in a keyrange vector
// to transfer a random value to one keyrange based on the hotness.
struct KeyrangeUnit {
int64_t keyrange_start ;
int64_t keyrange_access ;
int64_t keyrange_keys ;
} ;
// From our observations, the prefix hotness (key-range hotness) follows
// the two-term-exponential distribution: f(x) = a*exp(b*x) + c*exp(d*x).
// However, we cannot directly use the inverse function to decide a
// key-range from a random distribution. To achieve it, we create a list of
// KeyrangeUnit, each KeyrangeUnit occupies a range of integers whose size is
// decided based on the hotness of the key-range. When a random value is
// generated based on uniform distribution, we map it to the KeyrangeUnit Vec
// and one KeyrangeUnit is selected. The probability of a KeyrangeUnit being
// selected is the same as the hotness of this KeyrangeUnit. After that, the
// key can be randomly allocated to the key-range of this KeyrangeUnit, or we
// can based on the power distribution (y=ax^b) to generate the offset of
// the key in the selected key-range. In this way, we generate the keyID
// based on the hotness of the prefix and also the key hotness distribution.
class GenerateTwoTermExpKeys {
public :
int64_t keyrange_rand_max_ ;
int64_t keyrange_size_ ;
int64_t keyrange_num_ ;
bool initiated_ ;
std : : vector < KeyrangeUnit > keyrange_set_ ;
GenerateTwoTermExpKeys ( ) {
keyrange_rand_max_ = FLAGS_num ;
initiated_ = false ;
}
~ GenerateTwoTermExpKeys ( ) { }
// Initiate the KeyrangeUnit vector and calculate the size of each
// KeyrangeUnit.
Status InitiateExpDistribution ( int64_t total_keys , double prefix_a ,
double prefix_b , double prefix_c ,
double prefix_d ) {
int64_t amplify = 0 ;
int64_t keyrange_start = 0 ;
initiated_ = true ;
if ( FLAGS_keyrange_num < = 0 ) {
keyrange_num_ = 1 ;
} else {
keyrange_num_ = FLAGS_keyrange_num ;
}
keyrange_size_ = total_keys / keyrange_num_ ;
// Calculate the key-range shares size based on the input parameters
for ( int64_t pfx = keyrange_num_ ; pfx > = 1 ; pfx - - ) {
// Step 1. Calculate the probability that this key range will be
// accessed in a query. It is based on the two-term expoential
// distribution
double keyrange_p = prefix_a * std : : exp ( prefix_b * pfx ) +
prefix_c * std : : exp ( prefix_d * pfx ) ;
if ( keyrange_p < std : : pow ( 10.0 , - 16.0 ) ) {
keyrange_p = 0.0 ;
}
// Step 2. Calculate the amplify
// In order to allocate a query to a key-range based on the random
// number generated for this query, we need to extend the probability
// of each key range from [0,1] to [0, amplify]. Amplify is calculated
// by 1/(smallest key-range probability). In this way, we ensure that
// all key-ranges are assigned with an Integer that >=0
if ( amplify = = 0 & & keyrange_p > 0 ) {
amplify = static_cast < int64_t > ( std : : floor ( 1 / keyrange_p ) ) + 1 ;
}
// Step 3. For each key-range, we calculate its position in the
// [0, amplify] range, including the start, the size (keyrange_access)
KeyrangeUnit p_unit ;
p_unit . keyrange_start = keyrange_start ;
if ( 0.0 > = keyrange_p ) {
p_unit . keyrange_access = 0 ;
} else {
p_unit . keyrange_access =
static_cast < int64_t > ( std : : floor ( amplify * keyrange_p ) ) ;
}
p_unit . keyrange_keys = keyrange_size_ ;
keyrange_set_ . push_back ( p_unit ) ;
keyrange_start + = p_unit . keyrange_access ;
}
keyrange_rand_max_ = keyrange_start ;
// Step 4. Shuffle the key-ranges randomly
// Since the access probability is calculated from small to large,
// If we do not re-allocate them, hot key-ranges are always at the end
// and cold key-ranges are at the begin of the key space. Therefore, the
// key-ranges are shuffled and the rand seed is only decide by the
// key-range hotness distribution. With the same distribution parameters
// the shuffle results are the same.
Random64 rand_loca ( keyrange_rand_max_ ) ;
for ( int64_t i = 0 ; i < FLAGS_keyrange_num ; i + + ) {
int64_t pos = rand_loca . Next ( ) % FLAGS_keyrange_num ;
assert ( i > = 0 & & i < static_cast < int64_t > ( keyrange_set_ . size ( ) ) & &
pos > = 0 & & pos < static_cast < int64_t > ( keyrange_set_ . size ( ) ) ) ;
std : : swap ( keyrange_set_ [ i ] , keyrange_set_ [ pos ] ) ;
}
// Step 5. Recalculate the prefix start postion after shuffling
int64_t offset = 0 ;
for ( auto & p_unit : keyrange_set_ ) {
p_unit . keyrange_start = offset ;
offset + = p_unit . keyrange_access ;
}
return Status : : OK ( ) ;
}
// Generate the Key ID according to the input ini_rand and key distribution
int64_t DistGetKeyID ( int64_t ini_rand , double key_dist_a ,
double key_dist_b ) {
int64_t keyrange_rand = ini_rand % keyrange_rand_max_ ;
// Calculate and select one key-range that contains the new key
int64_t start = 0 , end = static_cast < int64_t > ( keyrange_set_ . size ( ) ) ;
while ( start + 1 < end ) {
int64_t mid = start + ( end - start ) / 2 ;
assert ( mid > = 0 & & mid < static_cast < int64_t > ( keyrange_set_ . size ( ) ) ) ;
if ( keyrange_rand < keyrange_set_ [ mid ] . keyrange_start ) {
end = mid ;
} else {
start = mid ;
}
}
int64_t keyrange_id = start ;
// Select one key in the key-range and compose the keyID
int64_t key_offset = 0 , key_seed ;
if ( key_dist_a = = 0.0 & & key_dist_b = = 0.0 ) {
key_offset = ini_rand % keyrange_size_ ;
} else {
key_seed = static_cast < int64_t > (
ceil ( std : : pow ( ( ini_rand / key_dist_a ) , ( 1 / key_dist_b ) ) ) ) ;
Random64 rand_key ( key_seed ) ;
key_offset = static_cast < int64_t > ( rand_key . Next ( ) ) % keyrange_size_ ;
}
return keyrange_size_ * keyrange_id + key_offset ;
}
} ;
// The social graph wokrload mixed with Get, Put, Iterator queries.
// The value size and iterator length follow Pareto distribution.
// The overall key access follow power distribution. If user models the
// workload based on different key-ranges (or different prefixes), user
// can use two-term-exponential distribution to fit the workload. User
// needs to decides the ratio between Get, Put, Iterator queries before
// starting the benchmark.
void MixGraph ( ThreadState * thread ) {
int64_t read = 0 ; // including single gets and Next of iterators
int64_t gets = 0 ;
@ -5048,6 +5214,8 @@ class Benchmark {
int64_t scan_len_max = FLAGS_mix_max_scan_len ;
double write_rate = 1000000.0 ;
double read_rate = 1000000.0 ;
bool use_prefix_modeling = false ;
GenerateTwoTermExpKeys gen_exp ;
std : : vector < double > ratio { FLAGS_mix_get_ratio , FLAGS_mix_put_ratio ,
FLAGS_mix_seek_ratio } ;
char value_buffer [ default_value_max ] ;
@ -5073,15 +5241,32 @@ class Benchmark {
NewGenericRateLimiter ( static_cast < int64_t > ( write_rate ) ) ) ;
}
// Decide if user wants to use prefix based key generation
if ( FLAGS_keyrange_dist_a ! = 0.0 | | FLAGS_keyrange_dist_b ! = 0.0 | |
FLAGS_keyrange_dist_c ! = 0.0 | | FLAGS_keyrange_dist_d ! = 0.0 ) {
use_prefix_modeling = true ;
gen_exp . InitiateExpDistribution (
FLAGS_num , FLAGS_keyrange_dist_a , FLAGS_keyrange_dist_b ,
FLAGS_keyrange_dist_c , FLAGS_keyrange_dist_d ) ;
}
Duration duration ( FLAGS_duration , reads_ ) ;
while ( ! duration . Done ( 1 ) ) {
DBWithColumnFamilies * db_with_cfh = SelectDBWithCfh ( thread ) ;
int64_t rand_v , key_rand , key_seed ;
rand_v = GetRandomKey ( & thread - > rand ) % FLAGS_num ;
int64_t ini_rand , rand_v , key_rand , key_seed ;
ini_rand = GetRandomKey ( & thread - > rand ) ;
rand_v = ini_rand % FLAGS_num ;
double u = static_cast < double > ( rand_v ) / FLAGS_num ;
key_seed = PowerCdfInversion ( u , FLAGS_key_dist_a , FLAGS_key_dist_b ) ;
Random64 rand ( key_seed ) ;
key_rand = static_cast < int64_t > ( rand . Next ( ) ) % FLAGS_num ;
// Generate the keyID based on the key hotness and prefix hotness
if ( use_prefix_modeling ) {
key_rand =
gen_exp . DistGetKeyID ( ini_rand , FLAGS_key_dist_a , FLAGS_key_dist_b ) ;
} else {
key_seed = PowerCdfInversion ( u , FLAGS_key_dist_a , FLAGS_key_dist_b ) ;
Random64 rand ( key_seed ) ;
key_rand = static_cast < int64_t > ( rand . Next ( ) ) % FLAGS_num ;
}
GenerateKeyFromInt ( key_rand , FLAGS_num , & key ) ;
int query_type = query . GetType ( rand_v ) ;