@ -3,19 +3,23 @@
// LICENSE file in the root directory of this source tree. An additional grant
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
// of patent rights can be found in the PATENTS file in the same directory.
# include <algorithm>
# include <gflags/gflags.h>
# include <gflags/gflags.h>
# include "dynamic_bloom.h"
# include "dynamic_bloom.h"
# include "port/port.h"
# include "util/logging.h"
# include "util/logging.h"
# include "util/testharness.h"
# include "util/testharness.h"
# include "util/testutil.h"
# include "util/testutil.h"
# include "util/stop_watch.h"
DEFINE_int32 ( bits_per_key , 10 , " " ) ;
DEFINE_int32 ( bits_per_key , 10 , " " ) ;
DEFINE_int32 ( num_probes , 6 , " " ) ;
DEFINE_int32 ( num_probes , 6 , " " ) ;
DEFINE_bool ( enable_perf , false , " " ) ;
namespace rocksdb {
namespace rocksdb {
static Slice Key ( int i , char * buffer ) {
static Slice Key ( u int64_ t i , char * buffer ) {
memcpy ( buffer , & i , sizeof ( i ) ) ;
memcpy ( buffer , & i , sizeof ( i ) ) ;
return Slice ( buffer , sizeof ( i ) ) ;
return Slice ( buffer , sizeof ( i ) ) ;
}
}
@ -24,32 +28,44 @@ class DynamicBloomTest {
} ;
} ;
TEST ( DynamicBloomTest , EmptyFilter ) {
TEST ( DynamicBloomTest , EmptyFilter ) {
DynamicBloom bloom ( 100 , 2 ) ;
DynamicBloom bloom1 ( 100 , 0 , 2 ) ;
ASSERT_TRUE ( ! bloom . MayContain ( " hello " ) ) ;
ASSERT_TRUE ( ! bloom1 . MayContain ( " hello " ) ) ;
ASSERT_TRUE ( ! bloom . MayContain ( " world " ) ) ;
ASSERT_TRUE ( ! bloom1 . MayContain ( " world " ) ) ;
DynamicBloom bloom2 ( CACHE_LINE_SIZE * 8 * 2 - 1 , 1 , 2 ) ;
ASSERT_TRUE ( ! bloom2 . MayContain ( " hello " ) ) ;
ASSERT_TRUE ( ! bloom2 . MayContain ( " world " ) ) ;
}
}
TEST ( DynamicBloomTest , Small ) {
TEST ( DynamicBloomTest , Small ) {
DynamicBloom bloom ( 100 , 2 ) ;
DynamicBloom bloom1 ( 100 , 0 , 2 ) ;
bloom . Add ( " hello " ) ;
bloom1 . Add ( " hello " ) ;
bloom . Add ( " world " ) ;
bloom1 . Add ( " world " ) ;
ASSERT_TRUE ( bloom . MayContain ( " hello " ) ) ;
ASSERT_TRUE ( bloom1 . MayContain ( " hello " ) ) ;
ASSERT_TRUE ( bloom . MayContain ( " world " ) ) ;
ASSERT_TRUE ( bloom1 . MayContain ( " world " ) ) ;
ASSERT_TRUE ( ! bloom . MayContain ( " x " ) ) ;
ASSERT_TRUE ( ! bloom1 . MayContain ( " x " ) ) ;
ASSERT_TRUE ( ! bloom . MayContain ( " foo " ) ) ;
ASSERT_TRUE ( ! bloom1 . MayContain ( " foo " ) ) ;
}
DynamicBloom bloom2 ( CACHE_LINE_SIZE * 8 * 2 - 1 , 1 , 2 ) ;
static int NextLength ( int length ) {
bloom2 . Add ( " hello " ) ;
if ( length < 10 ) {
bloom2 . Add ( " world " ) ;
length + = 1 ;
ASSERT_TRUE ( bloom2 . MayContain ( " hello " ) ) ;
} else if ( length < 100 ) {
ASSERT_TRUE ( bloom2 . MayContain ( " world " ) ) ;
length + = 10 ;
ASSERT_TRUE ( ! bloom2 . MayContain ( " x " ) ) ;
} else if ( length < 1000 ) {
ASSERT_TRUE ( ! bloom2 . MayContain ( " foo " ) ) ;
length + = 100 ;
}
static uint32_t NextNum ( uint32_t num ) {
if ( num < 10 ) {
num + = 1 ;
} else if ( num < 100 ) {
num + = 10 ;
} else if ( num < 1000 ) {
num + = 100 ;
} else {
} else {
length + = 1000 ;
num + = 1000 ;
}
}
return length ;
return num ;
}
}
TEST ( DynamicBloomTest , VaryingLengths ) {
TEST ( DynamicBloomTest , VaryingLengths ) {
@ -62,34 +78,41 @@ TEST(DynamicBloomTest, VaryingLengths) {
fprintf ( stderr , " bits_per_key: %d num_probes: %d \n " ,
fprintf ( stderr , " bits_per_key: %d num_probes: %d \n " ,
FLAGS_bits_per_key , FLAGS_num_probes ) ;
FLAGS_bits_per_key , FLAGS_num_probes ) ;
for ( int length = 1 ; length < = 10000 ; length = NextLength ( length ) ) {
for ( uint32_t cl_per_block = 0 ; cl_per_block < FLAGS_num_probes ;
uint32_t bloom_bits = std : : max ( length * FLAGS_bits_per_key , 64 ) ;
+ + cl_per_block ) {
DynamicBloom bloom ( bloom_bits , FLAGS_num_probes ) ;
for ( uint32_t num = 1 ; num < = 10000 ; num = NextNum ( num ) ) {
for ( int i = 0 ; i < length ; i + + ) {
uint32_t bloom_bits = 0 ;
if ( cl_per_block = = 0 ) {
bloom_bits = std : : max ( num * FLAGS_bits_per_key , 64U ) ;
} else {
bloom_bits = std : : max ( num * FLAGS_bits_per_key ,
cl_per_block * CACHE_LINE_SIZE * 8 ) ;
}
DynamicBloom bloom ( bloom_bits , cl_per_block , FLAGS_num_probes ) ;
for ( uint64_t i = 0 ; i < num ; i + + ) {
bloom . Add ( Key ( i , buffer ) ) ;
bloom . Add ( Key ( i , buffer ) ) ;
ASSERT_TRUE ( bloom . MayContain ( Key ( i , buffer ) ) ) ;
ASSERT_TRUE ( bloom . MayContain ( Key ( i , buffer ) ) ) ;
}
}
// All added keys must match
// All added keys must match
for ( int i = 0 ; i < length ; i + + ) {
for ( u int64_ t i = 0 ; i < num ; i + + ) {
ASSERT_TRUE ( bloom . MayContain ( Key ( i , buffer ) ) )
ASSERT_TRUE ( bloom . MayContain ( Key ( i , buffer ) ) )
< < " Length " < < length < < " ; key " < < i ;
< < " Num " < < num < < " ; key " < < i ;
}
}
// Check false positive rate
// Check false positive rate
int result = 0 ;
int result = 0 ;
for ( int i = 0 ; i < 10000 ; i + + ) {
for ( u int64_ t i = 0 ; i < 10000 ; i + + ) {
if ( bloom . MayContain ( Key ( i + 1000000000 , buffer ) ) ) {
if ( bloom . MayContain ( Key ( i + 1000000000 , buffer ) ) ) {
result + + ;
result + + ;
}
}
}
}
double rate = result / 10000.0 ;
double rate = result / 10000.0 ;
fprintf ( stderr , " False positives: %5.2f%% @ length = %6d ; \n " ,
fprintf ( stderr , " False positives: %5.2f%% @ num = %6u, bloom_bits = %6u, "
rate * 100.0 , length ) ;
" cl per block = %u \n " , rate * 100.0 , num , bloom_bits , cl_per_block ) ;
//ASSERT_LE(rate, 0.02); // Must not be over 2%
if ( rate > 0.0125 )
if ( rate > 0.0125 )
mediocre_filters + + ; // Allowed, but not too often
mediocre_filters + + ; // Allowed, but not too often
else
else
@ -98,11 +121,73 @@ TEST(DynamicBloomTest, VaryingLengths) {
fprintf ( stderr , " Filters: %d good, %d mediocre \n " ,
fprintf ( stderr , " Filters: %d good, %d mediocre \n " ,
good_filters , mediocre_filters ) ;
good_filters , mediocre_filters ) ;
ASSERT_LE ( mediocre_filters , good_filters / 5 ) ;
ASSERT_LE ( mediocre_filters , good_filters / 5 ) ;
}
}
}
TEST ( DynamicBloomTest , perf ) {
StopWatchNano timer ( Env : : Default ( ) ) ;
if ( ! FLAGS_enable_perf ) {
return ;
}
for ( uint64_t m = 1 ; m < = 8 ; + + m ) {
const uint64_t num_keys = m * 8 * 1024 * 1024 ;
fprintf ( stderr , " testing %luM keys \n " , m * 8 ) ;
DynamicBloom std_bloom ( num_keys * 10 , 0 , FLAGS_num_probes ) ;
timer . Start ( ) ;
for ( uint64_t i = 1 ; i < = num_keys ; + + i ) {
std_bloom . Add ( Slice ( reinterpret_cast < const char * > ( & i ) , 8 ) ) ;
}
uint64_t elapsed = timer . ElapsedNanos ( ) ;
fprintf ( stderr , " standard bloom, avg add latency %lu \n " ,
elapsed / num_keys ) ;
uint64_t count = 0 ;
timer . Start ( ) ;
for ( uint64_t i = 1 ; i < = num_keys ; + + i ) {
if ( std_bloom . MayContain ( Slice ( reinterpret_cast < const char * > ( & i ) , 8 ) ) ) {
+ + count ;
}
}
elapsed = timer . ElapsedNanos ( ) ;
fprintf ( stderr , " standard bloom, avg query latency %lu \n " ,
elapsed / count ) ;
ASSERT_TRUE ( count = = num_keys ) ;
for ( int cl_per_block = 1 ; cl_per_block < = FLAGS_num_probes ;
+ + cl_per_block ) {
DynamicBloom blocked_bloom ( num_keys * 10 , cl_per_block , FLAGS_num_probes ) ;
timer . Start ( ) ;
for ( uint64_t i = 1 ; i < = num_keys ; + + i ) {
blocked_bloom . Add ( Slice ( reinterpret_cast < const char * > ( & i ) , 8 ) ) ;
}
uint64_t elapsed = timer . ElapsedNanos ( ) ;
fprintf ( stderr , " blocked bloom(%d), avg add latency %lu \n " ,
cl_per_block , elapsed / num_keys ) ;
// Different bits-per-byte
uint64_t count = 0 ;
timer . Start ( ) ;
for ( uint64_t i = 1 ; i < = num_keys ; + + i ) {
if ( blocked_bloom . MayContain (
Slice ( reinterpret_cast < const char * > ( & i ) , 8 ) ) ) {
+ + count ;
}
}
elapsed = timer . ElapsedNanos ( ) ;
fprintf ( stderr , " blocked bloom(%d), avg query latency %lu \n " ,
cl_per_block , elapsed / count ) ;
ASSERT_TRUE ( count = = num_keys ) ;
}
}
}
} // namespace rocksdb
} // namespace rocksdb