Optimize MayContainHash()

Summary:
In latest leaf's, MayContainHash() consistently consumes 5%~7% CPU usage.

I checked the code and did an experiment with/without inlining this method.

In release mode, with `1024 * 1024 * 256` bits and `1024 * 512` entries, both call 2^30 MayContainHash() with distinctive parameters.

As the result showed, this patch reduced the running time from 9.127 sec to 7.891 sec.

Test Plan: make check

Reviewers: sdong, haobo

CC: leveldb

Differential Revision: https://reviews.facebook.net/D15177
main
Kai Liu 11 years ago
parent 9ea8bf90f1
commit cd535c2280
  1. 47
      util/dynamic_bloom.cc
  2. 33
      util/dynamic_bloom.h

@ -20,50 +20,17 @@ DynamicBloom::DynamicBloom(uint32_t total_bits,
uint32_t (*hash_func)(const Slice& key),
uint32_t num_probes)
: hash_func_(hash_func),
total_bits_((total_bits + 7) / 8 * 8),
num_probes_(num_probes) {
kTotalBits((total_bits + 7) / 8 * 8),
kNumProbes(num_probes) {
assert(hash_func_);
assert(num_probes_ > 0);
assert(total_bits_ > 0);
data_.reset(new unsigned char[total_bits_ / 8]());
assert(kNumProbes > 0);
assert(kTotalBits > 0);
data_.reset(new unsigned char[kTotalBits / 8]());
}
DynamicBloom::DynamicBloom(uint32_t total_bits,
uint32_t num_probes)
: hash_func_(&BloomHash),
total_bits_((total_bits + 7) / 8 * 8),
num_probes_(num_probes) {
assert(num_probes_ > 0);
assert(total_bits_ > 0);
data_.reset(new unsigned char[total_bits_ / 8]());
: DynamicBloom(total_bits, &BloomHash, num_probes) {
}
void DynamicBloom::Add(const Slice& key) {
AddHash(hash_func_(key));
}
void DynamicBloom::AddHash(uint32_t h) {
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (uint32_t i = 0; i < num_probes_; i++) {
const uint32_t bitpos = h % total_bits_;
data_[bitpos/8] |= (1 << (bitpos % 8));
h += delta;
}
}
bool DynamicBloom::MayContain(const Slice& key) {
return (MayContainHash(hash_func_(key)));
}
bool DynamicBloom::MayContainHash(uint32_t h) {
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (uint32_t i = 0; i < num_probes_; i++) {
const uint32_t bitpos = h % total_bits_;
if ((data_[bitpos/8] & (1 << (bitpos % 8)))
== 0) return false;
h += delta;
}
return true;
}
}
} // rocksdb

@ -13,9 +13,7 @@ namespace rocksdb {
class Slice;
class DynamicBloom {
public:
// total_bits: fixed total bits for the bloom
// hash_func: customized hash function
// num_probes: number of hash probes for a single key
@ -26,7 +24,7 @@ class DynamicBloom {
explicit DynamicBloom(uint32_t total_bits, uint32_t num_probes = 6);
// Assuming single threaded access to Add
void Add(const Slice& key);
void Add(const Slice& key) { AddHash(hash_func_(key)); }
// Assuming single threaded access to Add
void AddHash(uint32_t hash);
@ -39,9 +37,34 @@ class DynamicBloom {
private:
uint32_t (*hash_func_)(const Slice& key);
uint32_t total_bits_;
uint32_t num_probes_;
const uint32_t kTotalBits;
const uint32_t kNumProbes;
std::unique_ptr<unsigned char[]> data_;
};
inline bool DynamicBloom::MayContain(const Slice& key) {
return (MayContainHash(hash_func_(key)));
}
inline bool DynamicBloom::MayContainHash(uint32_t h) {
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (uint32_t i = 0; i < kNumProbes; i++) {
const uint32_t bitpos = h % kTotalBits;
if (((data_[bitpos / 8]) & (1 << (bitpos % 8))) == 0) {
return false;
}
h += delta;
}
return true;
}
inline void DynamicBloom::AddHash(uint32_t h) {
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (uint32_t i = 0; i < kNumProbes; i++) {
const uint32_t bitpos = h % kTotalBits;
data_[bitpos / 8] |= (1 << (bitpos % 8));
h += delta;
}
}
} // rocksdb

Loading…
Cancel
Save