[For Performance Branch] Bloom filter in PlainTableIterator::Seek() - Update 1

Summary:
Address @haobo's comments in D14277

Test Plan: ./indexed_table_db_test

Reviewers: haobo

CC:

Task ID: #

Blame Rev:
main
Siying Dong 11 years ago
parent 718488abc5
commit dfa1460d88
  1. 4
      include/rocksdb/plain_table_factory.h
  2. 10
      table/plain_table_reader.cc
  3. 4
      table/table_reader_bench.cc

@ -48,8 +48,8 @@ public:
~PlainTableFactory() { ~PlainTableFactory() {
} }
// user_key_size is the length of the user key. key_prefix_len is the // user_key_size is the length of the user key. key_prefix_len is the
// length of the prefix used for im-memory indexes. bloom_num_bits is // length of the prefix used for in-memory indexes. bloom_num_bits is
// number of bits is used for bloom filer per key. hash_table_ratio is // number of bits used for bloom filer per key. hash_table_ratio is
// the desired ultilization of the hash table used for prefix hashing. // the desired ultilization of the hash table used for prefix hashing.
// hash_table_ratio = number of prefixes / #buckets in the hash table // hash_table_ratio = number of prefixes / #buckets in the hash table
PlainTableFactory(int user_key_size, int key_prefix_len, PlainTableFactory(int user_key_size, int key_prefix_len,

@ -162,10 +162,10 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) {
// Make the hash table 3/5 full // Make the hash table 3/5 full
std::vector<Slice> filter_entries(0); // for creating bloom filter; std::vector<Slice> filter_entries(0); // for creating bloom filter;
if (filter_policy_ != nullptr) { if (filter_policy_ != nullptr) {
filter_entries.resize(tmp_index.size()); filter_entries.reserve(tmp_index.size());
} }
double hash_table_size_multipier = double hash_table_size_multipier =
(hash_table_ratio_ < 1.0) ? 1.0 : 1.0 / hash_table_ratio_; (hash_table_ratio_ > 1.0) ? 1.0 : 1.0 / hash_table_ratio_;
hash_table_size_ = tmp_index.size() * hash_table_size_multipier + 1; hash_table_size_ = tmp_index.size() * hash_table_size_multipier + 1;
hash_table_ = new char[GetHashTableRecordLen() * hash_table_size_]; hash_table_ = new char[GetHashTableRecordLen() * hash_table_size_];
for (int i = 0; i < hash_table_size_; i++) { for (int i = 0; i < hash_table_size_; i++) {
@ -173,10 +173,9 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) {
kOffsetLen); kOffsetLen);
} }
size_t count = 0;
for (auto it = tmp_index.begin(); it != tmp_index.end(); ++it) { for (auto it = tmp_index.begin(); it != tmp_index.end(); ++it) {
if (filter_policy_ != nullptr) { if (filter_policy_ != nullptr) {
filter_entries[count++] = it->first; filter_entries.push_back(it->first);
} }
int bucket = GetHashTableBucket(it->first); int bucket = GetHashTableBucket(it->first);
@ -194,7 +193,8 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) {
memcpy(bucket_ptr + key_prefix_len_, &it->second, kOffsetLen); memcpy(bucket_ptr + key_prefix_len_, &it->second, kOffsetLen);
} }
if (filter_policy_ != nullptr) { if (filter_policy_ != nullptr) {
filter_policy_->CreateFilter(&filter_entries[0], count, &filter_str_); filter_policy_->CreateFilter(&filter_entries[0], filter_entries.size(),
&filter_str_);
filter_slice_ = Slice(filter_str_.data(), filter_str_.size()); filter_slice_ = Slice(filter_str_.data(), filter_str_.size());
} }

@ -242,7 +242,9 @@ int main(int argc, char** argv) {
if (FLAGS_plain_table) { if (FLAGS_plain_table) {
options.allow_mmap_reads = true; options.allow_mmap_reads = true;
env_options.use_mmap_reads = true; env_options.use_mmap_reads = true;
tf = new rocksdb::PlainTableFactory(16, FLAGS_prefix_len, FLAGS_prefix_len); tf = new rocksdb::PlainTableFactory(16, FLAGS_prefix_len,
(FLAGS_prefix_len == 16) ? 0 : 8,
0.75);
} else { } else {
tf = new rocksdb::BlockBasedTableFactory(); tf = new rocksdb::BlockBasedTableFactory();
} }

Loading…
Cancel
Save