From dfa1460d88535bf09aaeffdb6d2bdca513b1dbc2 Mon Sep 17 00:00:00 2001 From: Siying Dong Date: Thu, 21 Nov 2013 23:33:45 -0800 Subject: [PATCH] [For Performance Branch] Bloom filter in PlainTableIterator::Seek() - Update 1 Summary: Address @haobo's comments in D14277 Test Plan: ./indexed_table_db_test Reviewers: haobo CC: Task ID: # Blame Rev: --- include/rocksdb/plain_table_factory.h | 4 ++-- table/plain_table_reader.cc | 10 +++++----- table/table_reader_bench.cc | 4 +++- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/rocksdb/plain_table_factory.h b/include/rocksdb/plain_table_factory.h index 0d843e2c2..3d26c6e4e 100644 --- a/include/rocksdb/plain_table_factory.h +++ b/include/rocksdb/plain_table_factory.h @@ -48,8 +48,8 @@ public: ~PlainTableFactory() { } // user_key_size is the length of the user key. key_prefix_len is the - // length of the prefix used for im-memory indexes. bloom_num_bits is - // number of bits is used for bloom filer per key. hash_table_ratio is + // length of the prefix used for in-memory indexes. bloom_num_bits is + // number of bits used for bloom filer per key. hash_table_ratio is // the desired ultilization of the hash table used for prefix hashing. // hash_table_ratio = number of prefixes / #buckets in the hash table PlainTableFactory(int user_key_size, int key_prefix_len, diff --git a/table/plain_table_reader.cc b/table/plain_table_reader.cc index 657dc031e..9d0283b22 100644 --- a/table/plain_table_reader.cc +++ b/table/plain_table_reader.cc @@ -162,10 +162,10 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) { // Make the hash table 3/5 full std::vector filter_entries(0); // for creating bloom filter; if (filter_policy_ != nullptr) { - filter_entries.resize(tmp_index.size()); + filter_entries.reserve(tmp_index.size()); } double hash_table_size_multipier = - (hash_table_ratio_ < 1.0) ? 1.0 : 1.0 / hash_table_ratio_; + (hash_table_ratio_ > 1.0) ? 1.0 : 1.0 / hash_table_ratio_; hash_table_size_ = tmp_index.size() * hash_table_size_multipier + 1; hash_table_ = new char[GetHashTableRecordLen() * hash_table_size_]; for (int i = 0; i < hash_table_size_; i++) { @@ -173,10 +173,9 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) { kOffsetLen); } - size_t count = 0; for (auto it = tmp_index.begin(); it != tmp_index.end(); ++it) { if (filter_policy_ != nullptr) { - filter_entries[count++] = it->first; + filter_entries.push_back(it->first); } int bucket = GetHashTableBucket(it->first); @@ -194,7 +193,8 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) { memcpy(bucket_ptr + key_prefix_len_, &it->second, kOffsetLen); } if (filter_policy_ != nullptr) { - filter_policy_->CreateFilter(&filter_entries[0], count, &filter_str_); + filter_policy_->CreateFilter(&filter_entries[0], filter_entries.size(), + &filter_str_); filter_slice_ = Slice(filter_str_.data(), filter_str_.size()); } diff --git a/table/table_reader_bench.cc b/table/table_reader_bench.cc index a8a344434..7e7e6b7da 100644 --- a/table/table_reader_bench.cc +++ b/table/table_reader_bench.cc @@ -242,7 +242,9 @@ int main(int argc, char** argv) { if (FLAGS_plain_table) { options.allow_mmap_reads = true; env_options.use_mmap_reads = true; - tf = new rocksdb::PlainTableFactory(16, FLAGS_prefix_len, FLAGS_prefix_len); + tf = new rocksdb::PlainTableFactory(16, FLAGS_prefix_len, + (FLAGS_prefix_len == 16) ? 0 : 8, + 0.75); } else { tf = new rocksdb::BlockBasedTableFactory(); }