diff --git a/db/plain_table_db_test.cc b/db/plain_table_db_test.cc index 3697b4c45..0baf56ecd 100644 --- a/db/plain_table_db_test.cc +++ b/db/plain_table_db_test.cc @@ -59,7 +59,7 @@ public: // Return the current option configuration. Options CurrentOptions() { Options options; - options.table_factory.reset(new PlainTableFactory(16, 8)); + options.table_factory.reset(new PlainTableFactory(16, 8, 2, 0.8)); options.allow_mmap_reads = true; return options; } diff --git a/include/rocksdb/plain_table_factory.h b/include/rocksdb/plain_table_factory.h index f8a0cb9a9..0d843e2c2 100644 --- a/include/rocksdb/plain_table_factory.h +++ b/include/rocksdb/plain_table_factory.h @@ -47,8 +47,15 @@ class PlainTableFactory: public TableFactory { public: ~PlainTableFactory() { } - PlainTableFactory(int user_key_size, int key_prefix_len) : - user_key_size_(user_key_size), key_prefix_len_(key_prefix_len) { + // user_key_size is the length of the user key. key_prefix_len is the + // length of the prefix used for im-memory indexes. bloom_num_bits is + // number of bits is used for bloom filer per key. hash_table_ratio is + // the desired ultilization of the hash table used for prefix hashing. + // hash_table_ratio = number of prefixes / #buckets in the hash table + PlainTableFactory(int user_key_size, int key_prefix_len, + int bloom_num_bits = 0, double hash_table_ratio = 0.75) : + user_key_size_(user_key_size), key_prefix_len_(key_prefix_len), + bloom_num_bits_(bloom_num_bits), hash_table_ratio_(hash_table_ratio) { } const char* Name() const override { return "PlainTable"; @@ -64,6 +71,8 @@ public: private: int user_key_size_; int key_prefix_len_; + int bloom_num_bits_; + double hash_table_ratio_; }; } // namespace rocksdb diff --git a/table/plain_table_factory.cc b/table/plain_table_factory.cc index 10393501d..08e75c4ec 100644 --- a/table/plain_table_factory.cc +++ b/table/plain_table_factory.cc @@ -19,13 +19,13 @@ Status PlainTableFactory::GetTableReader(const Options& options, unique_ptr* table) const { return PlainTableReader::Open(options, soptions, std::move(file), file_size, - table, user_key_size_, key_prefix_len_); + table, user_key_size_, key_prefix_len_, + bloom_num_bits_, hash_table_ratio_); } TableBuilder* PlainTableFactory::GetTableBuilder( const Options& options, WritableFile* file, CompressionType compression_type) const { - return new PlainTableBuilder(options, file, user_key_size_, - key_prefix_len_); + return new PlainTableBuilder(options, file, user_key_size_, key_prefix_len_); } } // namespace rocksdb diff --git a/table/plain_table_reader.cc b/table/plain_table_reader.cc index 5577c4eca..657dc031e 100644 --- a/table/plain_table_reader.cc +++ b/table/plain_table_reader.cc @@ -40,9 +40,16 @@ namespace rocksdb { PlainTableReader::PlainTableReader(const EnvOptions& storage_options, uint64_t file_size, int user_key_size, - int key_prefix_len) : - soptions_(storage_options), file_size_(file_size), - user_key_size_(user_key_size), key_prefix_len_(key_prefix_len) { + int key_prefix_len, int bloom_bits_per_key, + double hash_table_ratio) : + hash_table_size_(0), soptions_(storage_options), file_size_(file_size), + user_key_size_(user_key_size), key_prefix_len_(key_prefix_len), + hash_table_ratio_(hash_table_ratio) { + if (bloom_bits_per_key > 0) { + filter_policy_ = NewBloomFilterPolicy(bloom_bits_per_key); + } else { + filter_policy_ = nullptr; + } hash_table_ = nullptr; } @@ -50,6 +57,9 @@ PlainTableReader::~PlainTableReader() { if (hash_table_ != nullptr) { delete[] hash_table_; } + if (filter_policy_ != nullptr) { + delete filter_policy_; + } } Status PlainTableReader::Open(const Options& options, @@ -58,12 +68,16 @@ Status PlainTableReader::Open(const Options& options, uint64_t file_size, unique_ptr* table_reader, const int user_key_size, - const int key_prefix_len) { + const int key_prefix_len, + const int bloom_num_bits, + double hash_table_ratio) { assert(options.allow_mmap_reads); PlainTableReader* t = new PlainTableReader(soptions, file_size, user_key_size, - key_prefix_len); + key_prefix_len, + bloom_num_bits, + hash_table_ratio); t->file_ = std::move(file); t->options_ = options; Status s = t->PopulateIndex(file_size); @@ -146,14 +160,25 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) { delete[] hash_table_; } // Make the hash table 3/5 full - hash_table_size_ = tmp_index.size() * 1.66; + std::vector filter_entries(0); // for creating bloom filter; + if (filter_policy_ != nullptr) { + filter_entries.resize(tmp_index.size()); + } + double hash_table_size_multipier = + (hash_table_ratio_ < 1.0) ? 1.0 : 1.0 / hash_table_ratio_; + hash_table_size_ = tmp_index.size() * hash_table_size_multipier + 1; hash_table_ = new char[GetHashTableRecordLen() * hash_table_size_]; for (int i = 0; i < hash_table_size_; i++) { memcpy(GetHashTableBucketPtr(i) + key_prefix_len_, &file_size_, kOffsetLen); } + size_t count = 0; for (auto it = tmp_index.begin(); it != tmp_index.end(); ++it) { + if (filter_policy_ != nullptr) { + filter_entries[count++] = it->first; + } + int bucket = GetHashTableBucket(it->first); uint64_t* hash_value; while (true) { @@ -168,6 +193,10 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) { memcpy(bucket_ptr, it->first.data(), key_prefix_len_); memcpy(bucket_ptr + key_prefix_len_, &it->second, kOffsetLen); } + if (filter_policy_ != nullptr) { + filter_policy_->CreateFilter(&filter_entries[0], count, &filter_str_); + filter_slice_ = Slice(filter_str_.data(), filter_str_.size()); + } Log(options_.info_log, "Number of prefixes: %d, suffix_map length %ld", hash_table_size_, sub_index_.length()); @@ -187,7 +216,6 @@ inline void PlainTableReader::GetHashValue(int bucket, uint64_t** ret_value) { Status PlainTableReader::GetOffset(const Slice& target, uint64_t* offset) { Status s; - int bucket = GetHashTableBucket(target); uint64_t* found_value; Slice hash_key; @@ -248,6 +276,12 @@ Status PlainTableReader::GetOffset(const Slice& target, uint64_t* offset) { return s; } +bool PlainTableReader::MayHavePrefix(const Slice& target_prefix) { + return filter_policy_ == nullptr + || filter_policy_->KeyMayMatch(target_prefix, filter_slice_); +} + + uint64_t PlainTableReader::Next(uint64_t offset, Slice* key, Slice* value, Slice* tmp_slice) { if (offset >= file_size_) { @@ -321,6 +355,11 @@ void PlainTableIterator::SeekToLast() { } void PlainTableIterator::Seek(const Slice& target) { + if (!table_->MayHavePrefix(Slice(target.data(), table_->key_prefix_len_))) { + offset_ = next_offset_ = table_->file_size_; + return; + } + Status s = table_->GetOffset(target, &next_offset_); if (!s.ok()) { status_ = s; diff --git a/table/plain_table_reader.h b/table/plain_table_reader.h index 44b545833..d9ac34326 100644 --- a/table/plain_table_reader.h +++ b/table/plain_table_reader.h @@ -57,7 +57,8 @@ public: static Status Open(const Options& options, const EnvOptions& soptions, unique_ptr && file, uint64_t file_size, unique_ptr* table, const int user_key_size, - const int key_prefix_len); + const int key_prefix_len, const int bloom_num_bits, + double hash_table_ratio); bool PrefixMayMatch(const Slice& internal_prefix); @@ -79,7 +80,8 @@ public: } PlainTableReader(const EnvOptions& storage_options, uint64_t file_size, - int user_key_size, int key_prefix_len); + int user_key_size, int key_prefix_len, int bloom_num_bits, + double hash_table_ratio); ~PlainTableReader(); private: @@ -95,6 +97,10 @@ private: uint64_t file_size_; const size_t user_key_size_; const size_t key_prefix_len_; + const double hash_table_ratio_; + const FilterPolicy* filter_policy_; + std::string filter_str_; + Slice filter_slice_; TableProperties tbl_props; @@ -123,6 +129,7 @@ private: Status PopulateIndex(uint64_t file_size); uint64_t Next(uint64_t offset, Slice* key, Slice* value, Slice* tmp_slice); Status GetOffset(const Slice& target, uint64_t* offset); + bool MayHavePrefix(const Slice& target_prefix); // No copying allowed explicit PlainTableReader(const TableReader&) = delete; diff --git a/table/table_reader_bench.cc b/table/table_reader_bench.cc index 8d3fd2412..a8a344434 100644 --- a/table/table_reader_bench.cc +++ b/table/table_reader_bench.cc @@ -242,7 +242,7 @@ int main(int argc, char** argv) { if (FLAGS_plain_table) { options.allow_mmap_reads = true; env_options.use_mmap_reads = true; - tf = new rocksdb::PlainTableFactory(16, FLAGS_prefix_len); + tf = new rocksdb::PlainTableFactory(16, FLAGS_prefix_len, FLAGS_prefix_len); } else { tf = new rocksdb::BlockBasedTableFactory(); }