BlockBasedTable::PrefixMayMatch() to skip index checking if we can't find a filter block.

Summary: In the case where we can't find a filter block, there is not much benefit of doing the binary search and see whether the index key has the prefix. With the change, we blindly return true if we can't get the filter. It also fixes missing row cases for reverse comparator with full bloom. Test Plan: Add a test case that used to fail. Reviewers: yhchiang, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: kradhakrishnan, yiwu, hermanlee4, yoshinorim, leveldb, andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D56697
9 years ago · 535af525d6
parent 09be5cad5b
commit 535af525d6
2 changed files with 63 additions and 38 deletions
--- a/db/db_test2.cc
+++ b/db/db_test2.cc
@ -23,12 +23,28 @@ class DBTest2 : public DBTestBase {
  DBTest2() : DBTestBase("/db_test2") {}
 };

-TEST_F(DBTest2, PrefixFullBloomWithReverseComparator) {
+class PrefixFullBloomWithReverseComparator
+    : public DBTestBase,
+      public ::testing::WithParamInterface<bool> {
+ public:
+  PrefixFullBloomWithReverseComparator()
+      : DBTestBase("/prefix_bloom_reverse") {}
+  virtual void SetUp() override { if_cache_filter_ = GetParam(); }
+  bool if_cache_filter_;
+};
+
+TEST_P(PrefixFullBloomWithReverseComparator,
+       PrefixFullBloomWithReverseComparator) {
  Options options = last_options_;
  options.comparator = ReverseBytewiseComparator();
  options.prefix_extractor.reset(NewCappedPrefixTransform(3));
  options.statistics = rocksdb::CreateDBStatistics();
  BlockBasedTableOptions bbto;
+  if (if_cache_filter_) {
+    bbto.no_block_cache = false;
+    bbto.cache_index_and_filter_blocks = true;
+    bbto.block_cache = NewLRUCache(1);
+  }
  bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
  bbto.whole_key_filtering = false;
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
@ -40,6 +56,10 @@ TEST_F(DBTest2, PrefixFullBloomWithReverseComparator) {

  dbfull()->Flush(FlushOptions());

+  if (bbto.block_cache) {
+    bbto.block_cache->EraseUnRefEntries();
+  }
+
  unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
  iter->Seek("bar345");
  ASSERT_OK(iter->status());
@ -62,6 +82,9 @@ TEST_F(DBTest2, PrefixFullBloomWithReverseComparator) {
  ASSERT_TRUE(!iter->Valid());
 }

+INSTANTIATE_TEST_CASE_P(PrefixFullBloomWithReverseComparator,
+                        PrefixFullBloomWithReverseComparator, testing::Bool());
+
 TEST_F(DBTest2, IteratorPropertyVersionNumber) {
  Put("", "");
  Iterator* iter1 = db_->NewIterator(ReadOptions());
--- a/table/block_based_table_reader.cc
+++ b/table/block_based_table_reader.cc
@ -1224,43 +1224,45 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) {
  // First, try check with full filter
  auto filter_entry = GetFilter(true /* no io */);
  FilterBlockReader* filter = filter_entry.value;
-  if (filter != nullptr && !filter->IsBlockBased()) {
-    may_match = filter->PrefixMayMatch(prefix);
-  } else {
-    // Then, try find it within each block
-    unique_ptr<InternalIterator> iiter(NewIndexIterator(no_io_read_options));
-    iiter->Seek(internal_prefix);
-
-    if (!iiter->Valid()) {
-      // we're past end of file
-      // if it's incomplete, it means that we avoided I/O
-      // and we're not really sure that we're past the end
-      // of the file
-      may_match = iiter->status().IsIncomplete();
-    } else if (ExtractUserKey(iiter->key()).starts_with(
-                ExtractUserKey(internal_prefix))) {
-      // we need to check for this subtle case because our only
-      // guarantee is that "the key is a string >= last key in that data
-      // block" according to the doc/table_format.txt spec.
-      //
-      // Suppose iiter->key() starts with the desired prefix; it is not
-      // necessarily the case that the corresponding data block will
-      // contain the prefix, since iiter->key() need not be in the
-      // block.  However, the next data block may contain the prefix, so
-      // we return true to play it safe.
-      may_match = true;
-    } else if (filter != nullptr && filter->IsBlockBased()) {
-      // iiter->key() does NOT start with the desired prefix.  Because
-      // Seek() finds the first key that is >= the seek target, this
-      // means that iiter->key() > prefix.  Thus, any data blocks coming
-      // after the data block corresponding to iiter->key() cannot
-      // possibly contain the key.  Thus, the corresponding data block
-      // is the only on could potentially contain the prefix.
-      Slice handle_value = iiter->value();
-      BlockHandle handle;
-      s = handle.DecodeFrom(&handle_value);
-      assert(s.ok());
-      may_match = filter->PrefixMayMatch(prefix, handle.offset());
+  if (filter != nullptr) {
+    if (!filter->IsBlockBased()) {
+      may_match = filter->PrefixMayMatch(prefix);
+    } else {
+      // Then, try find it within each block
+      unique_ptr<InternalIterator> iiter(NewIndexIterator(no_io_read_options));
+      iiter->Seek(internal_prefix);
+
+      if (!iiter->Valid()) {
+        // we're past end of file
+        // if it's incomplete, it means that we avoided I/O
+        // and we're not really sure that we're past the end
+        // of the file
+        may_match = iiter->status().IsIncomplete();
+      } else if (ExtractUserKey(iiter->key())
+                     .starts_with(ExtractUserKey(internal_prefix))) {
+        // we need to check for this subtle case because our only
+        // guarantee is that "the key is a string >= last key in that data
+        // block" according to the doc/table_format.txt spec.
+        //
+        // Suppose iiter->key() starts with the desired prefix; it is not
+        // necessarily the case that the corresponding data block will
+        // contain the prefix, since iiter->key() need not be in the
+        // block.  However, the next data block may contain the prefix, so
+        // we return true to play it safe.
+        may_match = true;
+      } else if (filter->IsBlockBased()) {
+        // iiter->key() does NOT start with the desired prefix.  Because
+        // Seek() finds the first key that is >= the seek target, this
+        // means that iiter->key() > prefix.  Thus, any data blocks coming
+        // after the data block corresponding to iiter->key() cannot
+        // possibly contain the key.  Thus, the corresponding data block
+        // is the only on could potentially contain the prefix.
+        Slice handle_value = iiter->value();
+        BlockHandle handle;
+        s = handle.DecodeFrom(&handle_value);
+        assert(s.ok());
+        may_match = filter->PrefixMayMatch(prefix, handle.offset());
+      }
    }
  }