use user_key and iterate_upper_bound to determine compatibility of bloom filters (#3899)

Summary: Previously in https://github.com/facebook/rocksdb/pull/3601 bloom filter will only be checked if `prefix_extractor` in the mutable_cf_options matches the one found in the SST file. This PR relaxes the requirement by checking if all keys in the range [user_key, iterate_upper_bound) all share the same prefix after transforming using the BF in the SST file. If so, the bloom filter is considered compatible and will continue to be looked at. Closes https://github.com/facebook/rocksdb/pull/3899 Differential Revision: D8157459 Pulled By: miasantreble fbshipit-source-id: 18d17cba56a1005162f8d5db7a27aba277089c41
7 years ago · 408205a36b
parent 967aa8157a
commit 408205a36b
17 changed files with 700 additions and 231 deletions
--- a/db/comparator_db_test.cc
+++ b/db/comparator_db_test.cc
@ -449,6 +449,70 @@ TEST_P(ComparatorDBTest, TwoStrComparator) {
  }
 }

+TEST_P(ComparatorDBTest, IsSameLengthImmediateSuccessor) {
+  {
+    // different length
+    Slice s("abcxy");
+    Slice t("abcxyz");
+    ASSERT_FALSE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
+  }
+  {
+    Slice s("abcxyz");
+    Slice t("abcxy");
+    ASSERT_FALSE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
+  }
+  {
+    // not last byte different
+    Slice s("abc1xyz");
+    Slice t("abc2xyz");
+    ASSERT_FALSE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
+  }
+  {
+    // same string
+    Slice s("abcxyz");
+    Slice t("abcxyz");
+    ASSERT_FALSE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
+  }
+  {
+    Slice s("abcxy");
+    Slice t("abcxz");
+    ASSERT_TRUE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
+  }
+  {
+    Slice s("abcxz");
+    Slice t("abcxy");
+    ASSERT_FALSE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
+  }
+  {
+    const char s_array[] = "\x50\x8a\xac";
+    const char t_array[] = "\x50\x8a\xad";
+    Slice s(s_array);
+    Slice t(t_array);
+    ASSERT_TRUE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
+  }
+  {
+    const char s_array[] = "\x50\x8a\xff";
+    const char t_array[] = "\x50\x8b\x00";
+    Slice s(s_array, 3);
+    Slice t(t_array, 3);
+    ASSERT_TRUE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
+  }
+  {
+    const char s_array[] = "\x50\x8a\xff\xff";
+    const char t_array[] = "\x50\x8b\x00\x00";
+    Slice s(s_array, 4);
+    Slice t(t_array, 4);
+    ASSERT_TRUE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
+  }
+  {
+    const char s_array[] = "\x50\x8a\xff\xff";
+    const char t_array[] = "\x50\x8b\x00\x01";
+    Slice s(s_array, 4);
+    Slice t(t_array, 4);
+    ASSERT_FALSE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t));
+  }
+}
+
 TEST_P(ComparatorDBTest, FindShortestSeparator) {
  std::string s1 = "abc1xyz";
  std::string s2 = "abc3xy";
--- a/db/db_bloom_filter_test.cc
+++ b/db/db_bloom_filter_test.cc
@ -1097,6 +1097,413 @@ TEST_F(DBBloomFilterTest, OptimizeFiltersForHits) {
            TestGetTickerCount(options, BLOCK_CACHE_ADD));
 }

+int CountIter(std::unique_ptr<Iterator>& iter, const Slice& key) {
+  int count = 0;
+  for (iter->Seek(key); iter->Valid() && iter->status() == Status::OK();
+       iter->Next()) {
+    count++;
+  }
+  return count;
+}
+
+// use iterate_upper_bound to hint compatiability of existing bloom filters.
+// The BF is considered compatible if 1) upper bound and seek key transform
+// into the same string, or 2) the transformed seek key is of the same length
+// as the upper bound and two keys are adjacent according to the comparator.
+TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
+  int iteration = 0;
+  for (bool use_block_based_builder : {true, false}) {
+    Options options;
+    options.create_if_missing = true;
+    options.prefix_extractor.reset(NewCappedPrefixTransform(4));
+    options.disable_auto_compactions = true;
+    options.statistics = CreateDBStatistics();
+    // Enable prefix bloom for SST files
+    BlockBasedTableOptions table_options;
+    table_options.cache_index_and_filter_blocks = true;
+    table_options.filter_policy.reset(
+        NewBloomFilterPolicy(10, use_block_based_builder));
+    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+    DestroyAndReopen(options);
+
+    ASSERT_OK(Put("abcdxxx0", "val1"));
+    ASSERT_OK(Put("abcdxxx1", "val2"));
+    ASSERT_OK(Put("abcdxxx2", "val3"));
+    ASSERT_OK(Put("abcdxxx3", "val4"));
+    dbfull()->Flush(FlushOptions());
+    {
+      // prefix_extractor has not changed, BF will always be read
+      Slice upper_bound("abce");
+      ReadOptions read_options;
+      read_options.prefix_same_as_start = true;
+      read_options.iterate_upper_bound = &upper_bound;
+      std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+      ASSERT_EQ(CountIter(iter, "abcd0000"), 4);
+    }
+    {
+      Slice upper_bound("abcdzzzz");
+      ReadOptions read_options;
+      read_options.prefix_same_as_start = true;
+      read_options.iterate_upper_bound = &upper_bound;
+      std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+      ASSERT_EQ(CountIter(iter, "abcd0000"), 4);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+    }
+    ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:5"}}));
+    ASSERT_EQ(0, strcmp(dbfull()->GetOptions().prefix_extractor->Name(),
+                        "rocksdb.FixedPrefix.5"));
+    {
+      // BF changed, [abcdxx00, abce) is a valid bound, will trigger BF read
+      Slice upper_bound("abce");
+      ReadOptions read_options;
+      read_options.prefix_same_as_start = true;
+      read_options.iterate_upper_bound = &upper_bound;
+      std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+      ASSERT_EQ(CountIter(iter, "abcdxx00"), 4);
+      // should check bloom filter since upper bound meets requirement
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
+                2 + iteration);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+    }
+    {
+      // [abcdxx01, abcey) is not valid bound since upper bound is too long for
+      // the BF in SST (capped:4)
+      Slice upper_bound("abcey");
+      ReadOptions read_options;
+      read_options.prefix_same_as_start = true;
+      read_options.iterate_upper_bound = &upper_bound;
+      std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+      ASSERT_EQ(CountIter(iter, "abcdxx01"), 4);
+      // should skip bloom filter since upper bound is too long
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
+                2 + iteration);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+    }
+    {
+      // [abcdxx02, abcdy) is a valid bound since the prefix is the same
+      Slice upper_bound("abcdy");
+      ReadOptions read_options;
+      read_options.prefix_same_as_start = true;
+      read_options.iterate_upper_bound = &upper_bound;
+      std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+      ASSERT_EQ(CountIter(iter, "abcdxx02"), 4);
+      // should check bloom filter since upper bound matches transformed seek
+      // key
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
+                2 + iteration * 2);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+    }
+    {
+      // [aaaaaaaa, abce) is not a valid bound since 1) they don't share the
+      // same prefix, 2) the prefixes are not consecutive
+      Slice upper_bound("abce");
+      ReadOptions read_options;
+      read_options.prefix_same_as_start = true;
+      read_options.iterate_upper_bound = &upper_bound;
+      std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+      ASSERT_EQ(CountIter(iter, "aaaaaaaa"), 0);
+      // should skip bloom filter since mismatch is found
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
+                2 + iteration * 2);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+    }
+    ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:3"}}));
+    {
+      // [abc, abd) is not a valid bound since the upper bound is too short
+      // for BF (capped:4)
+      Slice upper_bound("abd");
+      ReadOptions read_options;
+      read_options.prefix_same_as_start = true;
+      read_options.iterate_upper_bound = &upper_bound;
+      std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+      ASSERT_EQ(CountIter(iter, "abc"), 4);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
+                2 + iteration * 2);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+    }
+    ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:4"}}));
+    {
+      // set back to capped:4 and verify BF is always read
+      Slice upper_bound("abd");
+      ReadOptions read_options;
+      read_options.prefix_same_as_start = true;
+      read_options.iterate_upper_bound = &upper_bound;
+      std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+      ASSERT_EQ(CountIter(iter, "abc"), 0);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
+                3 + iteration * 2);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1);
+    }
+    iteration++;
+  }
+}
+
+// Create multiple SST files each with a different prefix_extractor config,
+// verify iterators can read all SST files using the latest config.
+TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
+  int iteration = 0;
+  for (bool use_block_based_builder : {true, false}) {
+    Options options;
+    options.create_if_missing = true;
+    options.prefix_extractor.reset(NewFixedPrefixTransform(1));
+    options.disable_auto_compactions = true;
+    options.statistics = CreateDBStatistics();
+    // Enable prefix bloom for SST files
+    BlockBasedTableOptions table_options;
+    table_options.filter_policy.reset(
+        NewBloomFilterPolicy(10, use_block_based_builder));
+    table_options.cache_index_and_filter_blocks = true;
+    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+    DestroyAndReopen(options);
+
+    Slice upper_bound("foz90000");
+    ReadOptions read_options;
+    read_options.prefix_same_as_start = true;
+
+    // first SST with fixed:1 BF
+    ASSERT_OK(Put("foo2", "bar2"));
+    ASSERT_OK(Put("foo", "bar"));
+    ASSERT_OK(Put("foq1", "bar1"));
+    ASSERT_OK(Put("fpa", "0"));
+    dbfull()->Flush(FlushOptions());
+    std::unique_ptr<Iterator> iter_old(db_->NewIterator(read_options));
+    ASSERT_EQ(CountIter(iter_old, "foo"), 4);
+    ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 1);
+
+    ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}}));
+    ASSERT_EQ(0, strcmp(dbfull()->GetOptions().prefix_extractor->Name(),
+                        "rocksdb.CappedPrefix.3"));
+    read_options.iterate_upper_bound = &upper_bound;
+    std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+    ASSERT_EQ(CountIter(iter, "foo"), 2);
+    ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
+              1 + iteration);
+    ASSERT_EQ(CountIter(iter, "gpk"), 0);
+    ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
+              1 + iteration);
+    ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+
+    // second SST with capped:3 BF
+    ASSERT_OK(Put("foo3", "bar3"));
+    ASSERT_OK(Put("foo4", "bar4"));
+    ASSERT_OK(Put("foq5", "bar5"));
+    ASSERT_OK(Put("fpb", "1"));
+    dbfull()->Flush(FlushOptions());
+    {
+      // BF is cappped:3 now
+      std::unique_ptr<Iterator> iter_tmp(db_->NewIterator(read_options));
+      ASSERT_EQ(CountIter(iter_tmp, "foo"), 4);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
+                2 + iteration * 2);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+      ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0);
+      // both counters are incremented because BF is "not changed" for 1 of the
+      // 2 SST files, so filter is checked once and found no match.
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
+                3 + iteration * 2);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1);
+    }
+
+    ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:2"}}));
+    ASSERT_EQ(0, strcmp(dbfull()->GetOptions().prefix_extractor->Name(),
+                        "rocksdb.FixedPrefix.2"));
+    // third SST with fixed:2 BF
+    ASSERT_OK(Put("foo6", "bar6"));
+    ASSERT_OK(Put("foo7", "bar7"));
+    ASSERT_OK(Put("foq8", "bar8"));
+    ASSERT_OK(Put("fpc", "2"));
+    dbfull()->Flush(FlushOptions());
+    {
+      // BF is fixed:2 now
+      std::unique_ptr<Iterator> iter_tmp(db_->NewIterator(read_options));
+      ASSERT_EQ(CountIter(iter_tmp, "foo"), 9);
+      // the first and last BF are checked
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
+                4 + iteration * 3);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1);
+      ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0);
+      // only last BF is checked and not found
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
+                5 + iteration * 3);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2);
+    }
+
+    // iter_old can only see the first SST, so checked plus 1
+    ASSERT_EQ(CountIter(iter_old, "foo"), 4);
+    ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
+              6 + iteration * 3);
+    // iter was created after the first setoptions call so only full filter
+    // will check the filter
+    ASSERT_EQ(CountIter(iter, "foo"), 2);
+    ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
+              6 + iteration * 4);
+
+    {
+      // keys in all three SSTs are visible to iterator
+      // The range of [foo, foz90000] is compatible with (fixed:1) and (fixed:2)
+      // so +2 for checked counter
+      std::unique_ptr<Iterator> iter_all(db_->NewIterator(read_options));
+      ASSERT_EQ(CountIter(iter_all, "foo"), 9);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
+                7 + iteration * 5);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2);
+      ASSERT_EQ(CountIter(iter_all, "gpk"), 0);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
+                8 + iteration * 5);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3);
+    }
+    ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}}));
+    ASSERT_EQ(0, strcmp(dbfull()->GetOptions().prefix_extractor->Name(),
+                        "rocksdb.CappedPrefix.3"));
+    {
+      std::unique_ptr<Iterator> iter_all(db_->NewIterator(read_options));
+      ASSERT_EQ(CountIter(iter_all, "foo"), 6);
+      // all three SST are checked because the current options has the same as
+      // the remaining SST (capped:3)
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
+                9 + iteration * 7);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3);
+      ASSERT_EQ(CountIter(iter_all, "gpk"), 0);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
+                10 + iteration * 7);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 4);
+    }
+    // TODO(Zhongyi): Maybe also need to add Get calls to test point look up?
+    iteration++;
+  }
+}
+
+// Create a new column family in a running DB, change prefix_extractor
+// dynamically, verify the iterator created on the new column family behaves
+// as expected
+TEST_F(DBBloomFilterTest, DynamicBloomFilterNewColumnFamily) {
+  int iteration = 0;
+  for (bool use_block_based_builder : {true, false}) {
+    Options options = CurrentOptions();
+    options.create_if_missing = true;
+    options.prefix_extractor.reset(NewFixedPrefixTransform(1));
+    options.disable_auto_compactions = true;
+    options.statistics = CreateDBStatistics();
+    // Enable prefix bloom for SST files
+    BlockBasedTableOptions table_options;
+    table_options.cache_index_and_filter_blocks = true;
+    table_options.filter_policy.reset(
+        NewBloomFilterPolicy(10, use_block_based_builder));
+    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+    CreateAndReopenWithCF({"pikachu" + std::to_string(iteration)}, options);
+    ReadOptions read_options;
+    read_options.prefix_same_as_start = true;
+    // create a new CF and set prefix_extractor dynamically
+    options.prefix_extractor.reset(NewCappedPrefixTransform(3));
+    CreateColumnFamilies({"ramen_dojo_" + std::to_string(iteration)}, options);
+    ASSERT_EQ(0,
+              strcmp(dbfull()->GetOptions(handles_[2]).prefix_extractor->Name(),
+                     "rocksdb.CappedPrefix.3"));
+    ASSERT_OK(Put(2, "foo3", "bar3"));
+    ASSERT_OK(Put(2, "foo4", "bar4"));
+    ASSERT_OK(Put(2, "foo5", "bar5"));
+    ASSERT_OK(Put(2, "foq6", "bar6"));
+    ASSERT_OK(Put(2, "fpq7", "bar7"));
+    dbfull()->Flush(FlushOptions());
+    {
+      std::unique_ptr<Iterator> iter(
+          db_->NewIterator(read_options, handles_[2]));
+      ASSERT_EQ(CountIter(iter, "foo"), 3);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 0);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+    }
+    ASSERT_OK(
+        dbfull()->SetOptions(handles_[2], {{"prefix_extractor", "fixed:2"}}));
+    ASSERT_EQ(0,
+              strcmp(dbfull()->GetOptions(handles_[2]).prefix_extractor->Name(),
+                     "rocksdb.FixedPrefix.2"));
+    {
+      std::unique_ptr<Iterator> iter(
+          db_->NewIterator(read_options, handles_[2]));
+      ASSERT_EQ(CountIter(iter, "foo"), 4);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 0);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+    }
+    ASSERT_OK(dbfull()->DropColumnFamily(handles_[2]));
+    dbfull()->DestroyColumnFamilyHandle(handles_[2]);
+    handles_[2] = nullptr;
+    ASSERT_OK(dbfull()->DropColumnFamily(handles_[1]));
+    dbfull()->DestroyColumnFamilyHandle(handles_[1]);
+    handles_[1] = nullptr;
+    iteration++;
+  }
+}
+
+// Verify it's possible to change prefix_extractor at runtime and iterators
+// behaves as expected
+TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) {
+  int iteration = 0;
+  for (bool use_block_based_builder : {true, false}) {
+    Options options;
+    options.create_if_missing = true;
+    options.prefix_extractor.reset(NewFixedPrefixTransform(1));
+    options.disable_auto_compactions = true;
+    options.statistics = CreateDBStatistics();
+    // Enable prefix bloom for SST files
+    BlockBasedTableOptions table_options;
+    table_options.cache_index_and_filter_blocks = true;
+    table_options.filter_policy.reset(
+        NewBloomFilterPolicy(10, use_block_based_builder));
+    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+    DestroyAndReopen(options);
+
+    ASSERT_OK(Put("foo2", "bar2"));
+    ASSERT_OK(Put("foo", "bar"));
+    ASSERT_OK(Put("foo1", "bar1"));
+    ASSERT_OK(Put("fpa", "0"));
+    dbfull()->Flush(FlushOptions());
+    ASSERT_OK(Put("foo3", "bar3"));
+    ASSERT_OK(Put("foo4", "bar4"));
+    ASSERT_OK(Put("foo5", "bar5"));
+    ASSERT_OK(Put("fpb", "1"));
+    dbfull()->Flush(FlushOptions());
+    ASSERT_OK(Put("foo6", "bar6"));
+    ASSERT_OK(Put("foo7", "bar7"));
+    ASSERT_OK(Put("foo8", "bar8"));
+    ASSERT_OK(Put("fpc", "2"));
+    dbfull()->Flush(FlushOptions());
+
+    ReadOptions read_options;
+    read_options.prefix_same_as_start = true;
+    {
+      std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+      ASSERT_EQ(CountIter(iter, "foo"), 12);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+    }
+    std::unique_ptr<Iterator> iter_old(db_->NewIterator(read_options));
+    ASSERT_EQ(CountIter(iter_old, "foo"), 12);
+    ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 6);
+    ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+
+    ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}}));
+    ASSERT_EQ(0, strcmp(dbfull()->GetOptions().prefix_extractor->Name(),
+                        "rocksdb.CappedPrefix.3"));
+    {
+      std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+      // "fp*" should be skipped
+      ASSERT_EQ(CountIter(iter, "foo"), 9);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 6);
+      ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+    }
+
+    // iterator created before should not be affected and see all keys
+    ASSERT_EQ(CountIter(iter_old, "foo"), 12);
+    ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 9);
+    ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+    ASSERT_EQ(CountIter(iter_old, "abc"), 0);
+    ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 12);
+    ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3);
+    iteration++;
+  }
+}
+
 #endif  // ROCKSDB_LITE

 }  // namespace rocksdb
--- a/db/db_test.cc
+++ b/db/db_test.cc
@ -4607,180 +4607,6 @@ TEST_F(DBTest, FileCreationRandomFailure) {
 }

 #ifndef ROCKSDB_LITE
-int CountIter(Iterator* iter, const Slice& key) {
-  int count = 0;
-  for (iter->Seek(key); iter->Valid() && iter->status() == Status::OK();
-       iter->Next()) {
-    count++;
-  }
-  return count;
-}
-
-// Create multiple SST files each with a different prefix_extractor config,
-// verify iterators can read all SST files using the latest config.
-TEST_F(DBTest, DynamicBloomFilterMultipleSST) {
-  Options options;
-  options.create_if_missing = true;
-  options.prefix_extractor.reset(NewFixedPrefixTransform(1));
-  options.disable_auto_compactions = true;
-  // Enable prefix bloom for SST files
-  BlockBasedTableOptions table_options;
-  table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
-  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-  DestroyAndReopen(options);
-
-  ReadOptions read_options;
-  read_options.prefix_same_as_start = true;
-
-  // first SST with fixed:1 BF
-  ASSERT_OK(Put("foo2", "bar2"));
-  ASSERT_OK(Put("foo", "bar"));
-  ASSERT_OK(Put("foq1", "bar1"));
-  ASSERT_OK(Put("fpa", "0"));
-  dbfull()->Flush(FlushOptions());
-  Iterator* iter_old = db_->NewIterator(read_options);
-  ASSERT_EQ(CountIter(iter_old, "foo"), 4);
-
-  ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}}));
-  ASSERT_EQ(0, strcmp(dbfull()->GetOptions().prefix_extractor->Name(),
-                      "rocksdb.CappedPrefix.3"));
-  Iterator* iter = db_->NewIterator(read_options);
-  ASSERT_EQ(CountIter(iter, "foo"), 2);
-
-  // second SST with capped:3 BF
-  ASSERT_OK(Put("foo3", "bar3"));
-  ASSERT_OK(Put("foo4", "bar4"));
-  ASSERT_OK(Put("foq5", "bar5"));
-  ASSERT_OK(Put("fpb", "1"));
-  dbfull()->Flush(FlushOptions());
-  // BF is cappped:3 now
-  Iterator* iter_tmp = db_->NewIterator(read_options);
-  ASSERT_EQ(CountIter(iter_tmp, "foo"), 4);
-  delete iter_tmp;
-
-  ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:2"}}));
-  ASSERT_EQ(0, strcmp(dbfull()->GetOptions().prefix_extractor->Name(),
-                      "rocksdb.FixedPrefix.2"));
-  // third SST with fixed:2 BF
-  ASSERT_OK(Put("foo6", "bar6"));
-  ASSERT_OK(Put("foo7", "bar7"));
-  ASSERT_OK(Put("foq8", "bar8"));
-  ASSERT_OK(Put("fpc", "2"));
-  dbfull()->Flush(FlushOptions());
-  // BF is fixed:2 now
-  iter_tmp = db_->NewIterator(read_options);
-  ASSERT_EQ(CountIter(iter_tmp, "foo"), 9);
-  delete iter_tmp;
-
-  // TODO(Zhongyi): verify existing iterator cannot see newly inserted keys
-  ASSERT_EQ(CountIter(iter_old, "foo"), 4);
-  ASSERT_EQ(CountIter(iter, "foo"), 2);
-  delete iter;
-  delete iter_old;
-
-  // keys in all three SSTs are visible to iterator
-  Iterator* iter_all = db_->NewIterator(read_options);
-  ASSERT_EQ(CountIter(iter_all, "foo"), 9);
-  delete iter_all;
-  ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}}));
-  ASSERT_EQ(0, strcmp(dbfull()->GetOptions().prefix_extractor->Name(),
-                      "rocksdb.CappedPrefix.3"));
-  iter_all = db_->NewIterator(read_options);
-  ASSERT_EQ(CountIter(iter_all, "foo"), 6);
-  delete iter_all;
-  // TODO(Zhongyi): add test for cases where certain SST are skipped
-  // Also verify BF related counters like BLOOM_FILTER_USEFUL
-}
-
-// Create a new column family in a running DB, change prefix_extractor
-// dynamically, verify the iterator created on the new column family behaves
-// as expected
-TEST_F(DBTest, DynamicBloomFilterNewColumnFamily) {
-  Options options = CurrentOptions();
-  options.create_if_missing = true;
-  options.prefix_extractor.reset(NewFixedPrefixTransform(1));
-  options.disable_auto_compactions = true;
-  // Enable prefix bloom for SST files
-  BlockBasedTableOptions table_options;
-  table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
-  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-  CreateAndReopenWithCF({"pikachu"}, options);
-  ReadOptions read_options;
-  read_options.prefix_same_as_start = true;
-  // create a new CF and set prefix_extractor dynamically
-  options.prefix_extractor.reset(NewCappedPrefixTransform(3));
-  CreateColumnFamilies({"ramen_dojo"}, options);
-  ASSERT_EQ(0,
-            strcmp(dbfull()->GetOptions(handles_[2]).prefix_extractor->Name(),
-                   "rocksdb.CappedPrefix.3"));
-  ASSERT_OK(Put(2, "foo3", "bar3"));
-  ASSERT_OK(Put(2, "foo4", "bar4"));
-  ASSERT_OK(Put(2, "foo5", "bar5"));
-  ASSERT_OK(Put(2, "foq6", "bar6"));
-  ASSERT_OK(Put(2, "fpq7", "bar7"));
-  dbfull()->Flush(FlushOptions());
-  Iterator* iter = db_->NewIterator(read_options, handles_[2]);
-  ASSERT_EQ(CountIter(iter, "foo"), 3);
-  delete iter;
-  ASSERT_OK(
-      dbfull()->SetOptions(handles_[2], {{"prefix_extractor", "fixed:2"}}));
-  ASSERT_EQ(0,
-            strcmp(dbfull()->GetOptions(handles_[2]).prefix_extractor->Name(),
-                   "rocksdb.FixedPrefix.2"));
-  iter = db_->NewIterator(read_options, handles_[2]);
-  ASSERT_EQ(CountIter(iter, "foo"), 4);
-  delete iter;
-}
-
-// Verify it's possible to change prefix_extractor at runtime and iterators
-// behaves as expected
-TEST_F(DBTest, DynamicBloomFilterOptions) {
-  Options options;
-  options.create_if_missing = true;
-  options.prefix_extractor.reset(NewFixedPrefixTransform(1));
-  options.disable_auto_compactions = true;
-  // Enable prefix bloom for SST files
-  BlockBasedTableOptions table_options;
-  table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
-  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-  DestroyAndReopen(options);
-
-  ASSERT_OK(Put("foo2", "bar2"));
-  ASSERT_OK(Put("foo", "bar"));
-  ASSERT_OK(Put("foo1", "bar1"));
-  ASSERT_OK(Put("fpa", "0"));
-  dbfull()->Flush(FlushOptions());
-  ASSERT_OK(Put("foo3", "bar3"));
-  ASSERT_OK(Put("foo4", "bar4"));
-  ASSERT_OK(Put("foo5", "bar5"));
-  ASSERT_OK(Put("fpb", "1"));
-  dbfull()->Flush(FlushOptions());
-  ASSERT_OK(Put("foo6", "bar6"));
-  ASSERT_OK(Put("foo7", "bar7"));
-  ASSERT_OK(Put("foo8", "bar8"));
-  ASSERT_OK(Put("fpc", "2"));
-  dbfull()->Flush(FlushOptions());
-
-  ReadOptions read_options;
-  read_options.prefix_same_as_start = true;
-  Iterator* iter = db_->NewIterator(read_options);
-  ASSERT_EQ(CountIter(iter, "foo"), 12);
-  delete iter;
-  Iterator* iter_old = db_->NewIterator(read_options);
-  ASSERT_EQ(CountIter(iter_old, "foo"), 12);
-
-  ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}}));
-  ASSERT_EQ(0, strcmp(dbfull()->GetOptions().prefix_extractor->Name(),
-                      "rocksdb.CappedPrefix.3"));
-  iter = db_->NewIterator(read_options);
-  // "fp*" should be skipped
-  ASSERT_EQ(CountIter(iter, "foo"), 9);
-  delete iter;
-
-  // iterator created before should not be affected and see all keys
-  ASSERT_EQ(CountIter(iter_old, "foo"), 12);
-  delete iter_old;
-}

 TEST_F(DBTest, DynamicMiscOptions) {
  // Test max_sequential_skip_in_iterations
--- a/db/prefix_test.cc
+++ b/db/prefix_test.cc
@ -212,6 +212,10 @@ class SamePrefixTransform : public SliceTransform {
  virtual bool InRange(const Slice& dst) const override {
    return dst == prefix_;
  }
+
+  virtual bool FullLengthEnabled(size_t* /*len*/) const override {
+    return false;
+  }
 };

 }  // namespace
--- a/include/rocksdb/comparator.h
+++ b/include/rocksdb/comparator.h
@ -68,6 +68,12 @@ class Comparator {
  // if it is a wrapped comparator, may return the root one.
  // return itself it is not wrapped.
  virtual const Comparator* GetRootComparator() const { return this; }
+
+  // given two keys, determine if t is the successor of s
+  virtual bool IsSameLengthImmediateSuccessor(const Slice& /*s*/,
+                                              const Slice& /*t*/) const {
+    return false;
+  }
 };

 // Return a builtin comparator that uses lexicographic byte-wise
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@ -1015,7 +1015,7 @@ struct ReadOptions {
  // can returns entries. Once the bound is reached, Valid() will be false.
  // "iterate_upper_bound" is exclusive ie the bound value is
  // not a valid entry.  If iterator_extractor is not null, the Seek target
-  // and iterator_upper_bound need to have the same prefix.
+  // and iterate_upper_bound need to have the same prefix.
  // This is because ordering is not guaranteed outside of prefix domain.
  //
  // Default: nullptr
--- a/include/rocksdb/slice_transform.h
+++ b/include/rocksdb/slice_transform.h
@ -60,6 +60,11 @@ class SliceTransform {
  // This is currently not used and remains here for backward compatibility.
  virtual bool InRange(const Slice& /*dst*/) const { return false; }

+  // Some SliceTransform will have a full length which can be used to
+  // determine if two keys are consecuitive. Can be disabled by always
+  // returning 0
+  virtual bool FullLengthEnabled(size_t* /*len*/) const { return false; }
+
  // Transform(s)=Transform(`prefix`) for any s with `prefix` as a prefix.
  //
  // This function is not used by RocksDB, but for users. If users pass
--- a/options/options_helper.cc
+++ b/options/options_helper.cc
@ -379,6 +379,7 @@ bool ParseStructOptions(
  }
  return true;
 }
+}  // anonymouse namespace

 bool ParseSliceTransformHelper(
    const std::string& kFixedPrefixName, const std::string& kCappedPrefixName,
@ -434,7 +435,6 @@ bool ParseSliceTransform(
  //                 SliceTransforms here.
  return false;
 }
-}  // anonymouse namespace

 bool ParseOptionHelper(char* opt_address, const OptionType& opt_type,
                       const std::string& value) {
--- a/options/options_helper.h
+++ b/options/options_helper.h
@ -130,6 +130,10 @@ Status GetColumnFamilyOptionsFromMapInternal(
    std::vector<std::string>* unsupported_options_names = nullptr,
    bool ignore_unknown_options = false);

+bool ParseSliceTransform(
+    const std::string& value,
+    std::shared_ptr<const SliceTransform>* slice_transform);
+
 extern Status StringToMap(
    const std::string& opts_str,
    std::unordered_map<std::string, std::string>* opts_map);
--- a/table/block_based_table_reader.cc
+++ b/table/block_based_table_reader.cc
@ -249,8 +249,8 @@ class PartitionIndexReader : public IndexReader, public Cleanable {
          index_block_->NewIterator(icomparator_,
                                    icomparator_->user_comparator(), nullptr,
                                    true, nullptr, index_key_includes_seq_),
-          false,
-          /* prefix_extractor */ nullptr, kIsIndex, index_key_includes_seq_);
+          false, true, /* prefix_extractor */ nullptr, kIsIndex,
+          index_key_includes_seq_);
    }
    // TODO(myabandeh): Update TwoLevelIterator to be able to make use of
    // on-stack BlockIter while the state is on heap. Currentlly it assumes
@ -824,6 +824,12 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
    ROCKS_LOG_ERROR(rep->ioptions.info_log,
                    "Cannot find Properties block from file.");
  }
+#ifndef ROCKSDB_LITE
+  if (rep->table_properties) {
+    ParseSliceTransform(rep->table_properties->prefix_extractor_name,
+                        &(rep->table_prefix_extractor));
+  }
+#endif  // ROCKSDB_LITE

  // Read the compression dictionary meta block
  bool found_compression_dict;
@ -898,6 +904,9 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
                                                rep->ioptions.info_log);
  }

+  bool need_upper_bound_check =
+      PrefixExtractorChanged(rep->table_properties.get(), prefix_extractor);
+
  // prefetch both index and filters, down to all partitions
  const bool prefetch_all = prefetch_index_and_filter_in_cache || level == 0;
  BlockBasedTableOptions::IndexType index_type = new_table->UpdateIndexType();
@ -932,14 +941,12 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
      // Hack: Call NewIndexIterator() to implicitly add index to the
      // block_cache
      CachableEntry<IndexReader> index_entry;
-      bool prefix_extractor_changed = false;
      // check prefix_extractor match only if hash based index is used
-      if (rep->index_type == BlockBasedTableOptions::kHashSearch) {
-        prefix_extractor_changed = PrefixExtractorChanged(
-            rep->table_properties.get(), prefix_extractor);
-      }
+      bool disable_prefix_seek =
+          rep->index_type == BlockBasedTableOptions::kHashSearch &&
+          need_upper_bound_check;
      unique_ptr<InternalIterator> iter(new_table->NewIndexIterator(
-          ReadOptions(), prefix_extractor_changed, nullptr, &index_entry));
+          ReadOptions(), disable_prefix_seek, nullptr, &index_entry));
      s = iter->status();
      if (s.ok()) {
        // This is the first call to NewIndexIterator() since we're in Open().
@ -958,9 +965,11 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
    }
    if (s.ok() && prefetch_filter) {
      // Hack: Call GetFilter() to implicitly add filter to the block_cache
-      auto filter_entry = new_table->GetFilter(prefix_extractor);
+      auto filter_entry =
+          new_table->GetFilter(rep->table_prefix_extractor.get());
      if (filter_entry.value != nullptr && prefetch_all) {
-        filter_entry.value->CacheDependencies(pin_all, prefix_extractor);
+        filter_entry.value->CacheDependencies(
+            pin_all, rep->table_prefix_extractor.get());
      }
      // if pin_filter is true then save it in rep_->filter_entry; it will be
      // released in the destructor only, hence it will be pinned in the
@ -990,14 +999,14 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
      // Set filter block
      if (rep->filter_policy) {
        const bool is_a_filter_partition = true;
-        auto filter =
-            new_table->ReadFilter(prefetch_buffer.get(), rep->filter_handle,
-                                  !is_a_filter_partition, prefix_extractor);
+        auto filter = new_table->ReadFilter(
+            prefetch_buffer.get(), rep->filter_handle, !is_a_filter_partition,
+            rep->table_prefix_extractor.get());
        rep->filter.reset(filter);
        // Refer to the comment above about paritioned indexes always being
        // cached
        if (filter && (prefetch_index_and_filter_in_cache || level == 0)) {
-          filter->CacheDependencies(pin_all, prefix_extractor);
+          filter->CacheDependencies(pin_all, rep->table_prefix_extractor.get());
        }
      }
    } else {
@ -1784,20 +1793,28 @@ BlockBasedTable::PartitionedIndexIteratorState::NewSecondaryIterator(
 // Otherwise, this method guarantees no I/O will be incurred.
 //
 // REQUIRES: this method shouldn't be called while the DB lock is held.
-bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key,
-                                     const SliceTransform* prefix_extractor) {
+bool BlockBasedTable::PrefixMayMatch(
+    const Slice& internal_key, const ReadOptions& read_options,
+    const SliceTransform* options_prefix_extractor,
+    const bool need_upper_bound_check) {
  if (!rep_->filter_policy) {
    return true;
  }

-  assert(prefix_extractor != nullptr);
+  const SliceTransform* prefix_extractor;
+
+  if (rep_->table_prefix_extractor == nullptr) {
+    if (need_upper_bound_check) {
+      return true;
+    }
+    prefix_extractor = options_prefix_extractor;
+  } else {
+    prefix_extractor = rep_->table_prefix_extractor.get();
+  }
  auto user_key = ExtractUserKey(internal_key);
  if (!prefix_extractor->InDomain(user_key)) {
    return true;
  }
-  assert(rep_->table_properties->prefix_extractor_name.compare(
-             prefix_extractor->Name()) == 0);
-  auto prefix = prefix_extractor->Transform(user_key);

  bool may_match = true;
  Status s;
@ -1805,12 +1822,23 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key,
  // First, try check with full filter
  auto filter_entry = GetFilter(prefix_extractor);
  FilterBlockReader* filter = filter_entry.value;
+  bool filter_checked = true;
  if (filter != nullptr) {
    if (!filter->IsBlockBased()) {
      const Slice* const const_ikey_ptr = &internal_key;
-      may_match = filter->PrefixMayMatch(prefix, prefix_extractor, kNotValid,
-                                         false, const_ikey_ptr);
+      may_match = filter->RangeMayExist(
+          read_options.iterate_upper_bound, user_key, prefix_extractor,
+          rep_->internal_comparator.user_comparator(), const_ikey_ptr,
+          &filter_checked, need_upper_bound_check);
    } else {
+      // if prefix_extractor changed for block based filter, skip filter
+      if (need_upper_bound_check) {
+        if (!rep_->filter_entry.IsSet()) {
+          filter_entry.Release(rep_->table_options.block_cache.get());
+        }
+        return true;
+      }
+      auto prefix = prefix_extractor->Transform(user_key);
      InternalKey internal_key_prefix(prefix, kMaxSequenceNumber, kTypeValue);
      auto internal_prefix = internal_key_prefix.Encode();

@ -1823,9 +1851,9 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key,
      // Then, try find it within each block
      // we already know prefix_extractor and prefix_extractor_name must match
      // because `CheckPrefixMayMatch` first checks `check_filter_ == true`
-      bool prefix_extractor_changed = false;
      unique_ptr<InternalIterator> iiter(
-          NewIndexIterator(no_io_read_options, prefix_extractor_changed));
+          NewIndexIterator(no_io_read_options,
+                           /* need_upper_bound_check */ false));
      iiter->Seek(internal_prefix);

      if (!iiter->Valid()) {
@ -1866,10 +1894,12 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key,
    }
  }

-  Statistics* statistics = rep_->ioptions.statistics;
-  RecordTick(statistics, BLOOM_FILTER_PREFIX_CHECKED);
-  if (!may_match) {
-    RecordTick(statistics, BLOOM_FILTER_PREFIX_USEFUL);
+  if (filter_checked) {
+    Statistics* statistics = rep_->ioptions.statistics;
+    RecordTick(statistics, BLOOM_FILTER_PREFIX_CHECKED);
+    if (!may_match) {
+      RecordTick(statistics, BLOOM_FILTER_PREFIX_USEFUL);
+    }
  }

  // if rep_->filter_entry is not set, we should call Release(); otherwise
@ -1878,12 +1908,11 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key,
  if (!rep_->filter_entry.IsSet()) {
    filter_entry.Release(rep_->table_options.block_cache.get());
  }
-
  return may_match;
 }

 void BlockBasedTableIterator::Seek(const Slice& target) {
-  if (!CheckPrefixMayMatch(target, prefix_extractor_)) {
+  if (!CheckPrefixMayMatch(target)) {
    ResetDataIter();
    return;
  }
@ -1911,7 +1940,7 @@ void BlockBasedTableIterator::Seek(const Slice& target) {
 }

 void BlockBasedTableIterator::SeekForPrev(const Slice& target) {
-  if (!CheckPrefixMayMatch(target, prefix_extractor_)) {
+  if (!CheckPrefixMayMatch(target)) {
    ResetDataIter();
    return;
  }
@ -2101,7 +2130,7 @@ void BlockBasedTableIterator::FindKeyBackward() {
 InternalIterator* BlockBasedTable::NewIterator(
    const ReadOptions& read_options, const SliceTransform* prefix_extractor,
    Arena* arena, bool skip_filters, bool for_compaction) {
-  bool prefix_extractor_changed =
+  bool need_upper_bound_check =
      PrefixExtractorChanged(rep_->table_properties.get(), prefix_extractor);
  const bool kIsNotIndex = false;
  if (arena == nullptr) {
@ -2109,21 +2138,21 @@ InternalIterator* BlockBasedTable::NewIterator(
        this, read_options, rep_->internal_comparator,
        NewIndexIterator(
            read_options,
-            prefix_extractor_changed &&
+            need_upper_bound_check &&
                rep_->index_type == BlockBasedTableOptions::kHashSearch),
        !skip_filters && !read_options.total_order_seek &&
-            prefix_extractor != nullptr && !prefix_extractor_changed,
-        prefix_extractor, kIsNotIndex, true /*key_includes_seq*/,
-        for_compaction);
+            prefix_extractor != nullptr,
+        need_upper_bound_check, prefix_extractor, kIsNotIndex,
+        true /*key_includes_seq*/, for_compaction);
  } else {
    auto* mem = arena->AllocateAligned(sizeof(BlockBasedTableIterator));
    return new (mem) BlockBasedTableIterator(
        this, read_options, rep_->internal_comparator,
-        NewIndexIterator(read_options, prefix_extractor_changed),
+        NewIndexIterator(read_options, need_upper_bound_check),
        !skip_filters && !read_options.total_order_seek &&
-            prefix_extractor != nullptr && !prefix_extractor_changed,
-        prefix_extractor, kIsNotIndex, true /*key_includes_seq*/,
-        for_compaction);
+            prefix_extractor != nullptr,
+        need_upper_bound_check, prefix_extractor, kIsNotIndex,
+        true /*key_includes_seq*/, for_compaction);
  }
 }

@ -2210,14 +2239,14 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
    BlockIter iiter_on_stack;
    // if prefix_extractor found in block differs from options, disable
    // BlockPrefixIndex. Only do this check when index_type is kHashSearch.
-    bool prefix_extractor_changed = false;
+    bool need_upper_bound_check = false;
    if (rep_->index_type == BlockBasedTableOptions::kHashSearch) {
-      prefix_extractor_changed = PrefixExtractorChanged(
+      need_upper_bound_check = PrefixExtractorChanged(
          rep_->table_properties.get(), prefix_extractor);
    }
-    auto iiter = NewIndexIterator(read_options, prefix_extractor_changed,
-                                  &iiter_on_stack, /* index_entry */ nullptr,
-                                  get_context);
+    auto iiter =
+        NewIndexIterator(read_options, need_upper_bound_check, &iiter_on_stack,
+                         /* index_entry */ nullptr, get_context);
    std::unique_ptr<InternalIterator> iiter_unique_ptr;
    if (iiter != &iiter_on_stack) {
      iiter_unique_ptr.reset(iiter);
@ -2479,7 +2508,7 @@ Status BlockBasedTable::CreateIndexReader(

  // kHashSearch requires non-empty prefix_extractor but bypass checking
  // prefix_extractor here since we have no access to MutableCFOptions.
-  // Add prefix_extractor_changed flag in  BlockBasedTable::NewIndexIterator.
+  // Add need_upper_bound_check flag in  BlockBasedTable::NewIndexIterator.
  // If prefix_extractor does not match prefix_extractor_name from table
  // properties, turn off Hash Index by setting total_order_seek to true

--- a/table/block_based_table_reader.h
+++ b/table/block_based_table_reader.h
@ -95,7 +95,9 @@ class BlockBasedTable : public TableReader {
                     bool skip_filters = false, int level = -1);

  bool PrefixMayMatch(const Slice& internal_key,
-                      const SliceTransform* prefix_extractor = nullptr);
+                      const ReadOptions& read_options,
+                      const SliceTransform* options_prefix_extractor,
+                      const bool need_upper_bound_check);

  // Returns a new iterator over the table contents.
  // The result of NewIterator() is initially invalid (caller must
@ -277,7 +279,7 @@ class BlockBasedTable : public TableReader {
  //  3. We disallowed any io to be performed, that is, read_options ==
  //     kBlockCacheTier
  InternalIterator* NewIndexIterator(
-      const ReadOptions& read_options, bool prefix_extractor_changed = false,
+      const ReadOptions& read_options, bool need_upper_bound_check = false,
      BlockIter* input_iter = nullptr,
      CachableEntry<IndexReader>* index_entry = nullptr,
      GetContext* get_context = nullptr);
@ -481,6 +483,7 @@ struct BlockBasedTable::Rep {
  // and compatible with existing code, we introduce a wrapper that allows
  // block to extract prefix without knowing if a key is internal or not.
  unique_ptr<SliceTransform> internal_prefix_transform;
+  std::shared_ptr<const SliceTransform> table_prefix_extractor;

  // only used in level 0 files when pin_l0_filter_and_index_blocks_in_cache is
  // true or in all levels when pin_top_level_index_and_filter is set in
@ -515,6 +518,7 @@ class BlockBasedTableIterator : public InternalIterator {
                          const ReadOptions& read_options,
                          const InternalKeyComparator& icomp,
                          InternalIterator* index_iter, bool check_filter,
+                          bool need_upper_bound_check,
                          const SliceTransform* prefix_extractor, bool is_index,
                          bool key_includes_seq = true,
                          bool for_compaction = false)
@ -525,10 +529,11 @@ class BlockBasedTableIterator : public InternalIterator {
        pinned_iters_mgr_(nullptr),
        block_iter_points_to_real_block_(false),
        check_filter_(check_filter),
+        need_upper_bound_check_(need_upper_bound_check),
+        prefix_extractor_(prefix_extractor),
        is_index_(is_index),
        key_includes_seq_(key_includes_seq),
-        for_compaction_(for_compaction),
-        prefix_extractor_(prefix_extractor) {}
+        for_compaction_(for_compaction) {}

  ~BlockBasedTableIterator() { delete index_iter_; }

@ -575,9 +580,10 @@ class BlockBasedTableIterator : public InternalIterator {
           block_iter_points_to_real_block_;
  }

-  bool CheckPrefixMayMatch(const Slice& ikey,
-                           const SliceTransform* prefix_extractor = nullptr) {
-    if (check_filter_ && !table_->PrefixMayMatch(ikey, prefix_extractor)) {
+  bool CheckPrefixMayMatch(const Slice& ikey) {
+    if (check_filter_ &&
+        !table_->PrefixMayMatch(ikey, read_options_, prefix_extractor_,
+                                need_upper_bound_check_)) {
      // TODO remember the iterator is invalidated because of prefix
      // match. This can avoid the upper level file iterator to falsely
      // believe the position is the end of the SST file and move to
@ -621,6 +627,9 @@ class BlockBasedTableIterator : public InternalIterator {
  bool block_iter_points_to_real_block_;
  bool is_out_of_bound_ = false;
  bool check_filter_;
+  // TODO(Zhongyi): pick a better name
+  bool need_upper_bound_check_;
+  const SliceTransform* prefix_extractor_;
  // If the blocks over which we iterate are index blocks
  bool is_index_;
  // If the keys in the blocks over which we iterate include 8 byte sequence
@ -629,7 +638,6 @@ class BlockBasedTableIterator : public InternalIterator {
  bool for_compaction_;
  // TODO use block offset instead
  std::string prev_index_value_;
-  const SliceTransform* prefix_extractor_;

  static const size_t kInitReadaheadSize = 8 * 1024;
  // Found that 256 KB readahead size provides the best performance, based on
--- a/table/filter_block.h
+++ b/table/filter_block.h
@ -123,6 +123,17 @@ class FilterBlockReader {
  virtual void CacheDependencies(bool /*pin*/,
                                 const SliceTransform* /*prefix_extractor*/) {}

+  virtual bool RangeMayExist(
+      const Slice* /*iterate_upper_bound*/, const Slice& user_key,
+      const SliceTransform* prefix_extractor,
+      const Comparator* /*comparator*/, const Slice* const const_ikey_ptr,
+      bool* filter_checked, bool /*need_upper_bound_check*/) {
+    *filter_checked = true;
+    Slice prefix = prefix_extractor->Transform(user_key);
+    return PrefixMayMatch(prefix, prefix_extractor, kNotValid, false,
+                          const_ikey_ptr);
+  }
+
 protected:
  bool whole_key_filtering_;

--- a/table/full_filter_block.cc
+++ b/table/full_filter_block.cc
@ -98,6 +98,10 @@ FullFilterBlockReader::FullFilterBlockReader(
      contents_(contents) {
  assert(filter_bits_reader != nullptr);
  filter_bits_reader_.reset(filter_bits_reader);
+  if (prefix_extractor_ != nullptr) {
+    full_length_enabled_ =
+        prefix_extractor_->FullLengthEnabled(&prefix_extractor_full_length_);
+  }
 }

 FullFilterBlockReader::FullFilterBlockReader(
@ -153,4 +157,57 @@ bool FullFilterBlockReader::MayMatch(const Slice& entry) {
 size_t FullFilterBlockReader::ApproximateMemoryUsage() const {
  return contents_.size();
 }
+
+bool FullFilterBlockReader::RangeMayExist(const Slice* iterate_upper_bound,
+    const Slice& user_key, const SliceTransform* prefix_extractor,
+    const Comparator* comparator, const Slice* const const_ikey_ptr,
+    bool* filter_checked, bool need_upper_bound_check) {
+  if (!prefix_extractor || !prefix_extractor->InDomain(user_key)) {
+    *filter_checked = false;
+    return true;
+  }
+  Slice prefix = prefix_extractor->Transform(user_key);
+  if (need_upper_bound_check &&
+      !IsFilterCompatible(iterate_upper_bound, prefix, comparator)) {
+    *filter_checked = false;
+    return true;
+  } else {
+    *filter_checked = true;
+    return PrefixMayMatch(prefix, prefix_extractor, kNotValid, false,
+                          const_ikey_ptr);
+  }
+}
+
+bool FullFilterBlockReader::IsFilterCompatible(
+    const Slice* iterate_upper_bound, const Slice& prefix,
+    const Comparator* comparator) {
+  // Try to reuse the bloom filter in the SST table if prefix_extractor in
+  // mutable_cf_options has changed. If range [user_key, upper_bound) all
+  // share the same prefix then we may still be able to use the bloom filter.
+  if (iterate_upper_bound != nullptr && prefix_extractor_) {
+    if (!prefix_extractor_->InDomain(*iterate_upper_bound)) {
+      return false;
+    }
+    Slice upper_bound_xform =
+        prefix_extractor_->Transform(*iterate_upper_bound);
+    // first check if user_key and upper_bound all share the same prefix
+    if (!comparator->Equal(prefix, upper_bound_xform)) {
+      // second check if user_key's prefix is the immediate predecessor of
+      // upper_bound and have the same length. If so, we know for sure all
+      // keys in the range [user_key, upper_bound) share the same prefix.
+      // Also need to make sure upper_bound are full length to ensure
+      // correctness
+      if (!full_length_enabled_ ||
+          iterate_upper_bound->size() != prefix_extractor_full_length_ ||
+          !comparator->IsSameLengthImmediateSuccessor(prefix,
+                                                      *iterate_upper_bound)) {
+        return false;
+      }
+    }
+    return true;
+  } else {
+    return false;
+  }
+}
+
 }  // namespace rocksdb
--- a/table/full_filter_block.h
+++ b/table/full_filter_block.h
@ -108,18 +108,27 @@ class FullFilterBlockReader : public FilterBlockReader {
      uint64_t block_offset = kNotValid, const bool no_io = false,
      const Slice* const const_ikey_ptr = nullptr) override;
  virtual size_t ApproximateMemoryUsage() const override;
-
+  virtual bool RangeMayExist(const Slice* iterate_upper_bound, const Slice& user_key,
+                             const SliceTransform* prefix_extractor,
+                             const Comparator* comparator,
+                             const Slice* const const_ikey_ptr, bool* filter_checked,
+                             bool need_upper_bound_check) override;
 private:
  const SliceTransform* prefix_extractor_;
  Slice contents_;
  std::unique_ptr<FilterBitsReader> filter_bits_reader_;
  BlockContents block_contents_;
  std::unique_ptr<const char[]> filter_data_;
+  bool full_length_enabled_;
+  size_t prefix_extractor_full_length_;

  // No copying allowed
  FullFilterBlockReader(const FullFilterBlockReader&);
  bool MayMatch(const Slice& entry);
  void operator=(const FullFilterBlockReader&);
+  bool IsFilterCompatible(const Slice* iterate_upper_bound,
+                          const Slice& prefix, const Comparator* comparator);
+
 };

 }  // namespace rocksdb
--- a/table/table_test.cc
+++ b/table/table_test.cc
@ -692,6 +692,9 @@ class FixedOrLessPrefixTransform : public SliceTransform {
  virtual bool InRange(const Slice& dst) const override {
    return (dst.size() <= prefix_len_);
  }
+  virtual bool FullLengthEnabled(size_t* /*len*/) const override {
+    return false;
+  }
 };

 class HarnessTest : public testing::Test {
--- a/util/comparator.cc
+++ b/util/comparator.cc
@ -98,6 +98,32 @@ class BytewiseComparatorImpl : public Comparator {
    }
    // *key is a run of 0xffs.  Leave it alone.
  }
+
+  virtual bool IsSameLengthImmediateSuccessor(const Slice& s,
+                                              const Slice& t) const override {
+    if (s.size() != t.size() || s.size() == 0) {
+      return false;
+    }
+    size_t diff_ind = s.difference_offset(t);
+    // same slice
+    if (diff_ind >= s.size()) return false;
+    uint8_t byte_s = static_cast<uint8_t>(s[diff_ind]);
+    uint8_t byte_t = static_cast<uint8_t>(t[diff_ind]);
+    // first different byte must be consecutive, and remaining bytes must be
+    // 0xff for s and 0x00 for t
+    if (byte_s != uint8_t{0xff} && byte_s + 1 == byte_t) {
+      for (size_t i = diff_ind + 1; i < s.size(); ++i) {
+        byte_s = static_cast<uint8_t>(s[i]);
+        byte_t = static_cast<uint8_t>(t[i]);
+        if (byte_s != uint8_t{0xff} || byte_t != uint8_t{0x00}) {
+          return false;
+        }
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
 };

 class ReverseBytewiseComparatorImpl : public BytewiseComparatorImpl {
--- a/util/slice.cc
+++ b/util/slice.cc
@ -47,6 +47,11 @@ class FixedPrefixTransform : public SliceTransform {
    return (dst.size() == prefix_len_);
  }

+  virtual bool FullLengthEnabled(size_t* len) const override {
+    *len = prefix_len_;
+    return true;
+  }
+
  virtual bool SameResultWhenAppended(const Slice& prefix) const override {
    return InDomain(prefix);
  }
@ -80,6 +85,11 @@ class CappedPrefixTransform : public SliceTransform {
    return (dst.size() <= cap_len_);
  }

+  virtual bool FullLengthEnabled(size_t* len) const override {
+    *len = cap_len_;
+    return true;
+  }
+
  virtual bool SameResultWhenAppended(const Slice& prefix) const override {
    return prefix.size() >= cap_len_;
  }