From 17af09fcce881e2773a20086eea5c8117ad67204 Mon Sep 17 00:00:00 2001 From: Siying Dong Date: Thu, 17 May 2018 18:24:20 -0700 Subject: [PATCH] Implement key shortening functions in ReverseBytewiseComparator Summary: Right now ReverseBytewiseComparator::FindShortestSeparator() doesn't really shorten key, and ReverseBytewiseComparator::FindShortestSuccessor() seems to return wrong results. The code is confusing too as it uses BytewiseComparatorImpl::FindShortestSeparator() but the function actually won't do anything if the the first key is larger than the second. Implement ReverseBytewiseComparator::FindShortestSeparator() and override ReverseBytewiseComparator::FindShortestSuccessor() to be empty. Closes https://github.com/facebook/rocksdb/pull/3836 Differential Revision: D7959762 Pulled By: siying fbshipit-source-id: 93acb621c16ce6f23e087ae4e19f7d84d1254683 --- HISTORY.md | 1 + db/comparator_db_test.cc | 140 +++++++++++++++++++++++++++++++++++++++ util/comparator.cc | 52 ++++++++++++++- 3 files changed, 192 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 66f9287fe..e9698ab3b 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -26,6 +26,7 @@ * Fix `BackupableDBOptions::max_valid_backups_to_open` to not delete backup files when refcount cannot be accurately determined. * Fix memory leak when pin_l0_filter_and_index_blocks_in_cache is used with partitioned filters * Disable rollback of merge operands in WritePrepared transactions to work around an issue in MyRocks. It can be enabled back by setting TransactionDBOptions::rollback_merge_operands to true. +* Fix wrong results by ReverseBytewiseComparator::FindShortSuccessor() ### Java API Changes * Add `BlockBasedTableConfig.setBlockCache` to allow sharing a block cache across DB instances. diff --git a/db/comparator_db_test.cc b/db/comparator_db_test.cc index 83740ffda..cf2abe78b 100644 --- a/db/comparator_db_test.cc +++ b/db/comparator_db_test.cc @@ -2,6 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). +#include #include #include @@ -433,6 +434,145 @@ TEST_F(ComparatorDBTest, TwoStrComparator) { } } +TEST_F(ComparatorDBTest, FindShortestSeparator) { + std::string s1 = "abc1xyz"; + std::string s2 = "abc3xy"; + + BytewiseComparator()->FindShortestSeparator(&s1, s2); + ASSERT_EQ("abc2", s1); + + s1 = "abc5xyztt"; + + ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2); + ASSERT_EQ("abc5", s1); + + s1 = "abc3"; + s2 = "abc2xy"; + ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2); + ASSERT_EQ("abc3", s1); + + s1 = "abc3xyz"; + s2 = "abc2xy"; + ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2); + ASSERT_EQ("abc3", s1); + + s1 = "abc3xyz"; + s2 = "abc2"; + ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2); + ASSERT_EQ("abc3", s1); + + std::string old_s1 = s1 = "abc2xy"; + s2 = "abc2"; + ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2); + ASSERT_TRUE(old_s1 >= s1); + ASSERT_TRUE(s1 > s2); +} + +TEST_F(ComparatorDBTest, SeparatorSuccessorRandomizeTest) { + // Char list for boundary cases. + std::array char_list{{0, 1, 2, 253, 254, 255}}; + Random rnd(301); + + for (int attempts = 0; attempts < 1000; attempts++) { + uint32_t size1 = rnd.Skewed(4); + uint32_t size2; + + if (rnd.OneIn(2)) { + // size2 to be random size + size2 = rnd.Skewed(4); + } else { + // size1 is within [-2, +2] of size1 + int diff = static_cast(rnd.Uniform(5)) - 2; + int tmp_size2 = static_cast(size1) + diff; + if (tmp_size2 < 0) { + tmp_size2 = 0; + } + size2 = static_cast(tmp_size2); + } + + std::string s1; + std::string s2; + for (uint32_t i = 0; i < size1; i++) { + if (rnd.OneIn(2)) { + // Use random byte + s1 += static_cast(rnd.Uniform(256)); + } else { + // Use one byte in char_list + char c = static_cast(char_list[rnd.Uniform(sizeof(char_list))]); + s1 += c; + } + } + + // First set s2 to be the same as s1, and then modify s2. + s2 = s1; + s2.resize(size2); + // We start from the back of the string + if (size2 > 0) { + uint32_t pos = size2 - 1; + do { + if (pos >= size1 || rnd.OneIn(4)) { + // For 1/4 chance, use random byte + s2[pos] = static_cast(rnd.Uniform(256)); + } else if (rnd.OneIn(4)) { + // In 1/4 chance, stop here. + break; + } else { + // Create a char within [-2, +2] of the matching char of s1. + int diff = static_cast(rnd.Uniform(5)) - 2; + // char may be signed or unsigned based on platform. + int s1_char = static_cast(static_cast(s1[pos])); + int s2_char = s1_char + diff; + if (s2_char < 0) { + s2_char = 0; + } + if (s2_char > 255) { + s2_char = 255; + } + s2[pos] = static_cast(s2_char); + } + } while (pos-- != 0); + } + + // Test separators + for (int rev = 0; rev < 2; rev++) { + if (rev == 1) { + // switch s1 and s2 + std::string t = s1; + s1 = s2; + s2 = t; + } + std::string separator = s1; + BytewiseComparator()->FindShortestSeparator(&separator, s2); + std::string rev_separator = s1; + ReverseBytewiseComparator()->FindShortestSeparator(&rev_separator, s2); + + if (s1 == s2) { + ASSERT_EQ(s1, separator); + ASSERT_EQ(s2, rev_separator); + } else if (s1 < s2) { + ASSERT_TRUE(s1 <= separator); + ASSERT_TRUE(s2 > separator); + ASSERT_LE(separator.size(), std::max(s1.size(), s2.size())); + ASSERT_EQ(s1, rev_separator); + } else { + ASSERT_TRUE(s1 >= rev_separator); + ASSERT_TRUE(s2 < rev_separator); + ASSERT_LE(rev_separator.size(), std::max(s1.size(), s2.size())); + ASSERT_EQ(s1, separator); + } + } + + // Test successors + std::string succ = s1; + BytewiseComparator()->FindShortSuccessor(&succ); + ASSERT_TRUE(succ >= s1); + + succ = s1; + ReverseBytewiseComparator()->FindShortSuccessor(&succ); + ASSERT_TRUE(succ <= s1); + } +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/util/comparator.cc b/util/comparator.cc index 274b9080a..007f449a8 100644 --- a/util/comparator.cc +++ b/util/comparator.cc @@ -111,8 +111,58 @@ class ReverseBytewiseComparatorImpl : public BytewiseComparatorImpl { virtual int Compare(const Slice& a, const Slice& b) const override { return -a.compare(b); } -}; + void FindShortestSeparator(std::string* start, + const Slice& limit) const override { + // Find length of common prefix + size_t min_length = std::min(start->size(), limit.size()); + size_t diff_index = 0; + while ((diff_index < min_length) && + ((*start)[diff_index] == limit[diff_index])) { + diff_index++; + } + + assert(diff_index <= min_length); + if (diff_index == min_length) { + // Do not shorten if one string is a prefix of the other + // + // We could handle cases like: + // V + // A A 2 X Y + // A A 2 + // in a similar way as BytewiseComparator::FindShortestSeparator(). + // We keep it simple by not implementing it. We can come back to it + // later when needed. + } else { + uint8_t start_byte = static_cast((*start)[diff_index]); + uint8_t limit_byte = static_cast(limit[diff_index]); + if (start_byte > limit_byte && diff_index < start->size() - 1) { + // Case like + // V + // A A 3 A A + // A A 1 B B + // + // or + // v + // A A 2 A A + // A A 1 B B + // In this case "AA2" will be good. +#ifndef NDEBUG + std::string old_start = *start; +#endif + start->resize(diff_index + 1); +#ifndef NDEBUG + assert(old_start >= *start); +#endif + assert(Slice(*start).compare(limit) > 0); + } + } + } + + void FindShortSuccessor(std::string* /*key*/) const override { + // Don't do anything for simplicity. + } +}; }// namespace const Comparator* BytewiseComparator() {