diff --git a/HISTORY.md b/HISTORY.md index b3c2ef14a..028ddcf82 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -5,6 +5,7 @@ * Partitions of partitioned indexes no longer affect the read amplification statistics. * Due to a refactoring, block cache eviction statistics for indexes are temporarily broken. We plan to reintroduce them in a later phase. * options.keep_log_file_num will be enforced strictly all the time. File names of all log files will be tracked, which may take significantly amount of memory if options.keep_log_file_num is large and either of options.max_log_file_size or options.log_file_time_to_roll is set. +* Add initial support for Get/Put with user timestamps. Users can specify timestamps via ReadOptions and WriteOptions when calling DB::Get and DB::Put. ### New Features * Add an option `snap_refresh_nanos` (default to 0.1s) to periodically refresh the snapshot list in compaction jobs. Assign to 0 to disable the feature. diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 45524b250..1aec864dd 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -1284,6 +1284,157 @@ TEST_F(DBBasicTest, MultiGetBatchedMultiLevel) { } } } + +class DBBasicTestWithTimestampWithParam + : public DBTestBase, + public testing::WithParamInterface { + public: + DBBasicTestWithTimestampWithParam() + : DBTestBase("/db_basic_test_with_timestamp") {} + + protected: + class TestComparator : public Comparator { + private: + const Comparator* cmp_without_ts_; + + public: + explicit TestComparator(size_t ts_sz) + : Comparator(ts_sz), cmp_without_ts_(nullptr) { + cmp_without_ts_ = BytewiseComparator(); + } + + const char* Name() const override { return "TestComparator"; } + + void FindShortSuccessor(std::string*) const override {} + + void FindShortestSeparator(std::string*, const Slice&) const override {} + + int Compare(const Slice& a, const Slice& b) const override { + int r = CompareWithoutTimestamp(a, b); + if (r != 0 || 0 == timestamp_size()) { + return r; + } + return CompareTimestamp( + Slice(a.data() + a.size() - timestamp_size(), timestamp_size()), + Slice(b.data() + b.size() - timestamp_size(), timestamp_size())); + } + + int CompareWithoutTimestamp(const Slice& a, const Slice& b) const override { + assert(a.size() >= timestamp_size()); + assert(b.size() >= timestamp_size()); + Slice k1 = StripTimestampFromUserKey(a, timestamp_size()); + Slice k2 = StripTimestampFromUserKey(b, timestamp_size()); + + return cmp_without_ts_->Compare(k1, k2); + } + + int CompareTimestamp(const Slice& ts1, const Slice& ts2) const override { + if (!ts1.data() && !ts2.data()) { + return 0; + } else if (ts1.data() && !ts2.data()) { + return 1; + } else if (!ts1.data() && ts2.data()) { + return -1; + } + assert(ts1.size() == ts2.size()); + uint64_t low1 = 0; + uint64_t low2 = 0; + uint64_t high1 = 0; + uint64_t high2 = 0; + auto* ptr1 = const_cast(&ts1); + auto* ptr2 = const_cast(&ts2); + if (!GetFixed64(ptr1, &low1) || !GetFixed64(ptr1, &high1) || + !GetFixed64(ptr2, &low2) || !GetFixed64(ptr2, &high2)) { + assert(false); + } + if (high1 < high2) { + return 1; + } else if (high1 > high2) { + return -1; + } + if (low1 < low2) { + return 1; + } else if (low1 > low2) { + return -1; + } + return 0; + } + }; + + Slice EncodeTimestamp(uint64_t low, uint64_t high, std::string* ts) { + assert(nullptr != ts); + ts->clear(); + PutFixed64(ts, low); + PutFixed64(ts, high); + assert(ts->size() == sizeof(low) + sizeof(high)); + return Slice(*ts); + } +}; + +TEST_P(DBBasicTestWithTimestampWithParam, PutAndGet) { + const int kNumKeysPerFile = 8192; + const size_t kNumTimestamps = 6; + bool memtable_only = GetParam(); + Options options = CurrentOptions(); + options.create_if_missing = true; + options.env = env_; + options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); + std::string tmp; + size_t ts_sz = EncodeTimestamp(0, 0, &tmp).size(); + TestComparator test_cmp(ts_sz); + options.comparator = &test_cmp; + BlockBasedTableOptions bbto; + bbto.filter_policy.reset(NewBloomFilterPolicy( + 10 /*bits_per_key*/, false /*use_block_based_builder*/)); + bbto.whole_key_filtering = true; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + DestroyAndReopen(options); + CreateAndReopenWithCF({"pikachu"}, options); + size_t num_cfs = handles_.size(); + ASSERT_EQ(2, num_cfs); + std::vector write_ts_strs(kNumTimestamps); + std::vector read_ts_strs(kNumTimestamps); + std::vector write_ts_list; + std::vector read_ts_list; + + for (size_t i = 0; i != kNumTimestamps; ++i) { + write_ts_list.emplace_back(EncodeTimestamp(i * 2, 0, &write_ts_strs[i])); + read_ts_list.emplace_back(EncodeTimestamp(1 + i * 2, 0, &read_ts_strs[i])); + const Slice& write_ts = write_ts_list.back(); + WriteOptions wopts; + wopts.timestamp = &write_ts; + for (int cf = 0; cf != static_cast(num_cfs); ++cf) { + for (size_t j = 0; j != (kNumKeysPerFile - 1) / kNumTimestamps; ++j) { + ASSERT_OK(Put(cf, "key" + std::to_string(j), + "value_" + std::to_string(j) + "_" + std::to_string(i), + wopts)); + } + if (!memtable_only) { + ASSERT_OK(Flush(cf)); + } + } + } + const auto& verify_db_func = [&]() { + for (size_t i = 0; i != kNumTimestamps; ++i) { + ReadOptions ropts; + ropts.timestamp = &read_ts_list[i]; + for (int cf = 0; cf != static_cast(num_cfs); ++cf) { + ColumnFamilyHandle* cfh = handles_[cf]; + for (size_t j = 0; j != (kNumKeysPerFile - 1) / kNumTimestamps; ++j) { + std::string value; + ASSERT_OK(db_->Get(ropts, cfh, "key" + std::to_string(j), &value)); + ASSERT_EQ("value_" + std::to_string(j) + "_" + std::to_string(i), + value); + } + } + } + }; + verify_db_func(); +} + +INSTANTIATE_TEST_CASE_P(Timestamp, DBBasicTestWithTimestampWithParam, + ::testing::Bool()); + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index ba76abc28..96b911a6d 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -1376,7 +1376,16 @@ ColumnFamilyHandle* DBImpl::DefaultColumnFamily() const { Status DBImpl::Get(const ReadOptions& read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) { - return GetImpl(read_options, column_family, key, value); + if (nullptr == read_options.timestamp) { + return GetImpl(read_options, column_family, key, value); + } + Slice akey; + std::string buf; + Status s = AppendTimestamp(key, *(read_options.timestamp), &akey, &buf); + if (s.ok()) { + s = GetImpl(read_options, column_family, akey, value); + } + return s; } Status DBImpl::GetImpl(const ReadOptions& read_options, diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index 02e23e269..947194ace 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -1677,11 +1677,25 @@ size_t DBImpl::GetWalPreallocateBlockSize(uint64_t write_buffer_size) const { // can call if they wish Status DB::Put(const WriteOptions& opt, ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) { - // Pre-allocate size of write batch conservatively. - // 8 bytes are taken by header, 4 bytes for count, 1 byte for type, - // and we allocate 11 extra bytes for key length, as well as value length. - WriteBatch batch(key.size() + value.size() + 24); - Status s = batch.Put(column_family, key, value); + if (nullptr == opt.timestamp) { + // Pre-allocate size of write batch conservatively. + // 8 bytes are taken by header, 4 bytes for count, 1 byte for type, + // and we allocate 11 extra bytes for key length, as well as value length. + WriteBatch batch(key.size() + value.size() + 24); + Status s = batch.Put(column_family, key, value); + if (!s.ok()) { + return s; + } + return Write(opt, &batch); + } + Slice akey; + std::string buf; + Status s = AppendTimestamp(key, *(opt.timestamp), &akey, &buf); + if (!s.ok()) { + return s; + } + WriteBatch batch(akey.size() + value.size() + 24); + s = batch.Put(column_family, akey, value); if (!s.ok()) { return s; } diff --git a/db/dbformat.h b/db/dbformat.h index dbf6ea6f3..c6ee5677c 100644 --- a/db/dbformat.h +++ b/db/dbformat.h @@ -151,6 +151,17 @@ inline Slice ExtractUserKey(const Slice& internal_key) { return Slice(internal_key.data(), internal_key.size() - 8); } +inline Slice ExtractUserKeyAndStripTimestamp(const Slice& internal_key, + size_t ts_sz) { + assert(internal_key.size() >= 8 + ts_sz); + return Slice(internal_key.data(), internal_key.size() - 8 - ts_sz); +} + +inline Slice StripTimestampFromUserKey(const Slice& user_key, size_t ts_sz) { + assert(user_key.size() >= ts_sz); + return Slice(user_key.data(), user_key.size() - ts_sz); +} + inline uint64_t ExtractInternalKeyFooter(const Slice& internal_key) { assert(internal_key.size() >= 8); const size_t n = internal_key.size(); @@ -658,4 +669,20 @@ struct ParsedInternalKeyComparator { const InternalKeyComparator* cmp; }; +// TODO (yanqin): this causes extra memory allocation and copy. Should be +// addressed in the future. +inline Status AppendTimestamp(const Slice& key, const Slice& timestamp, + Slice* ret_key, std::string* ret_buf) { + assert(ret_key != nullptr); + assert(ret_buf != nullptr); + if (key.data() + key.size() == timestamp.data()) { + *ret_key = Slice(key.data(), key.size() + timestamp.size()); + } else { + ret_buf->assign(key.data(), key.size()); + ret_buf->append(timestamp.data(), timestamp.size()); + *ret_key = Slice(*ret_buf); + } + return Status::OK(); +} + } // namespace rocksdb diff --git a/db/memtable.cc b/db/memtable.cc index 46acbbfa6..fdd1a577a 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -493,6 +493,8 @@ bool MemTable::Add(SequenceNumber s, ValueType type, p = EncodeVarint32(p, val_size); memcpy(p, value.data(), val_size); assert((unsigned)(p + val_size - buf) == (unsigned)encoded_len); + size_t ts_sz = GetInternalKeyComparator().user_comparator()->timestamp_size(); + if (!allow_concurrent) { // Extract prefix for insert with hint. if (insert_with_hint_prefix_extractor_ != nullptr && @@ -525,7 +527,7 @@ bool MemTable::Add(SequenceNumber s, ValueType type, bloom_filter_->Add(prefix_extractor_->Transform(key)); } if (bloom_filter_ && moptions_.memtable_whole_key_filtering) { - bloom_filter_->Add(key); + bloom_filter_->Add(StripTimestampFromUserKey(key, ts_sz)); } // The first sequence number inserted into the memtable @@ -559,7 +561,7 @@ bool MemTable::Add(SequenceNumber s, ValueType type, bloom_filter_->AddConcurrently(prefix_extractor_->Transform(key)); } if (bloom_filter_ && moptions_.memtable_whole_key_filtering) { - bloom_filter_->AddConcurrently(key); + bloom_filter_->AddConcurrently(StripTimestampFromUserKey(key, ts_sz)); } // atomically update first_seqno_ and earliest_seqno_. @@ -632,8 +634,10 @@ static bool SaveValue(void* arg, const char* entry) { // all entries with overly large sequence numbers. uint32_t key_length; const char* key_ptr = GetVarint32Ptr(entry, entry + 5, &key_length); - if (s->mem->GetInternalKeyComparator().user_comparator()->Equal( - Slice(key_ptr, key_length - 8), s->key->user_key())) { + Slice user_key_slice = Slice(key_ptr, key_length - 8); + if (s->mem->GetInternalKeyComparator() + .user_comparator() + ->CompareWithoutTimestamp(user_key_slice, s->key->user_key()) == 0) { // Correct user key const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8); ValueType type; @@ -767,11 +771,13 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s, bool found_final_value = false; bool merge_in_progress = s->IsMergeInProgress(); bool may_contain = true; + size_t ts_sz = GetInternalKeyComparator().user_comparator()->timestamp_size(); if (bloom_filter_) { // when both memtable_whole_key_filtering and prefix_extractor_ are set, // only do whole key filtering for Get() to save CPU if (moptions_.memtable_whole_key_filtering) { - may_contain = bloom_filter_->MayContain(user_key); + may_contain = + bloom_filter_->MayContain(StripTimestampFromUserKey(user_key, ts_sz)); } else { assert(prefix_extractor_); may_contain = diff --git a/db/version_set.cc b/db/version_set.cc index a60a4e87c..ed9a316ac 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -93,7 +93,8 @@ Status OverlapWithIterator(const Comparator* ucmp, return Status::Corruption("DB have corrupted keys"); } - if (ucmp->Compare(seek_result.user_key, largest_user_key) <= 0) { + if (ucmp->CompareWithoutTimestamp(seek_result.user_key, largest_user_key) <= + 0) { *overlap = true; } } @@ -171,17 +172,16 @@ class FilePicker { // Check if key is within a file's range. If search left bound and // right bound point to the same find, we are sure key falls in // range. - assert( - curr_level_ == 0 || - curr_index_in_curr_level_ == start_index_in_curr_level_ || - user_comparator_->Compare(user_key_, - ExtractUserKey(f->smallest_key)) <= 0); - - int cmp_smallest = user_comparator_->Compare(user_key_, - ExtractUserKey(f->smallest_key)); + assert(curr_level_ == 0 || + curr_index_in_curr_level_ == start_index_in_curr_level_ || + user_comparator_->CompareWithoutTimestamp( + user_key_, ExtractUserKey(f->smallest_key)) <= 0); + + int cmp_smallest = user_comparator_->CompareWithoutTimestamp( + user_key_, ExtractUserKey(f->smallest_key)); if (cmp_smallest >= 0) { - cmp_largest = user_comparator_->Compare(user_key_, - ExtractUserKey(f->largest_key)); + cmp_largest = user_comparator_->CompareWithoutTimestamp( + user_key_, ExtractUserKey(f->largest_key)); } // Setup file search bound for the next level based on the @@ -799,14 +799,16 @@ static bool AfterFile(const Comparator* ucmp, const Slice* user_key, const FdWithKeyRange* f) { // nullptr user_key occurs before all keys and is therefore never after *f return (user_key != nullptr && - ucmp->Compare(*user_key, ExtractUserKey(f->largest_key)) > 0); + ucmp->CompareWithoutTimestamp(*user_key, + ExtractUserKey(f->largest_key)) > 0); } static bool BeforeFile(const Comparator* ucmp, const Slice* user_key, const FdWithKeyRange* f) { // nullptr user_key occurs after all keys and is therefore never before *f return (user_key != nullptr && - ucmp->Compare(*user_key, ExtractUserKey(f->smallest_key)) < 0); + ucmp->CompareWithoutTimestamp(*user_key, + ExtractUserKey(f->smallest_key)) < 0); } bool SomeFileOverlapsRange( @@ -952,8 +954,9 @@ class LevelIterator final : public InternalIterator { bool KeyReachedUpperBound(const Slice& internal_key) { return read_options_.iterate_upper_bound != nullptr && - user_comparator_.Compare(ExtractUserKey(internal_key), - *read_options_.iterate_upper_bound) >= 0; + user_comparator_.CompareWithoutTimestamp( + ExtractUserKey(internal_key), + *read_options_.iterate_upper_bound) >= 0; } InternalIterator* NewFileIterator() { @@ -2774,11 +2777,12 @@ void VersionStorageInfo::GetOverlappingInputs( FdWithKeyRange* f = &(level_files_brief_[level].files[*iter]); const Slice file_start = ExtractUserKey(f->smallest_key); const Slice file_limit = ExtractUserKey(f->largest_key); - if (begin != nullptr && user_cmp->Compare(file_limit, user_begin) < 0) { + if (begin != nullptr && + user_cmp->CompareWithoutTimestamp(file_limit, user_begin) < 0) { // "f" is completely before specified range; skip it iter++; } else if (end != nullptr && - user_cmp->Compare(file_start, user_end) > 0) { + user_cmp->CompareWithoutTimestamp(file_start, user_end) > 0) { // "f" is completely after specified range; skip it iter++; } else { @@ -2793,10 +2797,11 @@ void VersionStorageInfo::GetOverlappingInputs( iter = index.erase(iter); if (expand_range) { if (begin != nullptr && - user_cmp->Compare(file_start, user_begin) < 0) { + user_cmp->CompareWithoutTimestamp(file_start, user_begin) < 0) { user_begin = file_start; } - if (end != nullptr && user_cmp->Compare(file_limit, user_end) > 0) { + if (end != nullptr && + user_cmp->CompareWithoutTimestamp(file_limit, user_end) > 0) { user_end = file_limit; } } diff --git a/include/rocksdb/comparator.h b/include/rocksdb/comparator.h index 46279f9a6..9f262367d 100644 --- a/include/rocksdb/comparator.h +++ b/include/rocksdb/comparator.h @@ -20,6 +20,19 @@ class Slice; // from multiple threads. class Comparator { public: + Comparator() : timestamp_size_(0) {} + + Comparator(size_t ts_sz) : timestamp_size_(ts_sz) {} + + Comparator(const Comparator& orig) : timestamp_size_(orig.timestamp_size_) {} + + Comparator& operator=(const Comparator& rhs) { + if (this != &rhs) { + timestamp_size_ = rhs.timestamp_size_; + } + return *this; + } + virtual ~Comparator() {} // Three-way comparison. Returns value: @@ -78,6 +91,20 @@ class Comparator { // The major use case is to determine if DataBlockHashIndex is compatible // with the customized comparator. virtual bool CanKeysWithDifferentByteContentsBeEqual() const { return true; } + + inline size_t timestamp_size() const { return timestamp_size_; } + + virtual int CompareWithoutTimestamp(const Slice& a, const Slice& b) const { + return Compare(a, b); + } + + virtual int CompareTimestamp(const Slice& /*ts1*/, + const Slice& /*ts2*/) const { + return 0; + } + + private: + size_t timestamp_size_; }; // Return a builtin comparator that uses lexicographic byte-wise diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index cc7119410..307582fe6 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1255,6 +1255,14 @@ struct ReadOptions { // Default: 0 (don't filter by seqnum, return user keys) SequenceNumber iter_start_seqnum; + // Timestamp of operation. Read should return the latest data visible to the + // specified timestamp. All timestamps of the same database must be of the + // same length and format. The user is responsible for providing a customized + // compare function via Comparator to order tuples. + // The user-specified timestamp feature is still under active development, + // and the API is subject to change. + const Slice* timestamp; + ReadOptions(); ReadOptions(bool cksum, bool cache); }; @@ -1307,12 +1315,24 @@ struct WriteOptions { // Default: false bool low_pri; + // Timestamp of write operation, e.g. Put. All timestamps of the same + // database must share the same length and format. The user is also + // responsible for providing a customized compare function via Comparator to + // order tuples. If the user wants to enable timestamp, then + // all write operations must be associated with timestamp because RocksDB, as + // a single-node storage engine currently has no knowledge of global time, + // thus has to rely on the application. + // The user-specified timestamp feature is still under active development, + // and the API is subject to change. + const Slice* timestamp; + WriteOptions() : sync(false), disableWAL(false), ignore_missing_column_families(false), no_slowdown(false), - low_pri(false) {} + low_pri(false), + timestamp(nullptr) {} }; // Options that control flush operations diff --git a/options/options.cc b/options/options.cc index a5037ee78..8977b5890 100644 --- a/options/options.cc +++ b/options/options.cc @@ -600,7 +600,8 @@ ReadOptions::ReadOptions() pin_data(false), background_purge_on_iterator_cleanup(false), ignore_range_deletions(false), - iter_start_seqnum(0) {} + iter_start_seqnum(0), + timestamp(nullptr) {} ReadOptions::ReadOptions(bool cksum, bool cache) : snapshot(nullptr), @@ -618,6 +619,7 @@ ReadOptions::ReadOptions(bool cksum, bool cache) pin_data(false), background_purge_on_iterator_cleanup(false), ignore_range_deletions(false), - iter_start_seqnum(0) {} + iter_start_seqnum(0), + timestamp(nullptr) {} } // namespace rocksdb diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 9769e394f..cae93f7f2 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -531,7 +531,8 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) { // Note: PartitionedFilterBlockBuilder requires key being added to filter // builder after being added to index builder. if (r->state == Rep::State::kUnbuffered && r->filter_builder != nullptr) { - r->filter_builder->Add(ExtractUserKey(key)); + size_t ts_sz = r->internal_comparator.user_comparator()->timestamp_size(); + r->filter_builder->Add(ExtractUserKeyAndStripTimestamp(key, ts_sz)); } r->last_key.assign(key.data(), key.size()); diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 2fdaf2afd..37bbc3b52 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -2672,8 +2672,11 @@ bool BlockBasedTable::FullFilterKeyMayMatch( const Slice* const const_ikey_ptr = &internal_key; bool may_match = true; if (filter->whole_key_filtering()) { - may_match = filter->KeyMayMatch(user_key, prefix_extractor, kNotValid, - no_io, const_ikey_ptr); + size_t ts_sz = + rep_->internal_comparator.user_comparator()->timestamp_size(); + Slice user_key_without_ts = StripTimestampFromUserKey(user_key, ts_sz); + may_match = filter->KeyMayMatch(user_key_without_ts, prefix_extractor, + kNotValid, no_io, const_ikey_ptr); } else if (!read_options.total_order_seek && prefix_extractor && rep_->table_properties->prefix_extractor_name.compare( prefix_extractor->Name()) == 0 && @@ -2755,6 +2758,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, iiter_unique_ptr.reset(iiter); } + size_t ts_sz = + rep_->internal_comparator.user_comparator()->timestamp_size(); bool matched = false; // if such user key mathced a key in SST bool done = false; for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) { @@ -2762,8 +2767,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, bool not_exist_in_filter = filter != nullptr && filter->IsBlockBased() == true && - !filter->KeyMayMatch(ExtractUserKey(key), prefix_extractor, - handle.offset(), no_io); + !filter->KeyMayMatch(ExtractUserKeyAndStripTimestamp(key, ts_sz), + prefix_extractor, handle.offset(), no_io); if (not_exist_in_filter) { // Not found @@ -2793,7 +2798,9 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, } bool may_exist = biter.SeekForGet(key); - if (!may_exist) { + // If user-specified timestamp is supported, we cannot end the search + // just because hash index lookup indicates the key+ts does not exist. + if (!may_exist && ts_sz == 0) { // HashSeek cannot find the key this block and the the iter is not // the end of the block, i.e. cannot be in the following blocks // either. In this case, the seek_key cannot be found, so we break diff --git a/table/get_context.cc b/table/get_context.cc index 24c9ba7d5..9be16b062 100644 --- a/table/get_context.cc +++ b/table/get_context.cc @@ -182,7 +182,7 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key, assert(matched); assert((state_ != kMerge && parsed_key.type != kTypeMerge) || merge_context_ != nullptr); - if (ucmp_->Equal(parsed_key.user_key, user_key_)) { + if (ucmp_->CompareWithoutTimestamp(parsed_key.user_key, user_key_) == 0) { *matched = true; // If the value is not in the snapshot, skip it if (!CheckCallback(parsed_key.sequence)) { diff --git a/util/comparator.cc b/util/comparator.cc index eab17ebcc..717ebb523 100644 --- a/util/comparator.cc +++ b/util/comparator.cc @@ -124,6 +124,10 @@ class BytewiseComparatorImpl : public Comparator { bool CanKeysWithDifferentByteContentsBeEqual() const override { return false; } + + int CompareWithoutTimestamp(const Slice& a, const Slice& b) const override { + return a.compare(b); + } }; class ReverseBytewiseComparatorImpl : public BytewiseComparatorImpl { @@ -192,6 +196,10 @@ class ReverseBytewiseComparatorImpl : public BytewiseComparatorImpl { bool CanKeysWithDifferentByteContentsBeEqual() const override { return false; } + + int CompareWithoutTimestamp(const Slice& a, const Slice& b) const override { + return -a.compare(b); + } }; }// namespace