Make InternalKeyComparator not configurable (#10342)

Summary:
InternalKeyComparator is an internal class which is a simple wrapper of Comparator. https://github.com/facebook/rocksdb/pull/8336 made Comparator customizeable. As a side effect, internal key comparator was made configurable too. This introduces overhead to this simple wrapper. For example, every InternalKeyComparator will have an std::vector attached to it, which consumes memory and possible allocation overhead too.
We remove InternalKeyComparator from being customizable by making InternalKeyComparator not a subclass of Comparator.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10342

Test Plan: Run existing CI tests and make sure it doesn't fail

Reviewed By: riversand963

Differential Revision: D37771351

fbshipit-source-id: 917256ee04b2796ed82974549c734fb6c4d8ccee
main
sdong 2 years ago committed by Facebook GitHub Bot
parent 6ce0b2ca34
commit c8b20d469d
  1. 1
      HISTORY.md
  2. 5
      db/compaction/clipping_iterator.h
  3. 38
      db/dbformat.cc
  4. 23
      db/dbformat.h
  5. 7
      db/dbformat_test.cc
  6. 7
      db/forward_iterator.h
  7. 53
      include/rocksdb/comparator.h
  8. 37
      table/block_based/index_builder.cc
  9. 17
      table/block_based/index_builder.h
  10. 2
      table/internal_iterator.h
  11. 7
      table/sst_file_writer.cc
  12. 7
      util/vector_iterator.h

@ -24,6 +24,7 @@
* Fix a bug in which backup/checkpoint can include a WAL deleted by RocksDB.
* Fix a bug where concurrent compactions might cause unnecessary further write stalling. In some cases, this might cause write rate to drop to minimum.
* Fix a bug in Logger where if dbname and db_log_dir are on different filesystems, dbname creation would fail wrt to db_log_dir path returning an error and fails to open the DB.
* Fix a CPU and memory efficiency issue introduce by https://github.com/facebook/rocksdb/pull/8336 which made InternalKeyComparator configurable as an unintended side effect.
## Behavior Change
* In leveled compaction with dynamic levelling, level multiplier is not anymore adjusted due to oversized L0. Instead, compaction score is adjusted by increasing size level target by adding incoming bytes from upper levels. This would deprioritize compactions from upper levels if more data from L0 is coming. This is to fix some unnecessary full stalling due to drastic change of level targets, while not wasting write bandwidth for compaction while writes are overloaded.

@ -7,6 +7,7 @@
#include <cassert>
#include "rocksdb/comparator.h"
#include "table/internal_iterator.h"
namespace ROCKSDB_NAMESPACE {
@ -19,7 +20,7 @@ namespace ROCKSDB_NAMESPACE {
class ClippingIterator : public InternalIterator {
public:
ClippingIterator(InternalIterator* iter, const Slice* start, const Slice* end,
const Comparator* cmp)
const CompareInterface* cmp)
: iter_(iter), start_(start), end_(end), cmp_(cmp), valid_(false) {
assert(iter_);
assert(cmp_);
@ -268,7 +269,7 @@ class ClippingIterator : public InternalIterator {
InternalIterator* iter_;
const Slice* start_;
const Slice* end_;
const Comparator* cmp_;
const CompareInterface* cmp_;
bool valid_;
};

@ -116,10 +116,6 @@ std::string InternalKey::DebugString(bool hex) const {
return result;
}
const char* InternalKeyComparator::Name() const {
return "rocksdb.anonymous.InternalKeyComparator";
}
int InternalKeyComparator::Compare(const ParsedInternalKey& a,
const ParsedInternalKey& b) const {
// Order by:
@ -141,40 +137,6 @@ int InternalKeyComparator::Compare(const ParsedInternalKey& a,
return r;
}
void InternalKeyComparator::FindShortestSeparator(std::string* start,
const Slice& limit) const {
// Attempt to shorten the user portion of the key
Slice user_start = ExtractUserKey(*start);
Slice user_limit = ExtractUserKey(limit);
std::string tmp(user_start.data(), user_start.size());
user_comparator_.FindShortestSeparator(&tmp, user_limit);
if (tmp.size() <= user_start.size() &&
user_comparator_.Compare(user_start, tmp) < 0) {
// User key has become shorter physically, but larger logically.
// Tack on the earliest possible number to the shortened user key.
PutFixed64(&tmp,
PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek));
assert(this->Compare(*start, tmp) < 0);
assert(this->Compare(tmp, limit) < 0);
start->swap(tmp);
}
}
void InternalKeyComparator::FindShortSuccessor(std::string* key) const {
Slice user_key = ExtractUserKey(*key);
std::string tmp(user_key.data(), user_key.size());
user_comparator_.FindShortSuccessor(&tmp);
if (tmp.size() <= user_key.size() &&
user_comparator_.Compare(user_key, tmp) < 0) {
// User key has become shorter physically, but larger logically.
// Tack on the earliest possible number to the shortened user key.
PutFixed64(&tmp,
PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek));
assert(this->Compare(*key, tmp) < 0);
key->swap(tmp);
}
}
LookupKey::LookupKey(const Slice& _user_key, SequenceNumber s,
const Slice* ts) {
size_t usize = _user_key.size();

@ -235,7 +235,7 @@ class InternalKeyComparator
#ifdef NDEBUG
final
#endif
: public Comparator {
: public CompareInterface {
private:
UserComparatorWrapper user_comparator_;
@ -249,17 +249,19 @@ class InternalKeyComparator
// this constructor to precompute the result of `Name()`. To avoid this
// overhead, set `named` to false. In that case, `Name()` will return a
// generic name that is non-specific to the underlying comparator.
explicit InternalKeyComparator(const Comparator* c)
: Comparator(c->timestamp_size()), user_comparator_(c) {}
explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) {}
virtual ~InternalKeyComparator() {}
virtual const char* Name() const override;
virtual int Compare(const Slice& a, const Slice& b) const override;
int Compare(const Slice& a, const Slice& b) const override;
bool Equal(const Slice& a, const Slice& b) const {
// TODO Use user_comparator_.Equal(). Perhaps compare seqno before
// comparing the user key too.
return Compare(a, b) == 0;
}
// Same as Compare except that it excludes the value type from comparison
virtual int CompareKeySeq(const Slice& a, const Slice& b) const;
virtual void FindShortestSeparator(std::string* start,
const Slice& limit) const override;
virtual void FindShortSuccessor(std::string* key) const override;
int CompareKeySeq(const Slice& a, const Slice& b) const;
const Comparator* user_comparator() const {
return user_comparator_.user_comparator();
@ -273,9 +275,6 @@ class InternalKeyComparator
// value `kDisableGlobalSequenceNumber`.
int Compare(const Slice& a, SequenceNumber a_global_seqno, const Slice& b,
SequenceNumber b_global_seqno) const;
virtual const Comparator* GetRootComparator() const override {
return user_comparator_.GetRootComparator();
}
};
// The class represent the internal key in encoded form.

@ -9,6 +9,7 @@
#include "db/dbformat.h"
#include "table/block_based/index_builder.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
@ -24,13 +25,15 @@ static std::string IKey(const std::string& user_key,
static std::string Shorten(const std::string& s, const std::string& l) {
std::string result = s;
InternalKeyComparator(BytewiseComparator()).FindShortestSeparator(&result, l);
ShortenedIndexBuilder::FindShortestInternalKeySeparator(*BytewiseComparator(),
&result, l);
return result;
}
static std::string ShortSuccessor(const std::string& s) {
std::string result = s;
InternalKeyComparator(BytewiseComparator()).FindShortSuccessor(&result);
ShortenedIndexBuilder::FindShortInternalKeySuccessor(*BytewiseComparator(),
&result);
return result;
}

@ -4,6 +4,7 @@
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include "rocksdb/comparator.h"
#ifndef ROCKSDB_LITE
#include <string>
@ -28,14 +29,14 @@ struct FileMetaData;
class MinIterComparator {
public:
explicit MinIterComparator(const Comparator* comparator) :
comparator_(comparator) {}
explicit MinIterComparator(const CompareInterface* comparator)
: comparator_(comparator) {}
bool operator()(InternalIterator* a, InternalIterator* b) {
return comparator_->Compare(a->key(), b->key()) > 0;
}
private:
const Comparator* comparator_;
const CompareInterface* comparator_;
};
using MinIterHeap =

@ -17,6 +17,22 @@ namespace ROCKSDB_NAMESPACE {
class Slice;
// The general interface for comparing two Slices are defined for both of
// Comparator and some internal data structures.
class CompareInterface {
public:
virtual ~CompareInterface() {}
// Three-way comparison. Returns value:
// < 0 iff "a" < "b",
// == 0 iff "a" == "b",
// > 0 iff "a" > "b"
// Note that Compare(a, b) also compares timestamp if timestamp size is
// non-zero. For the same user key with different timestamps, larger (newer)
// timestamp comes first.
virtual int Compare(const Slice& a, const Slice& b) const = 0;
};
// A Comparator object provides a total order across slices that are
// used as keys in an sstable or a database. A Comparator implementation
// must be thread-safe since rocksdb may invoke its methods concurrently
@ -25,7 +41,7 @@ class Slice;
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class Comparator : public Customizable {
class Comparator : public Customizable, public CompareInterface {
public:
Comparator() : timestamp_size_(0) {}
@ -47,24 +63,6 @@ class Comparator : public Customizable {
const Comparator** comp);
static const char* Type() { return "Comparator"; }
// Three-way comparison. Returns value:
// < 0 iff "a" < "b",
// == 0 iff "a" == "b",
// > 0 iff "a" > "b"
// Note that Compare(a, b) also compares timestamp if timestamp size is
// non-zero. For the same user key with different timestamps, larger (newer)
// timestamp comes first.
virtual int Compare(const Slice& a, const Slice& b) const = 0;
// Compares two slices for equality. The following invariant should always
// hold (and is the default implementation):
// Equal(a, b) iff Compare(a, b) == 0
// Overwrite only if equality comparisons can be done more efficiently than
// three-way comparisons.
virtual bool Equal(const Slice& a, const Slice& b) const {
return Compare(a, b) == 0;
}
// The name of the comparator. Used to check for comparator
// mismatches (i.e., a DB created with one comparator is
// accessed using a different comparator.
@ -77,6 +75,15 @@ class Comparator : public Customizable {
// by any clients of this package.
const char* Name() const override = 0;
// Compares two slices for equality. The following invariant should always
// hold (and is the default implementation):
// Equal(a, b) iff Compare(a, b) == 0
// Overwrite only if equality comparisons can be done more efficiently than
// three-way comparisons.
virtual bool Equal(const Slice& a, const Slice& b) const {
return Compare(a, b) == 0;
}
// Advanced functions: these are used to reduce the space requirements
// for internal data structures like index blocks.
@ -91,10 +98,6 @@ class Comparator : public Customizable {
// i.e., an implementation of this method that does nothing is correct.
virtual void FindShortSuccessor(std::string* key) const = 0;
// if it is a wrapped comparator, may return the root one.
// return itself it is not wrapped.
virtual const Comparator* GetRootComparator() const { return this; }
// given two keys, determine if t is the successor of s
// BUG: only return true if no other keys starting with `t` are ordered
// before `t`. Otherwise, the auto_prefix_mode can omit entries within
@ -111,6 +114,10 @@ class Comparator : public Customizable {
// with the customized comparator.
virtual bool CanKeysWithDifferentByteContentsBeEqual() const { return true; }
// if it is a wrapped comparator, may return the root one.
// return itself it is not wrapped.
virtual const Comparator* GetRootComparator() const { return this; }
inline size_t timestamp_size() const { return timestamp_size_; }
int CompareWithoutTimestamp(const Slice& a, const Slice& b) const {

@ -10,11 +10,12 @@
#include "table/block_based/index_builder.h"
#include <assert.h>
#include <cinttypes>
#include <cinttypes>
#include <list>
#include <string>
#include "db/dbformat.h"
#include "rocksdb/comparator.h"
#include "rocksdb/flush_block_policy.h"
#include "table/block_based/partitioned_filter_block.h"
@ -68,6 +69,40 @@ IndexBuilder* IndexBuilder::CreateIndexBuilder(
return result;
}
void ShortenedIndexBuilder::FindShortestInternalKeySeparator(
const Comparator& comparator, std::string* start, const Slice& limit) {
// Attempt to shorten the user portion of the key
Slice user_start = ExtractUserKey(*start);
Slice user_limit = ExtractUserKey(limit);
std::string tmp(user_start.data(), user_start.size());
comparator.FindShortestSeparator(&tmp, user_limit);
if (tmp.size() <= user_start.size() &&
comparator.Compare(user_start, tmp) < 0) {
// User key has become shorter physically, but larger logically.
// Tack on the earliest possible number to the shortened user key.
PutFixed64(&tmp,
PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek));
assert(InternalKeyComparator(&comparator).Compare(*start, tmp) < 0);
assert(InternalKeyComparator(&comparator).Compare(tmp, limit) < 0);
start->swap(tmp);
}
}
void ShortenedIndexBuilder::FindShortInternalKeySuccessor(
const Comparator& comparator, std::string* key) {
Slice user_key = ExtractUserKey(*key);
std::string tmp(user_key.data(), user_key.size());
comparator.FindShortSuccessor(&tmp);
if (tmp.size() <= user_key.size() && comparator.Compare(user_key, tmp) < 0) {
// User key has become shorter physically, but larger logically.
// Tack on the earliest possible number to the shortened user key.
PutFixed64(&tmp,
PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek));
assert(InternalKeyComparator(&comparator).Compare(*key, tmp) < 0);
key->swap(tmp);
}
}
PartitionedIndexBuilder* PartitionedIndexBuilder::CreateIndexBuilder(
const InternalKeyComparator* comparator,
const bool use_value_delta_encoding,

@ -152,8 +152,9 @@ class ShortenedIndexBuilder : public IndexBuilder {
if (first_key_in_next_block != nullptr) {
if (shortening_mode_ !=
BlockBasedTableOptions::IndexShorteningMode::kNoShortening) {
comparator_->FindShortestSeparator(last_key_in_current_block,
*first_key_in_next_block);
FindShortestInternalKeySeparator(*comparator_->user_comparator(),
last_key_in_current_block,
*first_key_in_next_block);
}
if (!seperator_is_key_plus_seq_ &&
comparator_->user_comparator()->Compare(
@ -164,7 +165,8 @@ class ShortenedIndexBuilder : public IndexBuilder {
} else {
if (shortening_mode_ == BlockBasedTableOptions::IndexShorteningMode::
kShortenSeparatorsAndSuccessor) {
comparator_->FindShortSuccessor(last_key_in_current_block);
FindShortInternalKeySuccessor(*comparator_->user_comparator(),
last_key_in_current_block);
}
}
auto sep = Slice(*last_key_in_current_block);
@ -212,6 +214,15 @@ class ShortenedIndexBuilder : public IndexBuilder {
return seperator_is_key_plus_seq_;
}
// Changes *key to a short string >= *key.
//
static void FindShortestInternalKeySeparator(const Comparator& comparator,
std::string* start,
const Slice& limit);
static void FindShortInternalKeySuccessor(const Comparator& comparator,
std::string* key);
friend class PartitionedIndexBuilder;
private:

@ -187,7 +187,7 @@ class InternalIteratorBase : public Cleanable {
virtual void SetReadaheadState(ReadaheadFileInfo* /*readahead_file_info*/) {}
protected:
void SeekForPrevImpl(const Slice& target, const Comparator* cmp) {
void SeekForPrevImpl(const Slice& target, const CompareInterface* cmp) {
Seek(target);
if (!Valid()) {
SeekToLast();

@ -100,7 +100,7 @@ struct SstFileWriter::Rep {
}
Status Add(const Slice& user_key, const Slice& value, ValueType value_type) {
if (internal_comparator.timestamp_size() != 0) {
if (internal_comparator.user_comparator()->timestamp_size() != 0) {
return Status::InvalidArgument("Timestamp size mismatch");
}
@ -111,7 +111,8 @@ struct SstFileWriter::Rep {
ValueType value_type) {
const size_t timestamp_size = timestamp.size();
if (internal_comparator.timestamp_size() != timestamp_size) {
if (internal_comparator.user_comparator()->timestamp_size() !=
timestamp_size) {
return Status::InvalidArgument("Timestamp size mismatch");
}
@ -131,7 +132,7 @@ struct SstFileWriter::Rep {
}
Status DeleteRange(const Slice& begin_key, const Slice& end_key) {
if (internal_comparator.timestamp_size() != 0) {
if (internal_comparator.user_comparator()->timestamp_size() != 0) {
return Status::InvalidArgument("Timestamp size mismatch");
}

@ -6,6 +6,7 @@
#include <vector>
#include "db/dbformat.h"
#include "rocksdb/comparator.h"
#include "rocksdb/iterator.h"
#include "rocksdb/slice.h"
#include "table/internal_iterator.h"
@ -16,7 +17,7 @@ namespace ROCKSDB_NAMESPACE {
class VectorIterator : public InternalIterator {
public:
VectorIterator(std::vector<std::string> keys, std::vector<std::string> values,
const Comparator* icmp = nullptr)
const CompareInterface* icmp = nullptr)
: keys_(std::move(keys)),
values_(std::move(values)),
current_(keys_.size()),
@ -90,7 +91,7 @@ class VectorIterator : public InternalIterator {
private:
struct IndexedKeyComparator {
IndexedKeyComparator(const Comparator* c,
IndexedKeyComparator(const CompareInterface* c,
const std::vector<std::string>* ks)
: cmp(c), keys(ks) {}
@ -106,7 +107,7 @@ class VectorIterator : public InternalIterator {
return cmp->Compare(a, (*keys)[b]) < 0;
}
const Comparator* cmp;
const CompareInterface* cmp;
const std::vector<std::string>* keys;
};

Loading…
Cancel
Save