Make InternalKeyComparator not configurable (#10342)

Summary:
InternalKeyComparator is an internal class which is a simple wrapper of Comparator. https://github.com/facebook/rocksdb/pull/8336 made Comparator customizeable. As a side effect, internal key comparator was made configurable too. This introduces overhead to this simple wrapper. For example, every InternalKeyComparator will have an std::vector attached to it, which consumes memory and possible allocation overhead too.
We remove InternalKeyComparator from being customizable by making InternalKeyComparator not a subclass of Comparator.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10342

Test Plan: Run existing CI tests and make sure it doesn't fail

Reviewed By: riversand963

Differential Revision: D37771351

fbshipit-source-id: 917256ee04b2796ed82974549c734fb6c4d8ccee
main
sdong 2 years ago committed by Facebook GitHub Bot
parent 6ce0b2ca34
commit c8b20d469d
  1. 1
      HISTORY.md
  2. 5
      db/compaction/clipping_iterator.h
  3. 38
      db/dbformat.cc
  4. 23
      db/dbformat.h
  5. 7
      db/dbformat_test.cc
  6. 7
      db/forward_iterator.h
  7. 53
      include/rocksdb/comparator.h
  8. 37
      table/block_based/index_builder.cc
  9. 17
      table/block_based/index_builder.h
  10. 2
      table/internal_iterator.h
  11. 7
      table/sst_file_writer.cc
  12. 7
      util/vector_iterator.h

@ -24,6 +24,7 @@
* Fix a bug in which backup/checkpoint can include a WAL deleted by RocksDB. * Fix a bug in which backup/checkpoint can include a WAL deleted by RocksDB.
* Fix a bug where concurrent compactions might cause unnecessary further write stalling. In some cases, this might cause write rate to drop to minimum. * Fix a bug where concurrent compactions might cause unnecessary further write stalling. In some cases, this might cause write rate to drop to minimum.
* Fix a bug in Logger where if dbname and db_log_dir are on different filesystems, dbname creation would fail wrt to db_log_dir path returning an error and fails to open the DB. * Fix a bug in Logger where if dbname and db_log_dir are on different filesystems, dbname creation would fail wrt to db_log_dir path returning an error and fails to open the DB.
* Fix a CPU and memory efficiency issue introduce by https://github.com/facebook/rocksdb/pull/8336 which made InternalKeyComparator configurable as an unintended side effect.
## Behavior Change ## Behavior Change
* In leveled compaction with dynamic levelling, level multiplier is not anymore adjusted due to oversized L0. Instead, compaction score is adjusted by increasing size level target by adding incoming bytes from upper levels. This would deprioritize compactions from upper levels if more data from L0 is coming. This is to fix some unnecessary full stalling due to drastic change of level targets, while not wasting write bandwidth for compaction while writes are overloaded. * In leveled compaction with dynamic levelling, level multiplier is not anymore adjusted due to oversized L0. Instead, compaction score is adjusted by increasing size level target by adding incoming bytes from upper levels. This would deprioritize compactions from upper levels if more data from L0 is coming. This is to fix some unnecessary full stalling due to drastic change of level targets, while not wasting write bandwidth for compaction while writes are overloaded.

@ -7,6 +7,7 @@
#include <cassert> #include <cassert>
#include "rocksdb/comparator.h"
#include "table/internal_iterator.h" #include "table/internal_iterator.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
@ -19,7 +20,7 @@ namespace ROCKSDB_NAMESPACE {
class ClippingIterator : public InternalIterator { class ClippingIterator : public InternalIterator {
public: public:
ClippingIterator(InternalIterator* iter, const Slice* start, const Slice* end, ClippingIterator(InternalIterator* iter, const Slice* start, const Slice* end,
const Comparator* cmp) const CompareInterface* cmp)
: iter_(iter), start_(start), end_(end), cmp_(cmp), valid_(false) { : iter_(iter), start_(start), end_(end), cmp_(cmp), valid_(false) {
assert(iter_); assert(iter_);
assert(cmp_); assert(cmp_);
@ -268,7 +269,7 @@ class ClippingIterator : public InternalIterator {
InternalIterator* iter_; InternalIterator* iter_;
const Slice* start_; const Slice* start_;
const Slice* end_; const Slice* end_;
const Comparator* cmp_; const CompareInterface* cmp_;
bool valid_; bool valid_;
}; };

@ -116,10 +116,6 @@ std::string InternalKey::DebugString(bool hex) const {
return result; return result;
} }
const char* InternalKeyComparator::Name() const {
return "rocksdb.anonymous.InternalKeyComparator";
}
int InternalKeyComparator::Compare(const ParsedInternalKey& a, int InternalKeyComparator::Compare(const ParsedInternalKey& a,
const ParsedInternalKey& b) const { const ParsedInternalKey& b) const {
// Order by: // Order by:
@ -141,40 +137,6 @@ int InternalKeyComparator::Compare(const ParsedInternalKey& a,
return r; return r;
} }
void InternalKeyComparator::FindShortestSeparator(std::string* start,
const Slice& limit) const {
// Attempt to shorten the user portion of the key
Slice user_start = ExtractUserKey(*start);
Slice user_limit = ExtractUserKey(limit);
std::string tmp(user_start.data(), user_start.size());
user_comparator_.FindShortestSeparator(&tmp, user_limit);
if (tmp.size() <= user_start.size() &&
user_comparator_.Compare(user_start, tmp) < 0) {
// User key has become shorter physically, but larger logically.
// Tack on the earliest possible number to the shortened user key.
PutFixed64(&tmp,
PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek));
assert(this->Compare(*start, tmp) < 0);
assert(this->Compare(tmp, limit) < 0);
start->swap(tmp);
}
}
void InternalKeyComparator::FindShortSuccessor(std::string* key) const {
Slice user_key = ExtractUserKey(*key);
std::string tmp(user_key.data(), user_key.size());
user_comparator_.FindShortSuccessor(&tmp);
if (tmp.size() <= user_key.size() &&
user_comparator_.Compare(user_key, tmp) < 0) {
// User key has become shorter physically, but larger logically.
// Tack on the earliest possible number to the shortened user key.
PutFixed64(&tmp,
PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek));
assert(this->Compare(*key, tmp) < 0);
key->swap(tmp);
}
}
LookupKey::LookupKey(const Slice& _user_key, SequenceNumber s, LookupKey::LookupKey(const Slice& _user_key, SequenceNumber s,
const Slice* ts) { const Slice* ts) {
size_t usize = _user_key.size(); size_t usize = _user_key.size();

@ -235,7 +235,7 @@ class InternalKeyComparator
#ifdef NDEBUG #ifdef NDEBUG
final final
#endif #endif
: public Comparator { : public CompareInterface {
private: private:
UserComparatorWrapper user_comparator_; UserComparatorWrapper user_comparator_;
@ -249,17 +249,19 @@ class InternalKeyComparator
// this constructor to precompute the result of `Name()`. To avoid this // this constructor to precompute the result of `Name()`. To avoid this
// overhead, set `named` to false. In that case, `Name()` will return a // overhead, set `named` to false. In that case, `Name()` will return a
// generic name that is non-specific to the underlying comparator. // generic name that is non-specific to the underlying comparator.
explicit InternalKeyComparator(const Comparator* c) explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) {}
: Comparator(c->timestamp_size()), user_comparator_(c) {}
virtual ~InternalKeyComparator() {} virtual ~InternalKeyComparator() {}
virtual const char* Name() const override; int Compare(const Slice& a, const Slice& b) const override;
virtual int Compare(const Slice& a, const Slice& b) const override;
bool Equal(const Slice& a, const Slice& b) const {
// TODO Use user_comparator_.Equal(). Perhaps compare seqno before
// comparing the user key too.
return Compare(a, b) == 0;
}
// Same as Compare except that it excludes the value type from comparison // Same as Compare except that it excludes the value type from comparison
virtual int CompareKeySeq(const Slice& a, const Slice& b) const; int CompareKeySeq(const Slice& a, const Slice& b) const;
virtual void FindShortestSeparator(std::string* start,
const Slice& limit) const override;
virtual void FindShortSuccessor(std::string* key) const override;
const Comparator* user_comparator() const { const Comparator* user_comparator() const {
return user_comparator_.user_comparator(); return user_comparator_.user_comparator();
@ -273,9 +275,6 @@ class InternalKeyComparator
// value `kDisableGlobalSequenceNumber`. // value `kDisableGlobalSequenceNumber`.
int Compare(const Slice& a, SequenceNumber a_global_seqno, const Slice& b, int Compare(const Slice& a, SequenceNumber a_global_seqno, const Slice& b,
SequenceNumber b_global_seqno) const; SequenceNumber b_global_seqno) const;
virtual const Comparator* GetRootComparator() const override {
return user_comparator_.GetRootComparator();
}
}; };
// The class represent the internal key in encoded form. // The class represent the internal key in encoded form.

@ -9,6 +9,7 @@
#include "db/dbformat.h" #include "db/dbformat.h"
#include "table/block_based/index_builder.h"
#include "test_util/testharness.h" #include "test_util/testharness.h"
#include "test_util/testutil.h" #include "test_util/testutil.h"
@ -24,13 +25,15 @@ static std::string IKey(const std::string& user_key,
static std::string Shorten(const std::string& s, const std::string& l) { static std::string Shorten(const std::string& s, const std::string& l) {
std::string result = s; std::string result = s;
InternalKeyComparator(BytewiseComparator()).FindShortestSeparator(&result, l); ShortenedIndexBuilder::FindShortestInternalKeySeparator(*BytewiseComparator(),
&result, l);
return result; return result;
} }
static std::string ShortSuccessor(const std::string& s) { static std::string ShortSuccessor(const std::string& s) {
std::string result = s; std::string result = s;
InternalKeyComparator(BytewiseComparator()).FindShortSuccessor(&result); ShortenedIndexBuilder::FindShortInternalKeySuccessor(*BytewiseComparator(),
&result);
return result; return result;
} }

@ -4,6 +4,7 @@
// (found in the LICENSE.Apache file in the root directory). // (found in the LICENSE.Apache file in the root directory).
#pragma once #pragma once
#include "rocksdb/comparator.h"
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
#include <string> #include <string>
@ -28,14 +29,14 @@ struct FileMetaData;
class MinIterComparator { class MinIterComparator {
public: public:
explicit MinIterComparator(const Comparator* comparator) : explicit MinIterComparator(const CompareInterface* comparator)
comparator_(comparator) {} : comparator_(comparator) {}
bool operator()(InternalIterator* a, InternalIterator* b) { bool operator()(InternalIterator* a, InternalIterator* b) {
return comparator_->Compare(a->key(), b->key()) > 0; return comparator_->Compare(a->key(), b->key()) > 0;
} }
private: private:
const Comparator* comparator_; const CompareInterface* comparator_;
}; };
using MinIterHeap = using MinIterHeap =

@ -17,6 +17,22 @@ namespace ROCKSDB_NAMESPACE {
class Slice; class Slice;
// The general interface for comparing two Slices are defined for both of
// Comparator and some internal data structures.
class CompareInterface {
public:
virtual ~CompareInterface() {}
// Three-way comparison. Returns value:
// < 0 iff "a" < "b",
// == 0 iff "a" == "b",
// > 0 iff "a" > "b"
// Note that Compare(a, b) also compares timestamp if timestamp size is
// non-zero. For the same user key with different timestamps, larger (newer)
// timestamp comes first.
virtual int Compare(const Slice& a, const Slice& b) const = 0;
};
// A Comparator object provides a total order across slices that are // A Comparator object provides a total order across slices that are
// used as keys in an sstable or a database. A Comparator implementation // used as keys in an sstable or a database. A Comparator implementation
// must be thread-safe since rocksdb may invoke its methods concurrently // must be thread-safe since rocksdb may invoke its methods concurrently
@ -25,7 +41,7 @@ class Slice;
// Exceptions MUST NOT propagate out of overridden functions into RocksDB, // Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior // because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more. // including data loss, unreported corruption, deadlocks, and more.
class Comparator : public Customizable { class Comparator : public Customizable, public CompareInterface {
public: public:
Comparator() : timestamp_size_(0) {} Comparator() : timestamp_size_(0) {}
@ -47,24 +63,6 @@ class Comparator : public Customizable {
const Comparator** comp); const Comparator** comp);
static const char* Type() { return "Comparator"; } static const char* Type() { return "Comparator"; }
// Three-way comparison. Returns value:
// < 0 iff "a" < "b",
// == 0 iff "a" == "b",
// > 0 iff "a" > "b"
// Note that Compare(a, b) also compares timestamp if timestamp size is
// non-zero. For the same user key with different timestamps, larger (newer)
// timestamp comes first.
virtual int Compare(const Slice& a, const Slice& b) const = 0;
// Compares two slices for equality. The following invariant should always
// hold (and is the default implementation):
// Equal(a, b) iff Compare(a, b) == 0
// Overwrite only if equality comparisons can be done more efficiently than
// three-way comparisons.
virtual bool Equal(const Slice& a, const Slice& b) const {
return Compare(a, b) == 0;
}
// The name of the comparator. Used to check for comparator // The name of the comparator. Used to check for comparator
// mismatches (i.e., a DB created with one comparator is // mismatches (i.e., a DB created with one comparator is
// accessed using a different comparator. // accessed using a different comparator.
@ -77,6 +75,15 @@ class Comparator : public Customizable {
// by any clients of this package. // by any clients of this package.
const char* Name() const override = 0; const char* Name() const override = 0;
// Compares two slices for equality. The following invariant should always
// hold (and is the default implementation):
// Equal(a, b) iff Compare(a, b) == 0
// Overwrite only if equality comparisons can be done more efficiently than
// three-way comparisons.
virtual bool Equal(const Slice& a, const Slice& b) const {
return Compare(a, b) == 0;
}
// Advanced functions: these are used to reduce the space requirements // Advanced functions: these are used to reduce the space requirements
// for internal data structures like index blocks. // for internal data structures like index blocks.
@ -91,10 +98,6 @@ class Comparator : public Customizable {
// i.e., an implementation of this method that does nothing is correct. // i.e., an implementation of this method that does nothing is correct.
virtual void FindShortSuccessor(std::string* key) const = 0; virtual void FindShortSuccessor(std::string* key) const = 0;
// if it is a wrapped comparator, may return the root one.
// return itself it is not wrapped.
virtual const Comparator* GetRootComparator() const { return this; }
// given two keys, determine if t is the successor of s // given two keys, determine if t is the successor of s
// BUG: only return true if no other keys starting with `t` are ordered // BUG: only return true if no other keys starting with `t` are ordered
// before `t`. Otherwise, the auto_prefix_mode can omit entries within // before `t`. Otherwise, the auto_prefix_mode can omit entries within
@ -111,6 +114,10 @@ class Comparator : public Customizable {
// with the customized comparator. // with the customized comparator.
virtual bool CanKeysWithDifferentByteContentsBeEqual() const { return true; } virtual bool CanKeysWithDifferentByteContentsBeEqual() const { return true; }
// if it is a wrapped comparator, may return the root one.
// return itself it is not wrapped.
virtual const Comparator* GetRootComparator() const { return this; }
inline size_t timestamp_size() const { return timestamp_size_; } inline size_t timestamp_size() const { return timestamp_size_; }
int CompareWithoutTimestamp(const Slice& a, const Slice& b) const { int CompareWithoutTimestamp(const Slice& a, const Slice& b) const {

@ -10,11 +10,12 @@
#include "table/block_based/index_builder.h" #include "table/block_based/index_builder.h"
#include <assert.h> #include <assert.h>
#include <cinttypes>
#include <cinttypes>
#include <list> #include <list>
#include <string> #include <string>
#include "db/dbformat.h"
#include "rocksdb/comparator.h" #include "rocksdb/comparator.h"
#include "rocksdb/flush_block_policy.h" #include "rocksdb/flush_block_policy.h"
#include "table/block_based/partitioned_filter_block.h" #include "table/block_based/partitioned_filter_block.h"
@ -68,6 +69,40 @@ IndexBuilder* IndexBuilder::CreateIndexBuilder(
return result; return result;
} }
void ShortenedIndexBuilder::FindShortestInternalKeySeparator(
const Comparator& comparator, std::string* start, const Slice& limit) {
// Attempt to shorten the user portion of the key
Slice user_start = ExtractUserKey(*start);
Slice user_limit = ExtractUserKey(limit);
std::string tmp(user_start.data(), user_start.size());
comparator.FindShortestSeparator(&tmp, user_limit);
if (tmp.size() <= user_start.size() &&
comparator.Compare(user_start, tmp) < 0) {
// User key has become shorter physically, but larger logically.
// Tack on the earliest possible number to the shortened user key.
PutFixed64(&tmp,
PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek));
assert(InternalKeyComparator(&comparator).Compare(*start, tmp) < 0);
assert(InternalKeyComparator(&comparator).Compare(tmp, limit) < 0);
start->swap(tmp);
}
}
void ShortenedIndexBuilder::FindShortInternalKeySuccessor(
const Comparator& comparator, std::string* key) {
Slice user_key = ExtractUserKey(*key);
std::string tmp(user_key.data(), user_key.size());
comparator.FindShortSuccessor(&tmp);
if (tmp.size() <= user_key.size() && comparator.Compare(user_key, tmp) < 0) {
// User key has become shorter physically, but larger logically.
// Tack on the earliest possible number to the shortened user key.
PutFixed64(&tmp,
PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek));
assert(InternalKeyComparator(&comparator).Compare(*key, tmp) < 0);
key->swap(tmp);
}
}
PartitionedIndexBuilder* PartitionedIndexBuilder::CreateIndexBuilder( PartitionedIndexBuilder* PartitionedIndexBuilder::CreateIndexBuilder(
const InternalKeyComparator* comparator, const InternalKeyComparator* comparator,
const bool use_value_delta_encoding, const bool use_value_delta_encoding,

@ -152,8 +152,9 @@ class ShortenedIndexBuilder : public IndexBuilder {
if (first_key_in_next_block != nullptr) { if (first_key_in_next_block != nullptr) {
if (shortening_mode_ != if (shortening_mode_ !=
BlockBasedTableOptions::IndexShorteningMode::kNoShortening) { BlockBasedTableOptions::IndexShorteningMode::kNoShortening) {
comparator_->FindShortestSeparator(last_key_in_current_block, FindShortestInternalKeySeparator(*comparator_->user_comparator(),
*first_key_in_next_block); last_key_in_current_block,
*first_key_in_next_block);
} }
if (!seperator_is_key_plus_seq_ && if (!seperator_is_key_plus_seq_ &&
comparator_->user_comparator()->Compare( comparator_->user_comparator()->Compare(
@ -164,7 +165,8 @@ class ShortenedIndexBuilder : public IndexBuilder {
} else { } else {
if (shortening_mode_ == BlockBasedTableOptions::IndexShorteningMode:: if (shortening_mode_ == BlockBasedTableOptions::IndexShorteningMode::
kShortenSeparatorsAndSuccessor) { kShortenSeparatorsAndSuccessor) {
comparator_->FindShortSuccessor(last_key_in_current_block); FindShortInternalKeySuccessor(*comparator_->user_comparator(),
last_key_in_current_block);
} }
} }
auto sep = Slice(*last_key_in_current_block); auto sep = Slice(*last_key_in_current_block);
@ -212,6 +214,15 @@ class ShortenedIndexBuilder : public IndexBuilder {
return seperator_is_key_plus_seq_; return seperator_is_key_plus_seq_;
} }
// Changes *key to a short string >= *key.
//
static void FindShortestInternalKeySeparator(const Comparator& comparator,
std::string* start,
const Slice& limit);
static void FindShortInternalKeySuccessor(const Comparator& comparator,
std::string* key);
friend class PartitionedIndexBuilder; friend class PartitionedIndexBuilder;
private: private:

@ -187,7 +187,7 @@ class InternalIteratorBase : public Cleanable {
virtual void SetReadaheadState(ReadaheadFileInfo* /*readahead_file_info*/) {} virtual void SetReadaheadState(ReadaheadFileInfo* /*readahead_file_info*/) {}
protected: protected:
void SeekForPrevImpl(const Slice& target, const Comparator* cmp) { void SeekForPrevImpl(const Slice& target, const CompareInterface* cmp) {
Seek(target); Seek(target);
if (!Valid()) { if (!Valid()) {
SeekToLast(); SeekToLast();

@ -100,7 +100,7 @@ struct SstFileWriter::Rep {
} }
Status Add(const Slice& user_key, const Slice& value, ValueType value_type) { Status Add(const Slice& user_key, const Slice& value, ValueType value_type) {
if (internal_comparator.timestamp_size() != 0) { if (internal_comparator.user_comparator()->timestamp_size() != 0) {
return Status::InvalidArgument("Timestamp size mismatch"); return Status::InvalidArgument("Timestamp size mismatch");
} }
@ -111,7 +111,8 @@ struct SstFileWriter::Rep {
ValueType value_type) { ValueType value_type) {
const size_t timestamp_size = timestamp.size(); const size_t timestamp_size = timestamp.size();
if (internal_comparator.timestamp_size() != timestamp_size) { if (internal_comparator.user_comparator()->timestamp_size() !=
timestamp_size) {
return Status::InvalidArgument("Timestamp size mismatch"); return Status::InvalidArgument("Timestamp size mismatch");
} }
@ -131,7 +132,7 @@ struct SstFileWriter::Rep {
} }
Status DeleteRange(const Slice& begin_key, const Slice& end_key) { Status DeleteRange(const Slice& begin_key, const Slice& end_key) {
if (internal_comparator.timestamp_size() != 0) { if (internal_comparator.user_comparator()->timestamp_size() != 0) {
return Status::InvalidArgument("Timestamp size mismatch"); return Status::InvalidArgument("Timestamp size mismatch");
} }

@ -6,6 +6,7 @@
#include <vector> #include <vector>
#include "db/dbformat.h" #include "db/dbformat.h"
#include "rocksdb/comparator.h"
#include "rocksdb/iterator.h" #include "rocksdb/iterator.h"
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
#include "table/internal_iterator.h" #include "table/internal_iterator.h"
@ -16,7 +17,7 @@ namespace ROCKSDB_NAMESPACE {
class VectorIterator : public InternalIterator { class VectorIterator : public InternalIterator {
public: public:
VectorIterator(std::vector<std::string> keys, std::vector<std::string> values, VectorIterator(std::vector<std::string> keys, std::vector<std::string> values,
const Comparator* icmp = nullptr) const CompareInterface* icmp = nullptr)
: keys_(std::move(keys)), : keys_(std::move(keys)),
values_(std::move(values)), values_(std::move(values)),
current_(keys_.size()), current_(keys_.size()),
@ -90,7 +91,7 @@ class VectorIterator : public InternalIterator {
private: private:
struct IndexedKeyComparator { struct IndexedKeyComparator {
IndexedKeyComparator(const Comparator* c, IndexedKeyComparator(const CompareInterface* c,
const std::vector<std::string>* ks) const std::vector<std::string>* ks)
: cmp(c), keys(ks) {} : cmp(c), keys(ks) {}
@ -106,7 +107,7 @@ class VectorIterator : public InternalIterator {
return cmp->Compare(a, (*keys)[b]) < 0; return cmp->Compare(a, (*keys)[b]) < 0;
} }
const Comparator* cmp; const CompareInterface* cmp;
const std::vector<std::string>* keys; const std::vector<std::string>* keys;
}; };

Loading…
Cancel
Save