Enhance partial merge to support multiple arguments

Summary:
* PartialMerge api now takes a list of operands instead of two operands.
* Add min_pertial_merge_operands to Options, indicating the minimum
  number of operands to trigger partial merge.
* This diff is based on Schalk's previous diff (D14601), but it also
  includes necessary changes such as updating the pure C api for
  partial merge.

Test Plan:
* make check all
* develop tests for cases where partial merge takes more than two
  operands.

TODOs (from Schalk):
* Add test with min_partial_merge_operands > 2.
* Perform benchmarks to measure the performance improvements (can probably
  use results of task #2837810.)
* Add description of problem to doc/index.html.
* Change wiki pages to reflect the interface changes.

Reviewers: haobo, igor, vamsi

Reviewed By: haobo

CC: leveldb, dhruba

Differential Revision: https://reviews.facebook.net/D16815
main
Yueh-Hsuan Chiang 10 years ago
parent e6d4b006b6
commit cda4006e87
  1. 1
      db/builder.cc
  2. 58
      db/c.cc
  3. 4
      db/c_test.c
  4. 1
      db/db_impl.cc
  5. 15
      db/db_iter.cc
  6. 11
      db/memtable.cc
  7. 8
      db/merge_context.h
  8. 39
      db/merge_helper.cc
  9. 6
      db/merge_helper.h
  10. 23
      db/merge_operator.cc
  11. 91
      db/merge_test.cc
  12. 11
      db/version_set.cc
  13. 4
      include/rocksdb/c.h
  14. 51
      include/rocksdb/merge_operator.h
  15. 9
      include/rocksdb/options.h
  16. 10
      util/options.cc
  17. 14
      utilities/merge_operators/put.cc
  18. 41
      utilities/merge_operators/string_append/stringappend2.cc
  19. 24
      utilities/merge_operators/string_append/stringappend2.h
  20. 26
      utilities/ttl/db_ttl.h

@ -73,6 +73,7 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
MergeHelper merge(internal_comparator.user_comparator(), MergeHelper merge(internal_comparator.user_comparator(),
options.merge_operator.get(), options.info_log.get(), options.merge_operator.get(), options.info_log.get(),
options.min_partial_merge_operands,
true /* internal key corruption is not ok */); true /* internal key corruption is not ok */);
if (purge) { if (purge) {

@ -159,11 +159,9 @@ struct rocksdb_mergeoperator_t : public MergeOperator {
const char* const* operands_list, const size_t* operands_list_length, const char* const* operands_list, const size_t* operands_list_length,
int num_operands, int num_operands,
unsigned char* success, size_t* new_value_length); unsigned char* success, size_t* new_value_length);
char* (*partial_merge_)( char* (*partial_merge_)(void*, const char* key, size_t key_length,
void*, const char* const* operands_list,
const char* key, size_t key_length, const size_t* operands_list_length, int num_operands,
const char* left_operand, size_t left_operand_length,
const char* right_operand, size_t right_operand_length,
unsigned char* success, size_t* new_value_length); unsigned char* success, size_t* new_value_length);
void (*delete_value_)( void (*delete_value_)(
void*, void*,
@ -219,21 +217,23 @@ struct rocksdb_mergeoperator_t : public MergeOperator {
return success; return success;
} }
virtual bool PartialMerge( virtual bool PartialMergeMulti(const Slice& key,
const Slice& key, const std::deque<Slice>& operand_list,
const Slice& left_operand, std::string* new_value, Logger* logger) const {
const Slice& right_operand, size_t operand_count = operand_list.size();
std::string* new_value, std::vector<const char*> operand_pointers(operand_count);
Logger* logger) const { std::vector<size_t> operand_sizes(operand_count);
for (size_t i = 0; i < operand_count; ++i) {
Slice operand(operand_list[i]);
operand_pointers[i] = operand.data();
operand_sizes[i] = operand.size();
}
unsigned char success; unsigned char success;
size_t new_value_len; size_t new_value_len;
char* tmp_new_value = (*partial_merge_)( char* tmp_new_value = (*partial_merge_)(
state_, state_, key.data(), key.size(), &operand_pointers[0], &operand_sizes[0],
key.data(), key.size(), operand_count, &success, &new_value_len);
left_operand.data(), left_operand.size(),
right_operand.data(), right_operand.size(),
&success, &new_value_len);
new_value->assign(tmp_new_value, new_value_len); new_value->assign(tmp_new_value, new_value_len);
if (delete_value_ != nullptr) { if (delete_value_ != nullptr) {
@ -1094,24 +1094,18 @@ rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom(int bits_per_key) {
} }
rocksdb_mergeoperator_t* rocksdb_mergeoperator_create( rocksdb_mergeoperator_t* rocksdb_mergeoperator_create(
void* state, void* state, void (*destructor)(void*),
void (*destructor)(void*), char* (*full_merge)(void*, const char* key, size_t key_length,
char* (*full_merge)( const char* existing_value,
void*, size_t existing_value_length,
const char* key, size_t key_length, const char* const* operands_list,
const char* existing_value, size_t existing_value_length, const size_t* operands_list_length, int num_operands,
const char* const* operands_list, const size_t* operands_list_length,
int num_operands,
unsigned char* success, size_t* new_value_length), unsigned char* success, size_t* new_value_length),
char* (*partial_merge)( char* (*partial_merge)(void*, const char* key, size_t key_length,
void*, const char* const* operands_list,
const char* key, size_t key_length, const size_t* operands_list_length, int num_operands,
const char* left_operand, size_t left_operand_length,
const char* right_operand, size_t right_operand_length,
unsigned char* success, size_t* new_value_length), unsigned char* success, size_t* new_value_length),
void (*delete_value)( void (*delete_value)(void*, const char* value, size_t value_length),
void*,
const char* value, size_t value_length),
const char* (*name)(void*)) { const char* (*name)(void*)) {
rocksdb_mergeoperator_t* result = new rocksdb_mergeoperator_t; rocksdb_mergeoperator_t* result = new rocksdb_mergeoperator_t;
result->state_ = state; result->state_ = state;

@ -175,8 +175,8 @@ static char* MergeOperatorFullMerge(
static char* MergeOperatorPartialMerge( static char* MergeOperatorPartialMerge(
void* arg, void* arg,
const char* key, size_t key_length, const char* key, size_t key_length,
const char* left_operand, size_t left_operand_length, const char* const* operands_list, const size_t* operands_list_length,
const char* right_operand, size_t right_operand_length, int num_operands,
unsigned char* success, size_t* new_value_length) { unsigned char* success, size_t* new_value_length) {
*new_value_length = 4; *new_value_length = 4;
*success = 1; *success = 1;

@ -2463,6 +2463,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
std::vector<char> delete_key; // for compaction filter std::vector<char> delete_key; // for compaction filter
MergeHelper merge(user_comparator(), options_.merge_operator.get(), MergeHelper merge(user_comparator(), options_.merge_operator.get(),
options_.info_log.get(), options_.info_log.get(),
options_.min_partial_merge_operands,
false /* internal key corruption is expected */); false /* internal key corruption is expected */);
auto compaction_filter = options_.compaction_filter; auto compaction_filter = options_.compaction_filter;
std::unique_ptr<CompactionFilter> compaction_filter_from_factory = nullptr; std::unique_ptr<CompactionFilter> compaction_filter_from_factory = nullptr;

@ -313,21 +313,6 @@ void DBIter::MergeValuesNewToOld() {
// when complete, add result to operands and continue. // when complete, add result to operands and continue.
const Slice& value = iter_->value(); const Slice& value = iter_->value();
operands.push_front(value.ToString()); operands.push_front(value.ToString());
while(operands.size() >= 2) {
// Call user associative-merge until it returns false
if (user_merge_operator_->PartialMerge(ikey.user_key,
Slice(operands[0]),
Slice(operands[1]),
&merge_result,
logger_)) {
operands.pop_front();
swap(operands.front(), merge_result);
} else {
// Associative merge returns false ==> stack the operands
break;
}
}
} }
} }

@ -353,17 +353,6 @@ static bool SaveValue(void* arg, const char* entry) {
Slice v = GetLengthPrefixedSlice(key_ptr + key_length); Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
*(s->merge_in_progress) = true; *(s->merge_in_progress) = true;
merge_context->PushOperand(v); merge_context->PushOperand(v);
while (merge_context->GetNumOperands() >= 2) {
// Attempt to associative merge. (Returns true if successful)
if (merge_operator->PartialMerge(
s->key->user_key(), merge_context->GetOperand(0),
merge_context->GetOperand(1), &merge_result, s->logger)) {
merge_context->PushPartialMergeResult(merge_result);
} else {
// Stack them because user can't associative merge
break;
}
}
return true; return true;
} }
default: default:

@ -25,12 +25,12 @@ public:
operand_list->clear(); operand_list->clear();
} }
} }
// Replace the first two operands of merge_result, which are expected be the // Replace all operands with merge_result, which are expected to be the
// merge results of them. // merge result of them.
void PushPartialMergeResult(std::string& merge_result) { void PushPartialMergeResult(std::string& merge_result) {
assert (operand_list); assert (operand_list);
operand_list->pop_front(); operand_list->clear();
swap(operand_list->front(), merge_result); operand_list->push_front(std::move(merge_result));
} }
// Push a merge operand // Push a merge operand
void PushOperand(const Slice& operand_slice) { void PushOperand(const Slice& operand_slice) {

@ -129,28 +129,10 @@ void MergeHelper::MergeUntil(Iterator* iter, SequenceNumber stop_before,
// => then continue because we haven't yet seen a Put/Delete. // => then continue because we haven't yet seen a Put/Delete.
assert(!operands_.empty()); // Should have at least one element in it assert(!operands_.empty()); // Should have at least one element in it
// keep queuing keys and operands until we either meet a put / delete
// request or later did a partial merge.
keys_.push_front(iter->key().ToString()); keys_.push_front(iter->key().ToString());
operands_.push_front(iter->value().ToString()); operands_.push_front(iter->value().ToString());
while (operands_.size() >= 2) {
// Returns false when the merge_operator can no longer process it
if (user_merge_operator_->PartialMerge(ikey.user_key,
Slice(operands_[0]),
Slice(operands_[1]),
&merge_result,
logger_)) {
// Merging of operands (associative merge) was successful.
// Replace these frontmost two operands with the merge result
keys_.pop_front();
operands_.pop_front();
swap(operands_.front(), merge_result);
} else {
// Merging of operands (associative merge) returned false.
// The user merge_operator does not know how to merge these operands.
// So we just stack them up until we find a Put/Delete or end of key.
break;
}
}
continue;
} }
} }
@ -192,6 +174,23 @@ void MergeHelper::MergeUntil(Iterator* iter, SequenceNumber stop_before,
RecordTick(stats, NUMBER_MERGE_FAILURES); RecordTick(stats, NUMBER_MERGE_FAILURES);
// Do nothing if not success_. Leave keys() and operands() as they are. // Do nothing if not success_. Leave keys() and operands() as they are.
} }
} else {
// We haven't seen the beginning of the key nor a Put/Delete.
// Attempt to use the user's associative merge function to
// merge the stacked merge operands into a single operand.
if (operands_.size() >= 2 &&
operands_.size() >= min_partial_merge_operands_ &&
user_merge_operator_->PartialMergeMulti(
ikey.user_key,
std::deque<Slice>(operands_.begin(), operands_.end()),
&merge_result, logger_)) {
// Merging of operands (associative merge) was successful.
// Replace operands with the merge result
operands_.clear();
operands_.push_front(std::move(merge_result));
keys_.erase(keys_.begin(), keys_.end() - 1);
}
} }
} }

@ -22,12 +22,13 @@ class Statistics;
class MergeHelper { class MergeHelper {
public: public:
MergeHelper(const Comparator* user_comparator, MergeHelper(const Comparator* user_comparator,
const MergeOperator* user_merge_operator, const MergeOperator* user_merge_operator, Logger* logger,
Logger* logger, unsigned min_partial_merge_operands,
bool assert_valid_internal_key) bool assert_valid_internal_key)
: user_comparator_(user_comparator), : user_comparator_(user_comparator),
user_merge_operator_(user_merge_operator), user_merge_operator_(user_merge_operator),
logger_(logger), logger_(logger),
min_partial_merge_operands_(min_partial_merge_operands),
assert_valid_internal_key_(assert_valid_internal_key), assert_valid_internal_key_(assert_valid_internal_key),
keys_(), keys_(),
operands_(), operands_(),
@ -88,6 +89,7 @@ class MergeHelper {
const Comparator* user_comparator_; const Comparator* user_comparator_;
const MergeOperator* user_merge_operator_; const MergeOperator* user_merge_operator_;
Logger* logger_; Logger* logger_;
unsigned min_partial_merge_operands_;
bool assert_valid_internal_key_; // enforce no internal key corruption? bool assert_valid_internal_key_; // enforce no internal key corruption?
// the scratch area that holds the result of MergeUntil // the scratch area that holds the result of MergeUntil

@ -11,6 +11,28 @@
namespace rocksdb { namespace rocksdb {
// The default implementation of PartialMergeMulti, which invokes
// PartialMerge multiple times internally and merges two operands at
// a time.
bool MergeOperator::PartialMergeMulti(const Slice& key,
const std::deque<Slice>& operand_list,
std::string* new_value,
Logger* logger) const {
// Simply loop through the operands
std::string temp_value;
Slice temp_slice;
for (const auto& operand : operand_list) {
if (!PartialMerge(key, temp_slice, operand, &temp_value, logger)) {
return false;
}
swap(temp_value, *new_value);
temp_slice = Slice(*new_value);
}
// The result will be in *new_value. All merges succeeded.
return true;
}
// Given a "real" merge from the library, call the user's // Given a "real" merge from the library, call the user's
// associative merge function one-by-one on each of the operands. // associative merge function one-by-one on each of the operands.
// NOTE: It is assumed that the client's merge-operator will handle any errors. // NOTE: It is assumed that the client's merge-operator will handle any errors.
@ -46,7 +68,6 @@ bool AssociativeMergeOperator::PartialMerge(
const Slice& right_operand, const Slice& right_operand,
std::string* new_value, std::string* new_value,
Logger* logger) const { Logger* logger) const {
return Merge(key, &left_operand, right_operand, new_value, logger); return Merge(key, &left_operand, right_operand, new_value, logger);
} }

@ -24,10 +24,14 @@ using namespace rocksdb;
namespace { namespace {
int numMergeOperatorCalls; int numMergeOperatorCalls;
void resetNumMergeOperatorCalls() { void resetNumMergeOperatorCalls() {
numMergeOperatorCalls = 0; numMergeOperatorCalls = 0;
} }
int num_partial_merge_calls;
void resetNumPartialMergeCalls() {
num_partial_merge_calls = 0;
}
} }
class CountMergeOperator : public AssociativeMergeOperator { class CountMergeOperator : public AssociativeMergeOperator {
@ -42,6 +46,11 @@ class CountMergeOperator : public AssociativeMergeOperator {
std::string* new_value, std::string* new_value,
Logger* logger) const override { Logger* logger) const override {
++numMergeOperatorCalls; ++numMergeOperatorCalls;
if (existing_value == nullptr) {
new_value->assign(value.data(), value.size());
return true;
}
return mergeOperator_->PartialMerge( return mergeOperator_->PartialMerge(
key, key,
*existing_value, *existing_value,
@ -50,6 +59,14 @@ class CountMergeOperator : public AssociativeMergeOperator {
logger); logger);
} }
virtual bool PartialMergeMulti(const Slice& key,
const std::deque<Slice>& operand_list,
std::string* new_value, Logger* logger) const {
++num_partial_merge_calls;
return mergeOperator_->PartialMergeMulti(key, operand_list, new_value,
logger);
}
virtual const char* Name() const override { virtual const char* Name() const override {
return "UInt64AddOperator"; return "UInt64AddOperator";
} }
@ -58,16 +75,16 @@ class CountMergeOperator : public AssociativeMergeOperator {
std::shared_ptr<MergeOperator> mergeOperator_; std::shared_ptr<MergeOperator> mergeOperator_;
}; };
std::shared_ptr<DB> OpenDb( std::shared_ptr<DB> OpenDb(const string& dbname, const bool ttl = false,
const string& dbname, const size_t max_successive_merges = 0,
const bool ttl = false, const uint32_t min_partial_merge_operands = 2) {
const unsigned max_successive_merges = 0) {
DB* db; DB* db;
StackableDB* sdb; StackableDB* sdb;
Options options; Options options;
options.create_if_missing = true; options.create_if_missing = true;
options.merge_operator = std::make_shared<CountMergeOperator>(); options.merge_operator = std::make_shared<CountMergeOperator>();
options.max_successive_merges = max_successive_merges; options.max_successive_merges = max_successive_merges;
options.min_partial_merge_operands = min_partial_merge_operands;
Status s; Status s;
DestroyDB(dbname, Options()); DestroyDB(dbname, Options());
if (ttl) { if (ttl) {
@ -306,6 +323,44 @@ void testSuccessiveMerge(
} }
} }
void testPartialMerge(Counters* counters, DB* db, int max_merge, int min_merge,
int count) {
FlushOptions o;
o.wait = true;
// Test case 1: partial merge should be called when the number of merge
// operands exceeds the threshold.
uint64_t tmp_sum = 0;
resetNumPartialMergeCalls();
for (int i = 1; i <= count; i++) {
counters->assert_add("b", i);
tmp_sum += i;
}
db->Flush(o);
db->CompactRange(nullptr, nullptr);
ASSERT_EQ(tmp_sum, counters->assert_get("b"));
if (count > max_merge) {
// in this case, FullMerge should be called instead.
ASSERT_EQ(num_partial_merge_calls, 0);
} else {
// if count >= min_merge, then partial merge should be called once.
ASSERT_EQ((count >= min_merge), (num_partial_merge_calls == 1));
}
// Test case 2: partial merge should not be called when a put is found.
resetNumPartialMergeCalls();
tmp_sum = 0;
db->Put(rocksdb::WriteOptions(), "c", "10");
for (int i = 1; i <= count; i++) {
counters->assert_add("c", i);
tmp_sum += i;
}
db->Flush(o);
db->CompactRange(nullptr, nullptr);
ASSERT_EQ(tmp_sum, counters->assert_get("c"));
ASSERT_EQ(num_partial_merge_calls, 0);
}
void testSingleBatchSuccessiveMerge( void testSingleBatchSuccessiveMerge(
DB* db, DB* db,
int max_num_merges, int max_num_merges,
@ -370,20 +425,40 @@ void runTest(int argc, const string& dbname, const bool use_ttl = false) {
{ {
cout << "Test merge in memtable... \n"; cout << "Test merge in memtable... \n";
unsigned maxMerge = 5; size_t max_merge = 5;
auto db = OpenDb(dbname, use_ttl, maxMerge); auto db = OpenDb(dbname, use_ttl, max_merge);
MergeBasedCounters counters(db, 0); MergeBasedCounters counters(db, 0);
testCounters(counters, db.get(), compact); testCounters(counters, db.get(), compact);
testSuccessiveMerge(counters, maxMerge, maxMerge * 2); testSuccessiveMerge(counters, max_merge, max_merge * 2);
testSingleBatchSuccessiveMerge(db.get(), 5, 7); testSingleBatchSuccessiveMerge(db.get(), 5, 7);
DestroyDB(dbname, Options()); DestroyDB(dbname, Options());
} }
{
cout << "Test Partial-Merge\n";
size_t max_merge = 100;
for (uint32_t min_merge = 5; min_merge < 25; min_merge += 5) {
for (uint32_t count = min_merge - 1; count <= min_merge + 1; count++) {
auto db = OpenDb(dbname, use_ttl, max_merge, min_merge);
MergeBasedCounters counters(db, 0);
testPartialMerge(&counters, db.get(), max_merge, min_merge, count);
DestroyDB(dbname, Options());
}
{
auto db = OpenDb(dbname, use_ttl, max_merge, min_merge);
MergeBasedCounters counters(db, 0);
testPartialMerge(&counters, db.get(), max_merge, min_merge,
min_merge * 10);
DestroyDB(dbname, Options());
}
}
}
} }
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
//TODO: Make this test like a general rocksdb unit-test //TODO: Make this test like a general rocksdb unit-test
runTest(argc, test::TmpDir() + "/merge_testdb"); runTest(argc, test::TmpDir() + "/merge_testdb");
runTest(argc, test::TmpDir() + "/merge_testdbttl", true); // Run test on TTL database runTest(argc, test::TmpDir() + "/merge_testdbttl", true); // Run test on TTL database
printf("Passed all tests!\n");
return 0; return 0;
} }

@ -419,17 +419,6 @@ static bool SaveValue(void* arg, const ParsedInternalKey& parsed_key,
assert(s->state == kNotFound || s->state == kMerge); assert(s->state == kNotFound || s->state == kMerge);
s->state = kMerge; s->state = kMerge;
merge_contex->PushOperand(v); merge_contex->PushOperand(v);
while (merge_contex->GetNumOperands() >= 2) {
// Attempt to merge operands together via user associateive merge
if (s->merge_operator->PartialMerge(
s->user_key, merge_contex->GetOperand(0),
merge_contex->GetOperand(1), &merge_result, s->logger)) {
merge_contex->PushPartialMergeResult(merge_result);
} else {
// Associative merge returns false ==> stack the operands
break;
}
}
return true; return true;
default: default:

@ -425,8 +425,8 @@ extern rocksdb_mergeoperator_t* rocksdb_mergeoperator_create(
char* (*partial_merge)( char* (*partial_merge)(
void*, void*,
const char* key, size_t key_length, const char* key, size_t key_length,
const char* left_operand, size_t left_operand_length, const char* const* operands_list, const size_t* operands_list_length,
const char* right_operand, size_t right_operand_length, int num_operands,
unsigned char* success, size_t* new_value_length), unsigned char* success, size_t* new_value_length),
void (*delete_value)( void (*delete_value)(
void*, void*,

@ -32,9 +32,9 @@ class Logger;
// //
// b) MergeOperator - the generic class for all the more abstract / complex // b) MergeOperator - the generic class for all the more abstract / complex
// operations; one method (FullMerge) to merge a Put/Delete value with a // operations; one method (FullMerge) to merge a Put/Delete value with a
// merge operand; and another method (PartialMerge) that merges two // merge operand; and another method (PartialMerge) that merges multiple
// operands together. this is especially useful if your key values have a // operands together. this is especially useful if your key values have
// complex structure but you would still like to support client-specific // complex structures but you would still like to support client-specific
// incremental updates. // incremental updates.
// //
// AssociativeMergeOperator is simpler to implement. MergeOperator is simply // AssociativeMergeOperator is simpler to implement. MergeOperator is simply
@ -80,6 +80,13 @@ class MergeOperator {
// DB::Merge(key, *new_value) would yield the same result as a call // DB::Merge(key, *new_value) would yield the same result as a call
// to DB::Merge(key, left_op) followed by DB::Merge(key, right_op). // to DB::Merge(key, left_op) followed by DB::Merge(key, right_op).
// //
// The default implementation of PartialMergeMulti will use this function
// as a helper, for backward compatibility. Any successor class of
// MergeOperator should either implement PartialMerge or PartialMergeMulti,
// although implementing PartialMergeMulti is suggested as it is in general
// more effective to merge multiple operands at a time instead of two
// operands at a time.
//
// If it is impossible or infeasible to combine the two operations, // If it is impossible or infeasible to combine the two operations,
// leave new_value unchanged and return false. The library will // leave new_value unchanged and return false. The library will
// internally keep track of the operations, and apply them in the // internally keep track of the operations, and apply them in the
@ -89,12 +96,38 @@ class MergeOperator {
// and simply "return false". For now, the client should simply return // and simply "return false". For now, the client should simply return
// false in any case it cannot perform partial-merge, regardless of reason. // false in any case it cannot perform partial-merge, regardless of reason.
// If there is corruption in the data, handle it in the FullMerge() function, // If there is corruption in the data, handle it in the FullMerge() function,
// and return false there. // and return false there. The default implementation of PartialMerge will
virtual bool PartialMerge(const Slice& key, // always return false.
const Slice& left_operand, virtual bool PartialMerge(const Slice& key, const Slice& left_operand,
const Slice& right_operand, const Slice& right_operand, std::string* new_value,
std::string* new_value, Logger* logger) const {
Logger* logger) const = 0; return false;
}
// This function performs merge when all the operands are themselves merge
// operation types that you would have passed to a DB::Merge() call in the
// same order (front() first)
// (i.e. DB::Merge(key, operand_list[0]), followed by
// DB::Merge(key, operand_list[1]), ...)
//
// PartialMergeMulti should combine them into a single merge operation that is
// saved into *new_value, and then it should return true. *new_value should
// be constructed such that a call to DB::Merge(key, *new_value) would yield
// the same result as subquential individual calls to DB::Merge(key, operand)
// for each operand in operand_list from front() to back().
//
// The PartialMergeMulti function will be called only when the list of
// operands are long enough. The minimum amount of operands that will be
// passed to the function are specified by the "min_partial_merge_operands"
// option.
//
// In the default implementation, PartialMergeMulti will invoke PartialMerge
// multiple times, where each time it only merges two operands. Developers
// should either implement PartialMergeMulti, or implement PartialMerge which
// is served as the helper function of the default PartialMergeMulti.
virtual bool PartialMergeMulti(const Slice& key,
const std::deque<Slice>& operand_list,
std::string* new_value, Logger* logger) const;
// The name of the MergeOperator. Used to check for MergeOperator // The name of the MergeOperator. Used to check for MergeOperator
// mismatches (i.e., a DB created with one MergeOperator is // mismatches (i.e., a DB created with one MergeOperator is

@ -723,6 +723,15 @@ struct Options {
// Default: 0 (disabled) // Default: 0 (disabled)
size_t max_successive_merges; size_t max_successive_merges;
// The number of partial merge operands to accumulate before partial
// merge will be performed. Partial merge will not be called
// if the list of values to merge is less than min_partial_merge_operands.
//
// If min_partial_merge_operands < 2, then it will be treated as 2.
//
// Default: 2
uint32_t min_partial_merge_operands;
// Allow RocksDB to use thread local storage to optimize performance. // Allow RocksDB to use thread local storage to optimize performance.
// Default: true // Default: true
bool allow_thread_local; bool allow_thread_local;

@ -30,8 +30,7 @@ Options::Options()
: comparator(BytewiseComparator()), : comparator(BytewiseComparator()),
merge_operator(nullptr), merge_operator(nullptr),
compaction_filter(nullptr), compaction_filter(nullptr),
compaction_filter_factory( compaction_filter_factory(std::shared_ptr<CompactionFilterFactory>(
std::shared_ptr<CompactionFilterFactory>(
new DefaultCompactionFilterFactory())), new DefaultCompactionFilterFactory())),
create_if_missing(false), create_if_missing(false),
error_if_exists(false), error_if_exists(false),
@ -39,7 +38,7 @@ Options::Options()
env(Env::Default()), env(Env::Default()),
info_log(nullptr), info_log(nullptr),
info_log_level(INFO), info_log_level(INFO),
write_buffer_size(4<<20), write_buffer_size(4 << 20),
max_write_buffer_number(2), max_write_buffer_number(2),
min_write_buffer_number_to_merge(1), min_write_buffer_number_to_merge(1),
max_open_files(1000), max_open_files(1000),
@ -95,7 +94,7 @@ Options::Options()
is_fd_close_on_exec(true), is_fd_close_on_exec(true),
skip_log_error_on_recovery(false), skip_log_error_on_recovery(false),
stats_dump_period_sec(3600), stats_dump_period_sec(3600),
block_size_deviation (10), block_size_deviation(10),
advise_random_on_open(true), advise_random_on_open(true),
access_hint_on_compaction_start(NORMAL), access_hint_on_compaction_start(NORMAL),
use_adaptive_mutex(false), use_adaptive_mutex(false),
@ -113,6 +112,7 @@ Options::Options()
memtable_prefix_bloom_bits(0), memtable_prefix_bloom_bits(0),
memtable_prefix_bloom_probes(6), memtable_prefix_bloom_probes(6),
max_successive_merges(0), max_successive_merges(0),
min_partial_merge_operands(2),
allow_thread_local(true) { allow_thread_local(true) {
assert(memtable_factory.get() != nullptr); assert(memtable_factory.get() != nullptr);
} }
@ -306,6 +306,8 @@ Options::Dump(Logger* log) const
inplace_update_support); inplace_update_support);
Log(log, " Options.inplace_update_num_locks: %zd", Log(log, " Options.inplace_update_num_locks: %zd",
inplace_update_num_locks); inplace_update_num_locks);
Log(log, " Options.min_partial_merge_operands: %u",
min_partial_merge_operands);
// TODO: easier config for bloom (maybe based on avg key/value size) // TODO: easier config for bloom (maybe based on avg key/value size)
Log(log, " Options.memtable_prefix_bloom_bits: %d", Log(log, " Options.memtable_prefix_bloom_bits: %d",
memtable_prefix_bloom_bits); memtable_prefix_bloom_bits);

@ -1,3 +1,8 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#include <memory> #include <memory>
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
#include "rocksdb/merge_operator.h" #include "rocksdb/merge_operator.h"
@ -38,6 +43,15 @@ class PutOperator : public MergeOperator {
return true; return true;
} }
using MergeOperator::PartialMergeMulti;
virtual bool PartialMergeMulti(const Slice& key,
const std::deque<Slice>& operand_list,
std::string* new_value, Logger* logger) const
override {
new_value->assign(operand_list.back().data(), operand_list.back().size());
return true;
}
virtual const char* Name() const override { virtual const char* Name() const override {
return "PutOperator"; return "PutOperator";
} }

@ -6,6 +6,7 @@
#include "stringappend2.h" #include "stringappend2.h"
#include <memory> #include <memory>
#include <string>
#include <assert.h> #include <assert.h>
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
@ -61,31 +62,39 @@ bool StringAppendTESTOperator::FullMerge(
return true; return true;
} }
bool StringAppendTESTOperator::PartialMerge(const Slice& key, bool StringAppendTESTOperator::PartialMergeMulti(
const Slice& left_operand, const Slice& key, const std::deque<Slice>& operand_list,
const Slice& right_operand, std::string* new_value, Logger* logger) const {
std::string* new_value,
Logger* logger) const {
return false; return false;
} }
// A version of PartialMerge that actually performs "partial merging". // A version of PartialMerge that actually performs "partial merging".
// Use this to simulate the exact behaviour of the StringAppendOperator. // Use this to simulate the exact behaviour of the StringAppendOperator.
bool StringAppendTESTOperator::_AssocPartialMerge(const Slice& key, bool StringAppendTESTOperator::_AssocPartialMergeMulti(
const Slice& left_operand, const Slice& key, const std::deque<Slice>& operand_list,
const Slice& right_operand, std::string* new_value, Logger* logger) const {
std::string* new_value, // Clear the *new_value for writing
Logger* logger) const {
// Clear the *new_value for writing.
assert(new_value); assert(new_value);
new_value->clear(); new_value->clear();
assert(operand_list.size() >= 2);
// Generic append // Generic append
// Reserve correct size for *new_value, and apply concatenation. // Determine and reserve correct size for *new_value.
new_value->reserve(left_operand.size() + 1 + right_operand.size()); size_t size = 0;
new_value->assign(left_operand.data(), left_operand.size()); for (const auto& operand : operand_list) {
new_value->append(1,delim_); size += operand.size();
new_value->append(right_operand.data(), right_operand.size()); }
size += operand_list.size() - 1; // Delimiters
new_value->reserve(size);
// Apply concatenation
new_value->assign(operand_list.front().data(), operand_list.front().size());
for (std::deque<Slice>::const_iterator it = operand_list.begin() + 1;
it != operand_list.end(); ++it) {
new_value->append(1, delim_);
new_value->append(it->data(), it->size());
}
return true; return true;
} }

@ -11,6 +11,9 @@
*/ */
#pragma once #pragma once
#include <deque>
#include <string>
#include "rocksdb/merge_operator.h" #include "rocksdb/merge_operator.h"
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
@ -18,8 +21,8 @@ namespace rocksdb {
class StringAppendTESTOperator : public MergeOperator { class StringAppendTESTOperator : public MergeOperator {
public: public:
// Constructor with delimiter
StringAppendTESTOperator(char delim_char); /// Constructor with delimiter explicit StringAppendTESTOperator(char delim_char);
virtual bool FullMerge(const Slice& key, virtual bool FullMerge(const Slice& key,
const Slice* existing_value, const Slice* existing_value,
@ -27,22 +30,19 @@ class StringAppendTESTOperator : public MergeOperator {
std::string* new_value, std::string* new_value,
Logger* logger) const override; Logger* logger) const override;
virtual bool PartialMerge(const Slice& key, virtual bool PartialMergeMulti(const Slice& key,
const Slice& left_operand, const std::deque<Slice>& operand_list,
const Slice& right_operand, std::string* new_value, Logger* logger) const
std::string* new_value, override;
Logger* logger) const override;
virtual const char* Name() const override; virtual const char* Name() const override;
private: private:
// A version of PartialMerge that actually performs "partial merging". // A version of PartialMerge that actually performs "partial merging".
// Use this to simulate the exact behaviour of the StringAppendOperator. // Use this to simulate the exact behaviour of the StringAppendOperator.
bool _AssocPartialMerge(const Slice& key, bool _AssocPartialMergeMulti(const Slice& key,
const Slice& left_operand, const std::deque<Slice>& operand_list,
const Slice& right_operand, std::string* new_value, Logger* logger) const;
std::string* new_value,
Logger* logger) const;
char delim_; // The delimiter is inserted between elements char delim_; // The delimiter is inserted between elements

@ -3,6 +3,9 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once #pragma once
#include <deque>
#include <string>
#include "rocksdb/db.h" #include "rocksdb/db.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/compaction_filter.h" #include "rocksdb/compaction_filter.h"
@ -268,24 +271,27 @@ class TtlMergeOperator : public MergeOperator {
} }
} }
virtual bool PartialMerge(const Slice& key, virtual bool PartialMergeMulti(const Slice& key,
const Slice& left_operand, const std::deque<Slice>& operand_list,
const Slice& right_operand, std::string* new_value, Logger* logger) const
std::string* new_value, override {
Logger* logger) const override {
const uint32_t ts_len = DBWithTTL::kTSLength; const uint32_t ts_len = DBWithTTL::kTSLength;
std::deque<Slice> operands_without_ts;
if (left_operand.size() < ts_len || right_operand.size() < ts_len) { for (const auto& operand : operand_list) {
if (operand.size() < ts_len) {
Log(logger, "Error: Could not remove timestamp from value."); Log(logger, "Error: Could not remove timestamp from value.");
return false; return false;
} }
operands_without_ts.push_back(
Slice(operand.data(), operand.size() - ts_len));
}
// Apply the user partial-merge operator (store result in *new_value) // Apply the user partial-merge operator (store result in *new_value)
assert(new_value); assert(new_value);
Slice left_without_ts(left_operand.data(), left_operand.size() - ts_len); if (!user_merge_op_->PartialMergeMulti(key, operands_without_ts, new_value,
Slice right_without_ts(right_operand.data(), right_operand.size() - ts_len); logger)) {
if (!user_merge_op_->PartialMerge(key, left_without_ts, right_without_ts,
new_value, logger)) {
return false; return false;
} }

Loading…
Cancel
Save