Allow to use a string as a delimiter in StringAppendOperator (#8536)

Summary:
An arbitrary string can be used as a delimiter in StringAppend merge operator
flavor. In particular, it allows using an empty string, combining binary values for
the same key byte-to-byte one next to another.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8536

Reviewed By: mrambacher

Differential Revision: D29962120

Pulled By: zhichao-cao

fbshipit-source-id: 4ef5d846a47835cf428a11200409e30e2dbffc4f
main
Mikhail Golubev 3 years ago committed by Facebook GitHub Bot
parent ab7f7c9e49
commit 8f52972cf9
  1. 3
      HISTORY.md
  2. 16
      java/rocksjni/merge_operator.cc
  3. 5
      java/src/main/java/org/rocksdb/StringAppendOperator.java
  4. 26
      java/src/test/java/org/rocksdb/MergeTest.java
  5. 2
      utilities/merge_operators.h
  6. 17
      utilities/merge_operators/string_append/stringappend.cc
  7. 4
      utilities/merge_operators/string_append/stringappend.h
  8. 19
      utilities/merge_operators/string_append/stringappend2.cc
  9. 4
      utilities/merge_operators/string_append/stringappend2.h
  10. 89
      utilities/merge_operators/string_append/stringappend_test.cc

@ -9,6 +9,9 @@
### Performance Improvements ### Performance Improvements
* Try to avoid updating DBOptions if `SetDBOptions()` does not change any option value. * Try to avoid updating DBOptions if `SetDBOptions()` does not change any option value.
### Behavior Changes
* `StringAppendOperator` additionally accepts a string as the delimiter.
## 6.23.0 (2021-07-16) ## 6.23.0 (2021-07-16)
### Behavior Changes ### Behavior Changes
* Obsolete keys in the bottommost level that were preserved for a snapshot will now be cleaned upon snapshot release in all cases. This form of compaction (snapshot release triggered compaction) previously had an artificial limitation that multiple tombstones needed to be present. * Obsolete keys in the bottommost level that were preserved for a snapshot will now be cleaned upon snapshot release in all cases. This form of compaction (snapshot release triggered compaction) previously had an artificial limitation that multiple tombstones needed to be present.

@ -30,7 +30,7 @@
* Method: newSharedStringAppendOperator * Method: newSharedStringAppendOperator
* Signature: (C)J * Signature: (C)J
*/ */
jlong Java_org_rocksdb_StringAppendOperator_newSharedStringAppendOperator( jlong Java_org_rocksdb_StringAppendOperator_newSharedStringAppendOperator__C(
JNIEnv* /*env*/, jclass /*jclazz*/, jchar jdelim) { JNIEnv* /*env*/, jclass /*jclazz*/, jchar jdelim) {
auto* sptr_string_append_op = auto* sptr_string_append_op =
new std::shared_ptr<ROCKSDB_NAMESPACE::MergeOperator>( new std::shared_ptr<ROCKSDB_NAMESPACE::MergeOperator>(
@ -39,6 +39,20 @@ jlong Java_org_rocksdb_StringAppendOperator_newSharedStringAppendOperator(
return reinterpret_cast<jlong>(sptr_string_append_op); return reinterpret_cast<jlong>(sptr_string_append_op);
} }
jlong Java_org_rocksdb_StringAppendOperator_newSharedStringAppendOperator__Ljava_lang_String_2(
JNIEnv* env, jclass /*jclass*/, jstring jdelim) {
jboolean has_exception = JNI_FALSE;
auto delim =
ROCKSDB_NAMESPACE::JniUtil::copyStdString(env, jdelim, &has_exception);
if (has_exception == JNI_TRUE) {
return 0;
}
auto* sptr_string_append_op =
new std::shared_ptr<ROCKSDB_NAMESPACE::MergeOperator>(
ROCKSDB_NAMESPACE::MergeOperators::CreateStringAppendOperator(delim));
return reinterpret_cast<jlong>(sptr_string_append_op);
}
/* /*
* Class: org_rocksdb_StringAppendOperator * Class: org_rocksdb_StringAppendOperator
* Method: disposeInternal * Method: disposeInternal

@ -19,6 +19,11 @@ public class StringAppendOperator extends MergeOperator {
super(newSharedStringAppendOperator(delim)); super(newSharedStringAppendOperator(delim));
} }
public StringAppendOperator(String delim) {
super(newSharedStringAppendOperator(delim));
}
private native static long newSharedStringAppendOperator(final char delim); private native static long newSharedStringAppendOperator(final char delim);
private native static long newSharedStringAppendOperator(final String delim);
@Override protected final native void disposeInternal(final long handle); @Override protected final native void disposeInternal(final long handle);
} }

@ -412,6 +412,32 @@ public class MergeTest {
} }
} }
@Test
public void emptyStringAsStringAppendDelimiter() throws RocksDBException {
try (final StringAppendOperator stringAppendOperator = new StringAppendOperator("");
final Options opt =
new Options().setCreateIfMissing(true).setMergeOperator(stringAppendOperator);
final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) {
db.put("key".getBytes(), "aa".getBytes());
db.merge("key".getBytes(), "bb".getBytes());
final byte[] value = db.get("key".getBytes());
assertThat(new String(value)).isEqualTo("aabb");
}
}
@Test
public void multiCharStringAsStringAppendDelimiter() throws RocksDBException {
try (final StringAppendOperator stringAppendOperator = new StringAppendOperator("<>");
final Options opt =
new Options().setCreateIfMissing(true).setMergeOperator(stringAppendOperator);
final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) {
db.put("key".getBytes(), "aa".getBytes());
db.merge("key".getBytes(), "bb".getBytes());
final byte[] value = db.get("key".getBytes());
assertThat(new String(value)).isEqualTo("aa<>bb");
}
}
@Test @Test
public void emptyStringInSetMergeOperatorByName() { public void emptyStringInSetMergeOperatorByName() {
try (final Options opt = new Options() try (final Options opt = new Options()

@ -20,6 +20,8 @@ class MergeOperators {
static std::shared_ptr<MergeOperator> CreateUInt64AddOperator(); static std::shared_ptr<MergeOperator> CreateUInt64AddOperator();
static std::shared_ptr<MergeOperator> CreateStringAppendOperator(); static std::shared_ptr<MergeOperator> CreateStringAppendOperator();
static std::shared_ptr<MergeOperator> CreateStringAppendOperator(char delim_char); static std::shared_ptr<MergeOperator> CreateStringAppendOperator(char delim_char);
static std::shared_ptr<MergeOperator> CreateStringAppendOperator(
const std::string& delim);
static std::shared_ptr<MergeOperator> CreateStringAppendTESTOperator(); static std::shared_ptr<MergeOperator> CreateStringAppendTESTOperator();
static std::shared_ptr<MergeOperator> CreateMaxOperator(); static std::shared_ptr<MergeOperator> CreateMaxOperator();
static std::shared_ptr<MergeOperator> CreateBytesXOROperator(); static std::shared_ptr<MergeOperator> CreateBytesXOROperator();

@ -17,8 +17,10 @@ namespace ROCKSDB_NAMESPACE {
// Constructor: also specify the delimiter character. // Constructor: also specify the delimiter character.
StringAppendOperator::StringAppendOperator(char delim_char) StringAppendOperator::StringAppendOperator(char delim_char)
: delim_(delim_char) { : delim_(1, delim_char) {}
}
StringAppendOperator::StringAppendOperator(const std::string& delim)
: delim_(delim) {}
// Implementation for the merge operation (concatenates two strings) // Implementation for the merge operation (concatenates two strings)
bool StringAppendOperator::Merge(const Slice& /*key*/, bool StringAppendOperator::Merge(const Slice& /*key*/,
@ -35,9 +37,9 @@ bool StringAppendOperator::Merge(const Slice& /*key*/,
} else { } else {
// Generic append (existing_value != null). // Generic append (existing_value != null).
// Reserve *new_value to correct size, and apply concatenation. // Reserve *new_value to correct size, and apply concatenation.
new_value->reserve(existing_value->size() + 1 + value.size()); new_value->reserve(existing_value->size() + delim_.size() + value.size());
new_value->assign(existing_value->data(),existing_value->size()); new_value->assign(existing_value->data(), existing_value->size());
new_value->append(1,delim_); new_value->append(delim_);
new_value->append(value.data(), value.size()); new_value->append(value.data(), value.size());
} }
@ -56,4 +58,9 @@ std::shared_ptr<MergeOperator> MergeOperators::CreateStringAppendOperator(char d
return std::make_shared<StringAppendOperator>(delim_char); return std::make_shared<StringAppendOperator>(delim_char);
} }
std::shared_ptr<MergeOperator> MergeOperators::CreateStringAppendOperator(
const std::string& delim) {
return std::make_shared<StringAppendOperator>(delim);
}
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

@ -14,6 +14,7 @@ class StringAppendOperator : public AssociativeMergeOperator {
public: public:
// Constructor: specify delimiter // Constructor: specify delimiter
explicit StringAppendOperator(char delim_char); explicit StringAppendOperator(char delim_char);
explicit StringAppendOperator(const std::string& delim);
virtual bool Merge(const Slice& key, virtual bool Merge(const Slice& key,
const Slice* existing_value, const Slice* existing_value,
@ -24,8 +25,7 @@ class StringAppendOperator : public AssociativeMergeOperator {
virtual const char* Name() const override; virtual const char* Name() const override;
private: private:
char delim_; // The delimiter is inserted between elements std::string delim_; // The delimiter is inserted between elements
}; };
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

@ -17,8 +17,10 @@ namespace ROCKSDB_NAMESPACE {
// Constructor: also specify the delimiter character. // Constructor: also specify the delimiter character.
StringAppendTESTOperator::StringAppendTESTOperator(char delim_char) StringAppendTESTOperator::StringAppendTESTOperator(char delim_char)
: delim_(delim_char) { : delim_(1, delim_char) {}
}
StringAppendTESTOperator::StringAppendTESTOperator(const std::string& delim)
: delim_(delim) {}
// Implementation for the merge operation (concatenates two strings) // Implementation for the merge operation (concatenates two strings)
bool StringAppendTESTOperator::FullMergeV2( bool StringAppendTESTOperator::FullMergeV2(
@ -37,7 +39,7 @@ bool StringAppendTESTOperator::FullMergeV2(
size_t numBytes = 0; size_t numBytes = 0;
for (auto it = merge_in.operand_list.begin(); for (auto it = merge_in.operand_list.begin();
it != merge_in.operand_list.end(); ++it) { it != merge_in.operand_list.end(); ++it) {
numBytes += it->size() + 1; // Plus 1 for the delimiter numBytes += it->size() + delim_.size();
} }
// Only print the delimiter after the first entry has been printed // Only print the delimiter after the first entry has been printed
@ -50,15 +52,16 @@ bool StringAppendTESTOperator::FullMergeV2(
merge_in.existing_value->size()); merge_in.existing_value->size());
printDelim = true; printDelim = true;
} else if (numBytes) { } else if (numBytes) {
merge_out->new_value.reserve( // Without the existing (initial) value, the delimiter before the first of
numBytes - 1); // Minus 1 since we have one less delimiter // subsequent operands becomes redundant.
merge_out->new_value.reserve(numBytes - delim_.size());
} }
// Concatenate the sequence of strings (and add a delimiter between each) // Concatenate the sequence of strings (and add a delimiter between each)
for (auto it = merge_in.operand_list.begin(); for (auto it = merge_in.operand_list.begin();
it != merge_in.operand_list.end(); ++it) { it != merge_in.operand_list.end(); ++it) {
if (printDelim) { if (printDelim) {
merge_out->new_value.append(1, delim_); merge_out->new_value.append(delim_);
} }
merge_out->new_value.append(it->data(), it->size()); merge_out->new_value.append(it->data(), it->size());
printDelim = true; printDelim = true;
@ -89,7 +92,7 @@ bool StringAppendTESTOperator::_AssocPartialMergeMulti(
for (const auto& operand : operand_list) { for (const auto& operand : operand_list) {
size += operand.size(); size += operand.size();
} }
size += operand_list.size() - 1; // Delimiters size += (operand_list.size() - 1) * delim_.length(); // Delimiters
new_value->reserve(size); new_value->reserve(size);
// Apply concatenation // Apply concatenation
@ -97,7 +100,7 @@ bool StringAppendTESTOperator::_AssocPartialMergeMulti(
for (std::deque<Slice>::const_iterator it = operand_list.begin() + 1; for (std::deque<Slice>::const_iterator it = operand_list.begin() + 1;
it != operand_list.end(); ++it) { it != operand_list.end(); ++it) {
new_value->append(1, delim_); new_value->append(delim_);
new_value->append(it->data(), it->size()); new_value->append(it->data(), it->size());
} }

@ -24,6 +24,7 @@ class StringAppendTESTOperator : public MergeOperator {
public: public:
// Constructor with delimiter // Constructor with delimiter
explicit StringAppendTESTOperator(char delim_char); explicit StringAppendTESTOperator(char delim_char);
explicit StringAppendTESTOperator(const std::string& delim);
virtual bool FullMergeV2(const MergeOperationInput& merge_in, virtual bool FullMergeV2(const MergeOperationInput& merge_in,
MergeOperationOutput* merge_out) const override; MergeOperationOutput* merge_out) const override;
@ -42,8 +43,7 @@ class StringAppendTESTOperator : public MergeOperator {
const std::deque<Slice>& operand_list, const std::deque<Slice>& operand_list,
std::string* new_value, Logger* logger) const; std::string* new_value, Logger* logger) const;
char delim_; // The delimiter is inserted between elements std::string delim_; // The delimiter is inserted between elements
}; };
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

@ -35,22 +35,34 @@ const std::string kDbName = test::PerThreadDBPath("stringappend_test");
namespace { namespace {
// OpenDb opens a (possibly new) rocksdb database with a StringAppendOperator // OpenDb opens a (possibly new) rocksdb database with a StringAppendOperator
std::shared_ptr<DB> OpenNormalDb(char delim_char) { std::shared_ptr<DB> OpenNormalDb(const std::string& delim) {
DB* db; DB* db;
Options options; Options options;
options.create_if_missing = true; options.create_if_missing = true;
options.merge_operator.reset(new StringAppendOperator(delim_char)); MergeOperator* mergeOperator;
if (delim.size() == 1) {
mergeOperator = new StringAppendOperator(delim[0]);
} else {
mergeOperator = new StringAppendOperator(delim);
}
options.merge_operator.reset(mergeOperator);
EXPECT_OK(DB::Open(options, kDbName, &db)); EXPECT_OK(DB::Open(options, kDbName, &db));
return std::shared_ptr<DB>(db); return std::shared_ptr<DB>(db);
} }
#ifndef ROCKSDB_LITE // TtlDb is not supported in Lite #ifndef ROCKSDB_LITE // TtlDb is not supported in Lite
// Open a TtlDB with a non-associative StringAppendTESTOperator // Open a TtlDB with a non-associative StringAppendTESTOperator
std::shared_ptr<DB> OpenTtlDb(char delim_char) { std::shared_ptr<DB> OpenTtlDb(const std::string& delim) {
DBWithTTL* db; DBWithTTL* db;
Options options; Options options;
options.create_if_missing = true; options.create_if_missing = true;
options.merge_operator.reset(new StringAppendTESTOperator(delim_char)); MergeOperator* mergeOperator;
if (delim.size() == 1) {
mergeOperator = new StringAppendTESTOperator(delim[0]);
} else {
mergeOperator = new StringAppendTESTOperator(delim);
}
options.merge_operator.reset(mergeOperator);
EXPECT_OK(DBWithTTL::Open(options, kDbName, &db, 123456)); EXPECT_OK(DBWithTTL::Open(options, kDbName, &db, 123456));
return std::shared_ptr<DB>(db); return std::shared_ptr<DB>(db);
} }
@ -137,7 +149,7 @@ class StringAppendOperatorTest : public testing::Test,
StringAppendOperatorTest::SetOpenDbFunction(&OpenNormalDb); StringAppendOperatorTest::SetOpenDbFunction(&OpenNormalDb);
} }
typedef std::shared_ptr<DB> (* OpenFuncPtr)(char); typedef std::shared_ptr<DB> (*OpenFuncPtr)(const std::string&);
// Allows user to open databases with different configurations. // Allows user to open databases with different configurations.
// e.g.: Can open a DB or a TtlDB, etc. // e.g.: Can open a DB or a TtlDB, etc.
@ -153,7 +165,7 @@ StringAppendOperatorTest::OpenFuncPtr StringAppendOperatorTest::OpenDb = nullptr
// THE TEST CASES BEGIN HERE // THE TEST CASES BEGIN HERE
TEST_P(StringAppendOperatorTest, IteratorTest) { TEST_P(StringAppendOperatorTest, IteratorTest) {
auto db_ = OpenDb(','); auto db_ = OpenDb(",");
StringLists slists(db_); StringLists slists(db_);
slists.Append("k1", "v1"); slists.Append("k1", "v1");
@ -246,7 +258,7 @@ TEST_P(StringAppendOperatorTest, IteratorTest) {
} }
TEST_P(StringAppendOperatorTest, SimpleTest) { TEST_P(StringAppendOperatorTest, SimpleTest) {
auto db = OpenDb(','); auto db = OpenDb(",");
StringLists slists(db); StringLists slists(db);
slists.Append("k1", "v1"); slists.Append("k1", "v1");
@ -259,7 +271,7 @@ TEST_P(StringAppendOperatorTest, SimpleTest) {
} }
TEST_P(StringAppendOperatorTest, SimpleDelimiterTest) { TEST_P(StringAppendOperatorTest, SimpleDelimiterTest) {
auto db = OpenDb('|'); auto db = OpenDb("|");
StringLists slists(db); StringLists slists(db);
slists.Append("k1", "v1"); slists.Append("k1", "v1");
@ -271,8 +283,49 @@ TEST_P(StringAppendOperatorTest, SimpleDelimiterTest) {
ASSERT_EQ(res, "v1|v2|v3"); ASSERT_EQ(res, "v1|v2|v3");
} }
TEST_P(StringAppendOperatorTest, EmptyDelimiterTest) {
auto db = OpenDb("");
StringLists slists(db);
slists.Append("k1", "v1");
slists.Append("k1", "v2");
slists.Append("k1", "v3");
std::string res;
ASSERT_TRUE(slists.Get("k1", &res));
ASSERT_EQ(res, "v1v2v3");
}
TEST_P(StringAppendOperatorTest, MultiCharDelimiterTest) {
auto db = OpenDb("<>");
StringLists slists(db);
slists.Append("k1", "v1");
slists.Append("k1", "v2");
slists.Append("k1", "v3");
std::string res;
ASSERT_TRUE(slists.Get("k1", &res));
ASSERT_EQ(res, "v1<>v2<>v3");
}
TEST_P(StringAppendOperatorTest, DelimiterIsDefensivelyCopiedTest) {
std::string delimiter = "<>";
auto db = OpenDb(delimiter);
StringLists slists(db);
slists.Append("k1", "v1");
slists.Append("k1", "v2");
delimiter.clear();
slists.Append("k1", "v3");
std::string res;
ASSERT_TRUE(slists.Get("k1", &res));
ASSERT_EQ(res, "v1<>v2<>v3");
}
TEST_P(StringAppendOperatorTest, OneValueNoDelimiterTest) { TEST_P(StringAppendOperatorTest, OneValueNoDelimiterTest) {
auto db = OpenDb('!'); auto db = OpenDb("!");
StringLists slists(db); StringLists slists(db);
slists.Append("random_key", "single_val"); slists.Append("random_key", "single_val");
@ -283,7 +336,7 @@ TEST_P(StringAppendOperatorTest, OneValueNoDelimiterTest) {
} }
TEST_P(StringAppendOperatorTest, VariousKeys) { TEST_P(StringAppendOperatorTest, VariousKeys) {
auto db = OpenDb('\n'); auto db = OpenDb("\n");
StringLists slists(db); StringLists slists(db);
slists.Append("c", "asdasd"); slists.Append("c", "asdasd");
@ -309,7 +362,7 @@ TEST_P(StringAppendOperatorTest, VariousKeys) {
// Generate semi random keys/words from a small distribution. // Generate semi random keys/words from a small distribution.
TEST_P(StringAppendOperatorTest, RandomMixGetAppend) { TEST_P(StringAppendOperatorTest, RandomMixGetAppend) {
auto db = OpenDb(' '); auto db = OpenDb(" ");
StringLists slists(db); StringLists slists(db);
// Generate a list of random keys and values // Generate a list of random keys and values
@ -359,7 +412,7 @@ TEST_P(StringAppendOperatorTest, RandomMixGetAppend) {
} }
TEST_P(StringAppendOperatorTest, BIGRandomMixGetAppend) { TEST_P(StringAppendOperatorTest, BIGRandomMixGetAppend) {
auto db = OpenDb(' '); auto db = OpenDb(" ");
StringLists slists(db); StringLists slists(db);
// Generate a list of random keys and values // Generate a list of random keys and values
@ -411,7 +464,7 @@ TEST_P(StringAppendOperatorTest, BIGRandomMixGetAppend) {
TEST_P(StringAppendOperatorTest, PersistentVariousKeys) { TEST_P(StringAppendOperatorTest, PersistentVariousKeys) {
// Perform the following operations in limited scope // Perform the following operations in limited scope
{ {
auto db = OpenDb('\n'); auto db = OpenDb("\n");
StringLists slists(db); StringLists slists(db);
slists.Append("c", "asdasd"); slists.Append("c", "asdasd");
@ -434,7 +487,7 @@ TEST_P(StringAppendOperatorTest, PersistentVariousKeys) {
// Reopen the database (the previous changes should persist / be remembered) // Reopen the database (the previous changes should persist / be remembered)
{ {
auto db = OpenDb('\n'); auto db = OpenDb("\n");
StringLists slists(db); StringLists slists(db);
slists.Append("c", "bbnagnagsx"); slists.Append("c", "bbnagnagsx");
@ -460,7 +513,7 @@ TEST_P(StringAppendOperatorTest, PersistentVariousKeys) {
// Reopen the database (the previous changes should persist / be remembered) // Reopen the database (the previous changes should persist / be remembered)
{ {
auto db = OpenDb('\n'); auto db = OpenDb("\n");
StringLists slists(db); StringLists slists(db);
// All changes should be on disk. This will test VersionSet Get() // All changes should be on disk. This will test VersionSet Get()
@ -478,7 +531,7 @@ TEST_P(StringAppendOperatorTest, PersistentVariousKeys) {
TEST_P(StringAppendOperatorTest, PersistentFlushAndCompaction) { TEST_P(StringAppendOperatorTest, PersistentFlushAndCompaction) {
// Perform the following operations in limited scope // Perform the following operations in limited scope
{ {
auto db = OpenDb('\n'); auto db = OpenDb("\n");
StringLists slists(db); StringLists slists(db);
std::string a, b, c; std::string a, b, c;
@ -517,7 +570,7 @@ TEST_P(StringAppendOperatorTest, PersistentFlushAndCompaction) {
// Reopen the database (the previous changes should persist / be remembered) // Reopen the database (the previous changes should persist / be remembered)
{ {
auto db = OpenDb('\n'); auto db = OpenDb("\n");
StringLists slists(db); StringLists slists(db);
std::string a, b, c; std::string a, b, c;
@ -565,7 +618,7 @@ TEST_P(StringAppendOperatorTest, PersistentFlushAndCompaction) {
} }
TEST_P(StringAppendOperatorTest, SimpleTestNullDelimiter) { TEST_P(StringAppendOperatorTest, SimpleTestNullDelimiter) {
auto db = OpenDb('\0'); auto db = OpenDb(std::string(1, '\0'));
StringLists slists(db); StringLists slists(db);
slists.Append("k1", "v1"); slists.Append("k1", "v1");

Loading…
Cancel
Save