Return `Status` from `MemTable` mutation functions (#7656)

Summary:
This PR updates `MemTable::Add()`, `MemTable::Update()`, and
`MemTable::UpdateCallback()` to return `Status` objects, and adapts the
client code in `MemTableInserter`. The goal is to prepare these
functions for key-value checksum, where we want to verify key-value
integrity while adding to memtable. After this PR, the memtable mutation
functions can report a failed integrity check by returning `Status::Corruption`.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7656

Reviewed By: riversand963

Differential Revision: D24900497

Pulled By: ajkr

fbshipit-source-id: 1a7e80581e3774676f2bbba2f0a0b04890f40009
main
Andrew Kryczka 4 years ago committed by Facebook GitHub Bot
parent 0baa5055f1
commit dd6b7fc520
  1. 57
      db/db_memtable_test.cc
  2. 17
      db/flush_job_test.cc
  3. 51
      db/memtable.cc
  4. 58
      db/memtable.h
  5. 42
      db/memtable_list_test.cc
  6. 273
      db/write_batch.cc
  7. 5
      table/table_test.cc

@ -146,22 +146,15 @@ TEST_F(DBMemTableTest, DuplicateSeq) {
kMaxSequenceNumber, 0 /* column_family_id */); kMaxSequenceNumber, 0 /* column_family_id */);
// Write some keys and make sure it returns false on duplicates // Write some keys and make sure it returns false on duplicates
bool res; ASSERT_OK(mem->Add(seq, kTypeValue, "key", "value2"));
res = mem->Add(seq, kTypeValue, "key", "value2"); ASSERT_TRUE(mem->Add(seq, kTypeValue, "key", "value2").IsTryAgain());
ASSERT_TRUE(res);
res = mem->Add(seq, kTypeValue, "key", "value2");
ASSERT_FALSE(res);
// Changing the type should still cause the duplicatae key // Changing the type should still cause the duplicatae key
res = mem->Add(seq, kTypeMerge, "key", "value2"); ASSERT_TRUE(mem->Add(seq, kTypeMerge, "key", "value2").IsTryAgain());
ASSERT_FALSE(res);
// Changing the seq number will make the key fresh // Changing the seq number will make the key fresh
res = mem->Add(seq + 1, kTypeMerge, "key", "value2"); ASSERT_OK(mem->Add(seq + 1, kTypeMerge, "key", "value2"));
ASSERT_TRUE(res);
// Test with different types for duplicate keys // Test with different types for duplicate keys
res = mem->Add(seq, kTypeDeletion, "key", ""); ASSERT_TRUE(mem->Add(seq, kTypeDeletion, "key", "").IsTryAgain());
ASSERT_FALSE(res); ASSERT_TRUE(mem->Add(seq, kTypeSingleDeletion, "key", "").IsTryAgain());
res = mem->Add(seq, kTypeSingleDeletion, "key", "");
ASSERT_FALSE(res);
// Test the duplicate keys under stress // Test the duplicate keys under stress
for (int i = 0; i < 10000; i++) { for (int i = 0; i < 10000; i++) {
@ -169,11 +162,11 @@ TEST_F(DBMemTableTest, DuplicateSeq) {
if (!insert_dup) { if (!insert_dup) {
seq++; seq++;
} }
res = mem->Add(seq, kTypeValue, "foo", "value" + ToString(seq)); s = mem->Add(seq, kTypeValue, "foo", "value" + ToString(seq));
if (insert_dup) { if (insert_dup) {
ASSERT_FALSE(res); ASSERT_TRUE(s.IsTryAgain());
} else { } else {
ASSERT_TRUE(res); ASSERT_OK(s);
} }
} }
delete mem; delete mem;
@ -185,10 +178,8 @@ TEST_F(DBMemTableTest, DuplicateSeq) {
mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb, mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb,
kMaxSequenceNumber, 0 /* column_family_id */); kMaxSequenceNumber, 0 /* column_family_id */);
// Insert a duplicate key with _ in it // Insert a duplicate key with _ in it
res = mem->Add(seq, kTypeValue, "key_1", "value"); ASSERT_OK(mem->Add(seq, kTypeValue, "key_1", "value"));
ASSERT_TRUE(res); ASSERT_TRUE(mem->Add(seq, kTypeValue, "key_1", "value").IsTryAgain());
res = mem->Add(seq, kTypeValue, "key_1", "value");
ASSERT_FALSE(res);
delete mem; delete mem;
// Test when InsertConcurrently will be invoked // Test when InsertConcurrently will be invoked
@ -197,10 +188,11 @@ TEST_F(DBMemTableTest, DuplicateSeq) {
mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb, mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb,
kMaxSequenceNumber, 0 /* column_family_id */); kMaxSequenceNumber, 0 /* column_family_id */);
MemTablePostProcessInfo post_process_info; MemTablePostProcessInfo post_process_info;
res = mem->Add(seq, kTypeValue, "key", "value", true, &post_process_info); ASSERT_OK(
ASSERT_TRUE(res); mem->Add(seq, kTypeValue, "key", "value", true, &post_process_info));
res = mem->Add(seq, kTypeValue, "key", "value", true, &post_process_info); ASSERT_TRUE(
ASSERT_FALSE(res); mem->Add(seq, kTypeValue, "key", "value", true, &post_process_info)
.IsTryAgain());
delete mem; delete mem;
} }
@ -227,8 +219,7 @@ TEST_F(DBMemTableTest, ConcurrentMergeWrite) {
// Put 0 as the base // Put 0 as the base
PutFixed64(&value, static_cast<uint64_t>(0)); PutFixed64(&value, static_cast<uint64_t>(0));
bool res = mem->Add(0, kTypeValue, "key", value); ASSERT_OK(mem->Add(0, kTypeValue, "key", value));
ASSERT_TRUE(res);
value.clear(); value.clear();
// Write Merge concurrently // Write Merge concurrently
@ -237,9 +228,8 @@ TEST_F(DBMemTableTest, ConcurrentMergeWrite) {
std::string v1; std::string v1;
for (int seq = 1; seq < num_ops / 2; seq++) { for (int seq = 1; seq < num_ops / 2; seq++) {
PutFixed64(&v1, seq); PutFixed64(&v1, seq);
bool res1 = ASSERT_OK(
mem->Add(seq, kTypeMerge, "key", v1, true, &post_process_info1); mem->Add(seq, kTypeMerge, "key", v1, true, &post_process_info1));
ASSERT_TRUE(res1);
v1.clear(); v1.clear();
} }
}); });
@ -248,9 +238,8 @@ TEST_F(DBMemTableTest, ConcurrentMergeWrite) {
std::string v2; std::string v2;
for (int seq = num_ops / 2; seq < num_ops; seq++) { for (int seq = num_ops / 2; seq < num_ops; seq++) {
PutFixed64(&v2, seq); PutFixed64(&v2, seq);
bool res2 = ASSERT_OK(
mem->Add(seq, kTypeMerge, "key", v2, true, &post_process_info2); mem->Add(seq, kTypeMerge, "key", v2, true, &post_process_info2));
ASSERT_TRUE(res2);
v2.clear(); v2.clear();
} }
}); });
@ -261,8 +250,8 @@ TEST_F(DBMemTableTest, ConcurrentMergeWrite) {
ReadOptions roptions; ReadOptions roptions;
SequenceNumber max_covering_tombstone_seq = 0; SequenceNumber max_covering_tombstone_seq = 0;
LookupKey lkey("key", kMaxSequenceNumber); LookupKey lkey("key", kMaxSequenceNumber);
res = mem->Get(lkey, &value, /*timestamp=*/nullptr, &status, &merge_context, bool res = mem->Get(lkey, &value, /*timestamp=*/nullptr, &status,
&max_covering_tombstone_seq, roptions); &merge_context, &max_covering_tombstone_seq, roptions);
ASSERT_TRUE(res); ASSERT_TRUE(res);
uint64_t ivalue = DecodeFixed64(Slice(value).data()); uint64_t ivalue = DecodeFixed64(Slice(value).data());
uint64_t sum = 0; uint64_t sum = 0;

@ -187,7 +187,7 @@ TEST_F(FlushJobTest, NonEmpty) {
for (int i = 1; i < 10000; ++i) { for (int i = 1; i < 10000; ++i) {
std::string key(ToString((i + 1000) % 10000)); std::string key(ToString((i + 1000) % 10000));
std::string value("value" + key); std::string value("value" + key);
new_mem->Add(SequenceNumber(i), kTypeValue, key, value); ASSERT_OK(new_mem->Add(SequenceNumber(i), kTypeValue, key, value));
if ((i + 1000) % 10000 < 9995) { if ((i + 1000) % 10000 < 9995) {
InternalKey internal_key(key, SequenceNumber(i), kTypeValue); InternalKey internal_key(key, SequenceNumber(i), kTypeValue);
inserted_keys.push_back({internal_key.Encode().ToString(), value}); inserted_keys.push_back({internal_key.Encode().ToString(), value});
@ -195,7 +195,8 @@ TEST_F(FlushJobTest, NonEmpty) {
} }
{ {
new_mem->Add(SequenceNumber(10000), kTypeRangeDeletion, "9995", "9999a"); ASSERT_OK(new_mem->Add(SequenceNumber(10000), kTypeRangeDeletion, "9995",
"9999a"));
InternalKey internal_key("9995", SequenceNumber(10000), kTypeRangeDeletion); InternalKey internal_key("9995", SequenceNumber(10000), kTypeRangeDeletion);
inserted_keys.push_back({internal_key.Encode().ToString(), "9999a"}); inserted_keys.push_back({internal_key.Encode().ToString(), "9999a"});
} }
@ -222,7 +223,7 @@ TEST_F(FlushJobTest, NonEmpty) {
} }
const SequenceNumber seq(i + 10001); const SequenceNumber seq(i + 10001);
new_mem->Add(seq, kTypeBlobIndex, key, blob_index); ASSERT_OK(new_mem->Add(seq, kTypeBlobIndex, key, blob_index));
InternalKey internal_key(key, seq, kTypeBlobIndex); InternalKey internal_key(key, seq, kTypeBlobIndex);
inserted_keys.push_back({internal_key.Encode().ToString(), blob_index}); inserted_keys.push_back({internal_key.Encode().ToString(), blob_index});
@ -283,8 +284,8 @@ TEST_F(FlushJobTest, FlushMemTablesSingleColumnFamily) {
for (size_t j = 0; j < num_keys_per_table; ++j) { for (size_t j = 0; j < num_keys_per_table; ++j) {
std::string key(ToString(j + i * num_keys_per_table)); std::string key(ToString(j + i * num_keys_per_table));
std::string value("value" + key); std::string value("value" + key);
mem->Add(SequenceNumber(j + i * num_keys_per_table), kTypeValue, key, ASSERT_OK(mem->Add(SequenceNumber(j + i * num_keys_per_table), kTypeValue,
value); key, value));
} }
} }
@ -356,7 +357,7 @@ TEST_F(FlushJobTest, FlushMemtablesMultipleColumnFamilies) {
for (size_t j = 0; j != num_keys_per_memtable; ++j) { for (size_t j = 0; j != num_keys_per_memtable; ++j) {
std::string key(ToString(j + i * num_keys_per_memtable)); std::string key(ToString(j + i * num_keys_per_memtable));
std::string value("value" + key); std::string value("value" + key);
mem->Add(curr_seqno++, kTypeValue, key, value); ASSERT_OK(mem->Add(curr_seqno++, kTypeValue, key, value));
} }
cfd->imm()->Add(mem, &to_delete); cfd->imm()->Add(mem, &to_delete);
@ -466,7 +467,7 @@ TEST_F(FlushJobTest, Snapshots) {
for (int j = 0; j < insertions; ++j) { for (int j = 0; j < insertions; ++j) {
std::string value(rnd.HumanReadableString(10)); std::string value(rnd.HumanReadableString(10));
auto seqno = ++current_seqno; auto seqno = ++current_seqno;
new_mem->Add(SequenceNumber(seqno), kTypeValue, key, value); ASSERT_OK(new_mem->Add(SequenceNumber(seqno), kTypeValue, key, value));
// a key is visible only if: // a key is visible only if:
// 1. it's the last one written (j == insertions - 1) // 1. it's the last one written (j == insertions - 1)
// 2. there's a snapshot pointing at it // 2. there's a snapshot pointing at it
@ -518,7 +519,7 @@ class FlushJobTimestampTest : public FlushJobTestBase {
Slice value) { Slice value) {
std::string key_str(std::move(key)); std::string key_str(std::move(key));
PutFixed64(&key_str, ts); PutFixed64(&key_str, ts);
memtable->Add(seq, value_type, key_str, value); ASSERT_OK(memtable->Add(seq, value_type, key_str, value));
} }
protected: protected:

@ -480,10 +480,10 @@ MemTable::MemTableStats MemTable::ApproximateStats(const Slice& start_ikey,
return {entry_count * (data_size / n), entry_count}; return {entry_count * (data_size / n), entry_count};
} }
bool MemTable::Add(SequenceNumber s, ValueType type, Status MemTable::Add(SequenceNumber s, ValueType type,
const Slice& key, /* user key */ const Slice& key, /* user key */
const Slice& value, bool allow_concurrent, const Slice& value, bool allow_concurrent,
MemTablePostProcessInfo* post_process_info, void** hint) { MemTablePostProcessInfo* post_process_info, void** hint) {
// Format of an entry is concatenation of: // Format of an entry is concatenation of:
// key_size : varint32 of internal_key.size() // key_size : varint32 of internal_key.size()
// key bytes : char[internal_key.size()] // key bytes : char[internal_key.size()]
@ -519,12 +519,12 @@ bool MemTable::Add(SequenceNumber s, ValueType type,
Slice prefix = insert_with_hint_prefix_extractor_->Transform(key_slice); Slice prefix = insert_with_hint_prefix_extractor_->Transform(key_slice);
bool res = table->InsertKeyWithHint(handle, &insert_hints_[prefix]); bool res = table->InsertKeyWithHint(handle, &insert_hints_[prefix]);
if (UNLIKELY(!res)) { if (UNLIKELY(!res)) {
return res; return Status::TryAgain("key+seq exists");
} }
} else { } else {
bool res = table->InsertKey(handle); bool res = table->InsertKey(handle);
if (UNLIKELY(!res)) { if (UNLIKELY(!res)) {
return res; return Status::TryAgain("key+seq exists");
} }
} }
@ -566,7 +566,7 @@ bool MemTable::Add(SequenceNumber s, ValueType type,
? table->InsertKeyConcurrently(handle) ? table->InsertKeyConcurrently(handle)
: table->InsertKeyWithHintConcurrently(handle, hint); : table->InsertKeyWithHintConcurrently(handle, hint);
if (UNLIKELY(!res)) { if (UNLIKELY(!res)) {
return res; return Status::TryAgain("key+seq exists");
} }
assert(post_process_info != nullptr); assert(post_process_info != nullptr);
@ -600,7 +600,7 @@ bool MemTable::Add(SequenceNumber s, ValueType type,
is_range_del_table_empty_.store(false, std::memory_order_relaxed); is_range_del_table_empty_.store(false, std::memory_order_relaxed);
} }
UpdateOldestKeyTime(); UpdateOldestKeyTime();
return true; return Status::OK();
} }
// Callback from MemTable::Get() // Callback from MemTable::Get()
@ -973,9 +973,8 @@ void MemTable::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
PERF_COUNTER_ADD(get_from_memtable_count, 1); PERF_COUNTER_ADD(get_from_memtable_count, 1);
} }
void MemTable::Update(SequenceNumber seq, Status MemTable::Update(SequenceNumber seq, const Slice& key,
const Slice& key, const Slice& value) {
const Slice& value) {
LookupKey lkey(key, seq); LookupKey lkey(key, seq);
Slice mem_key = lkey.memtable_key(); Slice mem_key = lkey.memtable_key();
@ -1019,22 +1018,18 @@ void MemTable::Update(SequenceNumber seq,
(unsigned)(VarintLength(key_length) + key_length + (unsigned)(VarintLength(key_length) + key_length +
VarintLength(value.size()) + value.size())); VarintLength(value.size()) + value.size()));
RecordTick(moptions_.statistics, NUMBER_KEYS_UPDATED); RecordTick(moptions_.statistics, NUMBER_KEYS_UPDATED);
return; return Status::OK();
} }
} }
} }
} }
// key doesn't exist // The latest value is not `kTypeValue` or key doesn't exist
bool add_res __attribute__((__unused__)); return Add(seq, kTypeValue, key, value);
add_res = Add(seq, kTypeValue, key, value);
// We already checked unused != seq above. In that case, Add should not fail.
assert(add_res);
} }
bool MemTable::UpdateCallback(SequenceNumber seq, Status MemTable::UpdateCallback(SequenceNumber seq, const Slice& key,
const Slice& key, const Slice& delta) {
const Slice& delta) {
LookupKey lkey(key, seq); LookupKey lkey(key, seq);
Slice memkey = lkey.memtable_key(); Slice memkey = lkey.memtable_key();
@ -1088,16 +1083,17 @@ bool MemTable::UpdateCallback(SequenceNumber seq,
} }
RecordTick(moptions_.statistics, NUMBER_KEYS_UPDATED); RecordTick(moptions_.statistics, NUMBER_KEYS_UPDATED);
UpdateFlushState(); UpdateFlushState();
return true; return Status::OK();
} else if (status == UpdateStatus::UPDATED) { } else if (status == UpdateStatus::UPDATED) {
Add(seq, kTypeValue, key, Slice(str_value)); Status s = Add(seq, kTypeValue, key, Slice(str_value));
RecordTick(moptions_.statistics, NUMBER_KEYS_WRITTEN); RecordTick(moptions_.statistics, NUMBER_KEYS_WRITTEN);
UpdateFlushState(); UpdateFlushState();
return true; return s;
} else if (status == UpdateStatus::UPDATE_FAILED) { } else if (status == UpdateStatus::UPDATE_FAILED) {
// No action required. Return. // `UPDATE_FAILED` is named incorrectly. It indicates no update
// happened. It does not indicate a failure happened.
UpdateFlushState(); UpdateFlushState();
return true; return Status::OK();
} }
} }
default: default:
@ -1105,9 +1101,8 @@ bool MemTable::UpdateCallback(SequenceNumber seq,
} }
} }
} }
// If the latest value is not kTypeValue // The latest value is not `kTypeValue` or key doesn't exist
// or key doesn't exist return Status::NotFound();
return false;
} }
size_t MemTable::CountSuccessiveMergeEntries(const LookupKey& key) { size_t MemTable::CountSuccessiveMergeEntries(const LookupKey& key) {

@ -182,12 +182,13 @@ class MemTable {
// REQUIRES: if allow_concurrent = false, external synchronization to prevent // REQUIRES: if allow_concurrent = false, external synchronization to prevent
// simultaneous operations on the same MemTable. // simultaneous operations on the same MemTable.
// //
// Returns false if MemTableRepFactory::CanHandleDuplicatedKey() is true and // Returns `Status::TryAgain` if the `seq`, `key` combination already exists
// the <key, seq> already exists. // in the memtable and `MemTableRepFactory::CanHandleDuplicatedKey()` is true.
bool Add(SequenceNumber seq, ValueType type, const Slice& key, // The next attempt should try a larger value for `seq`.
const Slice& value, bool allow_concurrent = false, Status Add(SequenceNumber seq, ValueType type, const Slice& key,
MemTablePostProcessInfo* post_process_info = nullptr, const Slice& value, bool allow_concurrent = false,
void** hint = nullptr); MemTablePostProcessInfo* post_process_info = nullptr,
void** hint = nullptr);
// Used to Get value associated with key or Get Merge Operands associated // Used to Get value associated with key or Get Merge Operands associated
// with key. // with key.
@ -239,35 +240,34 @@ class MemTable {
void MultiGet(const ReadOptions& read_options, MultiGetRange* range, void MultiGet(const ReadOptions& read_options, MultiGetRange* range,
ReadCallback* callback, bool* is_blob); ReadCallback* callback, bool* is_blob);
// Attempts to update the new_value inplace, else does normal Add // If `key` exists in current memtable with type `kTypeValue` and the existing
// Pseudocode // value is at least as large as the new value, updates it in-place. Otherwise
// if key exists in current memtable && prev_value is of type kTypeValue // adds the new value to the memtable out-of-place.
// if new sizeof(new_value) <= sizeof(prev_value) //
// update inplace // Returns `Status::TryAgain` if the `seq`, `key` combination already exists
// else add(key, new_value) // in the memtable and `MemTableRepFactory::CanHandleDuplicatedKey()` is true.
// else add(key, new_value) // The next attempt should try a larger value for `seq`.
// //
// REQUIRES: external synchronization to prevent simultaneous // REQUIRES: external synchronization to prevent simultaneous
// operations on the same MemTable. // operations on the same MemTable.
void Update(SequenceNumber seq, Status Update(SequenceNumber seq, const Slice& key, const Slice& value);
const Slice& key,
const Slice& value); // If `key` exists in current memtable with type `kTypeValue` and the existing
// value is at least as large as the new value, updates it in-place. Otherwise
// If prev_value for key exists, attempts to update it inplace. // if `key` exists in current memtable with type `kTypeValue`, adds the new
// else returns false // value to the memtable out-of-place.
// Pseudocode //
// if key exists in current memtable && prev_value is of type kTypeValue // Returns `Status::NotFound` if `key` does not exist in current memtable or
// new_value = delta(prev_value) // the latest version of `key` does not have `kTypeValue`.
// if sizeof(new_value) <= sizeof(prev_value) //
// update inplace // Returns `Status::TryAgain` if the `seq`, `key` combination already exists
// else add(key, new_value) // in the memtable and `MemTableRepFactory::CanHandleDuplicatedKey()` is true.
// else return false // The next attempt should try a larger value for `seq`.
// //
// REQUIRES: external synchronization to prevent simultaneous // REQUIRES: external synchronization to prevent simultaneous
// operations on the same MemTable. // operations on the same MemTable.
bool UpdateCallback(SequenceNumber seq, Status UpdateCallback(SequenceNumber seq, const Slice& key,
const Slice& key, const Slice& delta);
const Slice& delta);
// Returns the number of successive merge entries starting from the newest // Returns the number of successive merge entries starting from the newest
// entry for the key up to the last non-merge entry or last entry for the // entry for the key up to the last non-merge entry or last entry for the

@ -242,10 +242,10 @@ TEST_F(MemTableListTest, GetTest) {
mem->Ref(); mem->Ref();
// Write some keys to this memtable. // Write some keys to this memtable.
mem->Add(++seq, kTypeDeletion, "key1", ""); ASSERT_OK(mem->Add(++seq, kTypeDeletion, "key1", ""));
mem->Add(++seq, kTypeValue, "key2", "value2"); ASSERT_OK(mem->Add(++seq, kTypeValue, "key2", "value2"));
mem->Add(++seq, kTypeValue, "key1", "value1"); ASSERT_OK(mem->Add(++seq, kTypeValue, "key1", "value1"));
mem->Add(++seq, kTypeValue, "key2", "value2.2"); ASSERT_OK(mem->Add(++seq, kTypeValue, "key2", "value2.2"));
// Fetch the newly written keys // Fetch the newly written keys
merge_context.Clear(); merge_context.Clear();
@ -283,8 +283,8 @@ TEST_F(MemTableListTest, GetTest) {
kMaxSequenceNumber, 0 /* column_family_id */); kMaxSequenceNumber, 0 /* column_family_id */);
mem2->Ref(); mem2->Ref();
mem2->Add(++seq, kTypeDeletion, "key1", ""); ASSERT_OK(mem2->Add(++seq, kTypeDeletion, "key1", ""));
mem2->Add(++seq, kTypeValue, "key2", "value2.3"); ASSERT_OK(mem2->Add(++seq, kTypeValue, "key2", "value2.3"));
// Add second memtable to list // Add second memtable to list
list.Add(mem2, &to_delete); list.Add(mem2, &to_delete);
@ -359,9 +359,9 @@ TEST_F(MemTableListTest, GetFromHistoryTest) {
mem->Ref(); mem->Ref();
// Write some keys to this memtable. // Write some keys to this memtable.
mem->Add(++seq, kTypeDeletion, "key1", ""); ASSERT_OK(mem->Add(++seq, kTypeDeletion, "key1", ""));
mem->Add(++seq, kTypeValue, "key2", "value2"); ASSERT_OK(mem->Add(++seq, kTypeValue, "key2", "value2"));
mem->Add(++seq, kTypeValue, "key2", "value2.2"); ASSERT_OK(mem->Add(++seq, kTypeValue, "key2", "value2.2"));
// Fetch the newly written keys // Fetch the newly written keys
merge_context.Clear(); merge_context.Clear();
@ -443,8 +443,8 @@ TEST_F(MemTableListTest, GetFromHistoryTest) {
kMaxSequenceNumber, 0 /* column_family_id */); kMaxSequenceNumber, 0 /* column_family_id */);
mem2->Ref(); mem2->Ref();
mem2->Add(++seq, kTypeDeletion, "key1", ""); ASSERT_OK(mem2->Add(++seq, kTypeDeletion, "key1", ""));
mem2->Add(++seq, kTypeValue, "key3", "value3"); ASSERT_OK(mem2->Add(++seq, kTypeValue, "key3", "value3"));
// Add second memtable to list // Add second memtable to list
list.Add(mem2, &to_delete); list.Add(mem2, &to_delete);
@ -554,11 +554,11 @@ TEST_F(MemTableListTest, FlushPendingTest) {
std::string value; std::string value;
MergeContext merge_context; MergeContext merge_context;
mem->Add(++seq, kTypeValue, "key1", ToString(i)); ASSERT_OK(mem->Add(++seq, kTypeValue, "key1", ToString(i)));
mem->Add(++seq, kTypeValue, "keyN" + ToString(i), "valueN"); ASSERT_OK(mem->Add(++seq, kTypeValue, "keyN" + ToString(i), "valueN"));
mem->Add(++seq, kTypeValue, "keyX" + ToString(i), "value"); ASSERT_OK(mem->Add(++seq, kTypeValue, "keyX" + ToString(i), "value"));
mem->Add(++seq, kTypeValue, "keyM" + ToString(i), "valueM"); ASSERT_OK(mem->Add(++seq, kTypeValue, "keyM" + ToString(i), "valueM"));
mem->Add(++seq, kTypeDeletion, "keyX" + ToString(i), ""); ASSERT_OK(mem->Add(++seq, kTypeDeletion, "keyX" + ToString(i), ""));
tables.push_back(mem); tables.push_back(mem);
} }
@ -823,11 +823,11 @@ TEST_F(MemTableListTest, AtomicFlusTest) {
std::string value; std::string value;
mem->Add(++seq, kTypeValue, "key1", ToString(i)); ASSERT_OK(mem->Add(++seq, kTypeValue, "key1", ToString(i)));
mem->Add(++seq, kTypeValue, "keyN" + ToString(i), "valueN"); ASSERT_OK(mem->Add(++seq, kTypeValue, "keyN" + ToString(i), "valueN"));
mem->Add(++seq, kTypeValue, "keyX" + ToString(i), "value"); ASSERT_OK(mem->Add(++seq, kTypeValue, "keyX" + ToString(i), "value"));
mem->Add(++seq, kTypeValue, "keyM" + ToString(i), "valueM"); ASSERT_OK(mem->Add(++seq, kTypeValue, "keyM" + ToString(i), "valueM"));
mem->Add(++seq, kTypeDeletion, "keyX" + ToString(i), ""); ASSERT_OK(mem->Add(++seq, kTypeDeletion, "keyX" + ToString(i), ""));
elem.push_back(mem); elem.push_back(mem);
} }

@ -1425,19 +1425,21 @@ class MemTableInserter : public WriteBatch::Handler {
Status ret_status; Status ret_status;
if (UNLIKELY(!SeekToColumnFamily(column_family_id, &ret_status))) { if (UNLIKELY(!SeekToColumnFamily(column_family_id, &ret_status))) {
bool batch_boundry = false; if (ret_status.ok() && rebuilding_trx_ != nullptr) {
if (rebuilding_trx_ != nullptr) {
assert(!write_after_commit_); assert(!write_after_commit_);
// The CF is probably flushed and hence no need for insert but we still // The CF is probably flushed and hence no need for insert but we still
// need to keep track of the keys for upcoming rollback/commit. // need to keep track of the keys for upcoming rollback/commit.
ret_status = WriteBatchInternal::Put(rebuilding_trx_, column_family_id, ret_status = WriteBatchInternal::Put(rebuilding_trx_, column_family_id,
key, value); key, value);
assert(ret_status.ok()); if (ret_status.ok()) {
batch_boundry = IsDuplicateKeySeq(column_family_id, key); MaybeAdvanceSeq(IsDuplicateKeySeq(column_family_id, key));
}
} else if (ret_status.ok()) {
MaybeAdvanceSeq(false /* batch_boundary */);
} }
MaybeAdvanceSeq(batch_boundry);
return ret_status; return ret_status;
} }
assert(ret_status.ok());
MemTable* mem = cf_mems_->GetMemTable(); MemTable* mem = cf_mems_->GetMemTable();
auto* moptions = mem->GetImmutableMemTableOptions(); auto* moptions = mem->GetImmutableMemTableOptions();
@ -1445,23 +1447,17 @@ class MemTableInserter : public WriteBatch::Handler {
// any kind of transactions including the ones that use seq_per_batch // any kind of transactions including the ones that use seq_per_batch
assert(!seq_per_batch_ || !moptions->inplace_update_support); assert(!seq_per_batch_ || !moptions->inplace_update_support);
if (!moptions->inplace_update_support) { if (!moptions->inplace_update_support) {
bool mem_res = ret_status =
mem->Add(sequence_, value_type, key, value, mem->Add(sequence_, value_type, key, value,
concurrent_memtable_writes_, get_post_process_info(mem), concurrent_memtable_writes_, get_post_process_info(mem),
hint_per_batch_ ? &GetHintMap()[mem] : nullptr); hint_per_batch_ ? &GetHintMap()[mem] : nullptr);
if (UNLIKELY(!mem_res)) {
assert(seq_per_batch_);
ret_status = Status::TryAgain("key+seq exists");
const bool BATCH_BOUNDRY = true;
MaybeAdvanceSeq(BATCH_BOUNDRY);
}
} else if (moptions->inplace_callback == nullptr) { } else if (moptions->inplace_callback == nullptr) {
assert(!concurrent_memtable_writes_); assert(!concurrent_memtable_writes_);
mem->Update(sequence_, key, value); ret_status = mem->Update(sequence_, key, value);
} else { } else {
assert(!concurrent_memtable_writes_); assert(!concurrent_memtable_writes_);
if (mem->UpdateCallback(sequence_, key, value)) { ret_status = mem->UpdateCallback(sequence_, key, value);
} else { if (ret_status.IsNotFound()) {
// key not found in memtable. Do sst get, update, add // key not found in memtable. Do sst get, update, add
SnapshotImpl read_from_snapshot; SnapshotImpl read_from_snapshot;
read_from_snapshot.number_ = sequence_; read_from_snapshot.number_ = sequence_;
@ -1475,50 +1471,69 @@ class MemTableInserter : public WriteBatch::Handler {
std::string merged_value; std::string merged_value;
auto cf_handle = cf_mems_->GetColumnFamilyHandle(); auto cf_handle = cf_mems_->GetColumnFamilyHandle();
Status s = Status::NotSupported(); Status get_status = Status::NotSupported();
if (db_ != nullptr && recovering_log_number_ == 0) { if (db_ != nullptr && recovering_log_number_ == 0) {
if (cf_handle == nullptr) { if (cf_handle == nullptr) {
cf_handle = db_->DefaultColumnFamily(); cf_handle = db_->DefaultColumnFamily();
} }
s = db_->Get(ropts, cf_handle, key, &prev_value); get_status = db_->Get(ropts, cf_handle, key, &prev_value);
}
// Intentionally overwrites the `NotFound` in `ret_status`.
if (!get_status.ok() && !get_status.IsNotFound()) {
ret_status = get_status;
} else {
ret_status = Status::OK();
} }
char* prev_buffer = const_cast<char*>(prev_value.c_str()); if (ret_status.ok()) {
uint32_t prev_size = static_cast<uint32_t>(prev_value.size()); UpdateStatus update_status;
auto status = moptions->inplace_callback(s.ok() ? prev_buffer : nullptr, char* prev_buffer = const_cast<char*>(prev_value.c_str());
s.ok() ? &prev_size : nullptr, uint32_t prev_size = static_cast<uint32_t>(prev_value.size());
value, &merged_value); if (get_status.ok()) {
if (status == UpdateStatus::UPDATED_INPLACE) { update_status = moptions->inplace_callback(prev_buffer, &prev_size,
// prev_value is updated in-place with final value. value, &merged_value);
bool mem_res __attribute__((__unused__)); } else {
mem_res = mem->Add( update_status = moptions->inplace_callback(
sequence_, value_type, key, Slice(prev_buffer, prev_size)); nullptr /* existing_value */, nullptr /* existing_value_size */,
assert(mem_res); value, &merged_value);
RecordTick(moptions->statistics, NUMBER_KEYS_WRITTEN); }
} else if (status == UpdateStatus::UPDATED) { if (update_status == UpdateStatus::UPDATED_INPLACE) {
// merged_value contains the final value. assert(get_status.ok());
bool mem_res __attribute__((__unused__)); // prev_value is updated in-place with final value.
mem_res = ret_status = mem->Add(sequence_, value_type, key,
mem->Add(sequence_, value_type, key, Slice(merged_value)); Slice(prev_buffer, prev_size));
assert(mem_res); if (ret_status.ok()) {
RecordTick(moptions->statistics, NUMBER_KEYS_WRITTEN); RecordTick(moptions->statistics, NUMBER_KEYS_WRITTEN);
}
} else if (update_status == UpdateStatus::UPDATED) {
// merged_value contains the final value.
ret_status =
mem->Add(sequence_, value_type, key, Slice(merged_value));
if (ret_status.ok()) {
RecordTick(moptions->statistics, NUMBER_KEYS_WRITTEN);
}
}
} }
} }
} }
if (UNLIKELY(ret_status.IsTryAgain())) {
assert(seq_per_batch_);
const bool kBatchBoundary = true;
MaybeAdvanceSeq(kBatchBoundary);
} else if (ret_status.ok()) {
MaybeAdvanceSeq();
CheckMemtableFull();
}
// optimize for non-recovery mode // optimize for non-recovery mode
if (UNLIKELY(!ret_status.IsTryAgain() && rebuilding_trx_ != nullptr)) { // If `ret_status` is `TryAgain` then the next (successful) try will add
// the key to the rebuilding transaction object. If `ret_status` is
// another non-OK `Status`, then the `rebuilding_trx_` will be thrown
// away. So we only need to add to it when `ret_status.ok()`.
if (UNLIKELY(ret_status.ok() && rebuilding_trx_ != nullptr)) {
assert(!write_after_commit_); assert(!write_after_commit_);
// If the ret_status is TryAgain then let the next try to add the ky to
// the rebuilding transaction object.
ret_status = WriteBatchInternal::Put(rebuilding_trx_, column_family_id, ret_status = WriteBatchInternal::Put(rebuilding_trx_, column_family_id,
key, value); key, value);
assert(ret_status.ok());
} }
// Since all Puts are logged in transaction logs (if enabled), always bump
// sequence number. Even if the update eventually fails and does not result
// in memtable add/update.
MaybeAdvanceSeq();
CheckMemtableFull();
return ret_status; return ret_status;
} }
@ -1531,18 +1546,18 @@ class MemTableInserter : public WriteBatch::Handler {
const Slice& value, ValueType delete_type) { const Slice& value, ValueType delete_type) {
Status ret_status; Status ret_status;
MemTable* mem = cf_mems_->GetMemTable(); MemTable* mem = cf_mems_->GetMemTable();
bool mem_res = ret_status =
mem->Add(sequence_, delete_type, key, value, mem->Add(sequence_, delete_type, key, value,
concurrent_memtable_writes_, get_post_process_info(mem), concurrent_memtable_writes_, get_post_process_info(mem),
hint_per_batch_ ? &GetHintMap()[mem] : nullptr); hint_per_batch_ ? &GetHintMap()[mem] : nullptr);
if (UNLIKELY(!mem_res)) { if (UNLIKELY(ret_status.IsTryAgain())) {
assert(seq_per_batch_); assert(seq_per_batch_);
ret_status = Status::TryAgain("key+seq exists"); const bool kBatchBoundary = true;
const bool BATCH_BOUNDRY = true; MaybeAdvanceSeq(kBatchBoundary);
MaybeAdvanceSeq(BATCH_BOUNDRY); } else if (ret_status.ok()) {
MaybeAdvanceSeq();
CheckMemtableFull();
} }
MaybeAdvanceSeq();
CheckMemtableFull();
return ret_status; return ret_status;
} }
@ -1555,17 +1570,18 @@ class MemTableInserter : public WriteBatch::Handler {
Status ret_status; Status ret_status;
if (UNLIKELY(!SeekToColumnFamily(column_family_id, &ret_status))) { if (UNLIKELY(!SeekToColumnFamily(column_family_id, &ret_status))) {
bool batch_boundry = false; if (ret_status.ok() && rebuilding_trx_ != nullptr) {
if (rebuilding_trx_ != nullptr) {
assert(!write_after_commit_); assert(!write_after_commit_);
// The CF is probably flushed and hence no need for insert but we still // The CF is probably flushed and hence no need for insert but we still
// need to keep track of the keys for upcoming rollback/commit. // need to keep track of the keys for upcoming rollback/commit.
ret_status = ret_status =
WriteBatchInternal::Delete(rebuilding_trx_, column_family_id, key); WriteBatchInternal::Delete(rebuilding_trx_, column_family_id, key);
assert(ret_status.ok()); if (ret_status.ok()) {
batch_boundry = IsDuplicateKeySeq(column_family_id, key); MaybeAdvanceSeq(IsDuplicateKeySeq(column_family_id, key));
}
} else if (ret_status.ok()) {
MaybeAdvanceSeq(false /* batch_boundary */);
} }
MaybeAdvanceSeq(batch_boundry);
return ret_status; return ret_status;
} }
@ -1578,10 +1594,12 @@ class MemTableInserter : public WriteBatch::Handler {
(0 == ts_sz) ? kTypeDeletion : kTypeDeletionWithTimestamp; (0 == ts_sz) ? kTypeDeletion : kTypeDeletionWithTimestamp;
ret_status = DeleteImpl(column_family_id, key, Slice(), delete_type); ret_status = DeleteImpl(column_family_id, key, Slice(), delete_type);
// optimize for non-recovery mode // optimize for non-recovery mode
if (UNLIKELY(!ret_status.IsTryAgain() && rebuilding_trx_ != nullptr)) { // If `ret_status` is `TryAgain` then the next (successful) try will add
// the key to the rebuilding transaction object. If `ret_status` is
// another non-OK `Status`, then the `rebuilding_trx_` will be thrown
// away. So we only need to add to it when `ret_status.ok()`.
if (UNLIKELY(ret_status.ok() && rebuilding_trx_ != nullptr)) {
assert(!write_after_commit_); assert(!write_after_commit_);
// If the ret_status is TryAgain then let the next try to add the ky to
// the rebuilding transaction object.
ret_status = ret_status =
WriteBatchInternal::Delete(rebuilding_trx_, column_family_id, key); WriteBatchInternal::Delete(rebuilding_trx_, column_family_id, key);
} }
@ -1598,27 +1616,31 @@ class MemTableInserter : public WriteBatch::Handler {
Status ret_status; Status ret_status;
if (UNLIKELY(!SeekToColumnFamily(column_family_id, &ret_status))) { if (UNLIKELY(!SeekToColumnFamily(column_family_id, &ret_status))) {
bool batch_boundry = false; if (ret_status.ok() && rebuilding_trx_ != nullptr) {
if (rebuilding_trx_ != nullptr) {
assert(!write_after_commit_); assert(!write_after_commit_);
// The CF is probably flushed and hence no need for insert but we still // The CF is probably flushed and hence no need for insert but we still
// need to keep track of the keys for upcoming rollback/commit. // need to keep track of the keys for upcoming rollback/commit.
ret_status = WriteBatchInternal::SingleDelete(rebuilding_trx_, ret_status = WriteBatchInternal::SingleDelete(rebuilding_trx_,
column_family_id, key); column_family_id, key);
assert(ret_status.ok()); if (ret_status.ok()) {
batch_boundry = IsDuplicateKeySeq(column_family_id, key); MaybeAdvanceSeq(IsDuplicateKeySeq(column_family_id, key));
}
} else if (ret_status.ok()) {
MaybeAdvanceSeq(false /* batch_boundary */);
} }
MaybeAdvanceSeq(batch_boundry);
return ret_status; return ret_status;
} }
assert(ret_status.ok());
ret_status = ret_status =
DeleteImpl(column_family_id, key, Slice(), kTypeSingleDeletion); DeleteImpl(column_family_id, key, Slice(), kTypeSingleDeletion);
// optimize for non-recovery mode // optimize for non-recovery mode
if (UNLIKELY(!ret_status.IsTryAgain() && rebuilding_trx_ != nullptr)) { // If `ret_status` is `TryAgain` then the next (successful) try will add
// the key to the rebuilding transaction object. If `ret_status` is
// another non-OK `Status`, then the `rebuilding_trx_` will be thrown
// away. So we only need to add to it when `ret_status.ok()`.
if (UNLIKELY(ret_status.ok() && rebuilding_trx_ != nullptr)) {
assert(!write_after_commit_); assert(!write_after_commit_);
// If the ret_status is TryAgain then let the next try to add the ky to
// the rebuilding transaction object.
ret_status = WriteBatchInternal::SingleDelete(rebuilding_trx_, ret_status = WriteBatchInternal::SingleDelete(rebuilding_trx_,
column_family_id, key); column_family_id, key);
} }
@ -1636,21 +1658,22 @@ class MemTableInserter : public WriteBatch::Handler {
Status ret_status; Status ret_status;
if (UNLIKELY(!SeekToColumnFamily(column_family_id, &ret_status))) { if (UNLIKELY(!SeekToColumnFamily(column_family_id, &ret_status))) {
bool batch_boundry = false; if (ret_status.ok() && rebuilding_trx_ != nullptr) {
if (rebuilding_trx_ != nullptr) {
assert(!write_after_commit_); assert(!write_after_commit_);
// The CF is probably flushed and hence no need for insert but we still // The CF is probably flushed and hence no need for insert but we still
// need to keep track of the keys for upcoming rollback/commit. // need to keep track of the keys for upcoming rollback/commit.
ret_status = WriteBatchInternal::DeleteRange( ret_status = WriteBatchInternal::DeleteRange(
rebuilding_trx_, column_family_id, begin_key, end_key); rebuilding_trx_, column_family_id, begin_key, end_key);
assert(ret_status.ok()); if (ret_status.ok()) {
// TODO(myabandeh): when transactional DeleteRange support is added, MaybeAdvanceSeq(IsDuplicateKeySeq(column_family_id, begin_key));
// check if end_key must also be added. }
batch_boundry = IsDuplicateKeySeq(column_family_id, begin_key); } else if (ret_status.ok()) {
MaybeAdvanceSeq(false /* batch_boundary */);
} }
MaybeAdvanceSeq(batch_boundry);
return ret_status; return ret_status;
} }
assert(ret_status.ok());
if (db_ != nullptr) { if (db_ != nullptr) {
auto cf_handle = cf_mems_->GetColumnFamilyHandle(); auto cf_handle = cf_mems_->GetColumnFamilyHandle();
if (cf_handle == nullptr) { if (cf_handle == nullptr) {
@ -1659,6 +1682,8 @@ class MemTableInserter : public WriteBatch::Handler {
auto* cfd = auto* cfd =
static_cast_with_check<ColumnFamilyHandleImpl>(cf_handle)->cfd(); static_cast_with_check<ColumnFamilyHandleImpl>(cf_handle)->cfd();
if (!cfd->is_delete_range_supported()) { if (!cfd->is_delete_range_supported()) {
// TODO(ajkr): refactor `SeekToColumnFamily()` so it returns a `Status`.
ret_status.PermitUncheckedError();
return Status::NotSupported( return Status::NotSupported(
std::string("DeleteRange not supported for table type ") + std::string("DeleteRange not supported for table type ") +
cfd->ioptions()->table_factory->Name() + " in CF " + cfd->ioptions()->table_factory->Name() + " in CF " +
@ -1666,10 +1691,14 @@ class MemTableInserter : public WriteBatch::Handler {
} }
int cmp = cfd->user_comparator()->Compare(begin_key, end_key); int cmp = cfd->user_comparator()->Compare(begin_key, end_key);
if (cmp > 0) { if (cmp > 0) {
// TODO(ajkr): refactor `SeekToColumnFamily()` so it returns a `Status`.
ret_status.PermitUncheckedError();
// It's an empty range where endpoints appear mistaken. Don't bother // It's an empty range where endpoints appear mistaken. Don't bother
// applying it to the DB, and return an error to the user. // applying it to the DB, and return an error to the user.
return Status::InvalidArgument("end key comes before start key"); return Status::InvalidArgument("end key comes before start key");
} else if (cmp == 0) { } else if (cmp == 0) {
// TODO(ajkr): refactor `SeekToColumnFamily()` so it returns a `Status`.
ret_status.PermitUncheckedError();
// It's an empty range. Don't bother applying it to the DB. // It's an empty range. Don't bother applying it to the DB.
return Status::OK(); return Status::OK();
} }
@ -1678,10 +1707,12 @@ class MemTableInserter : public WriteBatch::Handler {
ret_status = ret_status =
DeleteImpl(column_family_id, begin_key, end_key, kTypeRangeDeletion); DeleteImpl(column_family_id, begin_key, end_key, kTypeRangeDeletion);
// optimize for non-recovery mode // optimize for non-recovery mode
// If `ret_status` is `TryAgain` then the next (successful) try will add
// the key to the rebuilding transaction object. If `ret_status` is
// another non-OK `Status`, then the `rebuilding_trx_` will be thrown
// away. So we only need to add to it when `ret_status.ok()`.
if (UNLIKELY(!ret_status.IsTryAgain() && rebuilding_trx_ != nullptr)) { if (UNLIKELY(!ret_status.IsTryAgain() && rebuilding_trx_ != nullptr)) {
assert(!write_after_commit_); assert(!write_after_commit_);
// If the ret_status is TryAgain then let the next try to add the ky to
// the rebuilding transaction object.
ret_status = WriteBatchInternal::DeleteRange( ret_status = WriteBatchInternal::DeleteRange(
rebuilding_trx_, column_family_id, begin_key, end_key); rebuilding_trx_, column_family_id, begin_key, end_key);
} }
@ -1699,19 +1730,21 @@ class MemTableInserter : public WriteBatch::Handler {
Status ret_status; Status ret_status;
if (UNLIKELY(!SeekToColumnFamily(column_family_id, &ret_status))) { if (UNLIKELY(!SeekToColumnFamily(column_family_id, &ret_status))) {
bool batch_boundry = false; if (ret_status.ok() && rebuilding_trx_ != nullptr) {
if (rebuilding_trx_ != nullptr) {
assert(!write_after_commit_); assert(!write_after_commit_);
// The CF is probably flushed and hence no need for insert but we still // The CF is probably flushed and hence no need for insert but we still
// need to keep track of the keys for upcoming rollback/commit. // need to keep track of the keys for upcoming rollback/commit.
ret_status = WriteBatchInternal::Merge(rebuilding_trx_, ret_status = WriteBatchInternal::Merge(rebuilding_trx_,
column_family_id, key, value); column_family_id, key, value);
assert(ret_status.ok()); if (ret_status.ok()) {
batch_boundry = IsDuplicateKeySeq(column_family_id, key); MaybeAdvanceSeq(IsDuplicateKeySeq(column_family_id, key));
}
} else if (ret_status.ok()) {
MaybeAdvanceSeq(false /* batch_boundary */);
} }
MaybeAdvanceSeq(batch_boundry);
return ret_status; return ret_status;
} }
assert(ret_status.ok());
MemTable* mem = cf_mems_->GetMemTable(); MemTable* mem = cf_mems_->GetMemTable();
auto* moptions = mem->GetImmutableMemTableOptions(); auto* moptions = mem->GetImmutableMemTableOptions();
@ -1756,60 +1789,60 @@ class MemTableInserter : public WriteBatch::Handler {
if (cf_handle == nullptr) { if (cf_handle == nullptr) {
cf_handle = db_->DefaultColumnFamily(); cf_handle = db_->DefaultColumnFamily();
} }
db_->Get(read_options, cf_handle, key, &get_value); Status get_status = db_->Get(read_options, cf_handle, key, &get_value);
Slice get_value_slice = Slice(get_value); if (!get_status.ok()) {
// Failed to read a key we know exists. Store the delta in memtable.
// 2) Apply this merge
auto merge_operator = moptions->merge_operator;
assert(merge_operator);
std::string new_value;
Status merge_status = MergeHelper::TimedFullMerge(
merge_operator, key, &get_value_slice, {value}, &new_value,
moptions->info_log, moptions->statistics, Env::Default());
if (!merge_status.ok()) {
// Failed to merge!
// Store the delta in memtable
perform_merge = false; perform_merge = false;
} else { } else {
// 3) Add value to memtable Slice get_value_slice = Slice(get_value);
assert(!concurrent_memtable_writes_);
bool mem_res = mem->Add(sequence_, kTypeValue, key, new_value); // 2) Apply this merge
if (UNLIKELY(!mem_res)) { auto merge_operator = moptions->merge_operator;
assert(seq_per_batch_); assert(merge_operator);
ret_status = Status::TryAgain("key+seq exists");
const bool BATCH_BOUNDRY = true; std::string new_value;
MaybeAdvanceSeq(BATCH_BOUNDRY);
Status merge_status = MergeHelper::TimedFullMerge(
merge_operator, key, &get_value_slice, {value}, &new_value,
moptions->info_log, moptions->statistics, Env::Default());
if (!merge_status.ok()) {
// Failed to merge!
// Store the delta in memtable
perform_merge = false;
} else {
// 3) Add value to memtable
assert(!concurrent_memtable_writes_);
ret_status = mem->Add(sequence_, kTypeValue, key, new_value);
} }
} }
} }
if (!perform_merge) { if (!perform_merge) {
// Add merge operator to memtable // Add merge operand to memtable
bool mem_res = ret_status =
mem->Add(sequence_, kTypeMerge, key, value, mem->Add(sequence_, kTypeMerge, key, value,
concurrent_memtable_writes_, get_post_process_info(mem)); concurrent_memtable_writes_, get_post_process_info(mem));
if (UNLIKELY(!mem_res)) {
assert(seq_per_batch_);
ret_status = Status::TryAgain("key+seq exists");
const bool BATCH_BOUNDRY = true;
MaybeAdvanceSeq(BATCH_BOUNDRY);
}
} }
if (UNLIKELY(ret_status.IsTryAgain())) {
assert(seq_per_batch_);
const bool kBatchBoundary = true;
MaybeAdvanceSeq(kBatchBoundary);
} else if (ret_status.ok()) {
MaybeAdvanceSeq();
CheckMemtableFull();
}
// optimize for non-recovery mode // optimize for non-recovery mode
if (UNLIKELY(!ret_status.IsTryAgain() && rebuilding_trx_ != nullptr)) { // If `ret_status` is `TryAgain` then the next (successful) try will add
// the key to the rebuilding transaction object. If `ret_status` is
// another non-OK `Status`, then the `rebuilding_trx_` will be thrown
// away. So we only need to add to it when `ret_status.ok()`.
if (UNLIKELY(ret_status.ok() && rebuilding_trx_ != nullptr)) {
assert(!write_after_commit_); assert(!write_after_commit_);
// If the ret_status is TryAgain then let the next try to add the ky to
// the rebuilding transaction object.
ret_status = WriteBatchInternal::Merge(rebuilding_trx_, column_family_id, ret_status = WriteBatchInternal::Merge(rebuilding_trx_, column_family_id,
key, value); key, value);
assert(ret_status.ok());
} }
MaybeAdvanceSeq();
CheckMemtableFull();
return ret_status; return ret_status;
} }

@ -504,7 +504,10 @@ class MemTableConstructor: public Constructor {
memtable_->Ref(); memtable_->Ref();
int seq = 1; int seq = 1;
for (const auto& kv : kv_map) { for (const auto& kv : kv_map) {
memtable_->Add(seq, kTypeValue, kv.first, kv.second); Status s = memtable_->Add(seq, kTypeValue, kv.first, kv.second);
if (!s.ok()) {
return s;
}
seq++; seq++;
} }
return Status::OK(); return Status::OK();

Loading…
Cancel
Save