Enforce the contract of SingleDelete (#9888)

Summary:
Enforce the contract of SingleDelete so that they are not mixed with
Delete for the same key. Otherwise, it will lead to undefined behavior.
See https://github.com/facebook/rocksdb/wiki/Single-Delete#notes.

Also fix unit tests and write-unprepared.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9888

Test Plan: make check

Reviewed By: ajkr

Differential Revision: D35837817

Pulled By: riversand963

fbshipit-source-id: acd06e4dcba8cb18df92b44ed18c57e10e5a7635
main
Yanqin Jin 3 years ago committed by Facebook GitHub Bot
parent aafb377bb5
commit 2b5c29f9f3
  1. 3
      HISTORY.md
  2. 26
      db/compaction/compaction_iterator.cc
  3. 24
      db/compaction/compaction_job_test.cc
  4. 6
      test_util/transaction_test_util.cc
  5. 17
      test_util/transaction_test_util.h
  6. 4
      utilities/transactions/transaction_test.cc
  7. 2
      utilities/transactions/write_prepared_transaction_test.cc
  8. 4
      utilities/transactions/write_unprepared_txn.cc

@ -17,6 +17,9 @@
* RocksDB calls FileSystem::Poll API during FilePrefetchBuffer destruction which impacts performance as it waits for read requets completion which is not needed anymore. Calling FileSystem::AbortIO to abort those requests instead fixes that performance issue. * RocksDB calls FileSystem::Poll API during FilePrefetchBuffer destruction which impacts performance as it waits for read requets completion which is not needed anymore. Calling FileSystem::AbortIO to abort those requests instead fixes that performance issue.
* Fixed unnecessary block cache contention when queries within a MultiGet batch and across parallel batches access the same data block, which previously could cause severely degraded performance in this unusual case. (In more typical MultiGet cases, this fix is expected to yield a small or negligible performance improvement.) * Fixed unnecessary block cache contention when queries within a MultiGet batch and across parallel batches access the same data block, which previously could cause severely degraded performance in this unusual case. (In more typical MultiGet cases, this fix is expected to yield a small or negligible performance improvement.)
### Behavior changes
* Enforce the existing contract of SingleDelete so that SingleDelete cannot be mixed with Delete because it leads to undefined behavior. Fix a number of unit tests that violate the contract but happen to pass.
## 7.2.0 (04/15/2022) ## 7.2.0 (04/15/2022)
### Bug Fixes ### Bug Fixes
* Fixed bug which caused rocksdb failure in the situation when rocksdb was accessible using UNC path * Fixed bug which caused rocksdb failure in the situation when rocksdb was accessible using UNC path

@ -625,24 +625,34 @@ void CompactionIterator::NextFromInput() {
TEST_SYNC_POINT_CALLBACK( TEST_SYNC_POINT_CALLBACK(
"CompactionIterator::NextFromInput:SingleDelete:2", nullptr); "CompactionIterator::NextFromInput:SingleDelete:2", nullptr);
if (next_ikey.type == kTypeSingleDeletion || if (next_ikey.type == kTypeSingleDeletion) {
next_ikey.type == kTypeDeletion) {
// We encountered two SingleDeletes for same key in a row. This // We encountered two SingleDeletes for same key in a row. This
// could be due to unexpected user input. If write-(un)prepared // could be due to unexpected user input. If write-(un)prepared
// transaction is used, this could also be due to releasing an old // transaction is used, this could also be due to releasing an old
// snapshot between a Put and its matching SingleDelete. // snapshot between a Put and its matching SingleDelete.
// Furthermore, if write-(un)prepared transaction is rolled back
// after prepare, we will write a Delete to cancel a prior Put. If
// old snapshot is released between a later Put and its matching
// SingleDelete, we will end up with a Delete followed by
// SingleDelete.
// Skip the first SingleDelete and let the next iteration decide // Skip the first SingleDelete and let the next iteration decide
// how to handle the second SingleDelete or Delete. // how to handle the second SingleDelete.
// First SingleDelete has been skipped since we already called // First SingleDelete has been skipped since we already called
// input_.Next(). // input_.Next().
++iter_stats_.num_record_drop_obsolete; ++iter_stats_.num_record_drop_obsolete;
++iter_stats_.num_single_del_mismatch; ++iter_stats_.num_single_del_mismatch;
} else if (next_ikey.type == kTypeDeletion) {
std::ostringstream oss;
oss << "Found SD and type: " << static_cast<int>(next_ikey.type)
<< " on the same key, violating the contract "
"of SingleDelete. Check your application to make sure the "
"application does not mix SingleDelete and Delete for "
"the same key. If you are using "
"write-prepared/write-unprepared transactions, and use "
"SingleDelete to delete certain keys, then make sure "
"TransactionDBOptions::rollback_deletion_type_callback is "
"configured properly. Mixing SD and DEL can lead to "
"undefined behaviors";
ROCKS_LOG_ERROR(info_log_, "%s", oss.str().c_str());
valid_ = false;
status_ = Status::Corruption(oss.str());
return;
} else if (!is_timestamp_eligible_for_gc) { } else if (!is_timestamp_eligible_for_gc) {
// We cannot drop the SingleDelete as timestamp is enabled, and // We cannot drop the SingleDelete as timestamp is enabled, and
// timestamp of this key is greater than or equal to // timestamp of this key is greater than or equal to

@ -890,10 +890,10 @@ TEST_F(CompactionJobTest, MultiSingleDelete) {
// -> Snapshot Put // -> Snapshot Put
// K: SDel SDel Put SDel Put Put Snapshot SDel Put SDel SDel Put SDel // K: SDel SDel Put SDel Put Put Snapshot SDel Put SDel SDel Put SDel
// -> Snapshot Put Snapshot SDel // -> Snapshot Put Snapshot SDel
// L: SDel Put Del Put SDel Snapshot Del Put Del SDel Put SDel // L: SDel Put SDel Put SDel Snapshot SDel Put SDel SDel Put SDel
// -> Snapshot SDel // -> Snapshot SDel Put SDel
// M: (Put) SDel Put Del Put SDel Snapshot Put Del SDel Put SDel Del // M: (Put) SDel Put SDel Put SDel Snapshot Put SDel SDel Put SDel SDel
// -> SDel Snapshot Del // -> SDel Snapshot Put SDel
NewDB(); NewDB();
auto file1 = mock::MakeMockFile({ auto file1 = mock::MakeMockFile({
@ -924,14 +924,14 @@ TEST_F(CompactionJobTest, MultiSingleDelete) {
{KeyStr("L", 16U, kTypeSingleDeletion), ""}, {KeyStr("L", 16U, kTypeSingleDeletion), ""},
{KeyStr("L", 15U, kTypeValue), "val"}, {KeyStr("L", 15U, kTypeValue), "val"},
{KeyStr("L", 14U, kTypeSingleDeletion), ""}, {KeyStr("L", 14U, kTypeSingleDeletion), ""},
{KeyStr("L", 13U, kTypeDeletion), ""}, {KeyStr("L", 13U, kTypeSingleDeletion), ""},
{KeyStr("L", 12U, kTypeValue), "val"}, {KeyStr("L", 12U, kTypeValue), "val"},
{KeyStr("L", 11U, kTypeDeletion), ""}, {KeyStr("L", 11U, kTypeSingleDeletion), ""},
{KeyStr("M", 16U, kTypeDeletion), ""}, {KeyStr("M", 16U, kTypeSingleDeletion), ""},
{KeyStr("M", 15U, kTypeSingleDeletion), ""}, {KeyStr("M", 15U, kTypeSingleDeletion), ""},
{KeyStr("M", 14U, kTypeValue), "val"}, {KeyStr("M", 14U, kTypeValue), "val"},
{KeyStr("M", 13U, kTypeSingleDeletion), ""}, {KeyStr("M", 13U, kTypeSingleDeletion), ""},
{KeyStr("M", 12U, kTypeDeletion), ""}, {KeyStr("M", 12U, kTypeSingleDeletion), ""},
{KeyStr("M", 11U, kTypeValue), "val"}, {KeyStr("M", 11U, kTypeValue), "val"},
}); });
AddMockFile(file1); AddMockFile(file1);
@ -972,12 +972,12 @@ TEST_F(CompactionJobTest, MultiSingleDelete) {
{KeyStr("K", 1U, kTypeSingleDeletion), ""}, {KeyStr("K", 1U, kTypeSingleDeletion), ""},
{KeyStr("L", 5U, kTypeSingleDeletion), ""}, {KeyStr("L", 5U, kTypeSingleDeletion), ""},
{KeyStr("L", 4U, kTypeValue), "val"}, {KeyStr("L", 4U, kTypeValue), "val"},
{KeyStr("L", 3U, kTypeDeletion), ""}, {KeyStr("L", 3U, kTypeSingleDeletion), ""},
{KeyStr("L", 2U, kTypeValue), "val"}, {KeyStr("L", 2U, kTypeValue), "val"},
{KeyStr("L", 1U, kTypeSingleDeletion), ""}, {KeyStr("L", 1U, kTypeSingleDeletion), ""},
{KeyStr("M", 10U, kTypeSingleDeletion), ""}, {KeyStr("M", 10U, kTypeSingleDeletion), ""},
{KeyStr("M", 7U, kTypeValue), "val"}, {KeyStr("M", 7U, kTypeValue), "val"},
{KeyStr("M", 5U, kTypeDeletion), ""}, {KeyStr("M", 5U, kTypeSingleDeletion), ""},
{KeyStr("M", 4U, kTypeValue), "val"}, {KeyStr("M", 4U, kTypeValue), "val"},
{KeyStr("M", 3U, kTypeSingleDeletion), ""}, {KeyStr("M", 3U, kTypeSingleDeletion), ""},
}); });
@ -1019,7 +1019,9 @@ TEST_F(CompactionJobTest, MultiSingleDelete) {
{KeyStr("K", 8U, kTypeValue), "val3"}, {KeyStr("K", 8U, kTypeValue), "val3"},
{KeyStr("L", 16U, kTypeSingleDeletion), ""}, {KeyStr("L", 16U, kTypeSingleDeletion), ""},
{KeyStr("L", 15U, kTypeValue), ""}, {KeyStr("L", 15U, kTypeValue), ""},
{KeyStr("M", 16U, kTypeDeletion), ""}, {KeyStr("L", 11U, kTypeSingleDeletion), ""},
{KeyStr("M", 15U, kTypeSingleDeletion), ""},
{KeyStr("M", 14U, kTypeValue), ""},
{KeyStr("M", 3U, kTypeSingleDeletion), ""}}); {KeyStr("M", 3U, kTypeSingleDeletion), ""}});
SetLastSequence(22U); SetLastSequence(22U);

@ -91,7 +91,7 @@ Status RandomTransactionInserter::DBGet(
Status s; Status s;
// Five digits (since the largest uint16_t is 65535) plus the NUL // Five digits (since the largest uint16_t is 65535) plus the NUL
// end char. // end char.
char prefix_buf[6]; char prefix_buf[6] = {0};
// Pad prefix appropriately so we can iterate over each set // Pad prefix appropriately so we can iterate over each set
assert(set_i + 1 <= 9999); assert(set_i + 1 <= 9999);
snprintf(prefix_buf, sizeof(prefix_buf), "%.4u", set_i + 1); snprintf(prefix_buf, sizeof(prefix_buf), "%.4u", set_i + 1);
@ -165,7 +165,11 @@ bool RandomTransactionInserter::DoInsert(DB* db, Transaction* txn,
// Increment key // Increment key
std::string sum = ToString(int_value + incr); std::string sum = ToString(int_value + incr);
if (txn != nullptr) { if (txn != nullptr) {
if ((set_i % 4) != 0) {
s = txn->SingleDelete(key); s = txn->SingleDelete(key);
} else {
s = txn->Delete(key);
}
if (!get_for_update && (s.IsBusy() || s.IsTimedOut())) { if (!get_for_update && (s.IsBusy() || s.IsTimedOut())) {
// If the initial get was not for update, then the key is not locked // If the initial get was not for update, then the key is not locked
// before put and put could fail due to concurrent writes. // before put and put could fail due to concurrent writes.

@ -33,6 +33,23 @@ class Random64;
// RandomTransactionInserter with similar arguments using the same DB. // RandomTransactionInserter with similar arguments using the same DB.
class RandomTransactionInserter { class RandomTransactionInserter {
public: public:
static bool RollbackDeletionTypeCallback(const Slice& key) {
// These are hard-coded atm. See how RandomTransactionInserter::DoInsert()
// determines whether to use SingleDelete or Delete for a key.
assert(key.size() >= 4);
const char* ptr = key.data();
assert(ptr);
while (ptr && ptr < key.data() + 4 && *ptr == '0') {
++ptr;
}
std::string prefix(ptr, 4 - (ptr - key.data()));
unsigned long set_i = std::stoul(prefix);
assert(set_i > 0);
assert(set_i <= 9999);
--set_i;
return ((set_i % 4) != 0);
}
// num_keys is the number of keys in each set. // num_keys is the number of keys in each set.
// num_sets is the number of sets of keys. // num_sets is the number of sets of keys.
// cmt_delay_ms is the delay between prepare (if there is any) and commit // cmt_delay_ms is the delay between prepare (if there is any) and commit

@ -5469,6 +5469,10 @@ Status TransactionStressTestInserter(
TEST_P(MySQLStyleTransactionTest, TransactionStressTest) { TEST_P(MySQLStyleTransactionTest, TransactionStressTest) {
// Small write buffer to trigger more compactions // Small write buffer to trigger more compactions
options.write_buffer_size = 1024; options.write_buffer_size = 1024;
txn_db_options.rollback_deletion_type_callback =
[](TransactionDB*, ColumnFamilyHandle*, const Slice& key) {
return RandomTransactionInserter::RollbackDeletionTypeCallback(key);
};
ASSERT_OK(ReOpenNoDelete()); ASSERT_OK(ReOpenNoDelete());
constexpr size_t num_workers = 4; // worker threads count constexpr size_t num_workers = 4; // worker threads count
constexpr size_t num_checkers = 2; // checker threads count constexpr size_t num_checkers = 2; // checker threads count

@ -3202,6 +3202,8 @@ TEST_P(WritePreparedTransactionTest, ReleaseEarliestSnapshotAfterSeqZeroing2) {
TEST_P(WritePreparedTransactionTest, SingleDeleteAfterRollback) { TEST_P(WritePreparedTransactionTest, SingleDeleteAfterRollback) {
constexpr size_t kSnapshotCacheBits = 7; // same as default constexpr size_t kSnapshotCacheBits = 7; // same as default
constexpr size_t kCommitCacheBits = 0; // minimum commit cache constexpr size_t kCommitCacheBits = 0; // minimum commit cache
txn_db_options.rollback_deletion_type_callback =
[](TransactionDB*, ColumnFamilyHandle*, const Slice&) { return true; };
UpdateTransactionDBOptions(kSnapshotCacheBits, kCommitCacheBits); UpdateTransactionDBOptions(kSnapshotCacheBits, kCommitCacheBits);
options.disable_auto_compactions = true; options.disable_auto_compactions = true;
ASSERT_OK(ReOpen()); ASSERT_OK(ReOpen());

@ -675,7 +675,11 @@ Status WriteUnpreparedTxn::WriteRollbackKeys(
s = rollback_batch->Put(cf_handle, key, pinnable_val); s = rollback_batch->Put(cf_handle, key, pinnable_val);
assert(s.ok()); assert(s.ok());
} else if (s.IsNotFound()) { } else if (s.IsNotFound()) {
if (wupt_db_->ShouldRollbackWithSingleDelete(cf_handle, key)) {
s = rollback_batch->SingleDelete(cf_handle, key);
} else {
s = rollback_batch->Delete(cf_handle, key); s = rollback_batch->Delete(cf_handle, key);
}
assert(s.ok()); assert(s.ok());
} else { } else {
return s; return s;

Loading…
Cancel
Save