From 0a774a102f0be845007e3927dcda228716a771d2 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Thu, 20 Apr 2023 13:02:16 -0700 Subject: [PATCH] Clarify `SstFileWriter::DeleteRange()` ordering requirements (#11390) Summary: As titled Pull Request resolved: https://github.com/facebook/rocksdb/pull/11390 Reviewed By: cbi42 Differential Revision: D45148830 Pulled By: ajkr fbshipit-source-id: 9a8dfd040514bae3d8ed9e97a79cae7683f2749a --- db/external_sst_file_basic_test.cc | 58 +++++++++++++++++++ include/rocksdb/sst_file_writer.h | 43 +++++++++----- table/block_based/block_based_table_builder.h | 4 +- table/block_based/block_builder.h | 6 +- 4 files changed, 92 insertions(+), 19 deletions(-) diff --git a/db/external_sst_file_basic_test.cc b/db/external_sst_file_basic_test.cc index 950595fa1..39334994a 100644 --- a/db/external_sst_file_basic_test.cc +++ b/db/external_sst_file_basic_test.cc @@ -1419,6 +1419,7 @@ TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) { ASSERT_EQ(4, NumTableFilesAtLevel(0)); ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2)); ASSERT_EQ(2, NumTableFilesAtLevel(options.num_levels - 1)); + VerifyDBFromMap(true_data); } TEST_F(ExternalSSTFileBasicTest, AdjacentRangeDeletionTombstones) { @@ -1463,6 +1464,63 @@ TEST_F(ExternalSSTFileBasicTest, AdjacentRangeDeletionTombstones) { DestroyAndRecreateExternalSSTFilesDir(); } +TEST_F(ExternalSSTFileBasicTest, UnorderedRangeDeletions) { + int kNumLevels = 7; + Options options = CurrentOptions(); + options.disable_auto_compactions = true; + options.num_levels = kNumLevels; + Reopen(options); + + std::map true_data; + int file_id = 1; + + // prevent range deletions from being dropped due to becoming obsolete. + const Snapshot* snapshot = db_->GetSnapshot(); + + // Range del [0, 50) in memtable + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), + Key(50))); + + // Out of order range del overlaps memtable, so flush is required before file + // is ingested into L0 + ASSERT_OK(GenerateAndAddExternalFile( + options, {60, 90}, {ValueType::kTypeValue, ValueType::kTypeValue}, + {{65, 70}, {45, 50}}, file_id++, true /* write_global_seqno */, + true /* verify_checksums_before_ingest */, &true_data)); + ASSERT_EQ(2, true_data.size()); + ASSERT_EQ(2, NumTableFilesAtLevel(0)); + ASSERT_EQ(0, NumTableFilesAtLevel(kNumLevels - 1)); + VerifyDBFromMap(true_data); + + // Compact to L6 + MoveFilesToLevel(kNumLevels - 1); + ASSERT_EQ(0, NumTableFilesAtLevel(0)); + ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 1)); + VerifyDBFromMap(true_data); + + // Ingest a file containing out of order range dels that cover nothing + ASSERT_OK(GenerateAndAddExternalFile( + options, {151, 175}, {ValueType::kTypeValue, ValueType::kTypeValue}, + {{160, 200}, {120, 180}}, file_id++, true /* write_global_seqno */, + true /* verify_checksums_before_ingest */, &true_data)); + ASSERT_EQ(4, true_data.size()); + ASSERT_EQ(0, NumTableFilesAtLevel(0)); + ASSERT_EQ(2, NumTableFilesAtLevel(kNumLevels - 1)); + VerifyDBFromMap(true_data); + + // Ingest a file containing out of order range dels that cover keys in L6 + ASSERT_OK(GenerateAndAddExternalFile( + options, {}, {}, {{190, 200}, {170, 180}, {55, 65}}, file_id++, + true /* write_global_seqno */, true /* verify_checksums_before_ingest */, + &true_data)); + ASSERT_EQ(2, true_data.size()); + ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2)); + ASSERT_EQ(2, NumTableFilesAtLevel(kNumLevels - 1)); + VerifyDBFromMap(true_data); + + db_->ReleaseSnapshot(snapshot); +} + TEST_F(ExternalSSTFileBasicTest, RangeDeletionEndComesBeforeStart) { Options options = CurrentOptions(); SstFileWriter sst_file_writer(EnvOptions(), options); diff --git a/include/rocksdb/sst_file_writer.h b/include/rocksdb/sst_file_writer.h index b44633a7f..fb2806865 100644 --- a/include/rocksdb/sst_file_writer.h +++ b/include/rocksdb/sst_file_writer.h @@ -110,53 +110,64 @@ class SstFileWriter { Status Open(const std::string& file_path); // Add a Put key with value to currently opened file (deprecated) - // REQUIRES: key is after any previously added key according to comparator. + // REQUIRES: user_key is after any previously added point (Put/Merge/Delete) + // key according to the comparator. // REQUIRES: comparator is *not* timestamp-aware. ROCKSDB_DEPRECATED_FUNC Status Add(const Slice& user_key, const Slice& value); // Add a Put key with value to currently opened file - // REQUIRES: key is after any previously added key according to comparator. + // REQUIRES: user_key is after any previously added point (Put/Merge/Delete) + // key according to the comparator. // REQUIRES: comparator is *not* timestamp-aware. Status Put(const Slice& user_key, const Slice& value); // Add a Put (key with timestamp, value) to the currently opened file - // REQUIRES: key is after any previously added key according to the - // comparator. - // REQUIRES: the timestamp's size is equal to what is expected by - // the comparator. + // REQUIRES: user_key is after any previously added point (Put/Merge/Delete) + // key according to the comparator. + // REQUIRES: timestamp's size is equal to what is expected by the comparator. Status Put(const Slice& user_key, const Slice& timestamp, const Slice& value); // Add a Merge key with value to currently opened file - // REQUIRES: key is after any previously added key according to comparator. + // REQUIRES: user_key is after any previously added point (Put/Merge/Delete) + // key according to the comparator. // REQUIRES: comparator is *not* timestamp-aware. Status Merge(const Slice& user_key, const Slice& value); // Add a deletion key to currently opened file - // REQUIRES: key is after any previously added key according to comparator. + // REQUIRES: user_key is after any previously added point (Put/Merge/Delete) + // key according to the comparator. // REQUIRES: comparator is *not* timestamp-aware. Status Delete(const Slice& user_key); // Add a deletion key with timestamp to the currently opened file - // REQUIRES: key is after any previously added key according to the - // comparator. - // REQUIRES: the timestamp's size is equal to what is expected by - // the comparator. + // REQUIRES: user_key is after any previously added point (Put/Merge/Delete) + // key according to the comparator. + // REQUIRES: timestamp's size is equal to what is expected by the comparator. Status Delete(const Slice& user_key, const Slice& timestamp); // Add a range deletion tombstone to currently opened file. Such a range - // deletion tombstone does NOT delete other (point) keys in the same file. + // deletion tombstone does NOT delete point (Put/Merge/Delete) keys in the + // same file. + // + // Range deletion tombstones may be added in any order, both with respect to + // each other and with respect to the point (Put/Merge/Delete) keys in the + // same file. // // REQUIRES: The comparator orders `begin_key` at or before `end_key` // REQUIRES: comparator is *not* timestamp-aware. Status DeleteRange(const Slice& begin_key, const Slice& end_key); // Add a range deletion tombstone to currently opened file. Such a range - // deletion tombstone does NOT delete other (point) keys in the same file. + // deletion tombstone does NOT delete point (Put/Merge/Delete) keys in the + // same file. + // + // Range deletion tombstones may be added in any order, both with respect to + // each other and with respect to the point (Put/Merge/Delete) keys in the + // same file. // // REQUIRES: begin_key and end_key are user keys without timestamp. // REQUIRES: The comparator orders `begin_key` at or before `end_key` - // REQUIRES: the timestamp's size is equal to what is expected by - // the comparator. + // REQUIRES: timestamp's size is equal to what is expected by the comparator. Status DeleteRange(const Slice& begin_key, const Slice& end_key, const Slice& timestamp); diff --git a/table/block_based/block_based_table_builder.h b/table/block_based/block_based_table_builder.h index 7cf33953a..32ca30d1b 100644 --- a/table/block_based/block_based_table_builder.h +++ b/table/block_based/block_based_table_builder.h @@ -53,7 +53,9 @@ class BlockBasedTableBuilder : public TableBuilder { ~BlockBasedTableBuilder(); // Add key,value to the table being constructed. - // REQUIRES: key is after any previously added key according to comparator. + // REQUIRES: Unless key has type kTypeRangeDeletion, key is after any + // previously added non-kTypeRangeDeletion key according to + // comparator. // REQUIRES: Finish(), Abandon() have not been called void Add(const Slice& key, const Slice& value) override; diff --git a/table/block_based/block_builder.h b/table/block_based/block_builder.h index 5f68b449b..7cb94b311 100644 --- a/table/block_based/block_builder.h +++ b/table/block_based/block_builder.h @@ -37,7 +37,8 @@ class BlockBuilder { void SwapAndReset(std::string& buffer); // REQUIRES: Finish() has not been called since the last call to Reset(). - // REQUIRES: key is larger than any previously added key + // REQUIRES: Unless a range tombstone block, key is larger than any previously + // added key // DO NOT mix with AddWithLastKey() between Resets. For efficiency, use // AddWithLastKey() in contexts where previous added key is already known // and delta encoding might be used. @@ -47,7 +48,8 @@ class BlockBuilder { // A faster version of Add() if the previous key is already known for all // Add()s. // REQUIRES: Finish() has not been called since the last call to Reset(). - // REQUIRES: key is larger than any previously added key + // REQUIRES: Unless a range tombstone block, key is larger than any previously + // added key // REQUIRES: if AddWithLastKey has been called since last Reset(), last_key // is the key from most recent AddWithLastKey. (For convenience, last_key // is ignored on first call after creation or Reset().)