From e9ba4ba348ccfd18faffdd9fa439e82053a0d73c Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Thu, 7 May 2020 11:53:32 -0700 Subject: [PATCH] validate range tombstone covers positive range (#6788) Summary: We found some files containing nothing but negative range tombstones, and unsurprisingly their metadata specified a negative range, which made things crash. Time to add a bit of user input validation. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6788 Reviewed By: zhichao-cao Differential Revision: D21343719 Pulled By: ajkr fbshipit-source-id: f1c16e4c3e9fa150958c8c866176632a3206fb74 --- HISTORY.md | 1 + db/db_range_del_test.cc | 15 +++++++++++++++ db/write_batch.cc | 9 +++++++++ include/rocksdb/db.h | 7 +++++-- 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index e05e339f0..6eb8563ca 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -22,6 +22,7 @@ * Add a ConfigOptions argument to the APIs dealing with converting options to and from strings and files. The ConfigOptions is meant to replace some of the options (such as input_strings_escaped and ignore_unknown_options) and allow for more parameters to be passed in the future without changing the function signature. * Add NewFileChecksumGenCrc32cFactory to the file checksum public API, such that the builtin Crc32c based file checksum generator factory can be used by applications. * Add IsDirectory to Env and FS to indicate if a path is a directory. +* DeleteRange now returns `Status::InvalidArgument` if the range's end key comes before its start key according to the user comparator. Previously the behavior was undefined. ### New Features * Added support for pipelined & parallel compression optimization for `BlockBasedTableBuilder`. This optimization makes block building, block compression and block appending a pipeline, and uses multiple threads to accelerate block compression. Users can set `CompressionOptions::parallel_threads` greater than 1 to enable compression parallelism. This feature is experimental for now. diff --git a/db/db_range_del_test.cc b/db/db_range_del_test.cc index 15225875d..b8421cf3f 100644 --- a/db/db_range_del_test.cc +++ b/db/db_range_del_test.cc @@ -47,6 +47,21 @@ TEST_F(DBRangeDelTest, WriteBatchWithIndexNotSupported) { ASSERT_TRUE(indexedBatch.DeleteRange("dr1", "dr1").IsNotSupported()); } +TEST_F(DBRangeDelTest, EndSameAsStartCoversNothing) { + ASSERT_OK(db_->Put(WriteOptions(), "b", "val")); + ASSERT_OK( + db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "b", "b")); + ASSERT_EQ("val", Get("b")); +} + +TEST_F(DBRangeDelTest, EndComesBeforeStartInvalidArgument) { + db_->Put(WriteOptions(), "b", "val"); + ASSERT_TRUE( + db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "b", "a") + .IsInvalidArgument()); + ASSERT_EQ("val", Get("b")); +} + TEST_F(DBRangeDelTest, FlushOutputHasOnlyRangeTombstones) { do { DestroyAndReopen(CurrentOptions()); diff --git a/db/write_batch.cc b/db/write_batch.cc index 725602467..77715a208 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -1630,6 +1630,15 @@ class MemTableInserter : public WriteBatch::Handler { cfd->ioptions()->table_factory->Name() + " in CF " + cfd->GetName()); } + int cmp = cfd->user_comparator()->Compare(begin_key, end_key); + if (cmp > 0) { + // It's an empty range where endpoints appear mistaken. Don't bother + // applying it to the DB, and return an error to the user. + return Status::InvalidArgument("end key comes before start key"); + } else if (cmp == 0) { + // It's an empty range. Don't bother applying it to the DB. + return Status::OK(); + } } auto ret_status = diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index de35c1290..b199a476b 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -356,8 +356,11 @@ class DB { // Removes the database entries in the range ["begin_key", "end_key"), i.e., // including "begin_key" and excluding "end_key". Returns OK on success, and - // a non-OK status on error. It is not an error if no keys exist in the range - // ["begin_key", "end_key"). + // a non-OK status on error. It is not an error if the database does not + // contain any existing data in the range ["begin_key", "end_key"). + // + // If "end_key" comes before "start_key" according to the user's comparator, + // a `Status::InvalidArgument` is returned. // // This feature is now usable in production, with the following caveats: // 1) Accumulating many range tombstones in the memtable will degrade read