Merge operator failed subcode (#11231)

Summary:
From HISTORY.md: Added a subcode of `Status::Corruption`, `Status::SubCode::kMergeOperatorFailed`, for users to identify corruption failures originating in the merge operator, as opposed to RocksDB's internally identified data corruptions.

This is a followup to https://github.com/facebook/rocksdb/issues/11092, where we gave users the ability to keep running a DB despite merge operator failing. Now that the DB keeps running despite such failures, they want to be able to distinguish such failures from real corruptions.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11231

Test Plan: updated unit test

Reviewed By: akankshamahajan15

Differential Revision: D43396607

Pulled By: ajkr

fbshipit-source-id: 17fbcc779ad724dafada8abd73efd38e1c5208b9
oxigraph-8.1.1
Andrew Kryczka 2 years ago committed by Facebook GitHub Bot
parent 6aef1a05d6
commit 25e1365227
  1. 1
      HISTORY.md
  2. 6
      db/db_merge_operator_test.cc
  3. 2
      db/merge_helper.cc
  4. 3
      db/version_set.cc
  5. 4
      db/version_set_sync_and_async.h
  6. 1
      include/rocksdb/status.h
  7. 18
      table/get_context.cc
  8. 1
      table/get_context.h
  9. 7
      util/status.cc

@ -27,6 +27,7 @@
* Completely removed the following deprecated/obsolete statistics: the tickers `BLOCK_CACHE_INDEX_BYTES_EVICT`, `BLOCK_CACHE_FILTER_BYTES_EVICT`, `BLOOM_FILTER_MICROS`, `NO_FILE_CLOSES`, `STALL_L0_SLOWDOWN_MICROS`, `STALL_MEMTABLE_COMPACTION_MICROS`, `STALL_L0_NUM_FILES_MICROS`, `RATE_LIMIT_DELAY_MILLIS`, `NO_ITERATORS`, `NUMBER_FILTERED_DELETES`, `WRITE_TIMEDOUT`, `BLOB_DB_GC_NUM_KEYS_OVERWRITTEN`, `BLOB_DB_GC_NUM_KEYS_EXPIRED`, `BLOB_DB_GC_BYTES_OVERWRITTEN`, `BLOB_DB_GC_BYTES_EXPIRED`, `BLOCK_CACHE_COMPRESSION_DICT_BYTES_EVICT` as well as the histograms `STALL_L0_SLOWDOWN_COUNT`, `STALL_MEMTABLE_COMPACTION_COUNT`, `STALL_L0_NUM_FILES_COUNT`, `HARD_RATE_LIMIT_DELAY_COUNT`, `SOFT_RATE_LIMIT_DELAY_COUNT`, `BLOB_DB_GC_MICROS`, and `NUM_DATA_BLOCKS_READ_PER_LEVEL`. Note that as a result, the C++ enum values of the still supported statistics have changed. Developers are advised to not rely on the actual numeric values. * Completely removed the following deprecated/obsolete statistics: the tickers `BLOCK_CACHE_INDEX_BYTES_EVICT`, `BLOCK_CACHE_FILTER_BYTES_EVICT`, `BLOOM_FILTER_MICROS`, `NO_FILE_CLOSES`, `STALL_L0_SLOWDOWN_MICROS`, `STALL_MEMTABLE_COMPACTION_MICROS`, `STALL_L0_NUM_FILES_MICROS`, `RATE_LIMIT_DELAY_MILLIS`, `NO_ITERATORS`, `NUMBER_FILTERED_DELETES`, `WRITE_TIMEDOUT`, `BLOB_DB_GC_NUM_KEYS_OVERWRITTEN`, `BLOB_DB_GC_NUM_KEYS_EXPIRED`, `BLOB_DB_GC_BYTES_OVERWRITTEN`, `BLOB_DB_GC_BYTES_EXPIRED`, `BLOCK_CACHE_COMPRESSION_DICT_BYTES_EVICT` as well as the histograms `STALL_L0_SLOWDOWN_COUNT`, `STALL_MEMTABLE_COMPACTION_COUNT`, `STALL_L0_NUM_FILES_COUNT`, `HARD_RATE_LIMIT_DELAY_COUNT`, `SOFT_RATE_LIMIT_DELAY_COUNT`, `BLOB_DB_GC_MICROS`, and `NUM_DATA_BLOCKS_READ_PER_LEVEL`. Note that as a result, the C++ enum values of the still supported statistics have changed. Developers are advised to not rely on the actual numeric values.
* Deprecated IngestExternalFileOptions::write_global_seqno and change default to false. This option only needs to be set to true to generate a DB compatible with RocksDB versions before 5.16.0. * Deprecated IngestExternalFileOptions::write_global_seqno and change default to false. This option only needs to be set to true to generate a DB compatible with RocksDB versions before 5.16.0.
* Remove deprecated APIs `GetColumnFamilyOptionsFrom{Map|String}(const ColumnFamilyOptions&, ..)`, `GetDBOptionsFrom{Map|String}(const DBOptions&, ..)`, `GetBlockBasedTableOptionsFrom{Map|String}(const BlockBasedTableOptions& table_options, ..)` and ` GetPlainTableOptionsFrom{Map|String}(const PlainTableOptions& table_options,..)`. * Remove deprecated APIs `GetColumnFamilyOptionsFrom{Map|String}(const ColumnFamilyOptions&, ..)`, `GetDBOptionsFrom{Map|String}(const DBOptions&, ..)`, `GetBlockBasedTableOptionsFrom{Map|String}(const BlockBasedTableOptions& table_options, ..)` and ` GetPlainTableOptionsFrom{Map|String}(const PlainTableOptions& table_options,..)`.
* Added a subcode of `Status::Corruption`, `Status::SubCode::kMergeOperatorFailed`, for users to identify corruption failures originating in the merge operator, as opposed to RocksDB's internally identified data corruptions
### Build Changes ### Build Changes
* The `make` build now builds a shared library by default instead of a static library. Use `LIB_MODE=static` to override. * The `make` build now builds a shared library by default instead of a static library. Use `LIB_MODE=static` to override.

@ -231,7 +231,9 @@ TEST_F(DBMergeOperatorTest, MergeOperatorFailsWithMustMerge) {
{ {
std::string value; std::string value;
ASSERT_OK(db_->Get(ReadOptions(), "k0", &value)); ASSERT_OK(db_->Get(ReadOptions(), "k0", &value));
ASSERT_TRUE(db_->Get(ReadOptions(), "k1", &value).IsCorruption()); Status s = db_->Get(ReadOptions(), "k1", &value);
ASSERT_TRUE(s.IsCorruption());
ASSERT_EQ(Status::SubCode::kMergeOperatorFailed, s.subcode());
ASSERT_OK(db_->Get(ReadOptions(), "k2", &value)); ASSERT_OK(db_->Get(ReadOptions(), "k2", &value));
} }
@ -243,6 +245,8 @@ TEST_F(DBMergeOperatorTest, MergeOperatorFailsWithMustMerge) {
ASSERT_EQ("k0", iter->key()); ASSERT_EQ("k0", iter->key());
iter->Next(); iter->Next();
ASSERT_TRUE(iter->status().IsCorruption()); ASSERT_TRUE(iter->status().IsCorruption());
ASSERT_EQ(Status::SubCode::kMergeOperatorFailed,
iter->status().subcode());
iter->SeekToLast(); iter->SeekToLast();
ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(iter->Valid());

@ -113,7 +113,7 @@ Status MergeHelper::TimedFullMerge(
if (!success) { if (!success) {
RecordTick(statistics, NUMBER_MERGE_FAILURES); RecordTick(statistics, NUMBER_MERGE_FAILURES);
return Status::Corruption("Error: Could not perform merge."); return Status::Corruption(Status::SubCode::kMergeOperatorFailed);
} }
return Status::OK(); return Status::OK();

@ -2407,6 +2407,9 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
"Encounter unexpected blob index. Please open DB with " "Encounter unexpected blob index. Please open DB with "
"ROCKSDB_NAMESPACE::blob_db::BlobDB instead."); "ROCKSDB_NAMESPACE::blob_db::BlobDB instead.");
return; return;
case GetContext::kMergeOperatorFailed:
*status = Status::Corruption(Status::SubCode::kMergeOperatorFailed);
return;
} }
f = fp.GetNextFile(); f = fp.GetNextFile();
} }

@ -157,6 +157,10 @@ DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST)
"ROCKSDB_NAMESPACE::blob_db::BlobDB instead."); "ROCKSDB_NAMESPACE::blob_db::BlobDB instead.");
file_range.MarkKeyDone(iter); file_range.MarkKeyDone(iter);
continue; continue;
case GetContext::kMergeOperatorFailed:
*status = Status::Corruption(Status::SubCode::kMergeOperatorFailed);
file_range.MarkKeyDone(iter);
continue;
} }
} }

@ -113,6 +113,7 @@ class Status {
kOverwritten = 12, kOverwritten = 12,
kTxnNotPrepared = 13, kTxnNotPrepared = 13,
kIOFenced = 14, kIOFenced = 14,
kMergeOperatorFailed = 15,
kMaxSubCode kMaxSubCode
}; };

@ -474,7 +474,11 @@ void GetContext::Merge(const Slice* value) {
/* update_num_ops_stats */ true, /* update_num_ops_stats */ true,
/* op_failure_scope */ nullptr); /* op_failure_scope */ nullptr);
if (!s.ok()) { if (!s.ok()) {
state_ = kCorrupt; if (s.subcode() == Status::SubCode::kMergeOperatorFailed) {
state_ = kMergeOperatorFailed;
} else {
state_ = kCorrupt;
}
return; return;
} }
@ -514,7 +518,11 @@ void GetContext::MergeWithEntity(Slice entity) {
/* update_num_ops_stats */ true, /* update_num_ops_stats */ true,
/* op_failure_scope */ nullptr); /* op_failure_scope */ nullptr);
if (!s.ok()) { if (!s.ok()) {
state_ = kCorrupt; if (s.subcode() == Status::SubCode::kMergeOperatorFailed) {
state_ = kMergeOperatorFailed;
} else {
state_ = kCorrupt;
}
return; return;
} }
} }
@ -533,7 +541,11 @@ void GetContext::MergeWithEntity(Slice entity) {
&result, logger_, statistics_, clock_, /* update_num_ops_stats */ true, &result, logger_, statistics_, clock_, /* update_num_ops_stats */ true,
/* op_failure_scope */ nullptr); /* op_failure_scope */ nullptr);
if (!s.ok()) { if (!s.ok()) {
state_ = kCorrupt; if (s.subcode() == Status::SubCode::kMergeOperatorFailed) {
state_ = kMergeOperatorFailed;
} else {
state_ = kCorrupt;
}
return; return;
} }
} }

@ -75,6 +75,7 @@ class GetContext {
kCorrupt, kCorrupt,
kMerge, // saver contains the current merge result (the operands) kMerge, // saver contains the current merge result (the operands)
kUnexpectedBlobIndex, kUnexpectedBlobIndex,
kMergeOperatorFailed,
}; };
GetContextStats get_context_stats_; GetContextStats get_context_stats_;

@ -41,9 +41,10 @@ static const char* msgs[static_cast<int>(Status::kMaxSubCode)] = {
"Insufficient capacity for merge operands", "Insufficient capacity for merge operands",
// kManualCompactionPaused // kManualCompactionPaused
"Manual compaction paused", "Manual compaction paused",
" (overwritten)", // kOverwritten, subcode of OK " (overwritten)", // kOverwritten, subcode of OK
"Txn not prepared", // kTxnNotPrepared "Txn not prepared", // kTxnNotPrepared
"IO fenced off", // kIOFenced "IO fenced off", // kIOFenced
"Merge operator failed", // kMergeOperatorFailed
}; };
Status::Status(Code _code, SubCode _subcode, const Slice& msg, Status::Status(Code _code, SubCode _subcode, const Slice& msg,

Loading…
Cancel
Save