Fix DeleteRange file boundary correctness issue with max_compaction_bytes

Summary:
Cockroachdb exposed this bug in #1778. The bug happens when a compaction's output files are ended due to exceeding max_compaction_bytes. In that case we weren't taking into account the next file's start key when deciding how far to extend the current file's max_key. This caused the non-overlapping key-range invariant to be violated.

Note this was correctly handled for the usual case of cutting compaction output, which is file size exceeding max_output_file_size. I am not sure why these are two separate code paths, but we can consider refactoring it to prevent such errors in the future.
Closes https://github.com/facebook/rocksdb/pull/1784

Differential Revision: D4430235

Pulled By: ajkr

fbshipit-source-id: 80af748
main
Andrew Kryczka 8 years ago committed by Facebook Github Bot
parent 3ce091fd73
commit f9d18e22d2
  1. 6
      db/compaction_job.cc
  2. 53
      db/db_range_del_test.cc

@ -771,9 +771,9 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
key, sub_compact->current_output_file_size) &&
sub_compact->builder != nullptr) {
CompactionIterationStats range_del_out_stats;
status =
FinishCompactionOutputFile(input->status(), sub_compact,
range_del_agg.get(), &range_del_out_stats);
status = FinishCompactionOutputFile(input->status(), sub_compact,
range_del_agg.get(),
&range_del_out_stats, &key);
RecordDroppedKeys(range_del_out_stats,
&sub_compact->compaction_job_stats);
if (!status.ok()) {

@ -112,6 +112,59 @@ TEST_F(DBRangeDelTest, CompactionOutputFilesExactlyFilled) {
db_->ReleaseSnapshot(snapshot);
}
TEST_F(DBRangeDelTest, MaxCompactionBytesCutsOutputFiles) {
// Ensures range deletion spanning multiple compaction output files that are
// cut by max_compaction_bytes will have non-overlapping key-ranges.
// https://github.com/facebook/rocksdb/issues/1778
const int kNumFiles = 2, kNumPerFile = 1 << 8, kBytesPerVal = 1 << 12;
Options opts = CurrentOptions();
opts.comparator = test::Uint64Comparator();
opts.disable_auto_compactions = true;
opts.level0_file_num_compaction_trigger = kNumFiles;
opts.max_compaction_bytes = kNumPerFile * kBytesPerVal;
opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
// Want max_compaction_bytes to trigger the end of compaction output file, not
// target_file_size_base, so make the latter much bigger
opts.target_file_size_base = 100 * opts.max_compaction_bytes;
Reopen(opts);
// snapshot protects range tombstone from dropping due to becoming obsolete.
const Snapshot* snapshot = db_->GetSnapshot();
// It spans the whole key-range, thus will be included in all output files
ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
GetNumericStr(0),
GetNumericStr(kNumFiles * kNumPerFile - 1)));
Random rnd(301);
for (int i = 0; i < kNumFiles; ++i) {
std::vector<std::string> values;
// Write 1MB (256 values, each 4K)
for (int j = 0; j < kNumPerFile; j++) {
values.push_back(RandomString(&rnd, kBytesPerVal));
ASSERT_OK(Put(GetNumericStr(kNumPerFile * i + j), values[j]));
}
// extra entry to trigger SpecialSkipListFactory's flush
ASSERT_OK(Put(GetNumericStr(kNumPerFile), ""));
dbfull()->TEST_WaitForFlushMemTable();
ASSERT_EQ(i + 1, NumTableFilesAtLevel(0));
}
dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
true /* disallow_trivial_move */);
ASSERT_EQ(0, NumTableFilesAtLevel(0));
ASSERT_GE(NumTableFilesAtLevel(1), 2);
std::vector<std::vector<FileMetaData>> files;
dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files);
for (size_t i = 0; i < files[1].size() - 1; ++i) {
ASSERT_TRUE(InternalKeyComparator(opts.comparator)
.Compare(files[1][i].largest, files[1][i + 1].smallest) <
0);
}
db_->ReleaseSnapshot(snapshot);
}
TEST_F(DBRangeDelTest, FlushRangeDelsSameStartKey) {
db_->Put(WriteOptions(), "b1", "val");
ASSERT_OK(

Loading…
Cancel
Save