Fix IngestExternalFile overlapping check (#5649)

Summary:
Previously, the end key of a range deletion tombstone was considered exclusive for the purposes of deletion, but considered inclusive when checking if two SSTables overlap. For example, an SSTable with a range deletion tombstone [a, b) would be considered overlapping with an SSTable with a range deletion tombstone [b, c). This commit fixes this check.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5649

Differential Revision: D16808765

Pulled By: anand1976

fbshipit-source-id: 5c7ad1c027e4f778d35070e5dae1b8e6037e0d68
main
Jeffrey Xiao 5 years ago committed by Facebook Github Bot
parent d92a59b6f2
commit d61d4507c0
  1. 42
      db/external_sst_file_basic_test.cc
  2. 44
      db/external_sst_file_ingestion_job.cc
  3. 17
      db/external_sst_file_ingestion_job.h
  4. 12
      db/external_sst_file_test.cc
  5. 20
      db/import_column_family_job.cc

@ -835,6 +835,48 @@ TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) {
ASSERT_EQ(2, NumTableFilesAtLevel(options.num_levels - 1)); ASSERT_EQ(2, NumTableFilesAtLevel(options.num_levels - 1));
} }
TEST_F(ExternalSSTFileBasicTest, AdjacentRangeDeletionTombstones) {
Options options = CurrentOptions();
SstFileWriter sst_file_writer(EnvOptions(), options);
// file8.sst (delete 300 => 400)
std::string file8 = sst_files_dir_ + "file8.sst";
ASSERT_OK(sst_file_writer.Open(file8));
ASSERT_OK(sst_file_writer.DeleteRange(Key(300), Key(400)));
ExternalSstFileInfo file8_info;
Status s = sst_file_writer.Finish(&file8_info);
ASSERT_TRUE(s.ok()) << s.ToString();
ASSERT_EQ(file8_info.file_path, file8);
ASSERT_EQ(file8_info.num_entries, 0);
ASSERT_EQ(file8_info.smallest_key, "");
ASSERT_EQ(file8_info.largest_key, "");
ASSERT_EQ(file8_info.num_range_del_entries, 1);
ASSERT_EQ(file8_info.smallest_range_del_key, Key(300));
ASSERT_EQ(file8_info.largest_range_del_key, Key(400));
// file9.sst (delete 400 => 500)
std::string file9 = sst_files_dir_ + "file9.sst";
ASSERT_OK(sst_file_writer.Open(file9));
ASSERT_OK(sst_file_writer.DeleteRange(Key(400), Key(500)));
ExternalSstFileInfo file9_info;
s = sst_file_writer.Finish(&file9_info);
ASSERT_TRUE(s.ok()) << s.ToString();
ASSERT_EQ(file9_info.file_path, file9);
ASSERT_EQ(file9_info.num_entries, 0);
ASSERT_EQ(file9_info.smallest_key, "");
ASSERT_EQ(file9_info.largest_key, "");
ASSERT_EQ(file9_info.num_range_del_entries, 1);
ASSERT_EQ(file9_info.smallest_range_del_key, Key(400));
ASSERT_EQ(file9_info.largest_range_del_key, Key(500));
// Range deletion tombstones are exclusive on their end key, so these SSTs
// should not be considered as overlapping.
s = DeprecatedAddFile({file8, file9});
ASSERT_TRUE(s.ok()) << s.ToString();
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
DestroyAndRecreateExternalSSTFilesDir();
}
TEST_P(ExternalSSTFileBasicTest, IngestFileWithBadBlockChecksum) { TEST_P(ExternalSSTFileBasicTest, IngestFileWithBadBlockChecksum) {
bool change_checksum_called = false; bool change_checksum_called = false;
const auto& change_checksum = [&](void* arg) { const auto& change_checksum = [&](void* arg) {

@ -64,13 +64,13 @@ Status ExternalSstFileIngestionJob::Prepare(
std::sort( std::sort(
sorted_files.begin(), sorted_files.end(), sorted_files.begin(), sorted_files.end(),
[&ucmp](const IngestedFileInfo* info1, const IngestedFileInfo* info2) { [&ucmp](const IngestedFileInfo* info1, const IngestedFileInfo* info2) {
return ucmp->Compare(info1->smallest_user_key, return sstableKeyCompare(ucmp, info1->smallest_internal_key,
info2->smallest_user_key) < 0; info2->smallest_internal_key) < 0;
}); });
for (size_t i = 0; i < num_files - 1; i++) { for (size_t i = 0; i < num_files - 1; i++) {
if (ucmp->Compare(sorted_files[i]->largest_user_key, if (sstableKeyCompare(ucmp, sorted_files[i]->largest_internal_key,
sorted_files[i + 1]->smallest_user_key) >= 0) { sorted_files[i + 1]->smallest_internal_key) >= 0) {
return Status::NotSupported("Files have overlapping ranges"); return Status::NotSupported("Files have overlapping ranges");
} }
} }
@ -81,8 +81,8 @@ Status ExternalSstFileIngestionJob::Prepare(
return Status::InvalidArgument("File contain no entries"); return Status::InvalidArgument("File contain no entries");
} }
if (!f.smallest_internal_key().Valid() || if (!f.smallest_internal_key.Valid() ||
!f.largest_internal_key().Valid()) { !f.largest_internal_key.Valid()) {
return Status::Corruption("Generated table have corrupted keys"); return Status::Corruption("Generated table have corrupted keys");
} }
} }
@ -178,8 +178,8 @@ Status ExternalSstFileIngestionJob::NeedsFlush(bool* flush_needed,
SuperVersion* super_version) { SuperVersion* super_version) {
autovector<Range> ranges; autovector<Range> ranges;
for (const IngestedFileInfo& file_to_ingest : files_to_ingest_) { for (const IngestedFileInfo& file_to_ingest : files_to_ingest_) {
ranges.emplace_back(file_to_ingest.smallest_user_key, ranges.emplace_back(file_to_ingest.smallest_internal_key.user_key(),
file_to_ingest.largest_user_key); file_to_ingest.largest_internal_key.user_key());
} }
Status status = Status status =
cfd_->RangesOverlapWithMemtables(ranges, super_version, flush_needed); cfd_->RangesOverlapWithMemtables(ranges, super_version, flush_needed);
@ -238,8 +238,8 @@ Status ExternalSstFileIngestionJob::Run() {
return status; return status;
} }
edit_.AddFile(f.picked_level, f.fd.GetNumber(), f.fd.GetPathId(), edit_.AddFile(f.picked_level, f.fd.GetNumber(), f.fd.GetPathId(),
f.fd.GetFileSize(), f.smallest_internal_key(), f.fd.GetFileSize(), f.smallest_internal_key,
f.largest_internal_key(), f.assigned_seqno, f.assigned_seqno, f.largest_internal_key, f.assigned_seqno, f.assigned_seqno,
false); false);
} }
return status; return status;
@ -414,6 +414,8 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
table_reader->NewRangeTombstoneIterator(ro)); table_reader->NewRangeTombstoneIterator(ro));
// Get first (smallest) and last (largest) key from file. // Get first (smallest) and last (largest) key from file.
file_to_ingest->smallest_internal_key = InternalKey("", 0, ValueType::kTypeValue);
file_to_ingest->largest_internal_key = InternalKey("", 0, ValueType::kTypeValue);
bool bounds_set = false; bool bounds_set = false;
iter->SeekToFirst(); iter->SeekToFirst();
if (iter->Valid()) { if (iter->Valid()) {
@ -423,7 +425,7 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
if (key.sequence != 0) { if (key.sequence != 0) {
return Status::Corruption("external file have non zero sequence number"); return Status::Corruption("external file have non zero sequence number");
} }
file_to_ingest->smallest_user_key = key.user_key.ToString(); file_to_ingest->smallest_internal_key.SetFrom(key);
iter->SeekToLast(); iter->SeekToLast();
if (!ParseInternalKey(iter->key(), &key)) { if (!ParseInternalKey(iter->key(), &key)) {
@ -432,7 +434,7 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
if (key.sequence != 0) { if (key.sequence != 0) {
return Status::Corruption("external file have non zero sequence number"); return Status::Corruption("external file have non zero sequence number");
} }
file_to_ingest->largest_user_key = key.user_key.ToString(); file_to_ingest->largest_internal_key.SetFrom(key);
bounds_set = true; bounds_set = true;
} }
@ -448,13 +450,13 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
} }
RangeTombstone tombstone(key, range_del_iter->value()); RangeTombstone tombstone(key, range_del_iter->value());
if (!bounds_set || ucmp->Compare(tombstone.start_key_, InternalKey start_key = tombstone.SerializeKey();
file_to_ingest->smallest_user_key) < 0) { if (!bounds_set || sstableKeyCompare(ucmp, start_key, file_to_ingest->smallest_internal_key) < 0) {
file_to_ingest->smallest_user_key = tombstone.start_key_.ToString(); file_to_ingest->smallest_internal_key = start_key;
} }
if (!bounds_set || ucmp->Compare(tombstone.end_key_, InternalKey end_key = tombstone.SerializeEndKey();
file_to_ingest->largest_user_key) > 0) { if (!bounds_set || sstableKeyCompare(ucmp, end_key, file_to_ingest->largest_internal_key) > 0) {
file_to_ingest->largest_user_key = tombstone.end_key_.ToString(); file_to_ingest->largest_internal_key = end_key;
} }
bounds_set = true; bounds_set = true;
} }
@ -496,7 +498,7 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile(
if (vstorage->NumLevelFiles(lvl) > 0) { if (vstorage->NumLevelFiles(lvl) > 0) {
bool overlap_with_level = false; bool overlap_with_level = false;
status = sv->current->OverlapWithLevelIterator(ro, env_options_, status = sv->current->OverlapWithLevelIterator(ro, env_options_,
file_to_ingest->smallest_user_key, file_to_ingest->largest_user_key, file_to_ingest->smallest_internal_key.user_key(), file_to_ingest->largest_internal_key.user_key(),
lvl, &overlap_with_level); lvl, &overlap_with_level);
if (!status.ok()) { if (!status.ok()) {
return status; return status;
@ -630,8 +632,8 @@ bool ExternalSstFileIngestionJob::IngestedFileFitInLevel(
} }
auto* vstorage = cfd_->current()->storage_info(); auto* vstorage = cfd_->current()->storage_info();
Slice file_smallest_user_key(file_to_ingest->smallest_user_key); Slice file_smallest_user_key(file_to_ingest->smallest_internal_key.user_key());
Slice file_largest_user_key(file_to_ingest->largest_user_key); Slice file_largest_user_key(file_to_ingest->largest_internal_key.user_key());
if (vstorage->OverlapInLevel(level, &file_smallest_user_key, if (vstorage->OverlapInLevel(level, &file_smallest_user_key,
&file_largest_user_key)) { &file_largest_user_key)) {

@ -25,10 +25,10 @@ class Directories;
struct IngestedFileInfo { struct IngestedFileInfo {
// External file path // External file path
std::string external_file_path; std::string external_file_path;
// Smallest user key in external file // Smallest internal key in external file
std::string smallest_user_key; InternalKey smallest_internal_key;
// Largest user key in external file // Largest internal key in external file
std::string largest_user_key; InternalKey largest_internal_key;
// Sequence number for keys in external file // Sequence number for keys in external file
SequenceNumber original_seqno; SequenceNumber original_seqno;
// Offset of the global sequence number field in the file, will // Offset of the global sequence number field in the file, will
@ -62,15 +62,6 @@ struct IngestedFileInfo {
// ingestion_options.move_files is false by default, thus copy_file is true // ingestion_options.move_files is false by default, thus copy_file is true
// by default. // by default.
bool copy_file = true; bool copy_file = true;
InternalKey smallest_internal_key() const {
return InternalKey(smallest_user_key, assigned_seqno,
ValueType::kTypeValue);
}
InternalKey largest_internal_key() const {
return InternalKey(largest_user_key, assigned_seqno, ValueType::kTypeValue);
}
}; };
class ExternalSstFileIngestionJob { class ExternalSstFileIngestionJob {

@ -696,10 +696,10 @@ TEST_F(ExternalSSTFileTest, AddList) {
ASSERT_EQ(file6_info.smallest_range_del_key, Key(0)); ASSERT_EQ(file6_info.smallest_range_del_key, Key(0));
ASSERT_EQ(file6_info.largest_range_del_key, Key(100)); ASSERT_EQ(file6_info.largest_range_del_key, Key(100));
// file7.sst (delete 100 => 200) // file7.sst (delete 99 => 201)
std::string file7 = sst_files_dir_ + "file7.sst"; std::string file7 = sst_files_dir_ + "file7.sst";
ASSERT_OK(sst_file_writer.Open(file7)); ASSERT_OK(sst_file_writer.Open(file7));
ASSERT_OK(sst_file_writer.DeleteRange(Key(100), Key(200))); ASSERT_OK(sst_file_writer.DeleteRange(Key(99), Key(201)));
ExternalSstFileInfo file7_info; ExternalSstFileInfo file7_info;
s = sst_file_writer.Finish(&file7_info); s = sst_file_writer.Finish(&file7_info);
ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_TRUE(s.ok()) << s.ToString();
@ -708,8 +708,8 @@ TEST_F(ExternalSSTFileTest, AddList) {
ASSERT_EQ(file7_info.smallest_key, ""); ASSERT_EQ(file7_info.smallest_key, "");
ASSERT_EQ(file7_info.largest_key, ""); ASSERT_EQ(file7_info.largest_key, "");
ASSERT_EQ(file7_info.num_range_del_entries, 1); ASSERT_EQ(file7_info.num_range_del_entries, 1);
ASSERT_EQ(file7_info.smallest_range_del_key, Key(100)); ASSERT_EQ(file7_info.smallest_range_del_key, Key(99));
ASSERT_EQ(file7_info.largest_range_del_key, Key(200)); ASSERT_EQ(file7_info.largest_range_del_key, Key(201));
// list 1 has internal key range conflict // list 1 has internal key range conflict
std::vector<std::string> file_list0({file1, file2}); std::vector<std::string> file_list0({file1, file2});
@ -724,9 +724,7 @@ TEST_F(ExternalSSTFileTest, AddList) {
// These lists of files have key ranges that overlap with each other // These lists of files have key ranges that overlap with each other
s = DeprecatedAddFile(file_list1); s = DeprecatedAddFile(file_list1);
ASSERT_FALSE(s.ok()) << s.ToString(); ASSERT_FALSE(s.ok()) << s.ToString();
// Both of the following overlap on the end key of a range deletion // Both of the following overlap on the range deletion tombstone.
// tombstone. This is a limitation because these tombstones have exclusive
// end keys that should not count as overlapping with other keys.
s = DeprecatedAddFile(file_list4); s = DeprecatedAddFile(file_list4);
ASSERT_FALSE(s.ok()) << s.ToString(); ASSERT_FALSE(s.ok()) << s.ToString();
s = DeprecatedAddFile(file_list5); s = DeprecatedAddFile(file_list5);

@ -58,13 +58,13 @@ Status ImportColumnFamilyJob::Prepare(uint64_t next_file_number,
std::sort(sorted_files.begin(), sorted_files.end(), std::sort(sorted_files.begin(), sorted_files.end(),
[&ucmp](const IngestedFileInfo* info1, [&ucmp](const IngestedFileInfo* info1,
const IngestedFileInfo* info2) { const IngestedFileInfo* info2) {
return ucmp->Compare(info1->smallest_user_key, return sstableKeyCompare(ucmp, info1->smallest_internal_key,
info2->smallest_user_key) < 0; info2->smallest_internal_key) < 0;
}); });
for (size_t i = 0; i < sorted_files.size() - 1; i++) { for (size_t i = 0; i < sorted_files.size() - 1; i++) {
if (ucmp->Compare(sorted_files[i]->largest_user_key, if (sstableKeyCompare(ucmp, sorted_files[i]->largest_internal_key,
sorted_files[i + 1]->smallest_user_key) >= 0) { sorted_files[i + 1]->smallest_internal_key) >= 0) {
return Status::InvalidArgument("Files have overlapping ranges"); return Status::InvalidArgument("Files have overlapping ranges");
} }
} }
@ -76,8 +76,8 @@ Status ImportColumnFamilyJob::Prepare(uint64_t next_file_number,
return Status::InvalidArgument("File contain no entries"); return Status::InvalidArgument("File contain no entries");
} }
if (!f.smallest_internal_key().Valid() || if (!f.smallest_internal_key.Valid() ||
!f.largest_internal_key().Valid()) { !f.largest_internal_key.Valid()) {
return Status::Corruption("File has corrupted keys"); return Status::Corruption("File has corrupted keys");
} }
} }
@ -137,8 +137,8 @@ Status ImportColumnFamilyJob::Run() {
const auto& f = files_to_import_[i]; const auto& f = files_to_import_[i];
const auto& file_metadata = metadata_[i]; const auto& file_metadata = metadata_[i];
edit_.AddFile(file_metadata.level, f.fd.GetNumber(), f.fd.GetPathId(), edit_.AddFile(file_metadata.level, f.fd.GetNumber(), f.fd.GetPathId(),
f.fd.GetFileSize(), f.smallest_internal_key(), f.fd.GetFileSize(), f.smallest_internal_key,
f.largest_internal_key(), file_metadata.smallest_seqno, f.largest_internal_key, file_metadata.smallest_seqno,
file_metadata.largest_seqno, false); file_metadata.largest_seqno, false);
// If incoming sequence number is higher, update local sequence number. // If incoming sequence number is higher, update local sequence number.
@ -236,14 +236,14 @@ Status ImportColumnFamilyJob::GetIngestedFileInfo(
if (!ParseInternalKey(iter->key(), &key)) { if (!ParseInternalKey(iter->key(), &key)) {
return Status::Corruption("external file have corrupted keys"); return Status::Corruption("external file have corrupted keys");
} }
file_to_import->smallest_user_key = key.user_key.ToString(); file_to_import->smallest_internal_key.SetFrom(key);
// Get last (largest) key from file // Get last (largest) key from file
iter->SeekToLast(); iter->SeekToLast();
if (!ParseInternalKey(iter->key(), &key)) { if (!ParseInternalKey(iter->key(), &key)) {
return Status::Corruption("external file have corrupted keys"); return Status::Corruption("external file have corrupted keys");
} }
file_to_import->largest_user_key = key.user_key.ToString(); file_to_import->largest_internal_key.SetFrom(key);
file_to_import->cf_id = static_cast<uint32_t>(props->column_family_id); file_to_import->cf_id = static_cast<uint32_t>(props->column_family_id);

Loading…
Cancel
Save