update DB::AddFile to ingest list of sst files

Summary:
DB::AddFile(std::string file_path) API that allow them to ingest an SST file created using SstFileWriter
We want to update this interface to be able to accept a list of files that will be ingested, DB::AddFile(std::vector<std::string> file_path_list).

Test Plan:
Add test case `AddExternalSstFileList` in `DBSSTTest`. To make sure:
1. files key ranges are not overlapping with each other
2. each file key range dont overlap with the DB key range
3. make sure no snapshots are held

Reviewers: andrewkr, sdong, IslamAbdelRahman

Reviewed By: IslamAbdelRahman

Subscribers: andrewkr, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D58587
main
Aaron Gao 9 years ago
parent 296545a2c7
commit 8e6b38d895
  1. 1
      db/db_impl.cc
  2. 11
      db/db_impl.h
  3. 268
      db/db_impl_add_file.cc
  4. 261
      db/db_sst_test.cc
  5. 4
      db/db_test.cc
  6. 2
      db/db_test_util.cc
  7. 81
      include/rocksdb/db.h
  8. 8
      include/rocksdb/utilities/stackable_db.h

@ -6,7 +6,6 @@
// Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "db/db_impl.h" #include "db/db_impl.h"
#ifndef __STDC_FORMAT_MACROS #ifndef __STDC_FORMAT_MACROS

@ -258,10 +258,11 @@ class DBImpl : public DB {
using DB::AddFile; using DB::AddFile;
virtual Status AddFile(ColumnFamilyHandle* column_family, virtual Status AddFile(ColumnFamilyHandle* column_family,
const ExternalSstFileInfo* file_info, const std::vector<ExternalSstFileInfo>& file_info_list,
bool move_file) override; bool move_file) override;
virtual Status AddFile(ColumnFamilyHandle* column_family, virtual Status AddFile(ColumnFamilyHandle* column_family,
const std::string& file_path, bool move_file) override; const std::vector<std::string>& file_path_list,
bool move_file) override;
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
@ -637,13 +638,17 @@ class DBImpl : public DB {
// Finds the lowest level in the DB that the ingested file can be added to // Finds the lowest level in the DB that the ingested file can be added to
// REQUIRES: mutex_ held // REQUIRES: mutex_ held
int PickLevelForIngestedFile(ColumnFamilyData* cfd, int PickLevelForIngestedFile(ColumnFamilyData* cfd,
const ExternalSstFileInfo* file_info); const ExternalSstFileInfo& file_info);
Status CompactFilesImpl( Status CompactFilesImpl(
const CompactionOptions& compact_options, ColumnFamilyData* cfd, const CompactionOptions& compact_options, ColumnFamilyData* cfd,
Version* version, const std::vector<std::string>& input_file_names, Version* version, const std::vector<std::string>& input_file_names,
const int output_level, int output_path_id, JobContext* job_context, const int output_level, int output_path_id, JobContext* job_context,
LogBuffer* log_buffer); LogBuffer* log_buffer);
Status ReadExternalSstFileInfo(ColumnFamilyHandle* column_family,
const std::string& file_path,
ExternalSstFileInfo* file_info);
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
ColumnFamilyData* GetColumnFamilyDataByName(const std::string& cf_name); ColumnFamilyData* GetColumnFamilyDataByName(const std::string& cf_name);

@ -21,16 +21,33 @@
#include "util/sync_point.h" #include "util/sync_point.h"
namespace rocksdb { namespace rocksdb {
namespace {
// RAII Timer
class StatsTimer {
public:
explicit StatsTimer(Env* env, uint64_t* counter)
: env_(env), counter_(counter), start_micros_(env_->NowMicros()) {}
~StatsTimer() { *counter_ += env_->NowMicros() - start_micros_; }
private:
Env* env_;
uint64_t* counter_;
uint64_t start_micros_;
};
} // anonymous namespace
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
Status DBImpl::AddFile(ColumnFamilyHandle* column_family,
const std::string& file_path, bool move_file) { Status DBImpl::ReadExternalSstFileInfo(ColumnFamilyHandle* column_family,
const std::string& file_path,
ExternalSstFileInfo* file_info) {
Status status; Status status;
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family); auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
ColumnFamilyData* cfd = cfh->cfd(); auto cfd = cfh->cfd();
ExternalSstFileInfo file_info; file_info->file_path = file_path;
file_info.file_path = file_path; status = env_->GetFileSize(file_path, &file_info->file_size);
status = env_->GetFileSize(file_path, &file_info.file_size);
if (!status.ok()) { if (!status.ok()) {
return status; return status;
} }
@ -49,7 +66,7 @@ Status DBImpl::AddFile(ColumnFamilyHandle* column_family,
status = cfd->ioptions()->table_factory->NewTableReader( status = cfd->ioptions()->table_factory->NewTableReader(
TableReaderOptions(*cfd->ioptions(), env_options_, TableReaderOptions(*cfd->ioptions(), env_options_,
cfd->internal_comparator()), cfd->internal_comparator()),
std::move(sst_file_reader), file_info.file_size, &table_reader); std::move(sst_file_reader), file_info->file_size, &table_reader);
if (!status.ok()) { if (!status.ok()) {
return status; return status;
} }
@ -63,17 +80,16 @@ Status DBImpl::AddFile(ColumnFamilyHandle* column_family,
return Status::InvalidArgument("Generated table version not found"); return Status::InvalidArgument("Generated table version not found");
} }
file_info.version = file_info->version =
DecodeFixed32(external_sst_file_version_iter->second.c_str()); DecodeFixed32(external_sst_file_version_iter->second.c_str());
if (file_info.version == 1) { if (file_info->version == 1) {
// version 1 imply that all sequence numbers in table equal 0 // version 1 imply that all sequence numbers in table equal 0
file_info.sequence_number = 0; file_info->sequence_number = 0;
} else { } else {
return Status::InvalidArgument("Generated table version is not supported"); return Status::InvalidArgument("Generated table version is not supported");
} }
// Get number of entries in table // Get number of entries in table
file_info.num_entries = table_reader->GetTableProperties()->num_entries; file_info->num_entries = table_reader->GetTableProperties()->num_entries;
ParsedInternalKey key; ParsedInternalKey key;
std::unique_ptr<InternalIterator> iter( std::unique_ptr<InternalIterator> iter(
@ -87,7 +103,7 @@ Status DBImpl::AddFile(ColumnFamilyHandle* column_family,
if (key.sequence != 0) { if (key.sequence != 0) {
return Status::Corruption("Generated table have non zero sequence number"); return Status::Corruption("Generated table have non zero sequence number");
} }
file_info.smallest_key = key.user_key.ToString(); file_info->smallest_key = key.user_key.ToString();
// Get last (largest) key from file // Get last (largest) key from file
iter->SeekToLast(); iter->SeekToLast();
@ -97,70 +113,145 @@ Status DBImpl::AddFile(ColumnFamilyHandle* column_family,
if (key.sequence != 0) { if (key.sequence != 0) {
return Status::Corruption("Generated table have non zero sequence number"); return Status::Corruption("Generated table have non zero sequence number");
} }
file_info.largest_key = key.user_key.ToString(); file_info->largest_key = key.user_key.ToString();
return AddFile(column_family, &file_info, move_file); return Status::OK();
} }
Status DBImpl::AddFile(ColumnFamilyHandle* column_family, Status DBImpl::AddFile(ColumnFamilyHandle* column_family,
const ExternalSstFileInfo* file_info, bool move_file) { const std::vector<std::string>& file_path_list,
bool move_file) {
Status status;
auto num_files = file_path_list.size();
if (num_files == 0) {
return Status::InvalidArgument("The list of files is empty");
}
std::vector<ExternalSstFileInfo> file_infos(num_files);
std::vector<ExternalSstFileInfo> file_info_list(num_files);
for (size_t i = 0; i < num_files; i++) {
status = ReadExternalSstFileInfo(column_family, file_path_list[i],
&file_info_list[i]);
if (!status.ok()) {
return status;
}
}
return AddFile(column_family, file_info_list, move_file);
}
Status DBImpl::AddFile(ColumnFamilyHandle* column_family,
const std::vector<ExternalSstFileInfo>& file_info_list,
bool move_file) {
Status status; Status status;
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family); auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
ColumnFamilyData* cfd = cfh->cfd(); ColumnFamilyData* cfd = cfh->cfd();
const uint64_t start_micros = env_->NowMicros();
if (file_info->num_entries == 0) { auto num_files = file_info_list.size();
return Status::InvalidArgument("File contain no entries"); if (num_files == 0) {
return Status::InvalidArgument("The list of files is empty");
} }
if (file_info->version != 1) {
return Status::InvalidArgument("Generated table version is not supported"); // Verify that passed files dont have overlapping ranges
if (num_files > 1) {
std::vector<const ExternalSstFileInfo*> sorted_file_info_list(num_files);
for (size_t i = 0; i < num_files; i++) {
sorted_file_info_list[i] = &file_info_list[i];
} }
// version 1 imply that file have only Put Operations with Sequence Number = 0
FileMetaData meta; auto* vstorage = cfd->current()->storage_info();
meta.smallest = std::sort(
InternalKey(file_info->smallest_key, file_info->sequence_number, sorted_file_info_list.begin(), sorted_file_info_list.end(),
ValueType::kTypeValue); [&vstorage, &file_info_list](const ExternalSstFileInfo* info1,
meta.largest = InternalKey(file_info->largest_key, file_info->sequence_number, const ExternalSstFileInfo* info2) {
ValueType::kTypeValue); return vstorage->InternalComparator()->user_comparator()->Compare(
if (!meta.smallest.Valid() || !meta.largest.Valid()) { info1->smallest_key, info2->smallest_key) < 0;
});
for (size_t i = 0; i < num_files - 1; i++) {
if (sorted_file_info_list[i]->largest_key >=
sorted_file_info_list[i + 1]->smallest_key) {
return Status::NotSupported("Cannot add overlapping range among files");
}
}
}
std::vector<uint64_t> micro_list(num_files, 0);
std::vector<FileMetaData> meta_list(num_files);
for (size_t i = 0; i < num_files; i++) {
StatsTimer t(env_, &micro_list[i]);
if (file_info_list[i].num_entries == 0) {
return Status::InvalidArgument("File contain no entries");
}
if (file_info_list[i].version != 1) {
return Status::InvalidArgument(
"Generated table version is not supported");
}
// version 1 imply that file have only Put Operations with Sequence Number =
// 0
meta_list[i].smallest =
InternalKey(file_info_list[i].smallest_key,
file_info_list[i].sequence_number, ValueType::kTypeValue);
meta_list[i].largest =
InternalKey(file_info_list[i].largest_key,
file_info_list[i].sequence_number, ValueType::kTypeValue);
if (!meta_list[i].smallest.Valid() || !meta_list[i].largest.Valid()) {
return Status::Corruption("Generated table have corrupted keys"); return Status::Corruption("Generated table have corrupted keys");
} }
meta.smallest_seqno = file_info->sequence_number; meta_list[i].smallest_seqno = file_info_list[i].sequence_number;
meta.largest_seqno = file_info->sequence_number; meta_list[i].largest_seqno = file_info_list[i].sequence_number;
if (meta.smallest_seqno != 0 || meta.largest_seqno != 0) { if (meta_list[i].smallest_seqno != 0 || meta_list[i].largest_seqno != 0) {
return Status::InvalidArgument( return Status::InvalidArgument(
"Non zero sequence numbers are not supported"); "Non zero sequence numbers are not supported");
} }
}
// Generate a location for the new table std::vector<std::list<uint64_t>::iterator> pending_outputs_inserted_elem_list(
std::list<uint64_t>::iterator pending_outputs_inserted_elem; num_files);
// Generate locations for the new tables
{ {
InstrumentedMutexLock l(&mutex_); InstrumentedMutexLock l(&mutex_);
pending_outputs_inserted_elem = CaptureCurrentFileNumberInPendingOutputs(); for (size_t i = 0; i < num_files; i++) {
meta.fd = StatsTimer t(env_, &micro_list[i]);
FileDescriptor(versions_->NewFileNumber(), 0, file_info->file_size); pending_outputs_inserted_elem_list[i] =
} CaptureCurrentFileNumberInPendingOutputs();
meta_list[i].fd = FileDescriptor(versions_->NewFileNumber(), 0,
std::string db_fname = TableFileName( file_info_list[i].file_size);
db_options_.db_paths, meta.fd.GetNumber(), meta.fd.GetPathId()); }
}
// Copy/Move external files into DB
std::vector<std::string> db_fname_list(num_files);
size_t j = 0;
for (; j < num_files; j++) {
StatsTimer t(env_, &micro_list[j]);
db_fname_list[j] =
TableFileName(db_options_.db_paths, meta_list[j].fd.GetNumber(),
meta_list[j].fd.GetPathId());
if (move_file) { if (move_file) {
status = env_->LinkFile(file_info->file_path, db_fname); status = env_->LinkFile(file_info_list[j].file_path, db_fname_list[j]);
if (status.IsNotSupported()) { if (status.IsNotSupported()) {
// Original file is on a different FS, use copy instead of hard linking // Original file is on a different FS, use copy instead of hard linking
status = CopyFile(env_, file_info->file_path, db_fname, 0); status =
CopyFile(env_, file_info_list[j].file_path, db_fname_list[j], 0);
} }
} else { } else {
status = CopyFile(env_, file_info->file_path, db_fname, 0); status = CopyFile(env_, file_info_list[j].file_path, db_fname_list[j], 0);
} }
TEST_SYNC_POINT("DBImpl::AddFile:FileCopied"); TEST_SYNC_POINT("DBImpl::AddFile:FileCopied");
if (!status.ok()) { if (!status.ok()) {
for (size_t i = 0; i < j; i++) {
Status s = env_->DeleteFile(db_fname_list[i]);
if (!s.ok()) {
Log(InfoLogLevel::WARN_LEVEL, db_options_.info_log,
"AddFile() clean up for file %s failed : %s",
db_fname_list[i].c_str(), s.ToString().c_str());
}
}
return status; return status;
} }
}
// The level the file will be ingested into
int target_level = 0;
{ {
InstrumentedMutexLock l(&mutex_); InstrumentedMutexLock l(&mutex_);
const MutableCFOptions mutable_cf_options = const MutableCFOptions mutable_cf_options =
@ -183,8 +274,10 @@ Status DBImpl::AddFile(ColumnFamilyHandle* column_family,
ro.total_order_seek = true; ro.total_order_seek = true;
ScopedArenaIterator iter(NewInternalIterator(ro, cfd, sv, &arena)); ScopedArenaIterator iter(NewInternalIterator(ro, cfd, sv, &arena));
InternalKey range_start(file_info->smallest_key, kMaxSequenceNumber, for (size_t i = 0; i < num_files; i++) {
kTypeValue); StatsTimer t(env_, &micro_list[i]);
InternalKey range_start(file_info_list[i].smallest_key,
kMaxSequenceNumber, kTypeValue);
iter->Seek(range_start.Encode()); iter->Seek(range_start.Encode());
status = iter->status(); status = iter->status();
@ -193,25 +286,34 @@ Status DBImpl::AddFile(ColumnFamilyHandle* column_family,
if (ParseInternalKey(iter->key(), &seek_result)) { if (ParseInternalKey(iter->key(), &seek_result)) {
auto* vstorage = cfd->current()->storage_info(); auto* vstorage = cfd->current()->storage_info();
if (vstorage->InternalComparator()->user_comparator()->Compare( if (vstorage->InternalComparator()->user_comparator()->Compare(
seek_result.user_key, file_info->largest_key) <= 0) { seek_result.user_key, file_info_list[i].largest_key) <= 0) {
status = Status::NotSupported("Cannot add overlapping range"); status = Status::NotSupported("Cannot add overlapping range");
break;
} }
} else { } else {
status = Status::Corruption("DB have corrupted keys"); status = Status::Corruption("DB have corrupted keys");
break;
}
} }
} }
} }
// The level the file will be ingested into
std::vector<int> target_level_list(num_files, 0);
if (status.ok()) { if (status.ok()) {
// Add file to the lowest possible level // Add files to L0
target_level = PickLevelForIngestedFile(cfd, file_info);
VersionEdit edit; VersionEdit edit;
edit.SetColumnFamily(cfd->GetID()); edit.SetColumnFamily(cfd->GetID());
edit.AddFile(target_level, meta.fd.GetNumber(), meta.fd.GetPathId(), for (size_t i = 0; i < num_files; i++) {
meta.fd.GetFileSize(), meta.smallest, meta.largest, StatsTimer t(env_, &micro_list[i]);
meta.smallest_seqno, meta.largest_seqno, // Add file to the lowest possible level
meta.marked_for_compaction); target_level_list[i] = PickLevelForIngestedFile(cfd, file_info_list[i]);
edit.AddFile(target_level_list[i], meta_list[i].fd.GetNumber(),
meta_list[i].fd.GetPathId(), meta_list[i].fd.GetFileSize(),
meta_list[i].smallest, meta_list[i].largest,
meta_list[i].smallest_seqno, meta_list[i].largest_seqno,
meta_list[i].marked_for_compaction);
}
status = versions_->LogAndApply(cfd, mutable_cf_options, &edit, &mutex_, status = versions_->LogAndApply(cfd, mutable_cf_options, &edit, &mutex_,
directories_.GetDbDir()); directories_.GetDbDir());
} }
@ -220,45 +322,41 @@ Status DBImpl::AddFile(ColumnFamilyHandle* column_family,
if (status.ok()) { if (status.ok()) {
delete InstallSuperVersionAndScheduleWork(cfd, nullptr, delete InstallSuperVersionAndScheduleWork(cfd, nullptr,
mutable_cf_options); mutable_cf_options);
}
for (size_t i = 0; i < num_files; i++) {
// Update internal stats // Update internal stats
InternalStats::CompactionStats stats(1); InternalStats::CompactionStats stats(1);
stats.micros = env_->NowMicros() - start_micros; stats.micros = micro_list[i];
stats.bytes_written = meta.fd.GetFileSize(); stats.bytes_written = meta_list[i].fd.GetFileSize();
stats.num_output_files = 1; stats.num_output_files = 1;
cfd->internal_stats()->AddCompactionStats(target_level, stats); cfd->internal_stats()->AddCompactionStats(target_level_list[i], stats);
cfd->internal_stats()->AddCFStats(InternalStats::BYTES_INGESTED_ADD_FILE, cfd->internal_stats()->AddCFStats(InternalStats::BYTES_INGESTED_ADD_FILE,
meta.fd.GetFileSize()); meta_list[i].fd.GetFileSize());
ReleaseFileNumberFromPendingOutputs(
pending_outputs_inserted_elem_list[i]);
} }
ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
} }
if (!status.ok()) { if (!status.ok()) {
// We failed to add the file to the database // We failed to add the files to the database
Status s = env_->DeleteFile(db_fname); for (size_t i = 0; i < num_files; i++) {
Status s = env_->DeleteFile(db_fname_list[i]);
if (!s.ok()) { if (!s.ok()) {
Log(InfoLogLevel::WARN_LEVEL, db_options_.info_log, Log(InfoLogLevel::WARN_LEVEL, db_options_.info_log,
"AddFile() clean up for file %s failed : %s", db_fname.c_str(), "AddFile() clean up for file %s failed : %s",
s.ToString().c_str()); db_fname_list[i].c_str(), s.ToString().c_str());
} }
} else { }
// File was ingested successfully } else if (status.ok() && move_file) {
Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log, // The files were moved and added successfully, remove original file links
"New file %" PRIu64 for (size_t i = 0; i < num_files; i++) {
" was added to L%d (Size: %.2f MB, " Status s = env_->DeleteFile(file_info_list[i].file_path);
"entries: %" PRIu64 ")",
meta.fd.GetNumber(), target_level,
static_cast<double>(meta.fd.GetFileSize()) / 1048576.0,
file_info->num_entries);
if (move_file) {
// The file was moved and added successfully, remove original file link
Status s = env_->DeleteFile(file_info->file_path);
if (!s.ok()) { if (!s.ok()) {
Log(InfoLogLevel::WARN_LEVEL, db_options_.info_log, Log(InfoLogLevel::WARN_LEVEL, db_options_.info_log,
"%s was added to DB successfully but failed to remove original " "%s was added to DB successfully but failed to remove original "
"file link : %s", "file "
file_info->file_path.c_str(), s.ToString().c_str()); "link : %s",
file_info_list[i].file_path.c_str(), s.ToString().c_str());
} }
} }
} }
@ -267,15 +365,15 @@ Status DBImpl::AddFile(ColumnFamilyHandle* column_family,
// Finds the lowest level in the DB that the ingested file can be added to // Finds the lowest level in the DB that the ingested file can be added to
int DBImpl::PickLevelForIngestedFile(ColumnFamilyData* cfd, int DBImpl::PickLevelForIngestedFile(ColumnFamilyData* cfd,
const ExternalSstFileInfo* file_info) { const ExternalSstFileInfo& file_info) {
mutex_.AssertHeld(); mutex_.AssertHeld();
int target_level = 0; int target_level = 0;
auto* vstorage = cfd->current()->storage_info(); auto* vstorage = cfd->current()->storage_info();
auto* ucmp = vstorage->InternalComparator()->user_comparator(); auto* ucmp = vstorage->InternalComparator()->user_comparator();
Slice file_smallest_user_key(file_info->smallest_key); Slice file_smallest_user_key(file_info.smallest_key);
Slice file_largest_user_key(file_info->largest_key); Slice file_largest_user_key(file_info.largest_key);
for (int lvl = cfd->NumberLevels() - 1; lvl >= vstorage->base_level(); for (int lvl = cfd->NumberLevels() - 1; lvl >= vstorage->base_level();
lvl--) { lvl--) {

@ -843,7 +843,7 @@ TEST_F(DBSSTTest, AddExternalSstFile) {
DestroyAndReopen(options); DestroyAndReopen(options);
// Add file using file path // Add file using file path
s = db_->AddFile(file1); s = db_->AddFile(std::vector<std::string>(1, file1));
ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_TRUE(s.ok()) << s.ToString();
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
for (int k = 0; k < 100; k++) { for (int k = 0; k < 100; k++) {
@ -853,23 +853,23 @@ TEST_F(DBSSTTest, AddExternalSstFile) {
// Add file while holding a snapshot will fail // Add file while holding a snapshot will fail
const Snapshot* s1 = db_->GetSnapshot(); const Snapshot* s1 = db_->GetSnapshot();
if (s1 != nullptr) { if (s1 != nullptr) {
ASSERT_NOK(db_->AddFile(&file2_info)); ASSERT_NOK(db_->AddFile(std::vector<ExternalSstFileInfo>(1, file2_info)));
db_->ReleaseSnapshot(s1); db_->ReleaseSnapshot(s1);
} }
// We can add the file after releaseing the snapshot // We can add the file after releaseing the snapshot
ASSERT_OK(db_->AddFile(&file2_info)); ASSERT_OK(db_->AddFile(std::vector<ExternalSstFileInfo>(1, file2_info)));
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
for (int k = 0; k < 200; k++) { for (int k = 0; k < 200; k++) {
ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
} }
// This file have overlapping values with the exisitng data // This file has overlapping values with the exisitng data
s = db_->AddFile(file3); s = db_->AddFile(std::vector<std::string>(1, file3));
ASSERT_FALSE(s.ok()) << s.ToString(); ASSERT_FALSE(s.ok()) << s.ToString();
// This file have overlapping values with the exisitng data // This file has overlapping values with the exisitng data
s = db_->AddFile(&file4_info); s = db_->AddFile(std::vector<ExternalSstFileInfo>(1, file4_info));
ASSERT_FALSE(s.ok()) << s.ToString(); ASSERT_FALSE(s.ok()) << s.ToString();
// Overwrite values of keys divisible by 5 // Overwrite values of keys divisible by 5
@ -879,7 +879,7 @@ TEST_F(DBSSTTest, AddExternalSstFile) {
ASSERT_NE(db_->GetLatestSequenceNumber(), 0U); ASSERT_NE(db_->GetLatestSequenceNumber(), 0U);
// Key range of file5 (400 => 499) dont overlap with any keys in DB // Key range of file5 (400 => 499) dont overlap with any keys in DB
ASSERT_OK(db_->AddFile(file5)); ASSERT_OK(db_->AddFile(std::vector<std::string>(1, file5)));
// Make sure values are correct before and after flush/compaction // Make sure values are correct before and after flush/compaction
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++) {
@ -908,13 +908,13 @@ TEST_F(DBSSTTest, AddExternalSstFile) {
} }
// We deleted range (400 => 499) but cannot add file5 because // We deleted range (400 => 499) but cannot add file5 because
// of the range tombstones // of the range tombstones
ASSERT_NOK(db_->AddFile(file5)); ASSERT_NOK(db_->AddFile(std::vector<std::string>(1, file5)));
// Compacting the DB will remove the tombstones // Compacting the DB will remove the tombstones
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
// Now we can add the file // Now we can add the file
ASSERT_OK(db_->AddFile(file5)); ASSERT_OK(db_->AddFile(std::vector<std::string>(1, file5)));
// Verify values of file5 in DB // Verify values of file5 in DB
for (int k = 400; k < 500; k++) { for (int k = 400; k < 500; k++) {
@ -925,6 +925,230 @@ TEST_F(DBSSTTest, AddExternalSstFile) {
kSkipFIFOCompaction)); kSkipFIFOCompaction));
} }
TEST_F(DBSSTTest, AddExternalSstFileList) {
do {
std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/";
env_->CreateDir(sst_files_folder);
Options options = CurrentOptions();
options.env = env_;
SstFileWriter sst_file_writer(EnvOptions(), options, options.comparator);
// file1.sst (0 => 99)
std::string file1 = sst_files_folder + "file1.sst";
ASSERT_OK(sst_file_writer.Open(file1));
for (int k = 0; k < 100; k++) {
ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val"));
}
ExternalSstFileInfo file1_info;
Status s = sst_file_writer.Finish(&file1_info);
ASSERT_TRUE(s.ok()) << s.ToString();
ASSERT_EQ(file1_info.file_path, file1);
ASSERT_EQ(file1_info.num_entries, 100);
ASSERT_EQ(file1_info.smallest_key, Key(0));
ASSERT_EQ(file1_info.largest_key, Key(99));
// sst_file_writer already finished, cannot add this value
s = sst_file_writer.Add(Key(100), "bad_val");
ASSERT_FALSE(s.ok()) << s.ToString();
// file2.sst (100 => 199)
std::string file2 = sst_files_folder + "file2.sst";
ASSERT_OK(sst_file_writer.Open(file2));
for (int k = 100; k < 200; k++) {
ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val"));
}
// Cannot add this key because it's not after last added key
s = sst_file_writer.Add(Key(99), "bad_val");
ASSERT_FALSE(s.ok()) << s.ToString();
ExternalSstFileInfo file2_info;
s = sst_file_writer.Finish(&file2_info);
ASSERT_TRUE(s.ok()) << s.ToString();
ASSERT_EQ(file2_info.file_path, file2);
ASSERT_EQ(file2_info.num_entries, 100);
ASSERT_EQ(file2_info.smallest_key, Key(100));
ASSERT_EQ(file2_info.largest_key, Key(199));
// file3.sst (195 => 199)
// This file values overlap with file2 values
std::string file3 = sst_files_folder + "file3.sst";
ASSERT_OK(sst_file_writer.Open(file3));
for (int k = 195; k < 200; k++) {
ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val_overlap"));
}
ExternalSstFileInfo file3_info;
s = sst_file_writer.Finish(&file3_info);
ASSERT_TRUE(s.ok()) << s.ToString();
ASSERT_EQ(file3_info.file_path, file3);
ASSERT_EQ(file3_info.num_entries, 5);
ASSERT_EQ(file3_info.smallest_key, Key(195));
ASSERT_EQ(file3_info.largest_key, Key(199));
// file4.sst (30 => 39)
// This file values overlap with file1 values
std::string file4 = sst_files_folder + "file4.sst";
ASSERT_OK(sst_file_writer.Open(file4));
for (int k = 30; k < 40; k++) {
ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val_overlap"));
}
ExternalSstFileInfo file4_info;
s = sst_file_writer.Finish(&file4_info);
ASSERT_TRUE(s.ok()) << s.ToString();
ASSERT_EQ(file4_info.file_path, file4);
ASSERT_EQ(file4_info.num_entries, 10);
ASSERT_EQ(file4_info.smallest_key, Key(30));
ASSERT_EQ(file4_info.largest_key, Key(39));
// file5.sst (200 => 299)
std::string file5 = sst_files_folder + "file5.sst";
ASSERT_OK(sst_file_writer.Open(file5));
for (int k = 200; k < 300; k++) {
ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val"));
}
ExternalSstFileInfo file5_info;
s = sst_file_writer.Finish(&file5_info);
ASSERT_TRUE(s.ok()) << s.ToString();
ASSERT_EQ(file5_info.file_path, file5);
ASSERT_EQ(file5_info.num_entries, 100);
ASSERT_EQ(file5_info.smallest_key, Key(200));
ASSERT_EQ(file5_info.largest_key, Key(299));
// list 1 has internal key range conflict
std::vector<std::string> file_list0({file1, file2});
std::vector<std::string> file_list1({file3, file2, file1});
std::vector<std::string> file_list2({file5});
std::vector<std::string> file_list3({file3, file4});
std::vector<ExternalSstFileInfo> info_list0({file1_info, file2_info});
std::vector<ExternalSstFileInfo> info_list1(
{file3_info, file2_info, file1_info});
std::vector<ExternalSstFileInfo> info_list2({file5_info});
std::vector<ExternalSstFileInfo> info_list3({file3_info, file4_info});
DestroyAndReopen(options);
// This list of files have key ranges are overlapping with each other
s = db_->AddFile(file_list1);
ASSERT_FALSE(s.ok()) << s.ToString();
s = db_->AddFile(info_list1);
ASSERT_FALSE(s.ok()) << s.ToString();
// Add files using file path list
s = db_->AddFile(file_list0);
ASSERT_TRUE(s.ok()) << s.ToString();
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
for (int k = 0; k < 200; k++) {
ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
}
// Add file while holding a snapshot will fail
const Snapshot* s1 = db_->GetSnapshot();
if (s1 != nullptr) {
ASSERT_NOK(db_->AddFile(info_list2));
db_->ReleaseSnapshot(s1);
}
// We can add the file after releaseing the snapshot
ASSERT_OK(db_->AddFile(info_list2));
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
for (int k = 0; k < 300; k++) {
ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
}
// This file list has overlapping values with the exisitng data
s = db_->AddFile(file_list3);
ASSERT_FALSE(s.ok()) << s.ToString();
s = db_->AddFile(info_list3);
ASSERT_FALSE(s.ok()) << s.ToString();
// Overwrite values of keys divisible by 5
for (int k = 0; k < 200; k += 5) {
ASSERT_OK(Put(Key(k), Key(k) + "_val_new"));
}
ASSERT_NE(db_->GetLatestSequenceNumber(), 0U);
// Make sure values are correct before and after flush/compaction
for (int i = 0; i < 2; i++) {
for (int k = 0; k < 200; k++) {
std::string value = Key(k) + "_val";
if (k % 5 == 0) {
value += "_new";
}
ASSERT_EQ(Get(Key(k)), value);
}
for (int k = 200; k < 300; k++) {
std::string value = Key(k) + "_val";
ASSERT_EQ(Get(Key(k)), value);
}
ASSERT_OK(Flush());
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
}
// Delete keys in range (200 => 299)
for (int k = 200; k < 300; k++) {
ASSERT_OK(Delete(Key(k)));
}
// We deleted range (200 => 299) but cannot add file5 because
// of the range tombstones
ASSERT_NOK(db_->AddFile(file_list2));
// Compacting the DB will remove the tombstones
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
// Now we can add the file
ASSERT_OK(db_->AddFile(file_list2));
// Verify values of file5 in DB
for (int k = 200; k < 300; k++) {
std::string value = Key(k) + "_val";
ASSERT_EQ(Get(Key(k)), value);
}
} while (ChangeOptions(kSkipPlainTable | kSkipUniversalCompaction |
kSkipFIFOCompaction));
}
TEST_F(DBSSTTest, AddExternalSstFileListAtomicity) {
do {
std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/";
env_->CreateDir(sst_files_folder);
Options options = CurrentOptions();
options.env = env_;
SstFileWriter sst_file_writer(EnvOptions(), options, options.comparator);
// files[0].sst (0 => 99)
// files[1].sst (100 => 199)
// ...
// file[8].sst (800 => 899)
size_t n = 9;
std::vector<std::string> files(n);
std::vector<ExternalSstFileInfo> files_info(n);
for (size_t i = 0; i < n; i++) {
files[i] = sst_files_folder + "file" + std::to_string(i) + ".sst";
ASSERT_OK(sst_file_writer.Open(files[i]));
for (size_t k = i * 100; k < (i + 1) * 100; k++) {
ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val"));
}
Status s = sst_file_writer.Finish(&files_info[i]);
ASSERT_TRUE(s.ok()) << s.ToString();
ASSERT_EQ(files_info[i].file_path, files[i]);
ASSERT_EQ(files_info[i].num_entries, 100);
ASSERT_EQ(files_info[i].smallest_key, Key(i * 100));
ASSERT_EQ(files_info[i].largest_key, Key((i + 1) * 100 - 1));
}
files.push_back(sst_files_folder + "file" + std::to_string(n) + ".sst");
auto s = db_->AddFile(files);
ASSERT_NOK(s) << s.ToString();
for (size_t k = 0; k < n * 100; k++) {
ASSERT_EQ("NOT_FOUND", Get(Key(k)));
}
s = db_->AddFile(files_info);
ASSERT_OK(s);
for (size_t k = 0; k < n * 100; k++) {
std::string value = Key(k) + "_val";
ASSERT_EQ(Get(Key(k)), value);
}
} while (ChangeOptions(kSkipPlainTable | kSkipUniversalCompaction |
kSkipFIFOCompaction));
}
// This test reporduce a bug that can happen in some cases if the DB started // This test reporduce a bug that can happen in some cases if the DB started
// purging obsolete files when we are adding an external sst file. // purging obsolete files when we are adding an external sst file.
// This situation may result in deleting the file while it's being added. // This situation may result in deleting the file while it's being added.
@ -963,7 +1187,7 @@ TEST_F(DBSSTTest, AddExternalSstFilePurgeObsoleteFilesBug) {
}); });
rocksdb::SyncPoint::GetInstance()->EnableProcessing(); rocksdb::SyncPoint::GetInstance()->EnableProcessing();
s = db_->AddFile(sst_file_path); s = db_->AddFile(std::vector<std::string>(1, sst_file_path));
ASSERT_OK(s); ASSERT_OK(s);
for (int i = 0; i < 500; i++) { for (int i = 0; i < 500; i++) {
@ -1025,17 +1249,19 @@ TEST_F(DBSSTTest, AddExternalSstFileNoCopy) {
ASSERT_EQ(file3_info.num_entries, 15); ASSERT_EQ(file3_info.num_entries, 15);
ASSERT_EQ(file3_info.smallest_key, Key(110)); ASSERT_EQ(file3_info.smallest_key, Key(110));
ASSERT_EQ(file3_info.largest_key, Key(124)); ASSERT_EQ(file3_info.largest_key, Key(124));
s = db_->AddFile(std::vector<ExternalSstFileInfo>(1, file1_info),
s = db_->AddFile(&file1_info, true /* move file */); true /* move file */);
ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_TRUE(s.ok()) << s.ToString();
ASSERT_EQ(Status::NotFound(), env_->FileExists(file1)); ASSERT_EQ(Status::NotFound(), env_->FileExists(file1));
s = db_->AddFile(&file2_info, false /* copy file */); s = db_->AddFile(std::vector<ExternalSstFileInfo>(1, file2_info),
false /* copy file */);
ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_TRUE(s.ok()) << s.ToString();
ASSERT_OK(env_->FileExists(file2)); ASSERT_OK(env_->FileExists(file2));
// This file have overlapping values with the exisitng data // This file have overlapping values with the exisitng data
s = db_->AddFile(&file3_info, true /* move file */); s = db_->AddFile(std::vector<ExternalSstFileInfo>(1, file2_info),
true /* move file */);
ASSERT_FALSE(s.ok()) << s.ToString(); ASSERT_FALSE(s.ok()) << s.ToString();
ASSERT_OK(env_->FileExists(file3)); ASSERT_OK(env_->FileExists(file3));
@ -1101,7 +1327,8 @@ TEST_F(DBSSTTest, AddExternalSstFileMultiThreaded) {
// sometimes we use copy, sometimes link .. the result should be the same // sometimes we use copy, sometimes link .. the result should be the same
bool move_file = (thread_id % 3 == 0); bool move_file = (thread_id % 3 == 0);
Status s = db_->AddFile(file_names[file_idx], move_file); Status s = db_->AddFile(std::vector<std::string>(1, file_names[file_idx]),
move_file);
if (s.ok()) { if (s.ok()) {
files_added++; files_added++;
} }
@ -1198,7 +1425,7 @@ TEST_F(DBSSTTest, AddExternalSstFileOverlappingRanges) {
ASSERT_OK(s); ASSERT_OK(s);
// Insert the generated file // Insert the generated file
s = db_->AddFile(&file_info); s = db_->AddFile(std::vector<ExternalSstFileInfo>(1, file_info));
auto it = true_data.lower_bound(Key(range_start)); auto it = true_data.lower_bound(Key(range_start));
if (it != true_data.end() && it->first <= Key(range_end)) { if (it != true_data.end() && it->first <= Key(range_end)) {

@ -2642,12 +2642,12 @@ class ModelDB : public DB {
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
using DB::AddFile; using DB::AddFile;
virtual Status AddFile(ColumnFamilyHandle* column_family, virtual Status AddFile(ColumnFamilyHandle* column_family,
const ExternalSstFileInfo* file_path, const std::vector<ExternalSstFileInfo>& file_info_list,
bool move_file) override { bool move_file) override {
return Status::NotSupported("Not implemented."); return Status::NotSupported("Not implemented.");
} }
virtual Status AddFile(ColumnFamilyHandle* column_family, virtual Status AddFile(ColumnFamilyHandle* column_family,
const std::string& file_path, const std::vector<std::string>& file_path_list,
bool move_file) override { bool move_file) override {
return Status::NotSupported("Not implemented."); return Status::NotSupported("Not implemented.");
} }

@ -1098,7 +1098,7 @@ Status DBTestBase::GenerateAndAddExternalFile(const Options options,
s = sst_file_writer.Finish(); s = sst_file_writer.Finish();
if (s.ok()) { if (s.ok()) {
s = db_->AddFile(file_path); s = db_->AddFile(std::vector<std::string>(1, file_path));
} }
return s; return s;

@ -12,15 +12,16 @@
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <memory> #include <memory>
#include <vector>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <vector>
#include "rocksdb/immutable_options.h" #include "rocksdb/immutable_options.h"
#include "rocksdb/iterator.h" #include "rocksdb/iterator.h"
#include "rocksdb/listener.h" #include "rocksdb/listener.h"
#include "rocksdb/metadata.h" #include "rocksdb/metadata.h"
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "rocksdb/snapshot.h" #include "rocksdb/snapshot.h"
#include "rocksdb/sst_file_writer.h"
#include "rocksdb/thread_status.h" #include "rocksdb/thread_status.h"
#include "rocksdb/transaction_log.h" #include "rocksdb/transaction_log.h"
#include "rocksdb/types.h" #include "rocksdb/types.h"
@ -580,7 +581,7 @@ class DB {
} }
#if defined(__GNUC__) || defined(__clang__) #if defined(__GNUC__) || defined(__clang__)
__attribute__((deprecated)) __attribute__((__deprecated__))
#elif _WIN32 #elif _WIN32
__declspec(deprecated) __declspec(deprecated)
#endif #endif
@ -595,14 +596,13 @@ class DB {
return CompactRange(options, column_family, begin, end); return CompactRange(options, column_family, begin, end);
} }
#if defined(__GNUC__) || defined(__clang__) #if defined(__GNUC__) || defined(__clang__)
__attribute__((deprecated)) __attribute__((__deprecated__))
#elif _WIN32 #elif _WIN32
__declspec(deprecated) __declspec(deprecated)
#endif #endif
virtual Status virtual Status
CompactRange(const Slice* begin, const Slice* end, CompactRange(const Slice* begin, const Slice* end, bool change_level = false,
bool change_level = false, int target_level = -1, int target_level = -1, uint32_t target_path_id = 0) {
uint32_t target_path_id = 0) {
CompactRangeOptions options; CompactRangeOptions options;
options.change_level = change_level; options.change_level = change_level;
options.target_level = target_level; options.target_level = target_level;
@ -792,32 +792,77 @@ class DB {
GetColumnFamilyMetaData(DefaultColumnFamily(), metadata); GetColumnFamilyMetaData(DefaultColumnFamily(), metadata);
} }
// Load table file located at "file_path" into "column_family", a pointer to // Batch load table files whose paths stored in "file_path_list" into
// ExternalSstFileInfo can be used instead of "file_path" to do a blind add // "column_family", a vector of ExternalSstFileInfo can be used
// that wont need to read the file, move_file can be set to true to // instead of "file_path_list" to do a blind batch add that wont
// move the file instead of copying it. // need to read the file, move_file can be set to true to
// move the files instead of copying them.
// //
// Current Requirements: // Current Requirements:
// (1) Key range in loaded table file don't overlap with // (1) The key ranges of the files don't overlap with each other
// (1) The key range of any file in list doesn't overlap with
// existing keys or tombstones in DB. // existing keys or tombstones in DB.
// (2) No snapshots are held. // (2) No snapshots are held.
// //
// Notes: We will try to ingest the file to the lowest possible level // Notes: We will try to ingest the files to the lowest possible level
// even if the file compression dont match the level compression // even if the file compression dont match the level compression
virtual Status AddFile(ColumnFamilyHandle* column_family, virtual Status AddFile(ColumnFamilyHandle* column_family,
const std::string& file_path, const std::vector<std::string>& file_path_list,
bool move_file = false) = 0; bool move_file = false) = 0;
virtual Status AddFile(const std::string& file_path, bool move_file = false) { virtual Status AddFile(const std::vector<std::string>& file_path_list,
return AddFile(DefaultColumnFamily(), file_path, move_file); bool move_file = false) {
return AddFile(DefaultColumnFamily(), file_path_list, move_file);
}
#if defined(__GNUC__) || defined(__clang__)
__attribute__((__deprecated__))
#elif _WIN32
__declspec(deprecated)
#endif
virtual Status
AddFile(ColumnFamilyHandle* column_family, const std::string& file_path,
bool move_file = false) {
return AddFile(column_family, std::vector<std::string>(1, file_path),
move_file);
}
#if defined(__GNUC__) || defined(__clang__)
__attribute__((__deprecated__))
#elif _WIN32
__declspec(deprecated)
#endif
virtual Status
AddFile(const std::string& file_path, bool move_file = false) {
return AddFile(DefaultColumnFamily(),
std::vector<std::string>(1, file_path), move_file);
} }
// Load table file with information "file_info" into "column_family" // Load table file with information "file_info" into "column_family"
virtual Status AddFile(ColumnFamilyHandle* column_family, virtual Status AddFile(ColumnFamilyHandle* column_family,
const ExternalSstFileInfo* file_info, const std::vector<ExternalSstFileInfo>& file_info_list,
bool move_file = false) = 0; bool move_file = false) = 0;
virtual Status AddFile(const ExternalSstFileInfo* file_info, virtual Status AddFile(const std::vector<ExternalSstFileInfo>& file_info_list,
bool move_file = false) { bool move_file = false) {
return AddFile(DefaultColumnFamily(), file_info, move_file); return AddFile(DefaultColumnFamily(), file_info_list, move_file);
}
#if defined(__GNUC__) || defined(__clang__)
__attribute__((__deprecated__))
#elif _WIN32
__declspec(deprecated)
#endif
virtual Status
AddFile(ColumnFamilyHandle* column_family,
const ExternalSstFileInfo* file_info, bool move_file = false) {
return AddFile(column_family,
std::vector<ExternalSstFileInfo>(1, *file_info), move_file);
}
#if defined(__GNUC__) || defined(__clang__)
__attribute__((__deprecated__))
#elif _WIN32
__declspec(deprecated)
#endif
virtual Status
AddFile(const ExternalSstFileInfo* file_info, bool move_file = false) {
return AddFile(DefaultColumnFamily(),
std::vector<ExternalSstFileInfo>(1, *file_info), move_file);
} }
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE

@ -65,14 +65,14 @@ class StackableDB : public DB {
using DB::AddFile; using DB::AddFile;
virtual Status AddFile(ColumnFamilyHandle* column_family, virtual Status AddFile(ColumnFamilyHandle* column_family,
const ExternalSstFileInfo* file_info, const std::vector<ExternalSstFileInfo>& file_info_list,
bool move_file) override { bool move_file) override {
return db_->AddFile(column_family, file_info, move_file); return db_->AddFile(column_family, file_info_list, move_file);
} }
virtual Status AddFile(ColumnFamilyHandle* column_family, virtual Status AddFile(ColumnFamilyHandle* column_family,
const std::string& file_path, const std::vector<std::string>& file_path_list,
bool move_file) override { bool move_file) override {
return db_->AddFile(column_family, file_path, move_file); return db_->AddFile(column_family, file_path_list, move_file);
} }
using DB::KeyMayExist; using DB::KeyMayExist;

Loading…
Cancel
Save