Ingest external SST files with Temperature hints (#8949)

Summary:
Add the file temperature to `IngestExternalFileArg` such that when SST files are ingested, user is able to assign the temperature to each SST file. If the temperature vector is empty or its size does not match the file name vector size, all ingested SST files will be assigned with `Temperature::unKnown`.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8949

Test Plan: add the new test and make check

Reviewed By: siying

Differential Revision: D31127852

Pulled By: zhichao-cao

fbshipit-source-id: 141a81f0f7b473d88f4ab0cb2a21a114cbc6f83c
main
Zhichao Cao 3 years ago committed by Facebook GitHub Bot
parent 2f1296ef48
commit bcd049cd2d
  1. 1
      HISTORY.md
  2. 6
      db/db_impl/db_impl.cc
  3. 5
      db/db_test2.cc
  4. 130
      db/external_sst_file_basic_test.cc
  5. 21
      db/external_sst_file_ingestion_job.cc
  6. 5
      db/external_sst_file_ingestion_job.h
  7. 3
      include/rocksdb/db.h

@ -15,6 +15,7 @@
* Made SystemClock extend the Customizable class and added a CreateFromString method. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method.
* Made SliceTransform extend the Customizable class and added a CreateFromString method. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method. The Capped and Prefixed transform classes return a short name (no length); use GetId for the fully qualified name.
* Made FileChecksumGenFactory, SstPartitionerFactory, TablePropertiesCollectorFactory, and WalFilter extend the Customizable class and added a CreateFromString method.
* Add `file_temperature` to `IngestExternalFileArg` such that when ingesting SST files, we are able to indicate the temperature of the this batch of files.
## 6.25.0 (2021-09-20)
### Bug Fixes

@ -4558,7 +4558,8 @@ Status DBImpl::IngestExternalFiles(
SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
Status es = ingestion_jobs[i].Prepare(
args[i].external_files, args[i].files_checksums,
args[i].files_checksum_func_names, start_file_number, super_version);
args[i].files_checksum_func_names, args[i].file_temperature,
start_file_number, super_version);
// capture first error only
if (!es.ok() && status.ok()) {
status = es;
@ -4573,7 +4574,8 @@ Status DBImpl::IngestExternalFiles(
SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
Status es = ingestion_jobs[0].Prepare(
args[0].external_files, args[0].files_checksums,
args[0].files_checksum_func_names, next_file_number, super_version);
args[0].files_checksum_func_names, args[0].file_temperature,
next_file_number, super_version);
if (!es.ok()) {
status = es;
}

@ -37,11 +37,10 @@ class DBTest2 : public DBTestBase {
#ifndef ROCKSDB_LITE
uint64_t GetSstSizeHelper(Temperature temperature) {
std::string prop;
bool s =
EXPECT_TRUE(
dbfull()->GetProperty(DB::Properties::kLiveSstFilesSizeAtTemperature +
ToString(static_cast<uint8_t>(temperature)),
&prop);
assert(s);
&prop));
return static_cast<uint64_t>(std::atoi(prop.c_str()));
}
#endif // ROCKSDB_LITE

@ -187,6 +187,16 @@ class ExternalSSTFileBasicTest
std::string sst_files_dir_;
std::unique_ptr<FaultInjectionTestEnv> fault_injection_test_env_;
bool random_rwfile_supported_;
#ifndef ROCKSDB_LITE
uint64_t GetSstSizeHelper(Temperature temperature) {
std::string prop;
EXPECT_TRUE(
dbfull()->GetProperty(DB::Properties::kLiveSstFilesSizeAtTemperature +
ToString(static_cast<uint8_t>(temperature)),
&prop));
return static_cast<uint64_t>(std::atoi(prop.c_str()));
}
#endif // ROCKSDB_LITE
};
TEST_F(ExternalSSTFileBasicTest, Basic) {
@ -478,6 +488,20 @@ TEST_F(ExternalSSTFileBasicTest, IngestFileWithFileChecksum) {
ASSERT_EQ(f.file_checksum_func_name, kUnknownFileChecksumFuncName);
}
// check the temperature of the file being ingested
ColumnFamilyMetaData metadata;
db_->GetColumnFamilyMetaData(&metadata);
ASSERT_EQ(1, metadata.file_count);
ASSERT_EQ(Temperature::kUnknown, metadata.levels[6].files[0].temperature);
auto size = GetSstSizeHelper(Temperature::kUnknown);
ASSERT_GT(size, 0);
size = GetSstSizeHelper(Temperature::kWarm);
ASSERT_EQ(size, 0);
size = GetSstSizeHelper(Temperature::kHot);
ASSERT_EQ(size, 0);
size = GetSstSizeHelper(Temperature::kCold);
ASSERT_EQ(size, 0);
// Reopen Db with checksum enabled
Reopen(options);
// Enable verify_file_checksum option
@ -598,6 +622,15 @@ TEST_F(ExternalSSTFileBasicTest, IngestFileWithFileChecksum) {
}
ASSERT_OK(s) << s.ToString();
ASSERT_OK(env_->FileExists(file6));
db_->GetColumnFamilyMetaData(&metadata);
size = GetSstSizeHelper(Temperature::kUnknown);
ASSERT_GT(size, 0);
size = GetSstSizeHelper(Temperature::kWarm);
ASSERT_EQ(size, 0);
size = GetSstSizeHelper(Temperature::kHot);
ASSERT_EQ(size, 0);
size = GetSstSizeHelper(Temperature::kCold);
ASSERT_EQ(size, 0);
}
TEST_F(ExternalSSTFileBasicTest, NoCopy) {
@ -1666,6 +1699,103 @@ TEST_F(ExternalSSTFileBasicTest, IngestFileAfterDBPut) {
ASSERT_EQ(Get("k"), "b");
}
TEST_F(ExternalSSTFileBasicTest, IngestWithTemperature) {
Options options = CurrentOptions();
const ImmutableCFOptions ioptions(options);
options.bottommost_temperature = Temperature::kWarm;
SstFileWriter sst_file_writer(EnvOptions(), options);
options.level0_file_num_compaction_trigger = 2;
Reopen(options);
auto size = GetSstSizeHelper(Temperature::kUnknown);
ASSERT_EQ(size, 0);
size = GetSstSizeHelper(Temperature::kWarm);
ASSERT_EQ(size, 0);
size = GetSstSizeHelper(Temperature::kHot);
ASSERT_EQ(size, 0);
// create file01.sst (1000 => 1099) and ingest it
std::string file1 = sst_files_dir_ + "file01.sst";
ASSERT_OK(sst_file_writer.Open(file1));
for (int k = 1000; k < 1100; k++) {
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
}
ExternalSstFileInfo file1_info;
Status s = sst_file_writer.Finish(&file1_info);
ASSERT_OK(s);
ASSERT_EQ(file1_info.file_path, file1);
ASSERT_EQ(file1_info.num_entries, 100);
ASSERT_EQ(file1_info.smallest_key, Key(1000));
ASSERT_EQ(file1_info.largest_key, Key(1099));
std::vector<std::string> files;
std::vector<std::string> files_checksums;
std::vector<std::string> files_checksum_func_names;
Temperature file_temperature = Temperature::kWarm;
files.push_back(file1);
IngestExternalFileOptions in_opts;
in_opts.move_files = false;
in_opts.snapshot_consistency = true;
in_opts.allow_global_seqno = false;
in_opts.allow_blocking_flush = false;
in_opts.write_global_seqno = true;
in_opts.verify_file_checksum = false;
IngestExternalFileArg arg;
arg.column_family = db_->DefaultColumnFamily();
arg.external_files = files;
arg.options = in_opts;
arg.files_checksums = files_checksums;
arg.files_checksum_func_names = files_checksum_func_names;
arg.file_temperature = file_temperature;
s = db_->IngestExternalFiles({arg});
ASSERT_OK(s);
// check the temperature of the file being ingested
ColumnFamilyMetaData metadata;
db_->GetColumnFamilyMetaData(&metadata);
ASSERT_EQ(1, metadata.file_count);
ASSERT_EQ(Temperature::kWarm, metadata.levels[6].files[0].temperature);
size = GetSstSizeHelper(Temperature::kUnknown);
ASSERT_EQ(size, 0);
size = GetSstSizeHelper(Temperature::kWarm);
ASSERT_GT(size, 1);
// non-bottommost file still has unknown temperature
ASSERT_OK(Put("foo", "bar"));
ASSERT_OK(Put("bar", "bar"));
ASSERT_OK(Flush());
db_->GetColumnFamilyMetaData(&metadata);
ASSERT_EQ(2, metadata.file_count);
ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature);
size = GetSstSizeHelper(Temperature::kUnknown);
ASSERT_GT(size, 0);
size = GetSstSizeHelper(Temperature::kWarm);
ASSERT_GT(size, 0);
// reopen and check the information is persisted
Reopen(options);
db_->GetColumnFamilyMetaData(&metadata);
ASSERT_EQ(2, metadata.file_count);
ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature);
ASSERT_EQ(Temperature::kWarm, metadata.levels[6].files[0].temperature);
size = GetSstSizeHelper(Temperature::kUnknown);
ASSERT_GT(size, 0);
size = GetSstSizeHelper(Temperature::kWarm);
ASSERT_GT(size, 0);
// check other non-exist temperatures
size = GetSstSizeHelper(Temperature::kHot);
ASSERT_EQ(size, 0);
size = GetSstSizeHelper(Temperature::kCold);
ASSERT_EQ(size, 0);
std::string prop;
ASSERT_TRUE(dbfull()->GetProperty(
DB::Properties::kLiveSstFilesSizeAtTemperature + std::to_string(22),
&prop));
ASSERT_EQ(std::atoi(prop.c_str()), 0);
}
INSTANTIATE_TEST_CASE_P(ExternalSSTFileBasicTest, ExternalSSTFileBasicTest,
testing::Values(std::make_tuple(true, true),
std::make_tuple(true, false),

@ -31,7 +31,8 @@ Status ExternalSstFileIngestionJob::Prepare(
const std::vector<std::string>& external_files_paths,
const std::vector<std::string>& files_checksums,
const std::vector<std::string>& files_checksum_func_names,
uint64_t next_file_number, SuperVersion* sv) {
const Temperature& file_temperature, uint64_t next_file_number,
SuperVersion* sv) {
Status status;
// Read the information of files we are ingesting
@ -89,6 +90,11 @@ Status ExternalSstFileIngestionJob::Prepare(
}
}
// Hanlde the file temperature
for (size_t i = 0; i < num_files; i++) {
files_to_ingest_[i].file_temperature = file_temperature;
}
if (ingestion_options_.ingest_behind && files_overlap_) {
return Status::NotSupported("Files have overlapping ranges");
}
@ -429,12 +435,13 @@ Status ExternalSstFileIngestionJob::Run() {
current_time = oldest_ancester_time =
static_cast<uint64_t>(temp_current_time);
}
edit_.AddFile(f.picked_level, f.fd.GetNumber(), f.fd.GetPathId(),
f.fd.GetFileSize(), f.smallest_internal_key,
f.largest_internal_key, f.assigned_seqno, f.assigned_seqno,
false, kInvalidBlobFileNumber, oldest_ancester_time,
current_time, f.file_checksum, f.file_checksum_func_name);
FileMetaData f_metadata(
f.fd.GetNumber(), f.fd.GetPathId(), f.fd.GetFileSize(),
f.smallest_internal_key, f.largest_internal_key, f.assigned_seqno,
f.assigned_seqno, false, kInvalidBlobFileNumber, oldest_ancester_time,
current_time, f.file_checksum, f.file_checksum_func_name);
f_metadata.temperature = f.file_temperature;
edit_.AddFile(f.picked_level, f_metadata);
}
return status;
}

@ -68,6 +68,8 @@ struct IngestedFileInfo {
std::string file_checksum;
// The name of checksum function that generate the checksum
std::string file_checksum_func_name;
// The temperature of the file to be ingested
Temperature file_temperature = Temperature::kUnknown;
};
class ExternalSstFileIngestionJob {
@ -99,7 +101,8 @@ class ExternalSstFileIngestionJob {
Status Prepare(const std::vector<std::string>& external_files_paths,
const std::vector<std::string>& files_checksums,
const std::vector<std::string>& files_checksum_func_names,
uint64_t next_file_number, SuperVersion* sv);
const Temperature& file_temperature, uint64_t next_file_number,
SuperVersion* sv);
// Check if we need to flush the memtable before running the ingestion job
// This will be true if the files we are ingesting are overlapping with any

@ -121,12 +121,15 @@ struct RangePtr {
// empty (no checksum information is provided for ingestion). Otherwise,
// their sizes should be the same as external_files. The file order should
// be the same in three vectors and guaranteed by the caller.
// Note that, we assume the temperatures of this batch of files to be
// ingested are the same.
struct IngestExternalFileArg {
ColumnFamilyHandle* column_family = nullptr;
std::vector<std::string> external_files;
IngestExternalFileOptions options;
std::vector<std::string> files_checksums;
std::vector<std::string> files_checksum_func_names;
Temperature file_temperature = Temperature::kUnknown;
};
struct GetMergeOperandsOptions {

Loading…
Cancel
Save