Allow manifest fix-up without requiring prior state (#10796)

Summary:
This change is motivated by ensuring that `ldb update_manifest` or `UpdateManifestForFilesState` can run without expecting files to open when the old temperature is provided (in case the FileSystem strictly interprets non-kUnknown), but ended up fixing a problem in `OfflineManifestWriter` (used by `ldb unsafe_remove_sst_file`) where it would open some SST files during recovery and expect them to match the prior manifest state, even if not required by the intended new state.

Also update BackupEngine to retry with Temperature kUnknown when reading file with potentially "wrong" temperature.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10796

Test Plan: tests added/updated, that fail before the change(s) and now pass

Reviewed By: jay-zhuang

Differential Revision: D40232645

Pulled By: jay-zhuang

fbshipit-source-id: b5aa2688aecfe0c320b80a7da689b315414c20be
main
Peter Dillinger 2 years ago committed by Facebook GitHub Bot
parent f6a0065d54
commit 2d0380adbe
  1. 1
      HISTORY.md
  2. 20
      db/db_test_util.h
  3. 4
      db/experimental.cc
  4. 9
      db/version_set.cc
  5. 3
      db/version_set.h
  6. 4
      db/version_util.h
  7. 36
      tools/ldb_cmd_test.cc
  8. 13
      utilities/backup/backup_engine.cc
  9. 12
      utilities/backup/backup_engine_test.cc

@ -13,6 +13,7 @@
* Fixed a bug in iterator refresh that was not freeing up SuperVersion, which could cause excessive resource pinniung (#10770).
* Fixed a bug where RocksDB could be doing compaction endlessly when allow_ingest_behind is true and the bottommost level is not filled (#10767).
* Fixed a memory safety bug in experimental HyperClockCache (#10768)
* Fixed some cases where `ldb update_manifest` and `ldb unsafe_remove_sst_file` are not usable because they were requiring the DB files to match the existing manifest state (before updating the manifest to match a desired state).
### Performance Improvements
* Try to align the compaction output file boundaries to the next level ones, which can reduce more than 10% compaction load for the default level compaction. The feature is enabled by default, to disable, set `AdvancedColumnFamilyOptions.level_compaction_dynamic_file_size` to false. As a side effect, it can create SSTs larger than the target_file_size (capped at 2x target_file_size) or smaller files.

@ -714,6 +714,16 @@ class FileTemperatureTestFS : public FileSystemWrapper {
MutexLock lock(&mu_);
requested_sst_file_temperatures_.emplace_back(number, opts.temperature);
if (s.ok()) {
if (opts.temperature != Temperature::kUnknown) {
// Be extra picky and don't open if a wrong non-unknown temperature is
// provided
auto e = current_sst_file_temperatures_.find(number);
if (e != current_sst_file_temperatures_.end() &&
e->second != opts.temperature) {
result->reset();
return IOStatus::PathNotFound("Temperature mismatch on " + fname);
}
}
*result = WrapWithTemperature<FSSequentialFileOwnerWrapper>(
number, std::move(*result));
}
@ -733,6 +743,16 @@ class FileTemperatureTestFS : public FileSystemWrapper {
MutexLock lock(&mu_);
requested_sst_file_temperatures_.emplace_back(number, opts.temperature);
if (s.ok()) {
if (opts.temperature != Temperature::kUnknown) {
// Be extra picky and don't open if a wrong non-unknown temperature is
// provided
auto e = current_sst_file_temperatures_.find(number);
if (e != current_sst_file_temperatures_.end() &&
e->second != opts.temperature) {
result->reset();
return IOStatus::PathNotFound("Temperature mismatch on " + fname);
}
}
*result = WrapWithTemperature<FSRandomAccessFileOwnerWrapper>(
number, std::move(*result));
}

@ -94,7 +94,9 @@ Status UpdateManifestForFilesState(
std::unique_ptr<FSSequentialFile> f;
FileOptions fopts;
fopts.temperature = lf->temperature;
// Use kUnknown to signal the FileSystem to search all tiers for the
// file.
fopts.temperature = Temperature::kUnknown;
IOStatus file_ios =
fs->NewSequentialFile(fname, fopts, &f, /*dbg*/ nullptr);

@ -5523,7 +5523,7 @@ Status VersionSet::GetCurrentManifestPath(const std::string& dbname,
Status VersionSet::Recover(
const std::vector<ColumnFamilyDescriptor>& column_families, bool read_only,
std::string* db_id) {
std::string* db_id, bool no_error_if_files_missing) {
// Read "CURRENT" file, which contains a pointer to the current manifest file
std::string manifest_path;
Status s = GetCurrentManifestPath(dbname_, fs_.get(), &manifest_path,
@ -5556,10 +5556,9 @@ Status VersionSet::Recover(
reporter.status = &log_read_status;
log::Reader reader(nullptr, std::move(manifest_file_reader), &reporter,
true /* checksum */, 0 /* log_number */);
VersionEditHandler handler(read_only, column_families,
const_cast<VersionSet*>(this),
/*track_missing_files=*/false,
/*no_error_if_files_missing=*/false, io_tracer_);
VersionEditHandler handler(
read_only, column_families, const_cast<VersionSet*>(this),
/*track_missing_files=*/false, no_error_if_files_missing, io_tracer_);
handler.Iterate(reader, &log_read_status);
s = handler.status();
if (s.ok()) {

@ -1175,7 +1175,8 @@ class VersionSet {
// If read_only == true, Recover() will not complain if some column families
// are not opened
Status Recover(const std::vector<ColumnFamilyDescriptor>& column_families,
bool read_only = false, std::string* db_id = nullptr);
bool read_only = false, std::string* db_id = nullptr,
bool no_error_if_files_missing = false);
Status TryRecover(const std::vector<ColumnFamilyDescriptor>& column_families,
bool read_only,

@ -28,7 +28,9 @@ class OfflineManifestWriter {
/*db_id*/ "", /*db_session_id*/ "") {}
Status Recover(const std::vector<ColumnFamilyDescriptor>& column_families) {
return versions_.Recover(column_families);
return versions_.Recover(column_families, /*read_only*/ false,
/*db_id*/ nullptr,
/*no_error_if_files_missing*/ true);
}
Status LogAndApply(ColumnFamilyData* cfd, VersionEdit* edit,

@ -1024,6 +1024,42 @@ TEST_F(LdbCmdTest, UnsafeRemoveSstFile) {
ASSERT_OK(db->Get(ReadOptions(), handles[0], "0", &val));
ASSERT_EQ(val, "0");
// Determine which is the "first" one (most likely to be opened in recovery)
sst_files.clear();
db->GetLiveFilesMetaData(&sst_files);
numbers.clear();
for (auto& f : sst_files) {
numbers.push_back(f.file_number);
}
ASSERT_EQ(numbers.size(), 3);
std::sort(numbers.begin(), numbers.end());
to_remove = numbers.front();
// This time physically delete the file before unsafe_remove
{
std::string f = dbname + "/" + MakeTableFileName(to_remove);
ASSERT_OK(Env::Default()->DeleteFile(f));
}
// Close for unsafe_remove_sst_file
for (auto& h : handles) {
delete h;
}
delete db;
db = nullptr;
snprintf(arg4, sizeof(arg4), "%" PRIu64, to_remove);
ASSERT_EQ(0,
LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr));
ASSERT_OK(DB::Open(opts, dbname, cfds, &handles, &db));
ASSERT_OK(db->Get(ReadOptions(), handles[1], "3", &val));
ASSERT_EQ(val, "3");
ASSERT_TRUE(db->Get(ReadOptions(), handles[0], "0", &val).IsNotFound());
for (auto& h : handles) {
delete h;
}

@ -2053,6 +2053,12 @@ IOStatus BackupEngineImpl::CopyOrCreateFile(
io_s = src_env->GetFileSystem()->NewSequentialFile(src, src_file_options,
&src_file, nullptr);
}
if (io_s.IsPathNotFound() && *src_temperature != Temperature::kUnknown) {
// Retry without temperature hint in case the FileSystem is strict with
// non-kUnknown temperature option
io_s = src_env->GetFileSystem()->NewSequentialFile(
src, FileOptions(src_env_options), &src_file, nullptr);
}
if (!io_s.ok()) {
return io_s;
}
@ -2387,6 +2393,13 @@ IOStatus BackupEngineImpl::ReadFileAndComputeChecksum(
RateLimiter* rate_limiter = options_.backup_rate_limiter.get();
IOStatus io_s = SequentialFileReader::Create(
src_fs, src, file_options, &src_reader, nullptr /* dbg */, rate_limiter);
if (io_s.IsPathNotFound() && src_temperature != Temperature::kUnknown) {
// Retry without temperature hint in case the FileSystem is strict with
// non-kUnknown temperature option
file_options.temperature = Temperature::kUnknown;
io_s = SequentialFileReader::Create(src_fs, src, file_options, &src_reader,
nullptr /* dbg */, rate_limiter);
}
if (!io_s.ok()) {
return io_s;
}

@ -4150,13 +4150,19 @@ TEST_F(BackupEngineTest, FileTemperatures) {
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
// Verify requested temperatures against manifest temperatures (before
// backup finds out current temperatures in FileSystem)
// retry with kUnknown if needed, and before backup finds out current
// temperatures in FileSystem)
std::vector<std::pair<uint64_t, Temperature>> requested_temps;
my_db_fs->PopRequestedSstFileTemperatures(&requested_temps);
std::set<uint64_t> distinct_requests;
for (const auto& requested_temp : requested_temps) {
// Matching manifest temperatures
ASSERT_EQ(manifest_temps.at(requested_temp.first), requested_temp.second);
// Matching manifest temperatures, except allow retry request with
// kUnknown
auto manifest_temp = manifest_temps.at(requested_temp.first);
if (manifest_temp == Temperature::kUnknown ||
requested_temp.second != Temperature::kUnknown) {
ASSERT_EQ(manifest_temp, requested_temp.second);
}
distinct_requests.insert(requested_temp.first);
}
// Two distinct requests

Loading…
Cancel
Save