From 2d0380adbeab6aa91319c1aac47f6396f876c724 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Mon, 10 Oct 2022 17:59:17 -0700 Subject: [PATCH] Allow manifest fix-up without requiring prior state (#10796) Summary: This change is motivated by ensuring that `ldb update_manifest` or `UpdateManifestForFilesState` can run without expecting files to open when the old temperature is provided (in case the FileSystem strictly interprets non-kUnknown), but ended up fixing a problem in `OfflineManifestWriter` (used by `ldb unsafe_remove_sst_file`) where it would open some SST files during recovery and expect them to match the prior manifest state, even if not required by the intended new state. Also update BackupEngine to retry with Temperature kUnknown when reading file with potentially "wrong" temperature. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10796 Test Plan: tests added/updated, that fail before the change(s) and now pass Reviewed By: jay-zhuang Differential Revision: D40232645 Pulled By: jay-zhuang fbshipit-source-id: b5aa2688aecfe0c320b80a7da689b315414c20be --- HISTORY.md | 1 + db/db_test_util.h | 20 ++++++++++++++ db/experimental.cc | 4 ++- db/version_set.cc | 9 +++---- db/version_set.h | 3 ++- db/version_util.h | 4 ++- tools/ldb_cmd_test.cc | 36 ++++++++++++++++++++++++++ utilities/backup/backup_engine.cc | 13 ++++++++++ utilities/backup/backup_engine_test.cc | 12 ++++++--- 9 files changed, 91 insertions(+), 11 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index d61580c2b..a36e03ed5 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -13,6 +13,7 @@ * Fixed a bug in iterator refresh that was not freeing up SuperVersion, which could cause excessive resource pinniung (#10770). * Fixed a bug where RocksDB could be doing compaction endlessly when allow_ingest_behind is true and the bottommost level is not filled (#10767). * Fixed a memory safety bug in experimental HyperClockCache (#10768) +* Fixed some cases where `ldb update_manifest` and `ldb unsafe_remove_sst_file` are not usable because they were requiring the DB files to match the existing manifest state (before updating the manifest to match a desired state). ### Performance Improvements * Try to align the compaction output file boundaries to the next level ones, which can reduce more than 10% compaction load for the default level compaction. The feature is enabled by default, to disable, set `AdvancedColumnFamilyOptions.level_compaction_dynamic_file_size` to false. As a side effect, it can create SSTs larger than the target_file_size (capped at 2x target_file_size) or smaller files. diff --git a/db/db_test_util.h b/db/db_test_util.h index 0a35d9ffc..73d982024 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -714,6 +714,16 @@ class FileTemperatureTestFS : public FileSystemWrapper { MutexLock lock(&mu_); requested_sst_file_temperatures_.emplace_back(number, opts.temperature); if (s.ok()) { + if (opts.temperature != Temperature::kUnknown) { + // Be extra picky and don't open if a wrong non-unknown temperature is + // provided + auto e = current_sst_file_temperatures_.find(number); + if (e != current_sst_file_temperatures_.end() && + e->second != opts.temperature) { + result->reset(); + return IOStatus::PathNotFound("Temperature mismatch on " + fname); + } + } *result = WrapWithTemperature( number, std::move(*result)); } @@ -733,6 +743,16 @@ class FileTemperatureTestFS : public FileSystemWrapper { MutexLock lock(&mu_); requested_sst_file_temperatures_.emplace_back(number, opts.temperature); if (s.ok()) { + if (opts.temperature != Temperature::kUnknown) { + // Be extra picky and don't open if a wrong non-unknown temperature is + // provided + auto e = current_sst_file_temperatures_.find(number); + if (e != current_sst_file_temperatures_.end() && + e->second != opts.temperature) { + result->reset(); + return IOStatus::PathNotFound("Temperature mismatch on " + fname); + } + } *result = WrapWithTemperature( number, std::move(*result)); } diff --git a/db/experimental.cc b/db/experimental.cc index 2d155fcdb..d838ebde5 100644 --- a/db/experimental.cc +++ b/db/experimental.cc @@ -94,7 +94,9 @@ Status UpdateManifestForFilesState( std::unique_ptr f; FileOptions fopts; - fopts.temperature = lf->temperature; + // Use kUnknown to signal the FileSystem to search all tiers for the + // file. + fopts.temperature = Temperature::kUnknown; IOStatus file_ios = fs->NewSequentialFile(fname, fopts, &f, /*dbg*/ nullptr); diff --git a/db/version_set.cc b/db/version_set.cc index 009c917a0..af872f8cb 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -5523,7 +5523,7 @@ Status VersionSet::GetCurrentManifestPath(const std::string& dbname, Status VersionSet::Recover( const std::vector& column_families, bool read_only, - std::string* db_id) { + std::string* db_id, bool no_error_if_files_missing) { // Read "CURRENT" file, which contains a pointer to the current manifest file std::string manifest_path; Status s = GetCurrentManifestPath(dbname_, fs_.get(), &manifest_path, @@ -5556,10 +5556,9 @@ Status VersionSet::Recover( reporter.status = &log_read_status; log::Reader reader(nullptr, std::move(manifest_file_reader), &reporter, true /* checksum */, 0 /* log_number */); - VersionEditHandler handler(read_only, column_families, - const_cast(this), - /*track_missing_files=*/false, - /*no_error_if_files_missing=*/false, io_tracer_); + VersionEditHandler handler( + read_only, column_families, const_cast(this), + /*track_missing_files=*/false, no_error_if_files_missing, io_tracer_); handler.Iterate(reader, &log_read_status); s = handler.status(); if (s.ok()) { diff --git a/db/version_set.h b/db/version_set.h index 99625c550..4d546e8d9 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -1175,7 +1175,8 @@ class VersionSet { // If read_only == true, Recover() will not complain if some column families // are not opened Status Recover(const std::vector& column_families, - bool read_only = false, std::string* db_id = nullptr); + bool read_only = false, std::string* db_id = nullptr, + bool no_error_if_files_missing = false); Status TryRecover(const std::vector& column_families, bool read_only, diff --git a/db/version_util.h b/db/version_util.h index 6a809834d..5ec6fda11 100644 --- a/db/version_util.h +++ b/db/version_util.h @@ -28,7 +28,9 @@ class OfflineManifestWriter { /*db_id*/ "", /*db_session_id*/ "") {} Status Recover(const std::vector& column_families) { - return versions_.Recover(column_families); + return versions_.Recover(column_families, /*read_only*/ false, + /*db_id*/ nullptr, + /*no_error_if_files_missing*/ true); } Status LogAndApply(ColumnFamilyData* cfd, VersionEdit* edit, diff --git a/tools/ldb_cmd_test.cc b/tools/ldb_cmd_test.cc index cf133b7e4..5f9e05bb6 100644 --- a/tools/ldb_cmd_test.cc +++ b/tools/ldb_cmd_test.cc @@ -1024,6 +1024,42 @@ TEST_F(LdbCmdTest, UnsafeRemoveSstFile) { ASSERT_OK(db->Get(ReadOptions(), handles[0], "0", &val)); ASSERT_EQ(val, "0"); + // Determine which is the "first" one (most likely to be opened in recovery) + sst_files.clear(); + db->GetLiveFilesMetaData(&sst_files); + + numbers.clear(); + for (auto& f : sst_files) { + numbers.push_back(f.file_number); + } + ASSERT_EQ(numbers.size(), 3); + std::sort(numbers.begin(), numbers.end()); + to_remove = numbers.front(); + + // This time physically delete the file before unsafe_remove + { + std::string f = dbname + "/" + MakeTableFileName(to_remove); + ASSERT_OK(Env::Default()->DeleteFile(f)); + } + + // Close for unsafe_remove_sst_file + for (auto& h : handles) { + delete h; + } + delete db; + db = nullptr; + + snprintf(arg4, sizeof(arg4), "%" PRIu64, to_remove); + ASSERT_EQ(0, + LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr)); + + ASSERT_OK(DB::Open(opts, dbname, cfds, &handles, &db)); + + ASSERT_OK(db->Get(ReadOptions(), handles[1], "3", &val)); + ASSERT_EQ(val, "3"); + + ASSERT_TRUE(db->Get(ReadOptions(), handles[0], "0", &val).IsNotFound()); + for (auto& h : handles) { delete h; } diff --git a/utilities/backup/backup_engine.cc b/utilities/backup/backup_engine.cc index 754293e74..877a884eb 100644 --- a/utilities/backup/backup_engine.cc +++ b/utilities/backup/backup_engine.cc @@ -2053,6 +2053,12 @@ IOStatus BackupEngineImpl::CopyOrCreateFile( io_s = src_env->GetFileSystem()->NewSequentialFile(src, src_file_options, &src_file, nullptr); } + if (io_s.IsPathNotFound() && *src_temperature != Temperature::kUnknown) { + // Retry without temperature hint in case the FileSystem is strict with + // non-kUnknown temperature option + io_s = src_env->GetFileSystem()->NewSequentialFile( + src, FileOptions(src_env_options), &src_file, nullptr); + } if (!io_s.ok()) { return io_s; } @@ -2387,6 +2393,13 @@ IOStatus BackupEngineImpl::ReadFileAndComputeChecksum( RateLimiter* rate_limiter = options_.backup_rate_limiter.get(); IOStatus io_s = SequentialFileReader::Create( src_fs, src, file_options, &src_reader, nullptr /* dbg */, rate_limiter); + if (io_s.IsPathNotFound() && src_temperature != Temperature::kUnknown) { + // Retry without temperature hint in case the FileSystem is strict with + // non-kUnknown temperature option + file_options.temperature = Temperature::kUnknown; + io_s = SequentialFileReader::Create(src_fs, src, file_options, &src_reader, + nullptr /* dbg */, rate_limiter); + } if (!io_s.ok()) { return io_s; } diff --git a/utilities/backup/backup_engine_test.cc b/utilities/backup/backup_engine_test.cc index 7b9b44e47..b145c3a8c 100644 --- a/utilities/backup/backup_engine_test.cc +++ b/utilities/backup/backup_engine_test.cc @@ -4150,13 +4150,19 @@ TEST_F(BackupEngineTest, FileTemperatures) { ASSERT_OK(backup_engine_->CreateNewBackup(db_.get())); // Verify requested temperatures against manifest temperatures (before - // backup finds out current temperatures in FileSystem) + // retry with kUnknown if needed, and before backup finds out current + // temperatures in FileSystem) std::vector> requested_temps; my_db_fs->PopRequestedSstFileTemperatures(&requested_temps); std::set distinct_requests; for (const auto& requested_temp : requested_temps) { - // Matching manifest temperatures - ASSERT_EQ(manifest_temps.at(requested_temp.first), requested_temp.second); + // Matching manifest temperatures, except allow retry request with + // kUnknown + auto manifest_temp = manifest_temps.at(requested_temp.first); + if (manifest_temp == Temperature::kUnknown || + requested_temp.second != Temperature::kUnknown) { + ASSERT_EQ(manifest_temp, requested_temp.second); + } distinct_requests.insert(requested_temp.first); } // Two distinct requests