diff --git a/HISTORY.md b/HISTORY.md index 003327cfd..8cdb6d118 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,4 +1,8 @@ # Rocksdb Change Log +## Unreleased +### Bug Fixes +* If the primary's CURRENT file is missing or inaccessible, the secondary instance should not hang repeatedly trying to switch to a new MANIFEST. It should instead return the error code encountered while accessing the file. + ## 6.23.0 (2021-07-16) ### Behavior Changes * Obsolete keys in the bottommost level that were preserved for a snapshot will now be cleaned upon snapshot release in all cases. This form of compaction (snapshot release triggered compaction) previously had an artificial limitation that multiple tombstones needed to be present. diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index fcca59ae6..b9c2b74df 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -1674,6 +1674,7 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, impl->DeleteObsoleteFiles(); s = impl->directories_.GetDbDir()->Fsync(IOOptions(), nullptr); + TEST_SYNC_POINT("DBImpl::Open:AfterDeleteFilesAndSyncDir"); } if (s.ok()) { // In WritePrepared there could be gap in sequence numbers. This breaks diff --git a/db/db_impl/db_impl_secondary.cc b/db/db_impl/db_impl_secondary.cc index fcd162494..4742110ca 100644 --- a/db/db_impl/db_impl_secondary.cc +++ b/db/db_impl/db_impl_secondary.cc @@ -41,6 +41,9 @@ Status DBImplSecondary::Recover( ->Recover(column_families, &manifest_reader_, &manifest_reporter_, &manifest_reader_status_); if (!s.ok()) { + if (manifest_reader_status_) { + manifest_reader_status_->PermitUncheckedError(); + } return s; } if (immutable_db_options_.paranoid_checks && s.ok()) { diff --git a/db/db_secondary_test.cc b/db/db_secondary_test.cc index 2eaf3d2b4..e3a090d6e 100644 --- a/db/db_secondary_test.cc +++ b/db/db_secondary_test.cc @@ -115,6 +115,18 @@ void DBSecondaryTest::CheckFileTypeCounts(const std::string& dir, ASSERT_EQ(expected_manifest, manifest_cnt); } +TEST_F(DBSecondaryTest, NonExistingDb) { + Destroy(last_options_); + + Options options = GetDefaultOptions(); + options.env = env_; + options.max_open_files = -1; + const std::string dbname = "/doesnt/exist"; + Status s = + DB::OpenAsSecondary(options, dbname, secondary_path_, &db_secondary_); + ASSERT_TRUE(s.IsIOError()); +} + TEST_F(DBSecondaryTest, ReopenAsSecondary) { Options options; options.env = env_; @@ -599,17 +611,19 @@ TEST_F(DBSecondaryTest, SwitchToNewManifestDuringOpen) { SyncPoint::GetInstance()->LoadDependency( {{"ReactiveVersionSet::MaybeSwitchManifest:AfterGetCurrentManifestPath:0", "VersionSet::ProcessManifestWrites:BeforeNewManifest"}, - {"VersionSet::ProcessManifestWrites:AfterNewManifest", + {"DBImpl::Open:AfterDeleteFilesAndSyncDir", "ReactiveVersionSet::MaybeSwitchManifest:AfterGetCurrentManifestPath:" "1"}}); SyncPoint::GetInstance()->EnableProcessing(); - // Make sure db calls RecoverLogFiles so as to trigger a manifest write, - // which causes the db to switch to a new MANIFEST upon start. port::Thread ro_db_thread([&]() { Options options1; options1.env = env_; options1.max_open_files = -1; + Status s = TryOpenSecondary(options1); + ASSERT_TRUE(s.IsTryAgain()); + + // Try again OpenSecondary(options1); CloseSecondary(); }); diff --git a/db/version_set.cc b/db/version_set.cc index 8880c2903..df0fff956 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -4224,7 +4224,6 @@ Status VersionSet::ProcessManifestWrites( if (!io_s.ok()) { s = io_s; } - TEST_SYNC_POINT("VersionSet::ProcessManifestWrites:AfterNewManifest"); } if (s.ok()) { @@ -5716,6 +5715,9 @@ Status ReactiveVersionSet::Recover( static_cast_with_check(manifest_reporter->get())->status = manifest_reader_status->get(); Status s = MaybeSwitchManifest(manifest_reporter->get(), manifest_reader); + if (!s.ok()) { + return s; + } log::Reader* reader = manifest_reader->get(); assert(reader); @@ -5757,43 +5759,62 @@ Status ReactiveVersionSet::MaybeSwitchManifest( std::unique_ptr* manifest_reader) { assert(manifest_reader != nullptr); Status s; - do { - std::string manifest_path; - s = GetCurrentManifestPath(dbname_, fs_.get(), &manifest_path, - &manifest_file_number_); - std::unique_ptr manifest_file; - if (s.ok()) { - if (nullptr == manifest_reader->get() || - manifest_reader->get()->file()->file_name() != manifest_path) { - TEST_SYNC_POINT( - "ReactiveVersionSet::MaybeSwitchManifest:" - "AfterGetCurrentManifestPath:0"); - TEST_SYNC_POINT( - "ReactiveVersionSet::MaybeSwitchManifest:" - "AfterGetCurrentManifestPath:1"); - s = fs_->NewSequentialFile(manifest_path, - fs_->OptimizeForManifestRead(file_options_), - &manifest_file, nullptr); - } else { - // No need to switch manifest. - break; - } - } - std::unique_ptr manifest_file_reader; - if (s.ok()) { - manifest_file_reader.reset(new SequentialFileReader( - std::move(manifest_file), manifest_path, - db_options_->log_readahead_size, io_tracer_)); - manifest_reader->reset(new log::FragmentBufferedReader( - nullptr, std::move(manifest_file_reader), reporter, - true /* checksum */, 0 /* log_number */)); - ROCKS_LOG_INFO(db_options_->info_log, "Switched to new manifest: %s\n", - manifest_path.c_str()); - if (manifest_tailer_) { - manifest_tailer_->PrepareToReadNewManifest(); - } - } - } while (s.IsPathNotFound()); + std::string manifest_path; + s = GetCurrentManifestPath(dbname_, fs_.get(), &manifest_path, + &manifest_file_number_); + if (!s.ok()) { + return s; + } + std::unique_ptr manifest_file; + if (manifest_reader->get() != nullptr && + manifest_reader->get()->file()->file_name() == manifest_path) { + // CURRENT points to the same MANIFEST as before, no need to switch + // MANIFEST. + return s; + } + assert(nullptr == manifest_reader->get() || + manifest_reader->get()->file()->file_name() != manifest_path); + s = fs_->FileExists(manifest_path, IOOptions(), nullptr); + if (s.IsNotFound()) { + return Status::TryAgain( + "The primary may have switched to a new MANIFEST and deleted the old " + "one."); + } else if (!s.ok()) { + return s; + } + TEST_SYNC_POINT( + "ReactiveVersionSet::MaybeSwitchManifest:" + "AfterGetCurrentManifestPath:0"); + TEST_SYNC_POINT( + "ReactiveVersionSet::MaybeSwitchManifest:" + "AfterGetCurrentManifestPath:1"); + // The primary can also delete the MANIFEST while the secondary is reading + // it. This is OK on POSIX. For other file systems, maybe create a hard link + // to MANIFEST. The hard link should be cleaned up later by the secondary. + s = fs_->NewSequentialFile(manifest_path, + fs_->OptimizeForManifestRead(file_options_), + &manifest_file, nullptr); + std::unique_ptr manifest_file_reader; + if (s.ok()) { + manifest_file_reader.reset( + new SequentialFileReader(std::move(manifest_file), manifest_path, + db_options_->log_readahead_size, io_tracer_)); + manifest_reader->reset(new log::FragmentBufferedReader( + nullptr, std::move(manifest_file_reader), reporter, true /* checksum */, + 0 /* log_number */)); + ROCKS_LOG_INFO(db_options_->info_log, "Switched to new manifest: %s\n", + manifest_path.c_str()); + if (manifest_tailer_) { + manifest_tailer_->PrepareToReadNewManifest(); + } + } else if (s.IsPathNotFound()) { + // This can happen if the primary switches to a new MANIFEST after the + // secondary reads the CURRENT file but before the secondary actually tries + // to open the MANIFEST. + s = Status::TryAgain( + "The primary may have switched to a new MANIFEST and deleted the old " + "one."); + } return s; }