From d6b9c4ae26dfbb59eecbe27d85ada22b36dee44c Mon Sep 17 00:00:00 2001 From: Yanqin Jin Date: Tue, 5 Jul 2022 10:09:44 -0700 Subject: [PATCH] Update code comment and logging for secondary instance (#10260) Summary: Before this PR, it is required that application open RocksDB secondary instance with `max_open_files = -1`. This is a hacky workaround that prevents IOErrors on the seconary instance during point-lookup or range scan caused by primary instance deleting the table files. This is not necessary if the application can coordinate the primary and secondaries so that primary does not delete files that are still being used by the secondaries. Or users can provide a custom Env/FS implementation that deletes the files only after all primary and secondary instances indicate files are obsolete and deleted. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10260 Test Plan: make check Reviewed By: jay-zhuang Differential Revision: D37462633 Pulled By: riversand963 fbshipit-source-id: 9c2fc939f49663efa61e3d60c8f1e01d64b9d72c --- db/db_impl/db_impl_secondary.cc | 27 +++++++++++++++++++++------ db/db_impl/db_impl_secondary.h | 20 +++++++++++++++++++- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/db/db_impl/db_impl_secondary.cc b/db/db_impl/db_impl_secondary.cc index 55fd72acd..6dd34c49b 100644 --- a/db/db_impl/db_impl_secondary.cc +++ b/db/db_impl/db_impl_secondary.cc @@ -682,12 +682,6 @@ Status DB::OpenAsSecondary( const std::vector& column_families, std::vector* handles, DB** dbptr) { *dbptr = nullptr; - if (db_options.max_open_files != -1) { - // TODO (yanqin) maybe support max_open_files != -1 by creating hard links - // on SST files so that db secondary can still have access to old SSTs - // while primary instance may delete original. - return Status::InvalidArgument("require max_open_files to be -1"); - } DBOptions tmp_opts(db_options); Status s; @@ -699,6 +693,27 @@ Status DB::OpenAsSecondary( } } + assert(tmp_opts.info_log != nullptr); + if (db_options.max_open_files != -1) { + std::ostringstream oss; + oss << "The primary instance may delete all types of files after they " + "become obsolete. The application can coordinate the primary and " + "secondary so that primary does not delete/rename files that are " + "currently being used by the secondary. Alternatively, a custom " + "Env/FS can be provided such that files become inaccessible only " + "after all primary and secondaries indicate that they are obsolete " + "and deleted. If the above two are not possible, you can open the " + "secondary instance with `max_open_files==-1` so that secondary " + "will eagerly keep all table files open. Even if a file is deleted, " + "its content can still be accessed via a prior open file " + "descriptor. This is a hacky workaround for only table files. If " + "none of the above is done, then point lookup or " + "range scan via the secondary instance can result in IOError: file " + "not found. This can be resolved by retrying " + "TryCatchUpWithPrimary()."; + ROCKS_LOG_WARN(tmp_opts.info_log, "%s", oss.str().c_str()); + } + handles->clear(); DBImplSecondary* impl = new DBImplSecondary(tmp_opts, dbname, secondary_path); impl->versions_.reset(new ReactiveVersionSet( diff --git a/db/db_impl/db_impl_secondary.h b/db/db_impl/db_impl_secondary.h index d03b76f03..867786ed3 100644 --- a/db/db_impl/db_impl_secondary.h +++ b/db/db_impl/db_impl_secondary.h @@ -84,8 +84,17 @@ class DBImplSecondary : public DBImpl { bool error_if_data_exists_in_wals, uint64_t* = nullptr, RecoveryContext* recovery_ctx = nullptr) override; - // Implementations of the DB interface + // Implementations of the DB interface. using DB::Get; + // Can return IOError due to files being deleted by the primary. To avoid + // IOError in this case, application can coordinate between primary and + // secondaries so that primary will not delete files that are currently being + // used by the secondaries. The application can also provide a custom FS/Env + // implementation so that files will remain present until all primary and + // secondaries indicate that they can be deleted. As a partial hacky + // workaround, the secondaries can be opened with `max_open_files=-1` so that + // it eagerly keeps all talbe files open and is able to access the contents of + // deleted files via prior open fd. Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) override; @@ -98,6 +107,15 @@ class DBImplSecondary : public DBImpl { std::string* timestamp); using DBImpl::NewIterator; + // Operations on the created iterators can return IOError due to files being + // deleted by the primary. To avoid IOError in this case, application can + // coordinate between primary and secondaries so that primary will not delete + // files that are currently being used by the secondaries. The application can + // also provide a custom FS/Env implementation so that files will remain + // present until all primary and secondaries indicate that they can be + // deleted. As a partial hacky workaround, the secondaries can be opened with + // `max_open_files=-1` so that it eagerly keeps all talbe files open and is + // able to access the contents of deleted files via prior open fd. Iterator* NewIterator(const ReadOptions&, ColumnFamilyHandle* column_family) override;