diff --git a/db/db_impl/db_impl_secondary.cc b/db/db_impl/db_impl_secondary.cc index 55fd72acd..6dd34c49b 100644 --- a/db/db_impl/db_impl_secondary.cc +++ b/db/db_impl/db_impl_secondary.cc @@ -682,12 +682,6 @@ Status DB::OpenAsSecondary( const std::vector& column_families, std::vector* handles, DB** dbptr) { *dbptr = nullptr; - if (db_options.max_open_files != -1) { - // TODO (yanqin) maybe support max_open_files != -1 by creating hard links - // on SST files so that db secondary can still have access to old SSTs - // while primary instance may delete original. - return Status::InvalidArgument("require max_open_files to be -1"); - } DBOptions tmp_opts(db_options); Status s; @@ -699,6 +693,27 @@ Status DB::OpenAsSecondary( } } + assert(tmp_opts.info_log != nullptr); + if (db_options.max_open_files != -1) { + std::ostringstream oss; + oss << "The primary instance may delete all types of files after they " + "become obsolete. The application can coordinate the primary and " + "secondary so that primary does not delete/rename files that are " + "currently being used by the secondary. Alternatively, a custom " + "Env/FS can be provided such that files become inaccessible only " + "after all primary and secondaries indicate that they are obsolete " + "and deleted. If the above two are not possible, you can open the " + "secondary instance with `max_open_files==-1` so that secondary " + "will eagerly keep all table files open. Even if a file is deleted, " + "its content can still be accessed via a prior open file " + "descriptor. This is a hacky workaround for only table files. If " + "none of the above is done, then point lookup or " + "range scan via the secondary instance can result in IOError: file " + "not found. This can be resolved by retrying " + "TryCatchUpWithPrimary()."; + ROCKS_LOG_WARN(tmp_opts.info_log, "%s", oss.str().c_str()); + } + handles->clear(); DBImplSecondary* impl = new DBImplSecondary(tmp_opts, dbname, secondary_path); impl->versions_.reset(new ReactiveVersionSet( diff --git a/db/db_impl/db_impl_secondary.h b/db/db_impl/db_impl_secondary.h index d03b76f03..867786ed3 100644 --- a/db/db_impl/db_impl_secondary.h +++ b/db/db_impl/db_impl_secondary.h @@ -84,8 +84,17 @@ class DBImplSecondary : public DBImpl { bool error_if_data_exists_in_wals, uint64_t* = nullptr, RecoveryContext* recovery_ctx = nullptr) override; - // Implementations of the DB interface + // Implementations of the DB interface. using DB::Get; + // Can return IOError due to files being deleted by the primary. To avoid + // IOError in this case, application can coordinate between primary and + // secondaries so that primary will not delete files that are currently being + // used by the secondaries. The application can also provide a custom FS/Env + // implementation so that files will remain present until all primary and + // secondaries indicate that they can be deleted. As a partial hacky + // workaround, the secondaries can be opened with `max_open_files=-1` so that + // it eagerly keeps all talbe files open and is able to access the contents of + // deleted files via prior open fd. Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) override; @@ -98,6 +107,15 @@ class DBImplSecondary : public DBImpl { std::string* timestamp); using DBImpl::NewIterator; + // Operations on the created iterators can return IOError due to files being + // deleted by the primary. To avoid IOError in this case, application can + // coordinate between primary and secondaries so that primary will not delete + // files that are currently being used by the secondaries. The application can + // also provide a custom FS/Env implementation so that files will remain + // present until all primary and secondaries indicate that they can be + // deleted. As a partial hacky workaround, the secondaries can be opened with + // `max_open_files=-1` so that it eagerly keeps all talbe files open and is + // able to access the contents of deleted files via prior open fd. Iterator* NewIterator(const ReadOptions&, ColumnFamilyHandle* column_family) override;