diff --git a/HISTORY.md b/HISTORY.md index 6a62b4121..3b07e26f8 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,7 @@ # Rocksdb Change Log ## Unreleased ### Bug Fixes +* Fixed a hang when an operation such as `GetLiveFiles` or `CreateNewBackup` is asked to trigger and wait for memtable flush on a read-only DB. Such indirect requests for memtable flush are now ignored on a read-only DB. * Fixed bug where `FlushWAL(true /* sync */)` (used by `GetLiveFilesStorageInfo()`, which is used by checkpoint and backup) could cause parallel writes at the tail of a WAL file to never be synced. * Fix periodic_task unable to re-register the same task type, which may cause `SetOptions()` fail to update periodical_task time like: `stats_dump_period_sec`, `stats_persist_period_sec`. diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 9f866a91a..7549087cf 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -253,6 +253,7 @@ TEST_F(DBBasicTest, CompactedDB) { ASSERT_OK(Put("bbb", DummyString(kFileSize / 2, 'b'))); ASSERT_OK(Put("eee", DummyString(kFileSize / 2, 'e'))); ASSERT_OK(Flush()); + ASSERT_OK(Put("something_not_flushed", "x")); Close(); ASSERT_OK(ReadOnlyReopen(options)); @@ -260,6 +261,20 @@ TEST_F(DBBasicTest, CompactedDB) { s = Put("new", "value"); ASSERT_EQ(s.ToString(), "Not implemented: Not supported operation in read only mode."); + + // TODO: validate that other write ops return NotImplemented + // (DBImplReadOnly is missing some overrides) + + // Ensure no deadlock on flush triggered by another API function + // (Old deadlock bug depends on something_not_flushed above.) + std::vector files; + uint64_t manifest_file_size; + ASSERT_OK(db_->GetLiveFiles(files, &manifest_file_size, /*flush*/ true)); + LiveFilesStorageInfoOptions lfsi_opts; + lfsi_opts.wal_size_for_flush = 0; // always + std::vector files2; + ASSERT_OK(db_->GetLiveFilesStorageInfo(lfsi_opts, &files2)); + Close(); // Full compaction @@ -290,6 +305,13 @@ TEST_F(DBBasicTest, CompactedDB) { ASSERT_EQ(DummyString(kFileSize / 2, 'j'), Get("jjj")); ASSERT_EQ("NOT_FOUND", Get("kkk")); + // TODO: validate that other write ops return NotImplemented + // (CompactedDB is missing some overrides) + + // Ensure no deadlock on flush triggered by another API function + ASSERT_OK(db_->GetLiveFiles(files, &manifest_file_size, /*flush*/ true)); + ASSERT_OK(db_->GetLiveFilesStorageInfo(lfsi_opts, &files2)); + // MultiGet std::vector values; std::vector status_list = dbfull()->MultiGet( diff --git a/db/db_impl/compacted_db_impl.h b/db/db_impl/compacted_db_impl.h index 5fa412caf..9495c447c 100644 --- a/db/db_impl/compacted_db_impl.h +++ b/db/db_impl/compacted_db_impl.h @@ -11,6 +11,7 @@ namespace ROCKSDB_NAMESPACE { +// TODO: Share common structure with DBImplSecondary and DBImplReadOnly class CompactedDBImpl : public DBImpl { public: CompactedDBImpl(const DBOptions& options, const std::string& dbname); @@ -127,6 +128,17 @@ class CompactedDBImpl : public DBImpl { return Status::NotSupported("Not supported in compacted db mode."); } + // FIXME: some missing overrides for more "write" functions + // Share with DBImplReadOnly? + + protected: +#ifndef ROCKSDB_LITE + Status FlushForGetLiveFiles() override { + // No-op for read-only DB + return Status::OK(); + } +#endif // !ROCKSDB_LITE + private: friend class DB; inline size_t FindFile(const Slice& key); diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 3053d64ac..7cc9bfbf2 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1387,7 +1387,7 @@ class DBImpl : public DB { void NotifyOnExternalFileIngested( ColumnFamilyData* cfd, const ExternalSstFileIngestionJob& ingestion_job); - Status FlushForGetLiveFiles(); + virtual Status FlushForGetLiveFiles(); #endif // !ROCKSDB_LITE void NewThreadStatusCfInfo(ColumnFamilyData* cfd) const; diff --git a/db/db_impl/db_impl_readonly.h b/db/db_impl/db_impl_readonly.h index 21fa1021f..344fa4720 100644 --- a/db/db_impl/db_impl_readonly.h +++ b/db/db_impl/db_impl_readonly.h @@ -13,6 +13,7 @@ namespace ROCKSDB_NAMESPACE { +// TODO: Share common structure with CompactedDBImpl and DBImplSecondary class DBImplReadOnly : public DBImpl { public: DBImplReadOnly(const DBOptions& options, const std::string& dbname); @@ -141,6 +142,16 @@ class DBImplReadOnly : public DBImpl { return Status::NotSupported("Not supported operation in read only mode."); } + // FIXME: some missing overrides for more "write" functions + + protected: +#ifndef ROCKSDB_LITE + Status FlushForGetLiveFiles() override { + // No-op for read-only DB + return Status::OK(); + } +#endif // !ROCKSDB_LITE + private: // A "helper" function for DB::OpenForReadOnly without column families // to reduce unnecessary I/O diff --git a/db/db_impl/db_impl_secondary.h b/db/db_impl/db_impl_secondary.h index 867786ed3..a58ea4449 100644 --- a/db/db_impl/db_impl_secondary.h +++ b/db/db_impl/db_impl_secondary.h @@ -71,6 +71,7 @@ class LogReaderContainer { // The secondary instance can be opened using `DB::OpenAsSecondary`. After // that, it can call `DBImplSecondary::TryCatchUpWithPrimary` to make best // effort attempts to catch up with the primary. +// TODO: Share common structure with CompactedDBImpl and DBImplReadOnly class DBImplSecondary : public DBImpl { public: DBImplSecondary(const DBOptions& options, const std::string& dbname, @@ -268,6 +269,13 @@ class DBImplSecondary : public DBImpl { #endif // NDEBUG protected: +#ifndef ROCKSDB_LITE + Status FlushForGetLiveFiles() override { + // No-op for read-only DB + return Status::OK(); + } +#endif // !ROCKSDB_LITE + // ColumnFamilyCollector is a write batch handler which does nothing // except recording unique column family IDs class ColumnFamilyCollector : public WriteBatch::Handler { diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index b459aaf1c..b7fcdd786 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -1562,9 +1562,10 @@ class DB { // The valid size of the manifest file is returned in manifest_file_size. // The manifest file is an ever growing file, but only the portion specified // by manifest_file_size is valid for this snapshot. Setting flush_memtable - // to true does Flush before recording the live files. Setting flush_memtable - // to false is useful when we don't want to wait for flush which may have to - // wait for compaction to complete taking an indeterminate time. + // to true does Flush before recording the live files (unless DB is + // read-only). Setting flush_memtable to false is useful when we don't want + // to wait for flush which may have to wait for compaction to complete + // taking an indeterminate time. // // NOTE: Although GetLiveFiles() followed by GetSortedWalFiles() can generate // a lossless backup, GetLiveFilesStorageInfo() is strongly recommended diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 0338c7697..afbe9065a 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -2094,7 +2094,8 @@ struct LiveFilesStorageInfoOptions { // Whether to populate FileStorageInfo::file_checksum* or leave blank bool include_checksum_info = false; // Flushes memtables if total size in bytes of live WAL files is >= this - // number. Default: always force a flush without checking sizes. + // number (and DB is not read-only). + // Default: always force a flush without checking sizes. uint64_t wal_size_for_flush = 0; }; #endif // !ROCKSDB_LITE diff --git a/utilities/backup/backup_engine_test.cc b/utilities/backup/backup_engine_test.cc index b804c3a17..4394f3c25 100644 --- a/utilities/backup/backup_engine_test.cc +++ b/utilities/backup/backup_engine_test.cc @@ -2984,9 +2984,12 @@ TEST_F(BackupEngineTest, ReadOnlyBackupEngine) { DestroyDBWithoutCheck(dbname_, options_); OpenDBAndBackupEngine(true); FillDB(db_.get(), 0, 100); - ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true)); + // Also test read-only DB with CreateNewBackup and flush=true (no flush) + CloseAndReopenDB(/*read_only*/ true); + ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), /*flush*/ true)); + CloseAndReopenDB(/*read_only*/ false); FillDB(db_.get(), 100, 200); - ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true)); + ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), /*flush*/ true)); CloseDBAndBackupEngine(); DestroyDBWithoutCheck(dbname_, options_);