From 88db97b06dfea318d3693b22ace39675cbd5a5c7 Mon Sep 17 00:00:00 2001 From: Zitan Chen <11285749+gg814@users.noreply.github.com> Date: Mon, 15 Jun 2020 10:45:03 -0700 Subject: [PATCH] Add a DB Session ID (#6959) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Added DB::GetDbSessionId by using the same format and machinery as DB::GetDbIdentity. The DB Session ID is generated (and therefore, updated) each time a DB object is opened. It is written to the LOG file right after the line of “DB SUMMARY”. A test for the uniqueness, for different openings and during the same opening, is also added. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6959 Test Plan: Passed make check Reviewed By: zhichao-cao Differential Revision: D21951721 Pulled By: gg814 fbshipit-source-id: 958a48a612db49a39998ea703cded45987d3fa8b --- HISTORY.md | 1 + db/db_basic_test.cc | 50 ++++++++++++++++++++++++ db/db_impl/db_impl.cc | 18 ++++++++- db/db_impl/db_impl.h | 9 +++++ db/db_info_dumper.cc | 5 ++- db/db_info_dumper.h | 3 +- db/db_test.cc | 4 ++ include/rocksdb/db.h | 7 ++++ include/rocksdb/utilities/stackable_db.h | 4 ++ 9 files changed, 98 insertions(+), 3 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index fc811d00d..acab10db7 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -25,6 +25,7 @@ * `pin_l0_filter_and_index_blocks_in_cache` no longer applies to L0 files larger than `1.5 * write_buffer_size` to give more predictable memory usage. Such L0 files may exist due to intra-L0 compaction, external file ingestion, or user dynamically changing `write_buffer_size` (note, however, that files that are already pinned will continue being pinned, even after such a dynamic change). * In point-in-time wal recovery mode, fail database recovery in case of IOError while reading the WAL to avoid data loss. * A new method `Env::LowerThreadPoolCPUPriority(Priority, CpuPriority)` is added to `Env` to be able to lower to a specific priority such as `CpuPriority::kIdle`. +* `DB::GetDbSessionId(std::string& session_id)` is added. `session_id` stores a unique identifier that gets reset every time the DB is opened. This DB session ID should be unique among all open DB instances on all hosts, and should be unique among re-openings of the same or other DBs. This identifier is recorded in the `LOG` file on the line starting with `DB Session ID:`. ### New Features * sst_dump to add a new --readahead_size argument. Users can specify read size when scanning the data. Sst_dump also tries to prefetch tail part of the SST files so usually some number of I/Os are saved there too. diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 3544400ba..b2b4ff92a 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -41,6 +41,56 @@ TEST_F(DBBasicTest, OpenWhenOpen) { delete db2; } +TEST_F(DBBasicTest, UniqueSession) { + Options options = CurrentOptions(); + std::string sid1, sid2, sid3, sid4; + + db_->GetDbSessionId(sid1); + Reopen(options); + db_->GetDbSessionId(sid2); + ASSERT_OK(Put("foo", "v1")); + db_->GetDbSessionId(sid4); + Reopen(options); + db_->GetDbSessionId(sid3); + + ASSERT_NE(sid1, sid2); + ASSERT_NE(sid1, sid3); + ASSERT_NE(sid2, sid3); + + ASSERT_EQ(sid2, sid4); + +#ifndef ROCKSDB_LITE + Close(); + ASSERT_OK(ReadOnlyReopen(options)); + db_->GetDbSessionId(sid1); + // Test uniqueness between readonly open (sid1) and regular open (sid3) + ASSERT_NE(sid1, sid3); + Close(); + ASSERT_OK(ReadOnlyReopen(options)); + db_->GetDbSessionId(sid2); + ASSERT_EQ("v1", Get("foo")); + db_->GetDbSessionId(sid3); + + ASSERT_NE(sid1, sid2); + + ASSERT_EQ(sid2, sid3); +#endif // ROCKSDB_LITE + + CreateAndReopenWithCF({"goku"}, options); + db_->GetDbSessionId(sid1); + ASSERT_OK(Put("bar", "e1")); + db_->GetDbSessionId(sid2); + ASSERT_EQ("e1", Get("bar")); + db_->GetDbSessionId(sid3); + ReopenWithColumnFamilies({"default", "goku"}, options); + db_->GetDbSessionId(sid4); + + ASSERT_EQ(sid1, sid2); + ASSERT_EQ(sid2, sid3); + + ASSERT_NE(sid1, sid4); +} + #ifndef ROCKSDB_LITE TEST_F(DBBasicTest, ReadOnlyDB) { ASSERT_OK(Put("foo", "v1")); diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index bf8185175..e3f9d1018 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -249,7 +249,8 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname, new ColumnFamilyMemTablesImpl(versions_->GetColumnFamilySet())); DumpRocksDBBuildVersion(immutable_db_options_.info_log.get()); - DumpDBFileSummary(immutable_db_options_, dbname_); + SetDbSessionId(); + DumpDBFileSummary(immutable_db_options_, dbname_, db_session_id_); immutable_db_options_.Dump(immutable_db_options_.info_log.get()); mutable_db_options_.Dump(immutable_db_options_.info_log.get()); DumpSupportInfo(immutable_db_options_.info_log.get()); @@ -3601,6 +3602,21 @@ Status DBImpl::GetDbIdentityFromIdentityFile(std::string* identity) const { return s; } +Status DBImpl::GetDbSessionId(std::string& session_id) const { + session_id.assign(db_session_id_); + return Status::OK(); +} + +void DBImpl::SetDbSessionId() { + // GenerateUniqueId() generates an identifier + // that has a negligible probability of being duplicated + db_session_id_ = env_->GenerateUniqueId(); + // Remove the extra '\n' at the end if there is one + if (!db_session_id_.empty() && db_session_id_.back() == '\n') { + db_session_id_.pop_back(); + } +} + // Default implementation -- returns not supported status Status DB::CreateColumnFamily(const ColumnFamilyOptions& /*cf_options*/, const std::string& /*column_family_name*/, diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index c3edb9be5..7e3852868 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -350,6 +350,8 @@ class DBImpl : public DB { virtual Status GetDbIdentityFromIdentityFile(std::string* identity) const; + virtual Status GetDbSessionId(std::string& session_id) const override; + ColumnFamilyHandle* DefaultColumnFamily() const override; ColumnFamilyHandle* PersistentStatsColumnFamily() const; @@ -980,6 +982,9 @@ class DBImpl : public DB { protected: const std::string dbname_; std::string db_id_; + // db_session_id_ is an identifier that gets reset + // every time the DB is opened + std::string db_session_id_; std::unique_ptr versions_; // Flag to check whether we allocated and own the info log file bool own_info_log_; @@ -1161,6 +1166,10 @@ class DBImpl : public DB { // bump up the version set's next_file_number_ to be 1 + largest_file_number. Status FinishBestEffortsRecovery(); + // SetDbSessionId() should be called in the constuctor DBImpl() + // to ensure that db_session_id_ gets updated every time the DB is opened + void SetDbSessionId(); + private: friend class DB; friend class ErrorHandler; diff --git a/db/db_info_dumper.cc b/db/db_info_dumper.cc index 7008ca6ff..f57198ff1 100644 --- a/db/db_info_dumper.cc +++ b/db/db_info_dumper.cc @@ -17,7 +17,8 @@ namespace ROCKSDB_NAMESPACE { void DumpDBFileSummary(const ImmutableDBOptions& options, - const std::string& dbname) { + const std::string& dbname, + const std::string& session_id) { if (options.info_log == nullptr) { return; } @@ -32,6 +33,8 @@ void DumpDBFileSummary(const ImmutableDBOptions& options, std::string file_info, wal_info; Header(options.info_log, "DB SUMMARY\n"); + Header(options.info_log, "DB Session ID: %s\n", session_id.c_str()); + // Get files in dbname dir if (!env->GetChildren(dbname, &files).ok()) { Error(options.info_log, diff --git a/db/db_info_dumper.h b/db/db_info_dumper.h index 91404cbd7..f518e840f 100644 --- a/db/db_info_dumper.h +++ b/db/db_info_dumper.h @@ -10,5 +10,6 @@ namespace ROCKSDB_NAMESPACE { void DumpDBFileSummary(const ImmutableDBOptions& options, - const std::string& dbname); + const std::string& dbname, + const std::string& session_id = ""); } // namespace ROCKSDB_NAMESPACE diff --git a/db/db_test.cc b/db/db_test.cc index 080a14eea..55df60afb 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -3031,6 +3031,10 @@ class ModelDB : public DB { return Status::OK(); } + Status GetDbSessionId(std::string& /*session_id*/) const override { + return Status::OK(); + } + SequenceNumber GetLatestSequenceNumber() const override { return 0; } bool SetPreserveDeletesSequenceNumber(SequenceNumber /*seqnum*/) override { diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 337827191..1fa3158a8 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -1544,6 +1544,13 @@ class DB { // Returns Status::OK if identity could be set properly virtual Status GetDbIdentity(std::string& identity) const = 0; + // Return a unique identifier for each DB object that is opened + // This DB session ID should be unique among all open DB instances on all + // hosts, and should be unique among re-openings of the same or other DBs. + // (Two open DBs have the same identity from other function GetDbIdentity when + // one is physically copied from the other.) + virtual Status GetDbSessionId(std::string& session_id) const = 0; + // Returns default column family handle virtual ColumnFamilyHandle* DefaultColumnFamily() const = 0; diff --git a/include/rocksdb/utilities/stackable_db.h b/include/rocksdb/utilities/stackable_db.h index 9888fa22d..35cc6f87b 100644 --- a/include/rocksdb/utilities/stackable_db.h +++ b/include/rocksdb/utilities/stackable_db.h @@ -400,6 +400,10 @@ class StackableDB : public DB { return db_->GetDbIdentity(identity); } + virtual Status GetDbSessionId(std::string& session_id) const override { + return db_->GetDbSessionId(session_id); + } + using DB::SetOptions; virtual Status SetOptions(ColumnFamilyHandle* column_family_handle, const std::unordered_map&