From 229e6fbe0ebc73fa8ceadd9d20638e2c52bcfb70 Mon Sep 17 00:00:00 2001 From: Affan Dar Date: Wed, 4 Sep 2019 12:08:56 -0700 Subject: [PATCH] Adding DB::GetCurrentWalFile() API as a repliction/backup helper (#5765) Summary: Adding a light weight API to get last live WAL file name and size. Meant to be used as a helper for backup/restore tooling in a larger ecosystem such as MySQL with a MyRocks storage engine. Specifically within MySQL's backup/restore mechanism, this call can be made with a write lock on the mysql db to get a transactionally consistent snapshot of the current WAL file position along with other non-rocksdb log/data files. Without this, the alternative would be to take the aforementioned lock, scan the WAL dir for all files, find the last file and note its exact size as the rocksdb 'checkpoint'. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5765 Differential Revision: D17172717 Pulled By: affandar fbshipit-source-id: f2fabafd4c0e6fc45f126670c8c88a9f84cb8a37 --- HISTORY.md | 1 + db/db_filesnapshot.cc | 10 +++++ db/db_impl/db_impl.h | 1 + db/db_test.cc | 4 ++ db/db_wal_test.cc | 50 ++++++++++++++++++++++++ db/wal_manager.cc | 28 +++++++++++++ db/wal_manager.h | 2 + include/rocksdb/db.h | 9 +++++ include/rocksdb/utilities/stackable_db.h | 4 ++ 9 files changed, 109 insertions(+) diff --git a/HISTORY.md b/HISTORY.md index da42ecf07..d7a3f0c41 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -10,6 +10,7 @@ * When user uses options.force_consistency_check in RocksDb, instead of crashing the process, we now pass the error back to the users without killing the process. ### Public API Change * Added max_write_buffer_size_to_maintain option to better control memory usage of immutable memtables. +* Added a lightweight API GetCurrentWalFile() to get last live WAL filename and size. Meant to be used as a helper for backup/restore tooling in a larger ecosystem such as MySQL with a MyRocks storage engine. ## 6.4.0 (7/30/2019) ### Default Option Change diff --git a/db/db_filesnapshot.cc b/db/db_filesnapshot.cc index 67d994f55..a6dcdcccb 100644 --- a/db/db_filesnapshot.cc +++ b/db/db_filesnapshot.cc @@ -163,6 +163,16 @@ Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) { return wal_manager_.GetSortedWalFiles(files); } +Status DBImpl::GetCurrentWalFile(std::unique_ptr* current_log_file) { + uint64_t current_logfile_number; + { + InstrumentedMutexLock l(&mutex_); + current_logfile_number = logfile_number_; + } + + return wal_manager_.GetLiveWalFile(current_logfile_number, current_log_file); +} + } #endif // ROCKSDB_LITE diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 7fac09bfa..1942f0979 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -340,6 +340,7 @@ class DBImpl : public DB { uint64_t* manifest_file_size, bool flush_memtable = true) override; virtual Status GetSortedWalFiles(VectorLogPtr& files) override; + virtual Status GetCurrentWalFile(std::unique_ptr* current_log_file) override; virtual Status GetUpdatesSince( SequenceNumber seq_number, std::unique_ptr* iter, diff --git a/db/db_test.cc b/db/db_test.cc index 906a67cda..a78ba37c1 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -2790,6 +2790,10 @@ class ModelDB : public DB { return Status::OK(); } + Status GetCurrentWalFile(std::unique_ptr* /*current_log_file*/) override { + return Status::OK(); + } + Status DeleteFile(std::string /*name*/) override { return Status::OK(); } Status GetUpdatesSince( diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc index 2d5e7bc1d..4e0b08c9a 100644 --- a/db/db_wal_test.cc +++ b/db/db_wal_test.cc @@ -569,6 +569,56 @@ TEST_F(DBWALTest, GetSortedWalFiles) { } while (ChangeWalOptions()); } +TEST_F(DBWALTest, GetCurrentWalFile) { + do { + CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); + + std::unique_ptr* bad_log_file = nullptr; + ASSERT_NOK(dbfull()->GetCurrentWalFile(bad_log_file)); + + std::unique_ptr log_file; + ASSERT_OK(dbfull()->GetCurrentWalFile(&log_file)); + + // nothing has been written to the log yet + ASSERT_EQ(log_file->StartSequence(), 0); + ASSERT_EQ(log_file->SizeFileBytes(), 0); + ASSERT_EQ(log_file->Type(), kAliveLogFile); + ASSERT_GT(log_file->LogNumber(), 0); + + // add some data and verify that the file size actually moves foward + ASSERT_OK(Put(0, "foo", "v1")); + ASSERT_OK(Put(0, "foo2", "v2")); + ASSERT_OK(Put(0, "foo3", "v3")); + + ASSERT_OK(dbfull()->GetCurrentWalFile(&log_file)); + + ASSERT_EQ(log_file->StartSequence(), 0); + ASSERT_GT(log_file->SizeFileBytes(), 0); + ASSERT_EQ(log_file->Type(), kAliveLogFile); + ASSERT_GT(log_file->LogNumber(), 0); + + // force log files to cycle and add some more data, then check if + // log number moves forward + + ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); + for (int i = 0; i < 10; i++) { + ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); + } + + ASSERT_OK(Put(0, "foo4", "v4")); + ASSERT_OK(Put(0, "foo5", "v5")); + ASSERT_OK(Put(0, "foo6", "v6")); + + ASSERT_OK(dbfull()->GetCurrentWalFile(&log_file)); + + ASSERT_EQ(log_file->StartSequence(), 0); + ASSERT_GT(log_file->SizeFileBytes(), 0); + ASSERT_EQ(log_file->Type(), kAliveLogFile); + ASSERT_GT(log_file->LogNumber(), 0); + + } while (ChangeWalOptions()); +} + TEST_F(DBWALTest, RecoveryWithLogDataForSomeCFs) { // Test for regression of WAL cleanup missing files that don't contain data // for every column family. diff --git a/db/wal_manager.cc b/db/wal_manager.cc index 4ef414132..1074279d5 100644 --- a/db/wal_manager.cc +++ b/db/wal_manager.cc @@ -414,6 +414,34 @@ Status WalManager::ReadFirstRecord(const WalFileType type, return s; } +Status WalManager::GetLiveWalFile(uint64_t number, std::unique_ptr* log_file) { + if (!log_file) { + return Status::InvalidArgument("log_file not preallocated."); + } + + if(!number) { + return Status::PathNotFound("log file not available"); + } + + Status s; + + uint64_t size_bytes; + s = env_->GetFileSize(LogFileName(db_options_.wal_dir, number), &size_bytes); + + if (!s.ok()) { + return s; + } + + log_file->reset(new LogFileImpl( + number, + kAliveLogFile, + 0, // SequenceNumber + size_bytes)); + + return Status::OK(); +} + + // the function returns status.ok() and sequence == 0 if the file exists, but is // empty Status WalManager::ReadFirstLine(const std::string& fname, diff --git a/db/wal_manager.h b/db/wal_manager.h index 8d185c350..97211f000 100644 --- a/db/wal_manager.h +++ b/db/wal_manager.h @@ -59,6 +59,8 @@ class WalManager { Status DeleteFile(const std::string& fname, uint64_t number); + Status GetLiveWalFile(uint64_t number, std::unique_ptr* log_file); + Status TEST_ReadFirstRecord(const WalFileType type, const uint64_t number, SequenceNumber* sequence) { return ReadFirstRecord(type, number, sequence); diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 023659524..6aa05baae 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -1123,6 +1123,15 @@ class DB { // Retrieve the sorted list of all wal files with earliest file first virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0; + // Retrieve information about the current wal file + // + // Note that the log might have rolled after this call in which case + // the current_log_file would not point to the current log file. + // + // Additionally, for the sake of optimization current_log_file->StartSequence + // would always be set to 0 + virtual Status GetCurrentWalFile(std::unique_ptr* current_log_file) = 0; + // Note: this API is not yet consistent with WritePrepared transactions. // Sets iter to an iterator that is positioned at a write-batch containing // seq_number. If the sequence number is non existent, it returns an iterator diff --git a/include/rocksdb/utilities/stackable_db.h b/include/rocksdb/utilities/stackable_db.h index 3941c1821..04f1039c2 100644 --- a/include/rocksdb/utilities/stackable_db.h +++ b/include/rocksdb/utilities/stackable_db.h @@ -371,6 +371,10 @@ class StackableDB : public DB { return db_->GetSortedWalFiles(files); } + virtual Status GetCurrentWalFile(std::unique_ptr* current_log_file) override { + return db_->GetCurrentWalFile(current_log_file); + } + virtual Status DeleteFile(std::string name) override { return db_->DeleteFile(name); }