From f4c2b7cf97a6a15577be3d0ff93c7ba2dbe86481 Mon Sep 17 00:00:00 2001 From: Dhruba Borthakur Date: Tue, 18 Dec 2012 13:05:39 -0800 Subject: [PATCH] Enhance ReadOnly mode to process the all committed transactions. Summary: Leveldb has an api OpenForReadOnly() that opens the database in readonly mode. This call had an option to not process the transaction log. This patch removes this option and always processes all transactions that had been committed. It has been done in such a way that it does not create/write to any new files in the process. The invariant of "no-writes" to the leveldb data directory is still true. This enhancement allows multiple threads to open the same database in readonly mode and access all trancations that were committed right upto the OpenForReadOnly call. I changed the public API to match the new semantics because there are no users who are currently using this api. Test Plan: make clean check Reviewers: sheki Reviewed By: sheki CC: leveldb Differential Revision: https://reviews.facebook.net/D7479 --- db/db_impl.cc | 25 ++++++++++++++----------- db/db_impl.h | 12 +++++++----- db/db_impl_readonly.cc | 8 +++----- include/leveldb/db.h | 2 +- 4 files changed, 25 insertions(+), 22 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 700be1e8b..b2d738aa0 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -505,7 +505,9 @@ void DBImpl::PurgeObsoleteWALFiles() { } } -Status DBImpl::Recover(VersionEdit* edit, bool no_log_recory, +// If externalTable is set, then apply recovered transactions +// to that table. This is used for readonly mode. +Status DBImpl::Recover(VersionEdit* edit, MemTable* external_table, bool error_if_log_file_exist) { mutex_.AssertHeld(); @@ -513,7 +515,7 @@ Status DBImpl::Recover(VersionEdit* edit, bool no_log_recory, // committed only when the descriptor is created, and this directory // may already exist from a previous failed creation attempt. assert(db_lock_ == NULL); - if (!no_log_recory) { + if (!external_table) { env_->CreateDir(dbname_); Status s = env_->LockFile(LockFileName(dbname_), &db_lock_); if (!s.ok()) { @@ -573,14 +575,10 @@ Status DBImpl::Recover(VersionEdit* edit, bool no_log_recory, "flag but a log file already exists"); } - if (no_log_recory) { - return s; - } - // Recover in the order in which the logs were generated std::sort(logs.begin(), logs.end()); for (size_t i = 0; i < logs.size(); i++) { - s = RecoverLogFile(logs[i], edit, &max_sequence); + s = RecoverLogFile(logs[i], edit, &max_sequence, external_table); // The previous incarnation may not have written any MANIFEST // records after allocating this log number. So we manually @@ -600,7 +598,8 @@ Status DBImpl::Recover(VersionEdit* edit, bool no_log_recory, Status DBImpl::RecoverLogFile(uint64_t log_number, VersionEdit* edit, - SequenceNumber* max_sequence) { + SequenceNumber* max_sequence, + MemTable* external_table) { struct LogReporter : public log::Reader::Reporter { Env* env; Logger* info_log; @@ -645,6 +644,9 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, Slice record; WriteBatch batch; MemTable* mem = NULL; + if (external_table) { + mem = external_table; + } while (reader.ReadRecord(&record, &scratch) && status.ok()) { if (record.size() < 12) { @@ -670,7 +672,8 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, *max_sequence = last_seq; } - if (mem->ApproximateMemoryUsage() > options_.write_buffer_size) { + if (!external_table && + mem->ApproximateMemoryUsage() > options_.write_buffer_size) { status = WriteLevel0TableForRecovery(mem, edit); if (!status.ok()) { // Reflect errors immediately so that conditions like full @@ -682,13 +685,13 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, } } - if (status.ok() && mem != NULL) { + if (status.ok() && mem != NULL && !external_table) { status = WriteLevel0TableForRecovery(mem, edit); // Reflect errors immediately so that conditions like full // file-systems cause the DB::Open() to fail. } - if (mem != NULL) mem->Unref(); + if (mem != NULL && !external_table) mem->Unref(); delete file; return status; } diff --git a/db/db_impl.h b/db/db_impl.h index efbf5ef64..124f5eff3 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -85,8 +85,7 @@ class DBImpl : public DB { // Simulate a db crash, no elegant closing of database. void TEST_Destroy_DBImpl(); -protected: - + protected: Env* const env_; const std::string dbname_; VersionSet* versions_; @@ -96,6 +95,9 @@ protected: const Comparator* user_comparator() const { return internal_comparator_.user_comparator(); } + MemTable* GetMemTable() { + return mem_; + } private: friend class DB; @@ -111,8 +113,7 @@ protected: // Recover the descriptor from persistent storage. May do a significant // amount of work to recover recently logged updates. Any changes to // be made to the descriptor are added to *edit. - Status Recover(VersionEdit* edit, - bool no_log_recory = false, + Status Recover(VersionEdit* edit, MemTable* external_table = NULL, bool error_if_log_file_exist = false); void MaybeIgnoreError(Status* s) const; @@ -128,7 +129,8 @@ protected: Status RecoverLogFile(uint64_t log_number, VersionEdit* edit, - SequenceNumber* max_sequence); + SequenceNumber* max_sequence, + MemTable* external_table); // The following two methods are used to flush a memtable to // storage. The first one is used atdatabase RecoveryTime (when the diff --git a/db/db_impl_readonly.cc b/db/db_impl_readonly.cc index 18f62b805..72889215d 100644 --- a/db/db_impl_readonly.cc +++ b/db/db_impl_readonly.cc @@ -70,16 +70,14 @@ Iterator* DBImplReadOnly::NewIterator(const ReadOptions& options) { Status DB::OpenForReadOnly(const Options& options, const std::string& dbname, - DB** dbptr, bool no_log_recory, bool error_if_log_file_exist) { + DB** dbptr, bool error_if_log_file_exist) { *dbptr = NULL; DBImplReadOnly* impl = new DBImplReadOnly(options, dbname); impl->mutex_.Lock(); VersionEdit edit(impl->NumberLevels()); - Status s = impl->Recover(&edit, no_log_recory, error_if_log_file_exist); - if (s.ok() && !no_log_recory) { - s = impl->versions_->LogAndApply(&edit, &impl->mutex_); - } + Status s = impl->Recover(&edit, impl->GetMemTable(), + error_if_log_file_exist); impl->mutex_.Unlock(); if (s.ok()) { *dbptr = impl; diff --git a/include/leveldb/db.h b/include/leveldb/db.h index 2cfa53718..18a396d25 100644 --- a/include/leveldb/db.h +++ b/include/leveldb/db.h @@ -62,7 +62,7 @@ class DB { // will happen. static Status OpenForReadOnly(const Options& options, const std::string& name, DB** dbptr, - bool no_log_recory = true, bool error_if_log_file_exist = false); + bool error_if_log_file_exist = false); DB() { } virtual ~DB();