|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
//
|
|
|
|
// Copyright (c) 2012 Facebook. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file.
|
|
|
|
|
|
|
|
#include "db/db_impl_readonly.h"
|
|
|
|
#include "db/db_impl.h"
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <set>
|
|
|
|
#include <string>
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <vector>
|
|
|
|
#include <algorithm>
|
|
|
|
#include "db/db_iter.h"
|
|
|
|
#include "db/dbformat.h"
|
|
|
|
#include "db/filename.h"
|
|
|
|
#include "db/log_reader.h"
|
|
|
|
#include "db/log_writer.h"
|
|
|
|
#include "db/memtable.h"
|
|
|
|
#include "db/merge_context.h"
|
|
|
|
#include "db/table_cache.h"
|
|
|
|
#include "db/version_set.h"
|
|
|
|
#include "db/write_batch_internal.h"
|
|
|
|
#include "rocksdb/db.h"
|
|
|
|
#include "rocksdb/env.h"
|
|
|
|
#include "rocksdb/status.h"
|
|
|
|
#include "rocksdb/table.h"
|
|
|
|
#include "rocksdb/merge_operator.h"
|
|
|
|
#include "port/port.h"
|
|
|
|
#include "table/block.h"
|
|
|
|
#include "table/merger.h"
|
|
|
|
#include "table/two_level_iterator.h"
|
|
|
|
#include "util/coding.h"
|
|
|
|
#include "util/logging.h"
|
|
|
|
#include "util/build_version.h"
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
DBImplReadOnly::DBImplReadOnly(const Options& options,
|
|
|
|
const std::string& dbname)
|
|
|
|
: DBImpl(options, dbname) {
|
|
|
|
Log(options_.info_log, "Opening the db in read only mode");
|
|
|
|
}
|
|
|
|
|
|
|
|
DBImplReadOnly::~DBImplReadOnly() {
|
|
|
|
}
|
|
|
|
|
|
|
|
// Implementations of the DB interface
|
|
|
|
Status DBImplReadOnly::Get(const ReadOptions& options,
|
|
|
|
const Slice& key,
|
|
|
|
std::string* value) {
|
|
|
|
Status s;
|
|
|
|
SequenceNumber snapshot = versions_->LastSequence();
|
|
|
|
SuperVersion* super_version = GetSuperVersion();
|
|
|
|
MergeContext merge_context;
|
|
|
|
LookupKey lkey(key, snapshot);
|
|
|
|
if (super_version->mem->Get(lkey, value, &s, merge_context, options_)) {
|
|
|
|
} else {
|
|
|
|
Version::GetStats stats;
|
|
|
|
super_version->current->Get(options, lkey, value, &s, &merge_context,
|
|
|
|
&stats, options_);
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
Iterator* DBImplReadOnly::NewIterator(const ReadOptions& options) {
|
|
|
|
SequenceNumber latest_snapshot;
|
|
|
|
Iterator* internal_iter = NewInternalIterator(options, &latest_snapshot);
|
|
|
|
return NewDBIterator(
|
|
|
|
&dbname_, env_, options_, user_comparator(),internal_iter,
|
|
|
|
(options.snapshot != nullptr
|
|
|
|
? reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_
|
|
|
|
: latest_snapshot));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Status DB::OpenForReadOnly(const Options& options, const std::string& dbname,
|
|
|
|
DB** dbptr, bool error_if_log_file_exist) {
|
|
|
|
*dbptr = nullptr;
|
|
|
|
|
|
|
|
DBImplReadOnly* impl = new DBImplReadOnly(options, dbname);
|
|
|
|
impl->mutex_.Lock();
|
Refactor Recover() code
Summary:
This diff does two things:
* Rethinks how we call Recover() with read_only option. Before, we call it with pointer to memtable where we'd like to apply those changes to. This memtable is set in db_impl_readonly.cc and it's actually DBImpl::mem_. Why don't we just apply updates to mem_ right away? It seems more intuitive.
* Changes when we apply updates to manifest. Before, the process is to recover all the logs, flush it to sst files and then do one giant commit that atomically adds all recovered sst files and sets the next log number. This works good enough, but causes some small troubles for my column family approach, since I can't have one VersionEdit apply to more than single column family[1]. The change here is to commit the files recovered from logs right away. Here is the state of the world before the change:
1. Recover log 5, add new sst files to edit
2. Recover log 7, add new sst files to edit
3. Recover log 8, add new sst files to edit
4. Commit all added sst files to manifest and mark log files 5, 7 and 8 as recoverd (via SetLogNumber(9) function)
After the change, we'll do:
1. Recover log 5, commit the new sst files and set log 5 as recovered
2. Recover log 7, commit the new sst files and set log 7 as recovered
3. Recover log 8, commit the new sst files and set log 8 as recovered
The added (small) benefit is that if we fail after (2), the new recovery will only have to recover log 8. In previous case, we'll have to restart the recovery from the beginning. The bigger benefit will be to enable easier integration of multiple column families in Recovery code path.
[1] I'm happy to dicuss this decison, but I believe this is the cleanest way to go. It also makes backward compatibility much easier. We don't have a requirement of adding multiple column families atomically.
Test Plan: make check
Reviewers: dhruba, haobo, kailiu, sdong
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15237
11 years ago
|
|
|
Status s = impl->Recover(true /* read only */, error_if_log_file_exist);
|
|
|
|
if (s.ok()) {
|
|
|
|
delete impl->InstallSuperVersion(new DBImpl::SuperVersion());
|
|
|
|
}
|
|
|
|
impl->mutex_.Unlock();
|
|
|
|
if (s.ok()) {
|
|
|
|
*dbptr = impl;
|
|
|
|
} else {
|
|
|
|
delete impl;
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace rocksdb
|