Moved checkpoint to utilities

Summary:
Moved checkpoint to utilities.
Addressed comments by Igor, Siying, Dhruba

Test Plan: db_test/SnapshotLink

Reviewers: dhruba, igor, sdong

Reviewed By: igor

Subscribers: dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D29079
main
Venkatesh Radhakrishnan 10 years ago
parent beabc6879c
commit 004f416b77
  1. 6
      HISTORY.md
  2. 107
      db/db_filesnapshot.cc
  3. 5
      db/db_impl.h
  4. 9
      db/db_test.cc
  5. 6
      include/rocksdb/db.h
  6. 34
      include/rocksdb/utilities/checkpoint.h
  7. 4
      include/rocksdb/utilities/stackable_db.h
  8. 168
      utilities/checkpoint/checkpoint.cc

@ -3,6 +3,12 @@
### Unreleased Features ### Unreleased Features
* Add rocksdb::GetThreadList(), which returns the current status of all rocksdb-related threads. * Add rocksdb::GetThreadList(), which returns the current status of all rocksdb-related threads.
### Public API changes
* New API to create a checkpoint added. Given a directory name, creates a new
database which is an image of the existing database.
*New API LinkFile added to Env. If you implement your own Env class, an
implementation of the API LinkFile will have to be provided.
## 3.8.0 (11/14/2014) ## 3.8.0 (11/14/2014)
### Public API changes ### Public API changes

@ -138,113 +138,6 @@ Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) {
return wal_manager_.GetSortedWalFiles(files); return wal_manager_.GetSortedWalFiles(files);
} }
// Builds an openable snapshot of RocksDB
Status DBImpl::CreateCheckpoint(const std::string& snapshot_dir) {
Status s;
std::vector<std::string> live_files;
uint64_t manifest_file_size = 0;
uint64_t sequence_number = GetLatestSequenceNumber();
bool same_fs = true;
if (env_->FileExists(snapshot_dir)) {
return Status::InvalidArgument("Directory exists");
}
s = DisableFileDeletions();
if (s.ok()) {
// this will return live_files prefixed with "/"
s = GetLiveFiles(live_files, &manifest_file_size, true);
}
if (!s.ok()) {
EnableFileDeletions(false);
return s;
}
Log(db_options_.info_log,
"Started the snapshot process -- creating snapshot in directory %s",
snapshot_dir.c_str());
std::string full_private_path = snapshot_dir + ".tmp";
// create snapshot directory
s = env_->CreateDir(full_private_path);
// copy/hard link live_files
for (size_t i = 0; s.ok() && i < live_files.size(); ++i) {
uint64_t number;
FileType type;
bool ok = ParseFileName(live_files[i], &number, &type);
if (!ok) {
s = Status::Corruption("Can't parse file name. This is very bad");
break;
}
// we should only get sst, manifest and current files here
assert(type == kTableFile || type == kDescriptorFile ||
type == kCurrentFile);
assert(live_files[i].size() > 0 && live_files[i][0] == '/');
std::string src_fname = live_files[i];
// rules:
// * if it's kTableFile, then it's shared
// * if it's kDescriptorFile, limit the size to manifest_file_size
// * always copy if cross-device link
if ((type == kTableFile) && same_fs) {
Log(db_options_.info_log, "Hard Linking %s", src_fname.c_str());
s = env_->LinkFile(GetName() + src_fname, full_private_path + src_fname);
if (s.IsNotSupported()) {
same_fs = false;
s = Status::OK();
}
}
if ((type != kTableFile) || (!same_fs)) {
Log(db_options_.info_log, "Copying %s", src_fname.c_str());
s = CopyFile(env_, GetName() + src_fname, full_private_path + src_fname,
(type == kDescriptorFile) ? manifest_file_size : 0);
}
}
// we copied all the files, enable file deletions
EnableFileDeletions(false);
if (s.ok()) {
// move tmp private backup to real snapshot directory
s = env_->RenameFile(full_private_path, snapshot_dir);
}
if (s.ok()) {
unique_ptr<Directory> snapshot_directory;
env_->NewDirectory(snapshot_dir, &snapshot_directory);
if (snapshot_directory != nullptr) {
s = snapshot_directory->Fsync();
}
}
if (!s.ok()) {
// clean all the files we might have created
Log(db_options_.info_log, "Snapshot failed -- %s", s.ToString().c_str());
// we have to delete the dir and all its children
std::vector<std::string> subchildren;
env_->GetChildren(full_private_path, &subchildren);
for (auto& subchild : subchildren) {
Status s1 = env_->DeleteFile(full_private_path + subchild);
if (s1.ok()) {
Log(db_options_.info_log, "Deleted %s",
(full_private_path + subchild).c_str());
}
}
// finally delete the private dir
Status s1 = env_->DeleteDir(full_private_path);
Log(db_options_.info_log, "Deleted dir %s -- %s", full_private_path.c_str(),
s1.ToString().c_str());
return s;
}
// here we know that we succeeded and installed the new snapshot
Log(db_options_.info_log, "Snapshot DONE. All is good");
Log(db_options_.info_log, "Snapshot sequence number: %" PRIu64,
sequence_number);
return s;
}
} }
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE

@ -170,11 +170,6 @@ class DBImpl : public DB {
ColumnFamilyHandle* column_family, ColumnFamilyHandle* column_family,
ColumnFamilyMetaData* metadata) override; ColumnFamilyMetaData* metadata) override;
// Builds an openable snapshot of RocksDB on the same disk, which
// accepts an output directory on the same disk, and under the directory
// (1) hard-linked SST files pointing to existing live SST files
// (2) a copied manifest files and other files
virtual Status CreateCheckpoint(const std::string& snapshot_dir);
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
// checks if all live files exist on file system and that their file sizes // checks if all live files exist on file system and that their file sizes

@ -35,6 +35,7 @@
#include "rocksdb/table_properties.h" #include "rocksdb/table_properties.h"
#include "rocksdb/thread_status.h" #include "rocksdb/thread_status.h"
#include "rocksdb/utilities/write_batch_with_index.h" #include "rocksdb/utilities/write_batch_with_index.h"
#include "rocksdb/utilities/checkpoint.h"
#include "table/block_based_table_factory.h" #include "table/block_based_table_factory.h"
#include "table/plain_table_factory.h" #include "table/plain_table_factory.h"
#include "util/hash.h" #include "util/hash.h"
@ -1616,6 +1617,7 @@ TEST(DBTest, GetSnapshotLink) {
DB* snapshotDB; DB* snapshotDB;
ReadOptions roptions; ReadOptions roptions;
std::string result; std::string result;
Checkpoint* checkpoint;
options = CurrentOptions(options); options = CurrentOptions(options);
delete db_; delete db_;
@ -1631,7 +1633,8 @@ TEST(DBTest, GetSnapshotLink) {
std::string key = std::string("foo"); std::string key = std::string("foo");
ASSERT_OK(Put(key, "v1")); ASSERT_OK(Put(key, "v1"));
// Take a snapshot // Take a snapshot
ASSERT_OK(db_->CreateCheckpoint(snapshot_name)); ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
ASSERT_OK(checkpoint->CreateCheckpoint(snapshot_name));
ASSERT_OK(Put(key, "v2")); ASSERT_OK(Put(key, "v2"));
ASSERT_EQ("v2", Get(key)); ASSERT_EQ("v2", Get(key));
ASSERT_OK(Flush()); ASSERT_OK(Flush());
@ -7525,10 +7528,6 @@ class ModelDB: public DB {
ColumnFamilyHandle* column_family, ColumnFamilyHandle* column_family,
ColumnFamilyMetaData* metadata) {} ColumnFamilyMetaData* metadata) {}
virtual Status CreateCheckpoint(const std::string& snapshot_dir) {
return Status::NotSupported("Not supported in Model DB");
}
private: private:
class ModelIter: public Iterator { class ModelIter: public Iterator {
public: public:

@ -522,12 +522,6 @@ class DB {
virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) { virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) {
return GetPropertiesOfAllTables(DefaultColumnFamily(), props); return GetPropertiesOfAllTables(DefaultColumnFamily(), props);
} }
// Builds an openable snapshot of RocksDB on the same disk, which
// accepts an output directory on the same disk, and under the directory
// (1) hard-linked SST files pointing to existing live SST files
// (2) a copied manifest files and other files
virtual Status CreateCheckpoint(const std::string& snapshot_dir) = 0;
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
private: private:

@ -0,0 +1,34 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// A checkpoint is an openable snapshot of a database at a point in time.
#pragma once
#include <string>
#include "rocksdb/status.h"
namespace rocksdb {
class DB;
class Checkpoint {
public:
// Creates a Checkpoint object to be used for creating openable sbapshots
static Status Create(DB* db, Checkpoint** checkpoint_ptr);
// Builds an openable snapshot of RocksDB on the same disk, which
// accepts an output directory on the same disk, and under the directory
// (1) hard-linked SST files pointing to existing live SST files
// SST files will be copied if output directory is on a different filesystem
// (2) a copied manifest files and other files
// The directory should not already exist and will be created by this API.
// The directory will be an absolute path
virtual Status CreateCheckpoint(const std::string& checkpoint_dir);
virtual ~Checkpoint() {}
};
} // namespace rocksdb

@ -247,10 +247,6 @@ class StackableDB : public DB {
return db_->DefaultColumnFamily(); return db_->DefaultColumnFamily();
} }
virtual Status CreateCheckpoint(const std::string& snapshot_dir) override {
return db_->CreateCheckpoint(snapshot_dir);
}
protected: protected:
DB* db_; DB* db_;
}; };

@ -0,0 +1,168 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2012 Facebook.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef ROCKSDB_LITE
#include "rocksdb/utilities/checkpoint.h"
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <algorithm>
#include <string>
#include "db/filename.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "util/file_util.h"
namespace rocksdb {
class CheckpointImpl : public Checkpoint {
public:
// Creates a Checkpoint object to be used for creating openable sbapshots
explicit CheckpointImpl(DB* db) : db_(db) {}
// Builds an openable snapshot of RocksDB on the same disk, which
// accepts an output directory on the same disk, and under the directory
// (1) hard-linked SST files pointing to existing live SST files
// SST files will be copied if output directory is on a different filesystem
// (2) a copied manifest files and other files
// The directory should not already exist and will be created by this API.
// The directory will be an absolute path
using Checkpoint::CreateCheckpoint;
virtual Status CreateCheckpoint(const std::string& checkpoint_dir);
private:
DB* db_;
};
Status Checkpoint::Create(DB* db, Checkpoint** checkpoint_ptr) {
*checkpoint_ptr = new CheckpointImpl(db);
return Status::OK();
}
Status Checkpoint::CreateCheckpoint(const std::string& checkpoint_dir) {
return Status::NotSupported("");
}
// Builds an openable snapshot of RocksDB
Status CheckpointImpl::CreateCheckpoint(const std::string& checkpoint_dir) {
Status s;
std::vector<std::string> live_files;
uint64_t manifest_file_size = 0;
uint64_t sequence_number = db_->GetLatestSequenceNumber();
bool same_fs = true;
if (db_->GetEnv()->FileExists(checkpoint_dir)) {
return Status::InvalidArgument("Directory exists");
}
s = db_->DisableFileDeletions();
if (s.ok()) {
// this will return live_files prefixed with "/"
s = db_->GetLiveFiles(live_files, &manifest_file_size, true);
}
if (!s.ok()) {
db_->EnableFileDeletions(false);
return s;
}
Log(db_->GetOptions().info_log,
"Started the snapshot process -- creating snapshot in directory %s",
checkpoint_dir.c_str());
std::string full_private_path = checkpoint_dir + ".tmp";
// create snapshot directory
s = db_->GetEnv()->CreateDir(full_private_path);
// copy/hard link live_files
for (size_t i = 0; s.ok() && i < live_files.size(); ++i) {
uint64_t number;
FileType type;
bool ok = ParseFileName(live_files[i], &number, &type);
if (!ok) {
s = Status::Corruption("Can't parse file name. This is very bad");
break;
}
// we should only get sst, manifest and current files here
assert(type == kTableFile || type == kDescriptorFile ||
type == kCurrentFile);
assert(live_files[i].size() > 0 && live_files[i][0] == '/');
std::string src_fname = live_files[i];
// rules:
// * if it's kTableFile, then it's shared
// * if it's kDescriptorFile, limit the size to manifest_file_size
// * always copy if cross-device link
if ((type == kTableFile) && same_fs) {
Log(db_->GetOptions().info_log, "Hard Linking %s", src_fname.c_str());
s = db_->GetEnv()->LinkFile(db_->GetName() + src_fname,
full_private_path + src_fname);
if (s.IsNotSupported()) {
same_fs = false;
s = Status::OK();
}
}
if ((type != kTableFile) || (!same_fs)) {
Log(db_->GetOptions().info_log, "Copying %s", src_fname.c_str());
s = CopyFile(db_->GetEnv(), db_->GetName() + src_fname,
full_private_path + src_fname,
(type == kDescriptorFile) ? manifest_file_size : 0);
}
}
// we copied all the files, enable file deletions
db_->EnableFileDeletions(false);
if (s.ok()) {
// move tmp private backup to real snapshot directory
s = db_->GetEnv()->RenameFile(full_private_path, checkpoint_dir);
}
if (s.ok()) {
unique_ptr<Directory> checkpoint_directory;
db_->GetEnv()->NewDirectory(checkpoint_dir, &checkpoint_directory);
if (checkpoint_directory != nullptr) {
s = checkpoint_directory->Fsync();
}
}
if (!s.ok()) {
// clean all the files we might have created
Log(db_->GetOptions().info_log, "Snapshot failed -- %s",
s.ToString().c_str());
// we have to delete the dir and all its children
std::vector<std::string> subchildren;
db_->GetEnv()->GetChildren(full_private_path, &subchildren);
for (auto& subchild : subchildren) {
Status s1 = db_->GetEnv()->DeleteFile(full_private_path + subchild);
if (s1.ok()) {
Log(db_->GetOptions().info_log, "Deleted %s",
(full_private_path + subchild).c_str());
}
}
// finally delete the private dir
Status s1 = db_->GetEnv()->DeleteDir(full_private_path);
Log(db_->GetOptions().info_log, "Deleted dir %s -- %s",
full_private_path.c_str(), s1.ToString().c_str());
return s;
}
// here we know that we succeeded and installed the new snapshot
Log(db_->GetOptions().info_log, "Snapshot DONE. All is good");
Log(db_->GetOptions().info_log, "Snapshot sequence number: %" PRIu64,
sequence_number);
return s;
}
} // namespace rocksdb
#endif // ROCKSDB_LITE
Loading…
Cancel
Save