[backupable db] Remove file size embedded in name workaround

Summary:
Now that we get sizes efficiently, we no longer need the workaround to
embed file size in filename.

Test Plan:
  $ ./backupable_db_test

Reviewers: sdong

Reviewed By: sdong

Subscribers: dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D55035
main
Andrew Kryczka 9 years ago
parent ef204df7ef
commit 501927ffc4
  1. 9
      include/rocksdb/utilities/backupable_db.h
  2. 61
      utilities/backupable/backupable_db.cc
  3. 45
      utilities/backupable/backupable_db_test.cc
  4. 15
      utilities/backupable/backupable_db_testutil.h

@ -88,14 +88,6 @@ struct BackupableDBOptions {
// *turn it on only if you know what you're doing* // *turn it on only if you know what you're doing*
bool share_files_with_checksum; bool share_files_with_checksum;
// Try to use the file size in file name instead of getting size from HDFS,
// if the file is generated with options.share_files_with_checksum = true.
// This is a temporary solution to reduce the backupable Db open latency when
// There are too many sst files. Will remove the option after we have a
// permanent solution.
// Default: false
bool use_file_size_in_file_name;
// Up to this many background threads will copy files for CreateNewBackup() // Up to this many background threads will copy files for CreateNewBackup()
// and RestoreDBFromBackup() // and RestoreDBFromBackup()
// Default: 1 // Default: 1
@ -125,7 +117,6 @@ struct BackupableDBOptions {
backup_rate_limit(_backup_rate_limit), backup_rate_limit(_backup_rate_limit),
restore_rate_limit(_restore_rate_limit), restore_rate_limit(_restore_rate_limit),
share_files_with_checksum(false), share_files_with_checksum(false),
use_file_size_in_file_name(false),
max_background_operations(_max_background_operations), max_background_operations(_max_background_operations),
callback_trigger_interval_size(_callback_trigger_interval_size) { callback_trigger_interval_size(_callback_trigger_interval_size) {
assert(share_table_files || !share_files_with_checksum); assert(share_table_files || !share_files_with_checksum);

@ -187,7 +187,7 @@ class BackupEngineImpl : public BackupEngine {
// @param abs_path_to_size Pre-fetched file sizes (bytes). // @param abs_path_to_size Pre-fetched file sizes (bytes).
Status LoadFromFile( Status LoadFromFile(
const std::string& backup_dir, bool use_size_in_file_name, const std::string& backup_dir,
const std::unordered_map<std::string, uint64_t>& abs_path_to_size); const std::unordered_map<std::string, uint64_t>& abs_path_to_size);
Status StoreToFile(bool sync); Status StoreToFile(bool sync);
@ -591,9 +591,8 @@ Status BackupEngineImpl::Initialize() {
for (auto& backup : backups_) { for (auto& backup : backups_) {
InsertPathnameToSizeBytes( InsertPathnameToSizeBytes(
GetAbsolutePath(GetPrivateFileRel(backup.first)), &abs_path_to_size); GetAbsolutePath(GetPrivateFileRel(backup.first)), &abs_path_to_size);
Status s = backup.second->LoadFromFile( Status s =
options_.backup_dir, options_.use_file_size_in_file_name, backup.second->LoadFromFile(options_.backup_dir, abs_path_to_size);
abs_path_to_size);
if (!s.ok()) { if (!s.ok()) {
Log(options_.info_log, "Backup %u corrupted -- %s", backup.first, Log(options_.info_log, "Backup %u corrupted -- %s", backup.first,
s.ToString().c_str()); s.ToString().c_str());
@ -1563,55 +1562,6 @@ Status BackupEngineImpl::BackupMeta::Delete(bool delete_meta) {
return s; return s;
} }
namespace {
bool ParseStrToUint64(const std::string& str, uint64_t* out) {
try {
unsigned long ul = std::stoul(str);
*out = static_cast<uint64_t>(ul);
return true;
} catch (const std::invalid_argument&) {
return false;
} catch (const std::out_of_range&) {
return false;
}
}
// Parse file name in the format of
// "shared_checksum/<file_number>_<checksum>_<size>.sst, and fill `size` with
// the parsed <size> part.
// Will also accept only name part, or a file path in URL format.
// if file name doesn't have the extension of "sst", or doesn't have '_' as a
// part of the file name, or we can't parse a number from the sub string
// between the last '_' and '.', return false.
bool GetFileSizeFromBackupFileName(const std::string full_name,
uint64_t* size) {
auto dot_pos = full_name.find_last_of('.');
if (dot_pos == std::string::npos) {
return false;
}
if (full_name.substr(dot_pos + 1) != "sst") {
return false;
}
auto last_underscore_pos = full_name.find_last_of('_');
if (last_underscore_pos == std::string::npos) {
return false;
}
if (dot_pos <= last_underscore_pos + 2) {
return false;
}
return ParseStrToUint64(full_name.substr(last_underscore_pos + 1,
dot_pos - last_underscore_pos - 1),
size);
}
} // namespace
namespace test {
bool TEST_GetFileSizeFromBackupFileName(const std::string full_name,
uint64_t* size) {
return GetFileSizeFromBackupFileName(full_name, size);
}
} // namespace test
// each backup meta file is of the format: // each backup meta file is of the format:
// <timestamp> // <timestamp>
// <seq number> // <seq number>
@ -1620,7 +1570,7 @@ bool TEST_GetFileSizeFromBackupFileName(const std::string full_name,
// <file2> <crc32(literal string)> <crc32_value> // <file2> <crc32(literal string)> <crc32_value>
// ... // ...
Status BackupEngineImpl::BackupMeta::LoadFromFile( Status BackupEngineImpl::BackupMeta::LoadFromFile(
const std::string& backup_dir, bool use_size_in_file_name, const std::string& backup_dir,
const std::unordered_map<std::string, uint64_t>& abs_path_to_size) { const std::unordered_map<std::string, uint64_t>& abs_path_to_size) {
assert(Empty()); assert(Empty());
Status s; Status s;
@ -1663,8 +1613,6 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile(
if (file_info) { if (file_info) {
size = file_info->size; size = file_info->size;
} else { } else {
if (!use_size_in_file_name ||
!GetFileSizeFromBackupFileName(filename, &size)) {
std::string abs_path = backup_dir + "/" + filename; std::string abs_path = backup_dir + "/" + filename;
try { try {
size = abs_path_to_size.at(abs_path); size = abs_path_to_size.at(abs_path);
@ -1672,7 +1620,6 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile(
return Status::NotFound("Size missing for pathname: " + abs_path); return Status::NotFound("Size missing for pathname: " + abs_path);
} }
} }
}
if (line.empty()) { if (line.empty()) {
return Status::Corruption("File checksum is missing for " + filename + return Status::Corruption("File checksum is missing for " + filename +

@ -27,7 +27,6 @@
#include "util/sync_point.h" #include "util/sync_point.h"
#include "util/testutil.h" #include "util/testutil.h"
#include "util/mock_env.h" #include "util/mock_env.h"
#include "utilities/backupable/backupable_db_testutil.h"
namespace rocksdb { namespace rocksdb {
@ -596,8 +595,7 @@ class BackupableDBTestWithParam : public BackupableDBTest,
public testing::WithParamInterface<bool> { public testing::WithParamInterface<bool> {
public: public:
BackupableDBTestWithParam() { BackupableDBTestWithParam() {
backupable_options_->share_files_with_checksum = backupable_options_->share_files_with_checksum = GetParam();
backupable_options_->use_file_size_in_file_name = GetParam();
} }
}; };
@ -746,47 +744,6 @@ TEST_P(BackupableDBTestWithParam, OnlineIntegrationTest) {
INSTANTIATE_TEST_CASE_P(BackupableDBTestWithParam, BackupableDBTestWithParam, INSTANTIATE_TEST_CASE_P(BackupableDBTestWithParam, BackupableDBTestWithParam,
::testing::Bool()); ::testing::Bool());
TEST_F(BackupableDBTest, GetFileSizeFromBackupFileName) {
uint64_t size = 0;
ASSERT_TRUE(test::TEST_GetFileSizeFromBackupFileName(
"shared_checksum/6580354_1874793674_65806675.sst", &size));
ASSERT_EQ(65806675u, size);
ASSERT_TRUE(test::TEST_GetFileSizeFromBackupFileName(
"hdfs://a.b:80/a/b/shared_checksum/6580354_1874793674_85806675.sst",
&size));
ASSERT_EQ(85806675u, size);
ASSERT_TRUE(test::TEST_GetFileSizeFromBackupFileName(
"6580354_1874793674_65806665.sst", &size));
ASSERT_EQ(65806665u, size);
ASSERT_TRUE(test::TEST_GetFileSizeFromBackupFileName(
"private/66/6580354_1874793674_65806666.sst", &size));
ASSERT_EQ(65806666u, size);
ASSERT_TRUE(!test::TEST_GetFileSizeFromBackupFileName(
"shared_checksum/6580354.sst", &size));
ASSERT_TRUE(!test::TEST_GetFileSizeFromBackupFileName(
"private/368/6592388.log", &size));
ASSERT_TRUE(!test::TEST_GetFileSizeFromBackupFileName(
"private/68/MANIFEST-6586581", &size));
ASSERT_TRUE(
!test::TEST_GetFileSizeFromBackupFileName("private/68/CURRENT", &size));
ASSERT_TRUE(!test::TEST_GetFileSizeFromBackupFileName(
"shared_checksum/6580354_1874793674_65806675.log", &size));
ASSERT_TRUE(!test::TEST_GetFileSizeFromBackupFileName(
"shared_checksum/6580354_1874793674_65806675", &size));
ASSERT_TRUE(!test::TEST_GetFileSizeFromBackupFileName("meta/368", &size));
}
// this will make sure that backup does not copy the same file twice // this will make sure that backup does not copy the same file twice
TEST_F(BackupableDBTest, NoDoubleCopy) { TEST_F(BackupableDBTest, NoDoubleCopy) {
OpenDBAndBackupEngine(true, true); OpenDBAndBackupEngine(true, true);

@ -1,15 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#pragma once
#ifndef ROCKSDB_LITE
#include <string>
namespace rocksdb {
namespace test {
extern bool TEST_GetFileSizeFromBackupFileName(const std::string full_name,
uint64_t* size);
} // namespace test
} // namespace rocksdb
#endif // ROCKSDB_LITE
Loading…
Cancel
Save