Checksum for each SST file and stores in MANIFEST (#6216)
Summary: In the current code base, RocksDB generate the checksum for each block and verify the checksum at usage. Current PR enable SST file checksum. After a SST file is generated by Flush or Compaction, RocksDB generate the SST file checksum and store the checksum value and checksum method name in the vs_info and MANIFEST as part for the FileMetadata. Added the enable_sst_file_checksum to Options to enable or disable file checksum. Added sst_file_checksum to Options such that user can plugin their own SST file checksum calculate method via overriding the SstFileChecksum class. The checksum information inlcuding uint32_t checksum value and a checksum name (string). A new tool is added to LDB such that user can dump out a list of file checksum information from MANIFEST. If user enables the file checksum but does not provide the sst_file_checksum instance, RocksDB will use the default crc32checksum implemented in table/sst_file_checksum_crc32c.h Pull Request resolved: https://github.com/facebook/rocksdb/pull/6216 Test Plan: Added the testing case in table_test and ldb_cmd_test to verify checksum is correct in different level. Pass make asan_check. Differential Revision: D19171461 Pulled By: zhichao-cao fbshipit-source-id: b2e53479eefc5bb0437189eaa1941670e5ba8b87main
parent
594e815e32
commit
4369f2c7bb
@ -0,0 +1,86 @@ |
||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
// Copyright (c) 2013 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#pragma once |
||||
|
||||
#include <cassert> |
||||
#include <map> |
||||
#include <memory> |
||||
#include <string> |
||||
#include <vector> |
||||
|
||||
#include "rocksdb/status.h" |
||||
|
||||
namespace rocksdb { |
||||
|
||||
// FileChecksumFunc is the function class to generates the checksum value
|
||||
// for each file when the file is written to the file system.
|
||||
class FileChecksumFunc { |
||||
public: |
||||
virtual ~FileChecksumFunc() {} |
||||
// Return the checksum of concat (A, data[0,n-1]) where init_checksum is the
|
||||
// returned value of some string A. It is used to maintain the checksum of a
|
||||
// stream of data
|
||||
virtual std::string Extend(const std::string& init_checksum, const char* data, |
||||
size_t n) = 0; |
||||
|
||||
// Return the checksum value of data[0,n-1]
|
||||
virtual std::string Value(const char* data, size_t n) = 0; |
||||
|
||||
// Return a processed value of the checksum for store in somewhere
|
||||
virtual std::string ProcessChecksum(const std::string& checksum) = 0; |
||||
|
||||
// Returns a name that identifies the current file checksum function.
|
||||
virtual const char* Name() const = 0; |
||||
}; |
||||
|
||||
// FileChecksumList stores the checksum information of a list of files (e.g.,
|
||||
// SST files). The FileChecksumLIst can be used to store the checksum
|
||||
// information of all SST file getting from the MANIFEST, which are
|
||||
// the checksum information of all valid SST file of a DB instance. It can
|
||||
// also be used to store the checksum information of a list of SST files to
|
||||
// be ingested.
|
||||
class FileChecksumList { |
||||
public: |
||||
virtual ~FileChecksumList() {} |
||||
|
||||
// Clean the previously stored file checksum information.
|
||||
virtual void reset() = 0; |
||||
|
||||
// Get the number of checksums in the checksum list
|
||||
virtual size_t size() const = 0; |
||||
|
||||
// Return all the file checksum information being stored in a unordered_map.
|
||||
// File_number is the key, the first part of the value is checksum value,
|
||||
// and the second part of the value is checksum function name.
|
||||
virtual Status GetAllFileChecksums( |
||||
std::vector<uint64_t>* file_numbers, std::vector<std::string>* checksums, |
||||
std::vector<std::string>* checksum_func_names) = 0; |
||||
|
||||
// Given the file_number, it searches if the file checksum information is
|
||||
// stored.
|
||||
virtual Status SearchOneFileChecksum(uint64_t file_number, |
||||
std::string* checksum, |
||||
std::string* checksum_func_name) = 0; |
||||
|
||||
// Insert the checksum information of one file to the FileChecksumList.
|
||||
virtual Status InsertOneFileChecksum( |
||||
uint64_t file_number, const std::string& checksum, |
||||
const std::string& checksum_func_name) = 0; |
||||
|
||||
// Remove the checksum information of one SST file.
|
||||
virtual Status RemoveOneFileChecksum(uint64_t file_number) = 0; |
||||
}; |
||||
|
||||
// Create a new file checksum list.
|
||||
extern FileChecksumList* NewFileChecksumList(); |
||||
|
||||
// Create a Crc32c based file checksum function
|
||||
extern FileChecksumFunc* CreateFileChecksumFuncCrc32c(); |
||||
|
||||
} // namespace rocksdb
|
@ -0,0 +1,85 @@ |
||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
//
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "util/file_checksum_helper.h" |
||||
|
||||
namespace rocksdb { |
||||
|
||||
void FileChecksumListImpl::reset() { checksum_map_.clear(); } |
||||
|
||||
size_t FileChecksumListImpl::size() const { return checksum_map_.size(); } |
||||
|
||||
Status FileChecksumListImpl::GetAllFileChecksums( |
||||
std::vector<uint64_t>* file_numbers, std::vector<std::string>* checksums, |
||||
std::vector<std::string>* checksum_func_names) { |
||||
if (file_numbers == nullptr || checksums == nullptr || |
||||
checksum_func_names == nullptr) { |
||||
return Status::InvalidArgument("Pointer has not been initiated"); |
||||
} |
||||
|
||||
for (auto i : checksum_map_) { |
||||
file_numbers->push_back(i.first); |
||||
checksums->push_back(i.second.first); |
||||
checksum_func_names->push_back(i.second.second); |
||||
} |
||||
return Status::OK(); |
||||
} |
||||
|
||||
Status FileChecksumListImpl::SearchOneFileChecksum( |
||||
uint64_t file_number, std::string* checksum, |
||||
std::string* checksum_func_name) { |
||||
if (checksum == nullptr || checksum_func_name == nullptr) { |
||||
return Status::InvalidArgument("Pointer has not been initiated"); |
||||
} |
||||
|
||||
auto it = checksum_map_.find(file_number); |
||||
if (it == checksum_map_.end()) { |
||||
return Status::NotFound(); |
||||
} else { |
||||
*checksum = it->second.first; |
||||
*checksum_func_name = it->second.second; |
||||
} |
||||
return Status::OK(); |
||||
} |
||||
|
||||
Status FileChecksumListImpl::InsertOneFileChecksum( |
||||
uint64_t file_number, const std::string& checksum, |
||||
const std::string& checksum_func_name) { |
||||
auto it = checksum_map_.find(file_number); |
||||
if (it == checksum_map_.end()) { |
||||
checksum_map_.insert(std::make_pair( |
||||
file_number, std::make_pair(checksum, checksum_func_name))); |
||||
} else { |
||||
it->second.first = checksum; |
||||
it->second.second = checksum_func_name; |
||||
} |
||||
return Status::OK(); |
||||
} |
||||
|
||||
Status FileChecksumListImpl::RemoveOneFileChecksum(uint64_t file_number) { |
||||
auto it = checksum_map_.find(file_number); |
||||
if (it == checksum_map_.end()) { |
||||
return Status::NotFound(); |
||||
} else { |
||||
checksum_map_.erase(it); |
||||
} |
||||
return Status::OK(); |
||||
} |
||||
|
||||
FileChecksumList* NewFileChecksumList() { |
||||
FileChecksumListImpl* checksum_list = new FileChecksumListImpl(); |
||||
return checksum_list; |
||||
} |
||||
|
||||
FileChecksumFunc* CreateFileChecksumFuncCrc32c() { |
||||
FileChecksumFunc* file_checksum_crc32c = new FileChecksumFuncCrc32c(); |
||||
return file_checksum_crc32c; |
||||
} |
||||
|
||||
} // namespace rocksdb
|
@ -0,0 +1,117 @@ |
||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
#pragma once |
||||
#include <cassert> |
||||
#include <unordered_map> |
||||
#include "port/port.h" |
||||
#include "rocksdb/file_checksum.h" |
||||
#include "rocksdb/status.h" |
||||
#include "util/crc32c.h" |
||||
#include "util/string_util.h" |
||||
|
||||
namespace rocksdb { |
||||
|
||||
// This is the class to generate the file checksum based on Crc32. It
|
||||
// will be used as the default checksum method for SST file checksum
|
||||
class FileChecksumFuncCrc32c : public FileChecksumFunc { |
||||
public: |
||||
std::string Extend(const std::string& init_checksum, const char* data, |
||||
size_t n) override { |
||||
assert(data != nullptr); |
||||
uint32_t checksum_value = StringToUint32(init_checksum); |
||||
return Uint32ToString(crc32c::Extend(checksum_value, data, n)); |
||||
} |
||||
|
||||
std::string Value(const char* data, size_t n) override { |
||||
assert(data != nullptr); |
||||
return Uint32ToString(crc32c::Value(data, n)); |
||||
} |
||||
|
||||
std::string ProcessChecksum(const std::string& checksum) override { |
||||
uint32_t checksum_value = StringToUint32(checksum); |
||||
return Uint32ToString(crc32c::Mask(checksum_value)); |
||||
} |
||||
|
||||
const char* Name() const override { return "FileChecksumCrc32c"; } |
||||
|
||||
// Convert a uint32_t type data into a 4 bytes string.
|
||||
static std::string Uint32ToString(uint32_t v) { |
||||
std::string s; |
||||
if (port::kLittleEndian) { |
||||
s.append(reinterpret_cast<char*>(&v), sizeof(v)); |
||||
} else { |
||||
char buf[sizeof(v)]; |
||||
buf[0] = v & 0xff; |
||||
buf[1] = (v >> 8) & 0xff; |
||||
buf[2] = (v >> 16) & 0xff; |
||||
buf[3] = (v >> 24) & 0xff; |
||||
s.append(buf, sizeof(v)); |
||||
} |
||||
size_t i = 0, j = s.size() - 1; |
||||
while (i < j) { |
||||
char tmp = s[i]; |
||||
s[i] = s[j]; |
||||
s[j] = tmp; |
||||
++i; |
||||
--j; |
||||
} |
||||
return s; |
||||
} |
||||
|
||||
// Convert a 4 bytes size string into a uint32_t type data.
|
||||
static uint32_t StringToUint32(std::string s) { |
||||
assert(s.size() == sizeof(uint32_t)); |
||||
size_t i = 0, j = s.size() - 1; |
||||
while (i < j) { |
||||
char tmp = s[i]; |
||||
s[i] = s[j]; |
||||
s[j] = tmp; |
||||
++i; |
||||
--j; |
||||
} |
||||
uint32_t v = 0; |
||||
if (port::kLittleEndian) { |
||||
memcpy(&v, s.c_str(), sizeof(uint32_t)); |
||||
} else { |
||||
const char* buf = s.c_str(); |
||||
v |= static_cast<uint32_t>(buf[0]); |
||||
v |= (static_cast<uint32_t>(buf[1]) << 8); |
||||
v |= (static_cast<uint32_t>(buf[2]) << 16); |
||||
v |= (static_cast<uint32_t>(buf[3]) << 24); |
||||
} |
||||
return v; |
||||
} |
||||
}; |
||||
|
||||
// The default implementaion of FileChecksumList
|
||||
class FileChecksumListImpl : public FileChecksumList { |
||||
public: |
||||
FileChecksumListImpl() {} |
||||
void reset() override; |
||||
|
||||
size_t size() const override; |
||||
|
||||
Status GetAllFileChecksums( |
||||
std::vector<uint64_t>* file_numbers, std::vector<std::string>* checksums, |
||||
std::vector<std::string>* checksum_func_names) override; |
||||
|
||||
Status SearchOneFileChecksum(uint64_t file_number, std::string* checksum, |
||||
std::string* checksum_func_name) override; |
||||
|
||||
Status InsertOneFileChecksum(uint64_t file_number, |
||||
const std::string& checksum, |
||||
const std::string& checksum_func_name) override; |
||||
|
||||
Status RemoveOneFileChecksum(uint64_t file_number) override; |
||||
|
||||
private: |
||||
// Key is the file number, the first portion of the value is checksum, the
|
||||
// second portion of the value is checksum function name.
|
||||
std::unordered_map<uint64_t, std::pair<std::string, std::string>> |
||||
checksum_map_; |
||||
}; |
||||
|
||||
} // namespace rocksdb
|
Loading…
Reference in new issue