Checksum for each SST file and stores in MANIFEST (#6216)
Summary: In the current code base, RocksDB generate the checksum for each block and verify the checksum at usage. Current PR enable SST file checksum. After a SST file is generated by Flush or Compaction, RocksDB generate the SST file checksum and store the checksum value and checksum method name in the vs_info and MANIFEST as part for the FileMetadata. Added the enable_sst_file_checksum to Options to enable or disable file checksum. Added sst_file_checksum to Options such that user can plugin their own SST file checksum calculate method via overriding the SstFileChecksum class. The checksum information inlcuding uint32_t checksum value and a checksum name (string). A new tool is added to LDB such that user can dump out a list of file checksum information from MANIFEST. If user enables the file checksum but does not provide the sst_file_checksum instance, RocksDB will use the default crc32checksum implemented in table/sst_file_checksum_crc32c.h Pull Request resolved: https://github.com/facebook/rocksdb/pull/6216 Test Plan: Added the testing case in table_test and ldb_cmd_test to verify checksum is correct in different level. Pass make asan_check. Differential Revision: D19171461 Pulled By: zhichao-cao fbshipit-source-id: b2e53479eefc5bb0437189eaa1941670e5ba8b87main
parent
594e815e32
commit
4369f2c7bb
@ -0,0 +1,86 @@ |
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
// Copyright (c) 2013 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include <cassert> |
||||||
|
#include <map> |
||||||
|
#include <memory> |
||||||
|
#include <string> |
||||||
|
#include <vector> |
||||||
|
|
||||||
|
#include "rocksdb/status.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
// FileChecksumFunc is the function class to generates the checksum value
|
||||||
|
// for each file when the file is written to the file system.
|
||||||
|
class FileChecksumFunc { |
||||||
|
public: |
||||||
|
virtual ~FileChecksumFunc() {} |
||||||
|
// Return the checksum of concat (A, data[0,n-1]) where init_checksum is the
|
||||||
|
// returned value of some string A. It is used to maintain the checksum of a
|
||||||
|
// stream of data
|
||||||
|
virtual std::string Extend(const std::string& init_checksum, const char* data, |
||||||
|
size_t n) = 0; |
||||||
|
|
||||||
|
// Return the checksum value of data[0,n-1]
|
||||||
|
virtual std::string Value(const char* data, size_t n) = 0; |
||||||
|
|
||||||
|
// Return a processed value of the checksum for store in somewhere
|
||||||
|
virtual std::string ProcessChecksum(const std::string& checksum) = 0; |
||||||
|
|
||||||
|
// Returns a name that identifies the current file checksum function.
|
||||||
|
virtual const char* Name() const = 0; |
||||||
|
}; |
||||||
|
|
||||||
|
// FileChecksumList stores the checksum information of a list of files (e.g.,
|
||||||
|
// SST files). The FileChecksumLIst can be used to store the checksum
|
||||||
|
// information of all SST file getting from the MANIFEST, which are
|
||||||
|
// the checksum information of all valid SST file of a DB instance. It can
|
||||||
|
// also be used to store the checksum information of a list of SST files to
|
||||||
|
// be ingested.
|
||||||
|
class FileChecksumList { |
||||||
|
public: |
||||||
|
virtual ~FileChecksumList() {} |
||||||
|
|
||||||
|
// Clean the previously stored file checksum information.
|
||||||
|
virtual void reset() = 0; |
||||||
|
|
||||||
|
// Get the number of checksums in the checksum list
|
||||||
|
virtual size_t size() const = 0; |
||||||
|
|
||||||
|
// Return all the file checksum information being stored in a unordered_map.
|
||||||
|
// File_number is the key, the first part of the value is checksum value,
|
||||||
|
// and the second part of the value is checksum function name.
|
||||||
|
virtual Status GetAllFileChecksums( |
||||||
|
std::vector<uint64_t>* file_numbers, std::vector<std::string>* checksums, |
||||||
|
std::vector<std::string>* checksum_func_names) = 0; |
||||||
|
|
||||||
|
// Given the file_number, it searches if the file checksum information is
|
||||||
|
// stored.
|
||||||
|
virtual Status SearchOneFileChecksum(uint64_t file_number, |
||||||
|
std::string* checksum, |
||||||
|
std::string* checksum_func_name) = 0; |
||||||
|
|
||||||
|
// Insert the checksum information of one file to the FileChecksumList.
|
||||||
|
virtual Status InsertOneFileChecksum( |
||||||
|
uint64_t file_number, const std::string& checksum, |
||||||
|
const std::string& checksum_func_name) = 0; |
||||||
|
|
||||||
|
// Remove the checksum information of one SST file.
|
||||||
|
virtual Status RemoveOneFileChecksum(uint64_t file_number) = 0; |
||||||
|
}; |
||||||
|
|
||||||
|
// Create a new file checksum list.
|
||||||
|
extern FileChecksumList* NewFileChecksumList(); |
||||||
|
|
||||||
|
// Create a Crc32c based file checksum function
|
||||||
|
extern FileChecksumFunc* CreateFileChecksumFuncCrc32c(); |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,85 @@ |
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
//
|
||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "util/file_checksum_helper.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
void FileChecksumListImpl::reset() { checksum_map_.clear(); } |
||||||
|
|
||||||
|
size_t FileChecksumListImpl::size() const { return checksum_map_.size(); } |
||||||
|
|
||||||
|
Status FileChecksumListImpl::GetAllFileChecksums( |
||||||
|
std::vector<uint64_t>* file_numbers, std::vector<std::string>* checksums, |
||||||
|
std::vector<std::string>* checksum_func_names) { |
||||||
|
if (file_numbers == nullptr || checksums == nullptr || |
||||||
|
checksum_func_names == nullptr) { |
||||||
|
return Status::InvalidArgument("Pointer has not been initiated"); |
||||||
|
} |
||||||
|
|
||||||
|
for (auto i : checksum_map_) { |
||||||
|
file_numbers->push_back(i.first); |
||||||
|
checksums->push_back(i.second.first); |
||||||
|
checksum_func_names->push_back(i.second.second); |
||||||
|
} |
||||||
|
return Status::OK(); |
||||||
|
} |
||||||
|
|
||||||
|
Status FileChecksumListImpl::SearchOneFileChecksum( |
||||||
|
uint64_t file_number, std::string* checksum, |
||||||
|
std::string* checksum_func_name) { |
||||||
|
if (checksum == nullptr || checksum_func_name == nullptr) { |
||||||
|
return Status::InvalidArgument("Pointer has not been initiated"); |
||||||
|
} |
||||||
|
|
||||||
|
auto it = checksum_map_.find(file_number); |
||||||
|
if (it == checksum_map_.end()) { |
||||||
|
return Status::NotFound(); |
||||||
|
} else { |
||||||
|
*checksum = it->second.first; |
||||||
|
*checksum_func_name = it->second.second; |
||||||
|
} |
||||||
|
return Status::OK(); |
||||||
|
} |
||||||
|
|
||||||
|
Status FileChecksumListImpl::InsertOneFileChecksum( |
||||||
|
uint64_t file_number, const std::string& checksum, |
||||||
|
const std::string& checksum_func_name) { |
||||||
|
auto it = checksum_map_.find(file_number); |
||||||
|
if (it == checksum_map_.end()) { |
||||||
|
checksum_map_.insert(std::make_pair( |
||||||
|
file_number, std::make_pair(checksum, checksum_func_name))); |
||||||
|
} else { |
||||||
|
it->second.first = checksum; |
||||||
|
it->second.second = checksum_func_name; |
||||||
|
} |
||||||
|
return Status::OK(); |
||||||
|
} |
||||||
|
|
||||||
|
Status FileChecksumListImpl::RemoveOneFileChecksum(uint64_t file_number) { |
||||||
|
auto it = checksum_map_.find(file_number); |
||||||
|
if (it == checksum_map_.end()) { |
||||||
|
return Status::NotFound(); |
||||||
|
} else { |
||||||
|
checksum_map_.erase(it); |
||||||
|
} |
||||||
|
return Status::OK(); |
||||||
|
} |
||||||
|
|
||||||
|
FileChecksumList* NewFileChecksumList() { |
||||||
|
FileChecksumListImpl* checksum_list = new FileChecksumListImpl(); |
||||||
|
return checksum_list; |
||||||
|
} |
||||||
|
|
||||||
|
FileChecksumFunc* CreateFileChecksumFuncCrc32c() { |
||||||
|
FileChecksumFunc* file_checksum_crc32c = new FileChecksumFuncCrc32c(); |
||||||
|
return file_checksum_crc32c; |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,117 @@ |
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
#include <cassert> |
||||||
|
#include <unordered_map> |
||||||
|
#include "port/port.h" |
||||||
|
#include "rocksdb/file_checksum.h" |
||||||
|
#include "rocksdb/status.h" |
||||||
|
#include "util/crc32c.h" |
||||||
|
#include "util/string_util.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
// This is the class to generate the file checksum based on Crc32. It
|
||||||
|
// will be used as the default checksum method for SST file checksum
|
||||||
|
class FileChecksumFuncCrc32c : public FileChecksumFunc { |
||||||
|
public: |
||||||
|
std::string Extend(const std::string& init_checksum, const char* data, |
||||||
|
size_t n) override { |
||||||
|
assert(data != nullptr); |
||||||
|
uint32_t checksum_value = StringToUint32(init_checksum); |
||||||
|
return Uint32ToString(crc32c::Extend(checksum_value, data, n)); |
||||||
|
} |
||||||
|
|
||||||
|
std::string Value(const char* data, size_t n) override { |
||||||
|
assert(data != nullptr); |
||||||
|
return Uint32ToString(crc32c::Value(data, n)); |
||||||
|
} |
||||||
|
|
||||||
|
std::string ProcessChecksum(const std::string& checksum) override { |
||||||
|
uint32_t checksum_value = StringToUint32(checksum); |
||||||
|
return Uint32ToString(crc32c::Mask(checksum_value)); |
||||||
|
} |
||||||
|
|
||||||
|
const char* Name() const override { return "FileChecksumCrc32c"; } |
||||||
|
|
||||||
|
// Convert a uint32_t type data into a 4 bytes string.
|
||||||
|
static std::string Uint32ToString(uint32_t v) { |
||||||
|
std::string s; |
||||||
|
if (port::kLittleEndian) { |
||||||
|
s.append(reinterpret_cast<char*>(&v), sizeof(v)); |
||||||
|
} else { |
||||||
|
char buf[sizeof(v)]; |
||||||
|
buf[0] = v & 0xff; |
||||||
|
buf[1] = (v >> 8) & 0xff; |
||||||
|
buf[2] = (v >> 16) & 0xff; |
||||||
|
buf[3] = (v >> 24) & 0xff; |
||||||
|
s.append(buf, sizeof(v)); |
||||||
|
} |
||||||
|
size_t i = 0, j = s.size() - 1; |
||||||
|
while (i < j) { |
||||||
|
char tmp = s[i]; |
||||||
|
s[i] = s[j]; |
||||||
|
s[j] = tmp; |
||||||
|
++i; |
||||||
|
--j; |
||||||
|
} |
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
// Convert a 4 bytes size string into a uint32_t type data.
|
||||||
|
static uint32_t StringToUint32(std::string s) { |
||||||
|
assert(s.size() == sizeof(uint32_t)); |
||||||
|
size_t i = 0, j = s.size() - 1; |
||||||
|
while (i < j) { |
||||||
|
char tmp = s[i]; |
||||||
|
s[i] = s[j]; |
||||||
|
s[j] = tmp; |
||||||
|
++i; |
||||||
|
--j; |
||||||
|
} |
||||||
|
uint32_t v = 0; |
||||||
|
if (port::kLittleEndian) { |
||||||
|
memcpy(&v, s.c_str(), sizeof(uint32_t)); |
||||||
|
} else { |
||||||
|
const char* buf = s.c_str(); |
||||||
|
v |= static_cast<uint32_t>(buf[0]); |
||||||
|
v |= (static_cast<uint32_t>(buf[1]) << 8); |
||||||
|
v |= (static_cast<uint32_t>(buf[2]) << 16); |
||||||
|
v |= (static_cast<uint32_t>(buf[3]) << 24); |
||||||
|
} |
||||||
|
return v; |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
// The default implementaion of FileChecksumList
|
||||||
|
class FileChecksumListImpl : public FileChecksumList { |
||||||
|
public: |
||||||
|
FileChecksumListImpl() {} |
||||||
|
void reset() override; |
||||||
|
|
||||||
|
size_t size() const override; |
||||||
|
|
||||||
|
Status GetAllFileChecksums( |
||||||
|
std::vector<uint64_t>* file_numbers, std::vector<std::string>* checksums, |
||||||
|
std::vector<std::string>* checksum_func_names) override; |
||||||
|
|
||||||
|
Status SearchOneFileChecksum(uint64_t file_number, std::string* checksum, |
||||||
|
std::string* checksum_func_name) override; |
||||||
|
|
||||||
|
Status InsertOneFileChecksum(uint64_t file_number, |
||||||
|
const std::string& checksum, |
||||||
|
const std::string& checksum_func_name) override; |
||||||
|
|
||||||
|
Status RemoveOneFileChecksum(uint64_t file_number) override; |
||||||
|
|
||||||
|
private: |
||||||
|
// Key is the file number, the first portion of the value is checksum, the
|
||||||
|
// second portion of the value is checksum function name.
|
||||||
|
std::unordered_map<uint64_t, std::pair<std::string, std::string>> |
||||||
|
checksum_map_; |
||||||
|
}; |
||||||
|
|
||||||
|
} // namespace rocksdb
|
Loading…
Reference in new issue