Checksum for each SST file and stores in MANIFEST (#6216)
	
		
	
				
					
				
			Summary: In the current code base, RocksDB generate the checksum for each block and verify the checksum at usage. Current PR enable SST file checksum. After a SST file is generated by Flush or Compaction, RocksDB generate the SST file checksum and store the checksum value and checksum method name in the vs_info and MANIFEST as part for the FileMetadata. Added the enable_sst_file_checksum to Options to enable or disable file checksum. Added sst_file_checksum to Options such that user can plugin their own SST file checksum calculate method via overriding the SstFileChecksum class. The checksum information inlcuding uint32_t checksum value and a checksum name (string). A new tool is added to LDB such that user can dump out a list of file checksum information from MANIFEST. If user enables the file checksum but does not provide the sst_file_checksum instance, RocksDB will use the default crc32checksum implemented in table/sst_file_checksum_crc32c.h Pull Request resolved: https://github.com/facebook/rocksdb/pull/6216 Test Plan: Added the testing case in table_test and ldb_cmd_test to verify checksum is correct in different level. Pass make asan_check. Differential Revision: D19171461 Pulled By: zhichao-cao fbshipit-source-id: b2e53479eefc5bb0437189eaa1941670e5ba8b87main
							parent
							
								
									594e815e32
								
							
						
					
					
						commit
						4369f2c7bb
					
				| @ -0,0 +1,86 @@ | ||||
| // Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
 | ||||
| //  This source code is licensed under both the GPLv2 (found in the
 | ||||
| //  COPYING file in the root directory) and Apache 2.0 License
 | ||||
| //  (found in the LICENSE.Apache file in the root directory).
 | ||||
| // Copyright (c) 2013 The LevelDB Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style license that can be
 | ||||
| // found in the LICENSE file. See the AUTHORS file for names of contributors.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <cassert> | ||||
| #include <map> | ||||
| #include <memory> | ||||
| #include <string> | ||||
| #include <vector> | ||||
| 
 | ||||
| #include "rocksdb/status.h" | ||||
| 
 | ||||
| namespace rocksdb { | ||||
| 
 | ||||
| // FileChecksumFunc is the function class to generates the checksum value
 | ||||
| // for each file when the file is written to the file system.
 | ||||
| class FileChecksumFunc { | ||||
|  public: | ||||
|   virtual ~FileChecksumFunc() {} | ||||
|   // Return the checksum of concat (A, data[0,n-1]) where init_checksum is the
 | ||||
|   // returned value of some string A. It is used to maintain the checksum of a
 | ||||
|   // stream of data
 | ||||
|   virtual std::string Extend(const std::string& init_checksum, const char* data, | ||||
|                              size_t n) = 0; | ||||
| 
 | ||||
|   // Return the checksum value of data[0,n-1]
 | ||||
|   virtual std::string Value(const char* data, size_t n) = 0; | ||||
| 
 | ||||
|   // Return a processed value of the checksum for store in somewhere
 | ||||
|   virtual std::string ProcessChecksum(const std::string& checksum) = 0; | ||||
| 
 | ||||
|   // Returns a name that identifies the current file checksum function.
 | ||||
|   virtual const char* Name() const = 0; | ||||
| }; | ||||
| 
 | ||||
| // FileChecksumList stores the checksum information of a list of files (e.g.,
 | ||||
| // SST files). The FileChecksumLIst can be used to store the checksum
 | ||||
| // information of all SST file getting  from the MANIFEST, which are
 | ||||
| // the checksum information of all valid SST file of a DB instance. It can
 | ||||
| // also be used to store the checksum information of a list of SST files to
 | ||||
| // be ingested.
 | ||||
| class FileChecksumList { | ||||
|  public: | ||||
|   virtual ~FileChecksumList() {} | ||||
| 
 | ||||
|   // Clean the previously stored file checksum information.
 | ||||
|   virtual void reset() = 0; | ||||
| 
 | ||||
|   // Get the number of checksums in the checksum list
 | ||||
|   virtual size_t size() const = 0; | ||||
| 
 | ||||
|   // Return all the file checksum information being stored in a unordered_map.
 | ||||
|   // File_number is the key, the first part of the value is checksum value,
 | ||||
|   // and the second part of the value is checksum function name.
 | ||||
|   virtual Status GetAllFileChecksums( | ||||
|       std::vector<uint64_t>* file_numbers, std::vector<std::string>* checksums, | ||||
|       std::vector<std::string>* checksum_func_names) = 0; | ||||
| 
 | ||||
|   // Given the file_number, it searches if the file checksum information is
 | ||||
|   // stored.
 | ||||
|   virtual Status SearchOneFileChecksum(uint64_t file_number, | ||||
|                                        std::string* checksum, | ||||
|                                        std::string* checksum_func_name) = 0; | ||||
| 
 | ||||
|   // Insert the checksum information of one file to the FileChecksumList.
 | ||||
|   virtual Status InsertOneFileChecksum( | ||||
|       uint64_t file_number, const std::string& checksum, | ||||
|       const std::string& checksum_func_name) = 0; | ||||
| 
 | ||||
|   // Remove the checksum information of one SST file.
 | ||||
|   virtual Status RemoveOneFileChecksum(uint64_t file_number) = 0; | ||||
| }; | ||||
| 
 | ||||
| // Create a new file checksum list.
 | ||||
| extern FileChecksumList* NewFileChecksumList(); | ||||
| 
 | ||||
| // Create a Crc32c based file checksum function
 | ||||
| extern FileChecksumFunc* CreateFileChecksumFuncCrc32c(); | ||||
| 
 | ||||
| }  // namespace rocksdb
 | ||||
| @ -0,0 +1,85 @@ | ||||
| //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
 | ||||
| //  This source code is licensed under both the GPLv2 (found in the
 | ||||
| //  COPYING file in the root directory) and Apache 2.0 License
 | ||||
| //  (found in the LICENSE.Apache file in the root directory).
 | ||||
| //
 | ||||
| //  Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 | ||||
| //  Use of this source code is governed by a BSD-style license that can be
 | ||||
| //  found in the LICENSE file. See the AUTHORS file for names of contributors.
 | ||||
| 
 | ||||
| #include "util/file_checksum_helper.h" | ||||
| 
 | ||||
| namespace rocksdb { | ||||
| 
 | ||||
| void FileChecksumListImpl::reset() { checksum_map_.clear(); } | ||||
| 
 | ||||
| size_t FileChecksumListImpl::size() const { return checksum_map_.size(); } | ||||
| 
 | ||||
| Status FileChecksumListImpl::GetAllFileChecksums( | ||||
|     std::vector<uint64_t>* file_numbers, std::vector<std::string>* checksums, | ||||
|     std::vector<std::string>* checksum_func_names) { | ||||
|   if (file_numbers == nullptr || checksums == nullptr || | ||||
|       checksum_func_names == nullptr) { | ||||
|     return Status::InvalidArgument("Pointer has not been initiated"); | ||||
|   } | ||||
| 
 | ||||
|   for (auto i : checksum_map_) { | ||||
|     file_numbers->push_back(i.first); | ||||
|     checksums->push_back(i.second.first); | ||||
|     checksum_func_names->push_back(i.second.second); | ||||
|   } | ||||
|   return Status::OK(); | ||||
| } | ||||
| 
 | ||||
| Status FileChecksumListImpl::SearchOneFileChecksum( | ||||
|     uint64_t file_number, std::string* checksum, | ||||
|     std::string* checksum_func_name) { | ||||
|   if (checksum == nullptr || checksum_func_name == nullptr) { | ||||
|     return Status::InvalidArgument("Pointer has not been initiated"); | ||||
|   } | ||||
| 
 | ||||
|   auto it = checksum_map_.find(file_number); | ||||
|   if (it == checksum_map_.end()) { | ||||
|     return Status::NotFound(); | ||||
|   } else { | ||||
|     *checksum = it->second.first; | ||||
|     *checksum_func_name = it->second.second; | ||||
|   } | ||||
|   return Status::OK(); | ||||
| } | ||||
| 
 | ||||
| Status FileChecksumListImpl::InsertOneFileChecksum( | ||||
|     uint64_t file_number, const std::string& checksum, | ||||
|     const std::string& checksum_func_name) { | ||||
|   auto it = checksum_map_.find(file_number); | ||||
|   if (it == checksum_map_.end()) { | ||||
|     checksum_map_.insert(std::make_pair( | ||||
|         file_number, std::make_pair(checksum, checksum_func_name))); | ||||
|   } else { | ||||
|     it->second.first = checksum; | ||||
|     it->second.second = checksum_func_name; | ||||
|   } | ||||
|   return Status::OK(); | ||||
| } | ||||
| 
 | ||||
| Status FileChecksumListImpl::RemoveOneFileChecksum(uint64_t file_number) { | ||||
|   auto it = checksum_map_.find(file_number); | ||||
|   if (it == checksum_map_.end()) { | ||||
|     return Status::NotFound(); | ||||
|   } else { | ||||
|     checksum_map_.erase(it); | ||||
|   } | ||||
|   return Status::OK(); | ||||
| } | ||||
| 
 | ||||
| FileChecksumList* NewFileChecksumList() { | ||||
|   FileChecksumListImpl* checksum_list = new FileChecksumListImpl(); | ||||
|   return checksum_list; | ||||
| } | ||||
| 
 | ||||
| FileChecksumFunc* CreateFileChecksumFuncCrc32c() { | ||||
|   FileChecksumFunc* file_checksum_crc32c = new FileChecksumFuncCrc32c(); | ||||
|   return file_checksum_crc32c; | ||||
| } | ||||
| 
 | ||||
| }  // namespace rocksdb
 | ||||
| @ -0,0 +1,117 @@ | ||||
| //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
 | ||||
| //  This source code is licensed under both the GPLv2 (found in the
 | ||||
| //  COPYING file in the root directory) and Apache 2.0 License
 | ||||
| //  (found in the LICENSE.Apache file in the root directory).
 | ||||
| 
 | ||||
| #pragma once | ||||
| #include <cassert> | ||||
| #include <unordered_map> | ||||
| #include "port/port.h" | ||||
| #include "rocksdb/file_checksum.h" | ||||
| #include "rocksdb/status.h" | ||||
| #include "util/crc32c.h" | ||||
| #include "util/string_util.h" | ||||
| 
 | ||||
| namespace rocksdb { | ||||
| 
 | ||||
| // This is the class to generate the file checksum based on Crc32. It
 | ||||
| // will be used as the default checksum method for SST file checksum
 | ||||
| class FileChecksumFuncCrc32c : public FileChecksumFunc { | ||||
|  public: | ||||
|   std::string Extend(const std::string& init_checksum, const char* data, | ||||
|                      size_t n) override { | ||||
|     assert(data != nullptr); | ||||
|     uint32_t checksum_value = StringToUint32(init_checksum); | ||||
|     return Uint32ToString(crc32c::Extend(checksum_value, data, n)); | ||||
|   } | ||||
| 
 | ||||
|   std::string Value(const char* data, size_t n) override { | ||||
|     assert(data != nullptr); | ||||
|     return Uint32ToString(crc32c::Value(data, n)); | ||||
|   } | ||||
| 
 | ||||
|   std::string ProcessChecksum(const std::string& checksum) override { | ||||
|     uint32_t checksum_value = StringToUint32(checksum); | ||||
|     return Uint32ToString(crc32c::Mask(checksum_value)); | ||||
|   } | ||||
| 
 | ||||
|   const char* Name() const override { return "FileChecksumCrc32c"; } | ||||
| 
 | ||||
|   // Convert a uint32_t type data into a 4 bytes string.
 | ||||
|   static std::string Uint32ToString(uint32_t v) { | ||||
|     std::string s; | ||||
|     if (port::kLittleEndian) { | ||||
|       s.append(reinterpret_cast<char*>(&v), sizeof(v)); | ||||
|     } else { | ||||
|       char buf[sizeof(v)]; | ||||
|       buf[0] = v & 0xff; | ||||
|       buf[1] = (v >> 8) & 0xff; | ||||
|       buf[2] = (v >> 16) & 0xff; | ||||
|       buf[3] = (v >> 24) & 0xff; | ||||
|       s.append(buf, sizeof(v)); | ||||
|     } | ||||
|     size_t i = 0, j = s.size() - 1; | ||||
|     while (i < j) { | ||||
|       char tmp = s[i]; | ||||
|       s[i] = s[j]; | ||||
|       s[j] = tmp; | ||||
|       ++i; | ||||
|       --j; | ||||
|     } | ||||
|     return s; | ||||
|   } | ||||
| 
 | ||||
|   // Convert a 4 bytes size string into a uint32_t type data.
 | ||||
|   static uint32_t StringToUint32(std::string s) { | ||||
|     assert(s.size() == sizeof(uint32_t)); | ||||
|     size_t i = 0, j = s.size() - 1; | ||||
|     while (i < j) { | ||||
|       char tmp = s[i]; | ||||
|       s[i] = s[j]; | ||||
|       s[j] = tmp; | ||||
|       ++i; | ||||
|       --j; | ||||
|     } | ||||
|     uint32_t v = 0; | ||||
|     if (port::kLittleEndian) { | ||||
|       memcpy(&v, s.c_str(), sizeof(uint32_t)); | ||||
|     } else { | ||||
|       const char* buf = s.c_str(); | ||||
|       v |= static_cast<uint32_t>(buf[0]); | ||||
|       v |= (static_cast<uint32_t>(buf[1]) << 8); | ||||
|       v |= (static_cast<uint32_t>(buf[2]) << 16); | ||||
|       v |= (static_cast<uint32_t>(buf[3]) << 24); | ||||
|     } | ||||
|     return v; | ||||
|   } | ||||
| }; | ||||
| 
 | ||||
| // The default implementaion of FileChecksumList
 | ||||
| class FileChecksumListImpl : public FileChecksumList { | ||||
|  public: | ||||
|   FileChecksumListImpl() {} | ||||
|   void reset() override; | ||||
| 
 | ||||
|   size_t size() const override; | ||||
| 
 | ||||
|   Status GetAllFileChecksums( | ||||
|       std::vector<uint64_t>* file_numbers, std::vector<std::string>* checksums, | ||||
|       std::vector<std::string>* checksum_func_names) override; | ||||
| 
 | ||||
|   Status SearchOneFileChecksum(uint64_t file_number, std::string* checksum, | ||||
|                                std::string* checksum_func_name) override; | ||||
| 
 | ||||
|   Status InsertOneFileChecksum(uint64_t file_number, | ||||
|                                const std::string& checksum, | ||||
|                                const std::string& checksum_func_name) override; | ||||
| 
 | ||||
|   Status RemoveOneFileChecksum(uint64_t file_number) override; | ||||
| 
 | ||||
|  private: | ||||
|   // Key is the file number, the first portion of the value is checksum, the
 | ||||
|   // second portion of the value is checksum function name.
 | ||||
|   std::unordered_map<uint64_t, std::pair<std::string, std::string>> | ||||
|       checksum_map_; | ||||
| }; | ||||
| 
 | ||||
| }  // namespace rocksdb
 | ||||
					Loading…
					
					
				
		Reference in new issue
	
	 Zhichao Cao
						Zhichao Cao