Add SstFileReader to read sst files (#4717)
Summary: A user friendly sst file reader is useful when we want to access sst files outside of RocksDB. For example, we can generate an sst file with SstFileWriter and send it to other places, then use SstFileReader to read the file and process the entries in other ways. Also rename the original SstFileReader to SstFileDumper because of name conflict, and seems SstFileDumper is more appropriate for tools. TODO: there is only a very simple test now, because I want to get some feedback first. If the changes look good, I will add more tests soon. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4717 Differential Revision: D13212686 Pulled By: ajkr fbshipit-source-id: 737593383264c954b79e63edaf44aaae0d947e56main
parent
3fa80f0e85
commit
5e72bc113a
@ -0,0 +1,45 @@ |
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#ifndef ROCKSDB_LITE |
||||||
|
|
||||||
|
#include "rocksdb/slice.h" |
||||||
|
#include "rocksdb/options.h" |
||||||
|
#include "rocksdb/iterator.h" |
||||||
|
#include "rocksdb/table_properties.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
// SstFileReader is used to read sst files that are generated by DB or
|
||||||
|
// SstFileWriter.
|
||||||
|
class SstFileReader { |
||||||
|
public: |
||||||
|
SstFileReader(const Options& options); |
||||||
|
|
||||||
|
~SstFileReader(); |
||||||
|
|
||||||
|
// Prepares to read from the file located at "file_path".
|
||||||
|
Status Open(const std::string& file_path); |
||||||
|
|
||||||
|
// Returns a new iterator over the table contents.
|
||||||
|
// Most read options provide the same control as we read from DB.
|
||||||
|
// If "snapshot" is nullptr, the iterator returns only the latest keys.
|
||||||
|
Iterator* NewIterator(const ReadOptions& options); |
||||||
|
|
||||||
|
std::shared_ptr<const TableProperties> GetTableProperties() const; |
||||||
|
|
||||||
|
// Verifies whether there is corruption in this table.
|
||||||
|
Status VerifyChecksum(); |
||||||
|
|
||||||
|
private: |
||||||
|
struct Rep; |
||||||
|
std::unique_ptr<Rep> rep_; |
||||||
|
}; |
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
#endif // !ROCKSDB_LITE
|
@ -0,0 +1,84 @@ |
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
|
||||||
|
#ifndef ROCKSDB_LITE |
||||||
|
|
||||||
|
#include "rocksdb/sst_file_reader.h" |
||||||
|
|
||||||
|
#include "db/db_iter.h" |
||||||
|
#include "options/cf_options.h" |
||||||
|
#include "table/get_context.h" |
||||||
|
#include "table/table_reader.h" |
||||||
|
#include "table/table_builder.h" |
||||||
|
#include "util/file_reader_writer.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
struct SstFileReader::Rep { |
||||||
|
Options options; |
||||||
|
EnvOptions soptions; |
||||||
|
ImmutableCFOptions ioptions; |
||||||
|
MutableCFOptions moptions; |
||||||
|
|
||||||
|
std::unique_ptr<TableReader> table_reader; |
||||||
|
|
||||||
|
Rep(const Options& opts) |
||||||
|
: options(opts), |
||||||
|
soptions(options), |
||||||
|
ioptions(options), |
||||||
|
moptions(ColumnFamilyOptions(options)) {} |
||||||
|
}; |
||||||
|
|
||||||
|
SstFileReader::SstFileReader(const Options& options) |
||||||
|
: rep_(new Rep(options)) {} |
||||||
|
|
||||||
|
SstFileReader::~SstFileReader() {} |
||||||
|
|
||||||
|
Status SstFileReader::Open(const std::string& file_path) { |
||||||
|
auto r = rep_.get(); |
||||||
|
Status s; |
||||||
|
uint64_t file_size = 0; |
||||||
|
std::unique_ptr<RandomAccessFile> file; |
||||||
|
std::unique_ptr<RandomAccessFileReader> file_reader; |
||||||
|
s = r->options.env->GetFileSize(file_path, &file_size); |
||||||
|
if (s.ok()) { |
||||||
|
s = r->options.env->NewRandomAccessFile(file_path, &file, r->soptions); |
||||||
|
} |
||||||
|
if (s.ok()) { |
||||||
|
file_reader.reset(new RandomAccessFileReader(std::move(file), file_path)); |
||||||
|
} |
||||||
|
if (s.ok()) { |
||||||
|
s = r->options.table_factory->NewTableReader( |
||||||
|
TableReaderOptions(r->ioptions, r->moptions.prefix_extractor.get(), |
||||||
|
r->soptions, r->ioptions.internal_comparator), |
||||||
|
std::move(file_reader), file_size, &r->table_reader); |
||||||
|
} |
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
Iterator* SstFileReader::NewIterator(const ReadOptions& options) { |
||||||
|
auto r = rep_.get(); |
||||||
|
auto sequence = options.snapshot != nullptr ? |
||||||
|
options.snapshot->GetSequenceNumber() : |
||||||
|
kMaxSequenceNumber; |
||||||
|
auto internal_iter = r->table_reader->NewIterator( |
||||||
|
options, r->moptions.prefix_extractor.get()); |
||||||
|
return NewDBIterator(r->options.env, options, r->ioptions, r->moptions, |
||||||
|
r->ioptions.user_comparator, internal_iter, sequence, |
||||||
|
r->moptions.max_sequential_skip_in_iterations, |
||||||
|
nullptr /* read_callback */); |
||||||
|
} |
||||||
|
|
||||||
|
std::shared_ptr<const TableProperties> SstFileReader::GetTableProperties() const { |
||||||
|
return rep_->table_reader->GetTableProperties(); |
||||||
|
} |
||||||
|
|
||||||
|
Status SstFileReader::VerifyChecksum() { |
||||||
|
return rep_->table_reader->VerifyChecksum(); |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
#endif // !ROCKSDB_LITE
|
@ -0,0 +1,98 @@ |
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
|
||||||
|
#ifndef ROCKSDB_LITE |
||||||
|
|
||||||
|
#include <inttypes.h> |
||||||
|
|
||||||
|
#include "rocksdb/sst_file_reader.h" |
||||||
|
#include "rocksdb/sst_file_writer.h" |
||||||
|
#include "util/testharness.h" |
||||||
|
#include "util/testutil.h" |
||||||
|
#include "utilities/merge_operators.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
std::string EncodeAsString(uint64_t v) { |
||||||
|
char buf[16]; |
||||||
|
snprintf(buf, sizeof(buf), "%08" PRIu64, v); |
||||||
|
return std::string(buf); |
||||||
|
} |
||||||
|
|
||||||
|
std::string EncodeAsUint64(uint64_t v) { |
||||||
|
std::string dst; |
||||||
|
PutFixed64(&dst, v); |
||||||
|
return dst; |
||||||
|
} |
||||||
|
|
||||||
|
class SstFileReaderTest : public testing::Test { |
||||||
|
public: |
||||||
|
SstFileReaderTest() { |
||||||
|
options_.merge_operator = MergeOperators::CreateUInt64AddOperator(); |
||||||
|
sst_name_ = test::PerThreadDBPath("sst_file"); |
||||||
|
} |
||||||
|
|
||||||
|
void CreateFileAndCheck(const std::vector<std::string>& keys) { |
||||||
|
SstFileWriter writer(soptions_, options_); |
||||||
|
ASSERT_OK(writer.Open(sst_name_)); |
||||||
|
for (size_t i = 0; i + 2 < keys.size(); i += 3) { |
||||||
|
ASSERT_OK(writer.Put(keys[i], keys[i])); |
||||||
|
ASSERT_OK(writer.Merge(keys[i+1], EncodeAsUint64(i+1))); |
||||||
|
ASSERT_OK(writer.Delete(keys[i+2])); |
||||||
|
} |
||||||
|
ASSERT_OK(writer.Finish()); |
||||||
|
|
||||||
|
ReadOptions ropts; |
||||||
|
SstFileReader reader(options_); |
||||||
|
ASSERT_OK(reader.Open(sst_name_)); |
||||||
|
ASSERT_OK(reader.VerifyChecksum()); |
||||||
|
std::unique_ptr<Iterator> iter(reader.NewIterator(ropts)); |
||||||
|
iter->SeekToFirst(); |
||||||
|
for (size_t i = 0; i + 2 < keys.size(); i += 3) { |
||||||
|
ASSERT_TRUE(iter->Valid()); |
||||||
|
ASSERT_EQ(iter->key().compare(keys[i]), 0); |
||||||
|
ASSERT_EQ(iter->value().compare(keys[i]), 0); |
||||||
|
iter->Next(); |
||||||
|
ASSERT_TRUE(iter->Valid()); |
||||||
|
ASSERT_EQ(iter->key().compare(keys[i+1]), 0); |
||||||
|
ASSERT_EQ(iter->value().compare(EncodeAsUint64(i+1)), 0); |
||||||
|
iter->Next(); |
||||||
|
} |
||||||
|
ASSERT_FALSE(iter->Valid()); |
||||||
|
} |
||||||
|
|
||||||
|
protected: |
||||||
|
Options options_; |
||||||
|
EnvOptions soptions_; |
||||||
|
std::string sst_name_; |
||||||
|
}; |
||||||
|
|
||||||
|
const uint64_t kNumKeys = 100; |
||||||
|
|
||||||
|
TEST_F(SstFileReaderTest, Basic) { |
||||||
|
std::vector<std::string> keys; |
||||||
|
for (uint64_t i = 0; i < kNumKeys; i++) { |
||||||
|
keys.emplace_back(EncodeAsString(i)); |
||||||
|
} |
||||||
|
CreateFileAndCheck(keys); |
||||||
|
} |
||||||
|
|
||||||
|
TEST_F(SstFileReaderTest, Uint64Comparator) { |
||||||
|
options_.comparator = test::Uint64Comparator(); |
||||||
|
std::vector<std::string> keys; |
||||||
|
for (uint64_t i = 0; i < kNumKeys; i++) { |
||||||
|
keys.emplace_back(EncodeAsUint64(i)); |
||||||
|
} |
||||||
|
CreateFileAndCheck(keys); |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) { |
||||||
|
::testing::InitGoogleTest(&argc, argv); |
||||||
|
return RUN_ALL_TESTS(); |
||||||
|
} |
||||||
|
|
||||||
|
#endif // ROCKSDB_LITE
|
Loading…
Reference in new issue