Follow-up Cleaning-up After D13521

Summary:
This patch is to address @haobo's comments on D13521:
1. rename Table to be TableReader and make its factory function to be GetTableReader
2. move the compression type selection logic out of TableBuilder but to compaction logic
3. more accurate comments
4. Move stat name constants into BlockBasedTable implementation.
5. remove some uncleaned codes in simple_table_db_test

Test Plan: pass test suites.

Reviewers: haobo, dhruba, kailiu

Reviewed By: haobo

CC: leveldb

Differential Revision: https://reviews.facebook.net/D13785
main
Siying Dong 11 years ago
parent 068a819ac9
commit f03b2df010
  1. 11
      db/builder.cc
  2. 4
      db/builder.h
  3. 29
      db/db_impl.cc
  4. 9
      db/db_impl.h
  5. 524
      db/simple_table_db_test.cc
  6. 42
      db/table_cache.cc
  7. 2
      db/table_cache.h
  8. 25
      db/table_stats_collector_test.cc
  9. 8
      db/version_set.cc
  10. 1
      include/rocksdb/options.h
  11. 89
      include/rocksdb/table.h
  12. 52
      table/block_based_table_builder.cc
  13. 8
      table/block_based_table_builder.h
  14. 20
      table/block_based_table_factory.cc
  15. 9
      table/block_based_table_factory.h
  16. 76
      table/block_based_table_reader.cc
  17. 41
      table/block_based_table_reader.h
  18. 42
      table/table_test.cc
  19. 11
      tools/sst_dump.cc

@ -18,6 +18,7 @@
#include "rocksdb/table.h"
#include "rocksdb/env.h"
#include "rocksdb/iterator.h"
#include "rocksdb/options.h"
#include "table/block_based_table_builder.h"
#include "util/stop_watch.h"
@ -26,9 +27,9 @@ namespace rocksdb {
class TableFactory;
TableBuilder* GetTableBuilder(const Options& options, WritableFile* file,
int level, const bool enable_compression) {
return options.table_factory->GetTableBuilder(options, file, level,
enable_compression);
CompressionType compression_type) {
return options.table_factory->GetTableBuilder(options, file,
compression_type);
}
Status BuildTable(const std::string& dbname,
@ -63,8 +64,8 @@ Status BuildTable(const std::string& dbname,
return s;
}
TableBuilder* builder = GetTableBuilder(options, file.get(), 0,
enable_compression);
TableBuilder* builder = GetTableBuilder(options, file.get(),
options.compression);
// the first key is the smallest key
Slice key = iter->key();

@ -9,6 +9,7 @@
#include "rocksdb/comparator.h"
#include "rocksdb/status.h"
#include "rocksdb/types.h"
#include "rocksdb/options.h"
namespace rocksdb {
@ -23,8 +24,9 @@ class VersionEdit;
class TableBuilder;
class WritableFile;
extern TableBuilder* GetTableBuilder(const Options& options, WritableFile* file,
int level, const bool enable_compression);
CompressionType compression_type);
// Build a Table file from the contents of *iter. The generated file
// will be named according to meta->number. On success, the rest of

@ -211,6 +211,27 @@ Options SanitizeOptions(const std::string& dbname,
return result;
}
CompressionType GetCompressionType(const Options& options, int level,
const bool enable_compression) {
if (!enable_compression) {
// disable compression
return kNoCompression;
}
// If the use has specified a different compression level for each level,
// then pick the compresison for that level.
if (!options.compression_per_level.empty()) {
const int n = options.compression_per_level.size() - 1;
// It is possible for level_ to be -1; in that case, we use level
// 0's compression. This occurs mostly in backwards compatibility
// situations when the builder doesn't know what level the file
// belongs to. Likewise, if level_ is beyond the end of the
// specified compression levels, use the last value.
return options.compression_per_level[std::max(0, std::min(level, n))];
} else {
return options.compression;
}
}
DBImpl::DBImpl(const Options& options, const std::string& dbname)
: env_(options.env),
dbname_(dbname),
@ -1774,10 +1795,12 @@ Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) {
compact->outfile->SetPreallocationBlockSize(
1.1 * versions_->MaxFileSizeForLevel(compact->compaction->output_level()));
CompressionType compression_type = GetCompressionType(
options_, compact->compaction->output_level(),
compact->compaction->enable_compression());
compact->builder.reset(
GetTableBuilder(options_, compact->outfile.get(),
compact->compaction->output_level(),
compact->compaction->enable_compression()));
GetTableBuilder(options_, compact->outfile.get(), compression_type));
}
return s;
}

@ -444,4 +444,13 @@ extern Options SanitizeOptions(const std::string& db,
const InternalFilterPolicy* ipolicy,
const Options& src);
// Determine compression type, based on user options, level of the output
// file and whether compression is disabled.
// If enable_compression is false, then compression is always disabled no
// matter what the values of the other two parameters are.
// Otherwise, the compression type is determined based on options and level.
CompressionType GetCompressionType(const Options& options, int level,
const bool enable_compression);
} // namespace rocksdb

@ -60,11 +60,10 @@ namespace rocksdb {
// | index_block_offset (8 bytes) |
// +------------------------------+
// SimpleTable is a simple table format for UNIT TEST ONLY. It is not built
// as production quality.
class SimpleTable : public Table {
public:
class SimpleTableReader: public TableReader {
public:
// Attempt to open the table that is stored in bytes [0..file_size)
// of "file", and read the metadata entries necessary to allow
// retrieving data from the table.
@ -77,19 +76,16 @@ class SimpleTable : public Table {
// for the duration of the returned table's lifetime.
//
// *file must remain live while this Table is in use.
static Status Open(const Options& options,
const EnvOptions& soptions,
unique_ptr<RandomAccessFile>&& file,
uint64_t file_size,
unique_ptr<Table>* table);
static Status Open(const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
unique_ptr<TableReader>* table_reader);
bool PrefixMayMatch(const Slice& internal_prefix) override;
Iterator* NewIterator(const ReadOptions&) override;
Status Get(
const ReadOptions&, const Slice& key,
void* arg,
const ReadOptions&, const Slice& key, void* arg,
bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool),
void (*mark_key_may_exist)(void*) = nullptr) override;
@ -101,13 +97,13 @@ class SimpleTable : public Table {
TableStats& GetTableStats() override;
~SimpleTable();
~SimpleTableReader();
private:
private:
struct Rep;
Rep* rep_;
explicit SimpleTable(Rep* rep) {
explicit SimpleTableReader(Rep* rep) {
rep_ = rep;
}
friend class TableCache;
@ -116,51 +112,51 @@ class SimpleTable : public Table {
Status GetOffset(const Slice& target, uint64_t* offset);
// No copying allowed
explicit SimpleTable(const Table&) = delete;
void operator=(const Table&) = delete;
explicit SimpleTableReader(const TableReader&) = delete;
void operator=(const TableReader&) = delete;
};
// Iterator to iterate SimpleTable
class SimpleTableIterator: public Iterator {
public:
explicit SimpleTableIterator(SimpleTable* table);
explicit SimpleTableIterator(SimpleTableReader* table);
~SimpleTableIterator();
bool Valid() const;
bool Valid() const;
void SeekToFirst();
void SeekToFirst();
void SeekToLast();
void SeekToLast();
void Seek(const Slice& target);
void Seek(const Slice& target);
void Next();
void Next();
void Prev();
void Prev();
Slice key() const;
Slice key() const;
Slice value() const;
Slice value() const;
Status status() const;
Status status() const;
private:
SimpleTable* table_;
uint64_t offset_;
uint64_t next_offset_;
Slice key_;
Slice value_;
char tmp_str_[4];
char* key_str_;
char* value_str_;
int value_str_len_;
Status status_;
// No copying allowed
SimpleTableIterator(const SimpleTableIterator&) = delete;
void operator=(const Iterator&) = delete;
SimpleTableReader* table_;
uint64_t offset_;
uint64_t next_offset_;
Slice key_;
Slice value_;
char tmp_str_[4];
char* key_str_;
char* value_str_;
int value_str_len_;
Status status_;
// No copying allowed
SimpleTableIterator(const SimpleTableIterator&) = delete;
void operator=(const Iterator&) = delete;
};
struct SimpleTable::Rep {
struct SimpleTableReader::Rep {
~Rep() {
}
Rep(const EnvOptions& storage_options, uint64_t index_start_offset,
@ -186,13 +182,15 @@ struct SimpleTable::Rep {
}
};
SimpleTable::~SimpleTable() {
SimpleTableReader::~SimpleTableReader() {
delete rep_;
}
Status SimpleTable::Open(const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file, uint64_t size,
unique_ptr<Table>* table) {
Status SimpleTableReader::Open(const Options& options,
const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file,
uint64_t size,
unique_ptr<TableReader>* table_reader) {
char footer_space[Rep::offset_length];
Slice footer_input;
Status s = file->Read(size - Rep::offset_length, Rep::offset_length,
@ -202,33 +200,33 @@ Status SimpleTable::Open(const Options& options, const EnvOptions& soptions,
int num_entries = (size - Rep::offset_length - index_start_offset)
/ (Rep::GetInternalKeyLength() + Rep::offset_length);
SimpleTable::Rep* rep = new SimpleTable::Rep(soptions, index_start_offset,
num_entries);
SimpleTableReader::Rep* rep = new SimpleTableReader::Rep(soptions,
index_start_offset,
num_entries);
rep->file = std::move(file);
rep->options = options;
table->reset(new SimpleTable(rep));
table_reader->reset(new SimpleTableReader(rep));
}
return s;
}
void SimpleTable::SetupForCompaction() {
void SimpleTableReader::SetupForCompaction() {
}
TableStats& SimpleTable::GetTableStats() {
TableStats& SimpleTableReader::GetTableStats() {
return rep_->table_stats;
}
bool SimpleTable::PrefixMayMatch(const Slice& internal_prefix) {
bool SimpleTableReader::PrefixMayMatch(const Slice& internal_prefix) {
return true;
}
Iterator* SimpleTable::NewIterator(const ReadOptions& options) {
Iterator* SimpleTableReader::NewIterator(const ReadOptions& options) {
return new SimpleTableIterator(this);
}
Status SimpleTable::GetOffset(const Slice& target, uint64_t* offset) {
Status SimpleTableReader::GetOffset(const Slice& target, uint64_t* offset) {
uint32_t left = 0;
uint32_t right = rep_->num_entries - 1;
char key_chars[Rep::GetInternalKeyLength()];
@ -281,9 +279,10 @@ Status SimpleTable::GetOffset(const Slice& target, uint64_t* offset) {
return s;
}
Status SimpleTable::Get(const ReadOptions& options, const Slice& k, void* arg,
bool (*saver)(void*, const Slice&, const Slice&, bool),
void (*mark_key_may_exist)(void*)) {
Status SimpleTableReader::Get(
const ReadOptions& options, const Slice& k, void* arg,
bool (*saver)(void*, const Slice&, const Slice&, bool),
void (*mark_key_may_exist)(void*)) {
Status s;
SimpleTableIterator* iter = new SimpleTableIterator(this);
for (iter->Seek(k); iter->Valid(); iter->Next()) {
@ -296,18 +295,18 @@ Status SimpleTable::Get(const ReadOptions& options, const Slice& k, void* arg,
return s;
}
bool SimpleTable::TEST_KeyInCache(const ReadOptions& options,
const Slice& key) {
bool SimpleTableReader::TEST_KeyInCache(const ReadOptions& options,
const Slice& key) {
return false;
}
uint64_t SimpleTable::ApproximateOffsetOf(const Slice& key) {
uint64_t SimpleTableReader::ApproximateOffsetOf(const Slice& key) {
return 0;
}
SimpleTableIterator::SimpleTableIterator(SimpleTable* table) :
SimpleTableIterator::SimpleTableIterator(SimpleTableReader* table) :
table_(table) {
key_str_ = new char[table->rep_->GetInternalKeyLength()];
key_str_ = new char[SimpleTableReader::Rep::GetInternalKeyLength()];
value_str_len_ = -1;
SeekToFirst();
}
@ -346,7 +345,7 @@ void SimpleTableIterator::Next() {
return;
}
Slice result;
int internal_key_size = table_->rep_->GetInternalKeyLength();
int internal_key_size = SimpleTableReader::Rep::GetInternalKeyLength();
Status s = table_->rep_->file->Read(next_offset_, internal_key_size, &result,
key_str_);
@ -389,14 +388,15 @@ Status SimpleTableIterator::status() const {
return status_;
}
class SimpleTableBuilder : public TableBuilder {
public:
class SimpleTableBuilder: public TableBuilder {
public:
// Create a builder that will store the contents of the table it is
// building in *file. Does not close the file. It is up to the
// caller to close the file after calling Finish(). The output file
// will be part of level specified by 'level'. A value of -1 means
// that the caller does not know which level the output file will reside.
SimpleTableBuilder(const Options& options, WritableFile* file, int level=-1);
SimpleTableBuilder(const Options& options, WritableFile* file,
CompressionType compression_type);
// REQUIRES: Either Finish() or Abandon() has been called.
~SimpleTableBuilder();
@ -428,7 +428,7 @@ class SimpleTableBuilder : public TableBuilder {
// Finish() call, returns the size of the final generated file.
uint64_t FileSize() const override;
private:
private:
struct Rep;
Rep* rep_;
@ -457,25 +457,25 @@ struct SimpleTableBuilder::Rep {
std::string index;
Rep(const Options& opt, WritableFile* f)
: options(opt),
file(f) {
Rep(const Options& opt, WritableFile* f) :
options(opt), file(f) {
}
~Rep() {
}
};
SimpleTableBuilder::SimpleTableBuilder(const Options& options,
WritableFile* file, int level)
: TableBuilder(level), rep_(new SimpleTableBuilder::Rep(options, file)) {
WritableFile* file,
CompressionType compression_type) :
rep_(new SimpleTableBuilder::Rep(options, file)) {
}
SimpleTableBuilder::~SimpleTableBuilder() {
delete(rep_);
delete (rep_);
}
void SimpleTableBuilder::Add(const Slice& key, const Slice& value) {
assert((int) key.size() == Rep::GetInternalKeyLength());
assert((int ) key.size() == Rep::GetInternalKeyLength());
// Update index
rep_->index.append(key.data(), key.size());
@ -531,204 +531,50 @@ uint64_t SimpleTableBuilder::FileSize() const {
return rep_->offset;
}
class SimpleTableFactory : public TableFactory {
public:
~SimpleTableFactory() {}
SimpleTableFactory() {}
class SimpleTableFactory: public TableFactory {
public:
~SimpleTableFactory() {
}
SimpleTableFactory() {
}
const char* Name() const override {
return "SimpleTable";
}
Status OpenTable(const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
unique_ptr<Table>* table) const;
Status GetTableReader(const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file,
uint64_t file_size,
unique_ptr<TableReader>* table_reader) const;
TableBuilder* GetTableBuilder(const Options& options, WritableFile* file,
int level, const bool enable_compression) const;
CompressionType compression_type) const;
};
Status SimpleTableFactory::OpenTable(const Options& options,
const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file,
uint64_t file_size,
unique_ptr<Table>* table) const {
Status SimpleTableFactory::GetTableReader(
const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const {
return SimpleTable::Open(options, soptions, std::move(file), file_size,
table);
return SimpleTableReader::Open(options, soptions, std::move(file), file_size,
table_reader);
}
TableBuilder* SimpleTableFactory::GetTableBuilder(
const Options& options, WritableFile* file, int level,
const bool enable_compression) const {
return new SimpleTableBuilder(options, file, level);
const Options& options, WritableFile* file,
CompressionType compression_type) const {
return new SimpleTableBuilder(options, file, compression_type);
}
namespace anon {
class AtomicCounter {
private:
port::Mutex mu_;
int count_;
public:
AtomicCounter() : count_(0) { }
void Increment() {
MutexLock l(&mu_);
count_++;
}
int Read() {
MutexLock l(&mu_);
return count_;
}
void Reset() {
MutexLock l(&mu_);
count_ = 0;
}
};
}
// Special Env used to delay background operations
class SpecialEnv : public EnvWrapper {
public:
// sstable Sync() calls are blocked while this pointer is non-nullptr.
port::AtomicPointer delay_sstable_sync_;
// Simulate no-space errors while this pointer is non-nullptr.
port::AtomicPointer no_space_;
// Simulate non-writable file system while this pointer is non-nullptr
port::AtomicPointer non_writable_;
// Force sync of manifest files to fail while this pointer is non-nullptr
port::AtomicPointer manifest_sync_error_;
// Force write to manifest files to fail while this pointer is non-nullptr
port::AtomicPointer manifest_write_error_;
bool count_random_reads_;
anon::AtomicCounter random_read_counter_;
anon::AtomicCounter sleep_counter_;
explicit SpecialEnv(Env* base) : EnvWrapper(base) {
delay_sstable_sync_.Release_Store(nullptr);
no_space_.Release_Store(nullptr);
non_writable_.Release_Store(nullptr);
count_random_reads_ = false;
manifest_sync_error_.Release_Store(nullptr);
manifest_write_error_.Release_Store(nullptr);
}
Status NewWritableFile(const std::string& f, unique_ptr<WritableFile>* r,
const EnvOptions& soptions) {
class SSTableFile : public WritableFile {
private:
SpecialEnv* env_;
unique_ptr<WritableFile> base_;
public:
SSTableFile(SpecialEnv* env, unique_ptr<WritableFile>&& base)
: env_(env),
base_(std::move(base)) {
}
Status Append(const Slice& data) {
if (env_->no_space_.Acquire_Load() != nullptr) {
// Drop writes on the floor
return Status::OK();
} else {
return base_->Append(data);
}
}
Status Close() { return base_->Close(); }
Status Flush() { return base_->Flush(); }
Status Sync() {
while (env_->delay_sstable_sync_.Acquire_Load() != nullptr) {
env_->SleepForMicroseconds(100000);
}
return base_->Sync();
}
};
class ManifestFile : public WritableFile {
private:
SpecialEnv* env_;
unique_ptr<WritableFile> base_;
public:
ManifestFile(SpecialEnv* env, unique_ptr<WritableFile>&& b)
: env_(env), base_(std::move(b)) { }
Status Append(const Slice& data) {
if (env_->manifest_write_error_.Acquire_Load() != nullptr) {
return Status::IOError("simulated writer error");
} else {
return base_->Append(data);
}
}
Status Close() { return base_->Close(); }
Status Flush() { return base_->Flush(); }
Status Sync() {
if (env_->manifest_sync_error_.Acquire_Load() != nullptr) {
return Status::IOError("simulated sync error");
} else {
return base_->Sync();
}
}
};
if (non_writable_.Acquire_Load() != nullptr) {
return Status::IOError("simulated write error");
}
Status s = target()->NewWritableFile(f, r, soptions);
if (s.ok()) {
if (strstr(f.c_str(), ".sst") != nullptr) {
r->reset(new SSTableFile(this, std::move(*r)));
} else if (strstr(f.c_str(), "MANIFEST") != nullptr) {
r->reset(new ManifestFile(this, std::move(*r)));
}
}
return s;
}
Status NewRandomAccessFile(const std::string& f,
unique_ptr<RandomAccessFile>* r,
const EnvOptions& soptions) {
class CountingFile : public RandomAccessFile {
private:
unique_ptr<RandomAccessFile> target_;
anon::AtomicCounter* counter_;
public:
CountingFile(unique_ptr<RandomAccessFile>&& target,
anon::AtomicCounter* counter)
: target_(std::move(target)), counter_(counter) {
}
virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const {
counter_->Increment();
return target_->Read(offset, n, result, scratch);
}
};
Status s = target()->NewRandomAccessFile(f, r, soptions);
if (s.ok() && count_random_reads_) {
r->reset(new CountingFile(std::move(*r), &random_read_counter_));
}
return s;
}
virtual void SleepForMicroseconds(int micros) {
sleep_counter_.Increment();
target()->SleepForMicroseconds(micros);
}
};
class SimpleTableDBTest {
protected:
public:
protected:
public:
std::string dbname_;
SpecialEnv* env_;
Env* env_;
DB* db_;
Options last_options_;
SimpleTableDBTest() : env_(new SpecialEnv(Env::Default())) {
SimpleTableDBTest() :
env_(Env::Default()) {
dbname_ = test::TmpDir() + "/simple_table_db_test";
ASSERT_OK(DestroyDB(dbname_, Options()));
db_ = nullptr;
@ -738,7 +584,6 @@ class SimpleTableDBTest {
~SimpleTableDBTest() {
delete db_;
ASSERT_OK(DestroyDB(dbname_, Options()));
delete env_;
}
// Return the current option configuration.
@ -813,81 +658,6 @@ class SimpleTableDBTest {
return result;
}
// Return a string that contains all key,value pairs in order,
// formatted like "(k1->v1)(k2->v2)".
std::string Contents() {
std::vector<std::string> forward;
std::string result;
Iterator* iter = db_->NewIterator(ReadOptions());
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
std::string s = IterStatus(iter);
result.push_back('(');
result.append(s);
result.push_back(')');
forward.push_back(s);
}
// Check reverse iteration results are the reverse of forward results
unsigned int matched = 0;
for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
ASSERT_LT(matched, forward.size());
ASSERT_EQ(IterStatus(iter), forward[forward.size() - matched - 1]);
matched++;
}
ASSERT_EQ(matched, forward.size());
delete iter;
return result;
}
std::string AllEntriesFor(const Slice& user_key) {
Iterator* iter = dbfull()->TEST_NewInternalIterator();
InternalKey target(user_key, kMaxSequenceNumber, kTypeValue);
iter->Seek(target.Encode());
std::string result;
if (!iter->status().ok()) {
result = iter->status().ToString();
} else {
result = "[ ";
bool first = true;
while (iter->Valid()) {
ParsedInternalKey ikey;
if (!ParseInternalKey(iter->key(), &ikey)) {
result += "CORRUPTED";
} else {
if (last_options_.comparator->Compare(ikey.user_key, user_key) != 0) {
break;
}
if (!first) {
result += ", ";
}
first = false;
switch (ikey.type) {
case kTypeValue:
result += iter->value().ToString();
break;
case kTypeMerge:
// keep it the same as kTypeValue for testing kMergePut
result += iter->value().ToString();
break;
case kTypeDeletion:
result += "DEL";
break;
case kTypeLogData:
assert(false);
break;
}
}
iter->Next();
}
if (!first) {
result += " ";
}
result += "]";
}
delete iter;
return result;
}
int NumTableFilesAtLevel(int level) {
std::string property;
@ -897,14 +667,6 @@ class SimpleTableDBTest {
return atoi(property.c_str());
}
int TotalTableFiles() {
int result = 0;
for (int level = 0; level < db_->NumberLevels(); level++) {
result += NumTableFilesAtLevel(level);
}
return result;
}
// Return spread of files per level
std::string FilesPerLevel() {
std::string result;
@ -922,71 +684,6 @@ class SimpleTableDBTest {
return result;
}
int CountFiles() {
std::vector<std::string> files;
env_->GetChildren(dbname_, &files);
std::vector<std::string> logfiles;
if (dbname_ != last_options_.wal_dir) {
env_->GetChildren(last_options_.wal_dir, &logfiles);
}
return static_cast<int>(files.size() + logfiles.size());
}
int CountLiveFiles() {
std::vector<std::string> files;
uint64_t manifest_file_size;
db_->GetLiveFiles(files, &manifest_file_size);
return files.size();
}
uint64_t Size(const Slice& start, const Slice& limit) {
Range r(start, limit);
uint64_t size;
db_->GetApproximateSizes(&r, 1, &size);
return size;
}
void Compact(const Slice& start, const Slice& limit) {
db_->CompactRange(&start, &limit);
}
// Do n memtable compactions, each of which produces an sstable
// covering the range [small,large].
void MakeTables(int n, const std::string& small, const std::string& large) {
for (int i = 0; i < n; i++) {
Put(small, "begin");
Put(large, "end");
dbfull()->TEST_FlushMemTable();
}
}
// Prevent pushing of new sstables into deeper levels by adding
// tables that cover a specified range to all levels.
void FillLevels(const std::string& smallest, const std::string& largest) {
MakeTables(db_->NumberLevels(), smallest, largest);
}
void DumpFileCounts(const char* label) {
fprintf(stderr, "---\n%s:\n", label);
fprintf(stderr, "maxoverlap: %lld\n",
static_cast<long long>(
dbfull()->TEST_MaxNextLevelOverlappingBytes()));
for (int level = 0; level < db_->NumberLevels(); level++) {
int num = NumTableFilesAtLevel(level);
if (num > 0) {
fprintf(stderr, " level %3d : %d files\n", level, num);
}
}
}
std::string DumpSSTableList() {
std::string property;
db_->GetProperty("rocksdb.sstables", &property);
return property;
}
std::string IterStatus(Iterator* iter) {
std::string result;
if (iter->Valid()) {
@ -996,26 +693,6 @@ class SimpleTableDBTest {
}
return result;
}
Options OptionsForLogIterTest() {
Options options = CurrentOptions();
options.create_if_missing = true;
options.WAL_ttl_seconds = 1000;
return options;
}
std::unique_ptr<TransactionLogIterator> OpenTransactionLogIter(
const SequenceNumber seq) {
unique_ptr<TransactionLogIterator> iter;
Status status = dbfull()->GetUpdatesSince(seq, &iter);
ASSERT_OK(status);
ASSERT_TRUE(iter->Valid());
return std::move(iter);
}
std::string DummyString(size_t len, char c = 'a') {
return std::string(len, c);
}
};
TEST(SimpleTableDBTest, Empty) {
@ -1077,7 +754,7 @@ static std::string RandomString(Random* rnd, int len) {
TEST(SimpleTableDBTest, CompactionTrigger) {
Options options = CurrentOptions();
options.write_buffer_size = 100<<10; //100KB
options.write_buffer_size = 100 << 10; //100KB
options.num_levels = 3;
options.max_mem_compaction_level = 0;
options.level0_file_num_compaction_trigger = 3;
@ -1085,9 +762,8 @@ TEST(SimpleTableDBTest, CompactionTrigger) {
Random rnd(301);
for (int num = 0;
num < options.level0_file_num_compaction_trigger - 1;
num++) {
for (int num = 0; num < options.level0_file_num_compaction_trigger - 1;
num++) {
std::vector<std::string> values;
// Write 120KB (12 values, each 10K)
for (int i = 0; i < 12; i++) {

@ -19,8 +19,8 @@
namespace rocksdb {
static void DeleteEntry(const Slice& key, void* value) {
Table* table = reinterpret_cast<Table*>(value);
delete table;
TableReader* table_reader = reinterpret_cast<TableReader*>(value);
delete table_reader;
}
static void UnrefEntry(void* arg1, void* arg2) {
@ -63,7 +63,7 @@ Status TableCache::FindTable(const EnvOptions& toptions,
}
std::string fname = TableFileName(dbname_, file_number);
unique_ptr<RandomAccessFile> file;
unique_ptr<Table> table;
unique_ptr<TableReader> table_reader;
s = env_->NewRandomAccessFile(fname, &file, toptions);
RecordTick(options_->statistics, NO_FILE_OPENS);
if (s.ok()) {
@ -71,19 +71,19 @@ Status TableCache::FindTable(const EnvOptions& toptions,
file->Hint(RandomAccessFile::RANDOM);
}
StopWatch sw(env_, options_->statistics, TABLE_OPEN_IO_MICROS);
s = options_->table_factory->OpenTable(*options_, toptions,
std::move(file),
file_size, &table);
s = options_->table_factory->GetTableReader(*options_, toptions,
std::move(file), file_size,
&table_reader);
}
if (!s.ok()) {
assert(table == nullptr);
assert(table_reader == nullptr);
RecordTick(options_->statistics, NO_FILE_ERRORS);
// We do not cache error results so that if the error is transient,
// or somebody repairs the file, we recover automatically.
} else {
assert(file.get() == nullptr);
*handle = cache_->Insert(key, table.release(), 1, &DeleteEntry);
*handle = cache_->Insert(key, table_reader.release(), 1, &DeleteEntry);
}
}
return s;
@ -93,10 +93,10 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
const EnvOptions& toptions,
uint64_t file_number,
uint64_t file_size,
Table** tableptr,
TableReader** table_reader_ptr,
bool for_compaction) {
if (tableptr != nullptr) {
*tableptr = nullptr;
if (table_reader_ptr != nullptr) {
*table_reader_ptr = nullptr;
}
Cache::Handle* handle = nullptr;
@ -106,16 +106,16 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
return NewErrorIterator(s);
}
Table* table =
reinterpret_cast<Table*>(cache_->Value(handle));
Iterator* result = table->NewIterator(options);
TableReader* table_reader =
reinterpret_cast<TableReader*>(cache_->Value(handle));
Iterator* result = table_reader->NewIterator(options);
result->RegisterCleanup(&UnrefEntry, cache_.get(), handle);
if (tableptr != nullptr) {
*tableptr = table;
if (table_reader_ptr != nullptr) {
*table_reader_ptr = table_reader;
}
if (for_compaction) {
table->SetupForCompaction();
table_reader->SetupForCompaction();
}
return result;
@ -134,8 +134,8 @@ Status TableCache::Get(const ReadOptions& options,
&handle, table_io,
options.read_tier == kBlockCacheTier);
if (s.ok()) {
Table* t =
reinterpret_cast<Table*>(cache_->Value(handle));
TableReader* t =
reinterpret_cast<TableReader*>(cache_->Value(handle));
s = t->Get(options, k, arg, saver, mark_key_may_exist);
cache_->Release(handle);
} else if (options.read_tier && s.IsIncomplete()) {
@ -156,8 +156,8 @@ bool TableCache::PrefixMayMatch(const ReadOptions& options,
file_size, &handle, table_io);
bool may_match = true;
if (s.ok()) {
Table* t =
reinterpret_cast<Table*>(cache_->Value(handle));
TableReader* t =
reinterpret_cast<TableReader*>(cache_->Value(handle));
may_match = t->PrefixMayMatch(internal_prefix);
cache_->Release(handle);
}

@ -39,7 +39,7 @@ class TableCache {
const EnvOptions& toptions,
uint64_t file_number,
uint64_t file_size,
Table** tableptr = nullptr,
TableReader** table_reader_ptr = nullptr,
bool for_compaction = false);
// If a seek to internal key "k" in specified file finds an entry,

@ -20,7 +20,7 @@ namespace rocksdb {
class TableStatsTest {
private:
unique_ptr<Table> table_;
unique_ptr<TableReader> table_reader_;
};
// TODO(kailiu) the following classes should be moved to some more general
@ -88,22 +88,21 @@ void MakeBuilder(
std::unique_ptr<TableBuilder>* builder) {
writable->reset(new FakeWritableFile);
builder->reset(
options.table_factory->GetTableBuilder(options, writable->get(), 0,
true)
);
options.table_factory->GetTableBuilder(options, writable->get(),
options.compression));
}
void OpenTable(
const Options& options,
const std::string& contents,
std::unique_ptr<Table>* table) {
std::unique_ptr<TableReader>* table_reader) {
std::unique_ptr<RandomAccessFile> file(new FakeRandomeAccessFile(contents));
auto s = options.table_factory->OpenTable(
auto s = options.table_factory->GetTableReader(
options,
EnvOptions(),
std::move(file),
contents.size(),
table
table_reader
);
ASSERT_OK(s);
}
@ -176,9 +175,9 @@ TEST(TableStatsTest, CustomizedTableStatsCollector) {
ASSERT_OK(builder->Finish());
// -- Step 2: Open table
std::unique_ptr<Table> table;
OpenTable(options, writable->contents(), &table);
const auto& stats = table->GetTableStats().user_collected_stats;
std::unique_ptr<TableReader> table_reader;
OpenTable(options, writable->contents(), &table_reader);
const auto& stats = table_reader->GetTableStats().user_collected_stats;
ASSERT_EQ("Rocksdb", stats.at("TableStatsTest"));
@ -234,9 +233,9 @@ TEST(TableStatsTest, InternalKeyStatsCollector) {
ASSERT_OK(builder->Finish());
std::unique_ptr<Table> table;
OpenTable(options, writable->contents(), &table);
const auto& stats = table->GetTableStats().user_collected_stats;
std::unique_ptr<TableReader> table_reader;
OpenTable(options, writable->contents(), &table_reader);
const auto& stats = table_reader->GetTableStats().user_collected_stats;
uint64_t deleted = 0;
Slice key(stats.at(InternalKeyTableStatsNames::kDeletedKeys));

@ -1920,12 +1920,12 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
} else {
// "ikey" falls in the range for this table. Add the
// approximate offset of "ikey" within the table.
Table* tableptr;
TableReader* table_reader_ptr;
Iterator* iter = table_cache_->NewIterator(
ReadOptions(), storage_options_, files[i]->number,
files[i]->file_size, &tableptr);
if (tableptr != nullptr) {
result += tableptr->ApproximateOffsetOf(ikey.Encode());
files[i]->file_size, &table_reader_ptr);
if (table_reader_ptr != nullptr) {
result += table_reader_ptr->ApproximateOffsetOf(ikey.Encode());
}
delete iter;
}

@ -19,7 +19,6 @@
#include "rocksdb/universal_compaction.h"
#include "rocksdb/memtablerep.h"
#include "rocksdb/slice_transform.h"
#include "rocksdb/table.h"
namespace rocksdb {

@ -13,6 +13,7 @@
#include "rocksdb/env.h"
#include "rocksdb/iterator.h"
#include "rocksdb/table_stats.h"
#include "rocksdb/options.h"
namespace rocksdb {
@ -31,22 +32,8 @@ using std::unique_ptr;
// external synchronization, but if any of the threads may call a
// non-const method, all threads accessing the same TableBuilder must use
// external synchronization.
class TableBuilder {
public:
// Create a builder that will store the contents of the table it is
// building in *file. Does not close the file. It is up to the
// caller to close the file after calling Finish(). The output file
// will be part of level specified by 'level'. A value of -1 means
// that the caller does not know which level the output file will reside.
//
// If enable_compression=true, this table will follow the compression
// setting given in parameter options. If enable_compression=false, the
// table will not be compressed.
explicit TableBuilder(int level = -1, const bool enable_compression = true) :
level_(level) {
}
// REQUIRES: Either Finish() or Abandon() has been called.
virtual ~TableBuilder() {}
@ -74,17 +61,14 @@ class TableBuilder {
// Size of the file generated so far. If invoked after a successful
// Finish() call, returns the size of the final generated file.
virtual uint64_t FileSize() const = 0;
protected:
int level_;
};
// A Table is a sorted map from strings to strings. Tables are
// immutable and persistent. A Table may be safely accessed from
// multiple threads without external synchronization.
class Table {
class TableReader {
public:
virtual ~Table() {}
virtual ~TableReader() {}
// Determine whether there is a chance that the current table file
// contains the key a key starting with iternal_prefix. The specific
@ -116,29 +100,25 @@ class Table {
virtual TableStats& GetTableStats() = 0;
// Get function issued to look for specific key.
// The table will search the first entry in the table whose user key
// matches key, and pass it to the call back function handle_result,
// with the first argument to be parameter arg, and the last bool
// parameter to be whether an I/O is issued.
// mark_key_may_exist call back is called when it is configured to be
// Calls (*result_handler)(handle_context, ...) repeatedly, starting with
// the entry found after a call to Seek(key), until result_handler returns
// false, where k is the actual internal key for a row found and v as the
// value of the key. didIO is true if I/O is involved in the operation. May
// not make such a call if filter policy says that key is not present.
//
// mark_key_may_exist_handler needs to be called when it is configured to be
// memory only and the key is not found in the block cache, with
// the parameter to be arg.
// the parameter to be handle_context.
//
// readOptions is the options for the read
// key is the key to search for
virtual Status Get(
const ReadOptions&, const Slice& key,
void* arg,
bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool),
void (*mark_key_may_exist)(void*) = nullptr) = 0;
};
struct TableStatsNames {
static const std::string kDataSize;
static const std::string kIndexSize;
static const std::string kRawKeySize;
static const std::string kRawValueSize;
static const std::string kNumDataBlocks;
static const std::string kNumEntries;
static const std::string kFilterPolicy;
const ReadOptions& readOptions,
const Slice& key,
void* handle_context,
bool (*result_handler)(void* handle_context, const Slice& k,
const Slice& v, bool didIO),
void (*mark_key_may_exist_handler)(void* handle_context) = nullptr) = 0;
};
// A base class for table factories
@ -146,7 +126,7 @@ class TableFactory {
public:
virtual ~TableFactory() {}
// The name of the comparator.
// The type of the table.
//
// The client of this package should switch to a new name whenever
// the table format implementation changes.
@ -159,16 +139,21 @@ class TableFactory {
// in parameter file. It's the caller's responsibility to make sure
// file is in the correct format.
//
// OpenTable() is called in two places:
// GetTableReader() is called in two places:
// (1) TableCache::FindTable() calls the function when table cache miss
// and cache the table object returned.
// (1) SstFileReader (for SST Dump) opens the table and dump the table
// contents using the interator of the table.
virtual Status OpenTable(const Options& options,
const EnvOptions& soptions,
unique_ptr<RandomAccessFile>&& file,
uint64_t file_size,
unique_ptr<Table>* table) const = 0;
// options and soptions are options. options is the general options.
// Multiple configured can be accessed from there, including and not
// limited to block cache and key comparators.
// file is a file handler to handle the file for the table
// file_size is the physical file size of the file
// table_reader is the output table reader
virtual Status GetTableReader(
const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const = 0;
// Return a table builder to write to a file for this table type.
//
@ -182,8 +167,14 @@ class TableFactory {
// by calling BuildTable())
// (4) When running Repairer, it creates a table builder to convert logs to
// SST files (In Repairer::ConvertLogToTable() by calling BuildTable())
//
// options is the general options. Multiple configured can be acceseed from
// there, including and not limited to compression options.
// file is a handle of a writable file. It is the caller's responsibility to
// keep the file open and close the file after closing the table builder.
// compression_type is the compression type to use in this table.
virtual TableBuilder* GetTableBuilder(
const Options& options, WritableFile* file, int level,
const bool enable_compression) const = 0;
const Options& options, WritableFile* file,
CompressionType compression_type) const = 0;
};
} // namespace rocksdb

@ -17,7 +17,7 @@
#include "rocksdb/env.h"
#include "rocksdb/filter_policy.h"
#include "rocksdb/options.h"
#include "table/block_based_table.h"
#include "table/block_based_table_reader.h"
#include "table/block_builder.h"
#include "table/filter_block.h"
#include "table/format.h"
@ -81,8 +81,7 @@ struct BlockBasedTableBuilder::Rep {
BlockBuilder data_block;
BlockBuilder index_block;
std::string last_key;
// Whether enable compression in this table.
bool enable_compression;
CompressionType compression_type;
uint64_t num_entries = 0;
uint64_t num_data_blocks = 0;
@ -107,13 +106,13 @@ struct BlockBasedTableBuilder::Rep {
std::string compressed_output;
Rep(const Options& opt, WritableFile* f, bool enable_compression)
Rep(const Options& opt, WritableFile* f, CompressionType compression_type)
: options(opt),
index_block_options(opt),
file(f),
data_block(&options),
index_block(1, index_block_options.comparator),
enable_compression(enable_compression),
compression_type(compression_type),
filter_block(opt.filter_policy == nullptr ? nullptr
: new FilterBlockBuilder(opt)),
pending_index_entry(false) {
@ -121,9 +120,9 @@ struct BlockBasedTableBuilder::Rep {
};
BlockBasedTableBuilder::BlockBasedTableBuilder(const Options& options,
WritableFile* file, int level,
const bool enable_compression)
: TableBuilder(level), rep_(new Rep(options, file, enable_compression)) {
WritableFile* file,
CompressionType compression_type)
: rep_(new Rep(options, file, compression_type)) {
if (rep_->filter_block != nullptr) {
rep_->filter_block->StartBlock(0);
}
@ -220,26 +219,7 @@ void BlockBasedTableBuilder::WriteBlock(BlockBuilder* block,
Slice block_contents;
std::string* compressed = &r->compressed_output;
CompressionType type;
if (!r->enable_compression) {
// disable compression
type = kNoCompression;
} else {
// If the use has specified a different compression level for each level,
// then pick the compresison for that level.
if (!r->options.compression_per_level.empty()) {
const int n = r->options.compression_per_level.size();
// It is possible for level_ to be -1; in that case, we use level
// 0's compression. This occurs mostly in backwards compatibility
// situations when the builder doesn't know what level the file
// belongs to. Likewise, if level_ is beyond the end of the
// specified compression levels, use the last value.
type = r->options.compression_per_level[std::max(0,
std::min(level_, n))];
} else {
type = r->options.compression;
}
}
CompressionType type = r->compression_type;
switch (type) {
case kNoCompression:
block_contents = raw;
@ -376,19 +356,21 @@ Status BlockBasedTableBuilder::Finish() {
BytewiseSortedMap stats;
// Add basic stats
AddStats(stats, TableStatsNames::kRawKeySize, r->raw_key_size);
AddStats(stats, TableStatsNames::kRawValueSize, r->raw_value_size);
AddStats(stats, TableStatsNames::kDataSize, r->data_size);
AddStats(stats, BlockBasedTableStatsNames::kRawKeySize, r->raw_key_size);
AddStats(stats, BlockBasedTableStatsNames::kRawValueSize,
r->raw_value_size);
AddStats(stats, BlockBasedTableStatsNames::kDataSize, r->data_size);
AddStats(
stats,
TableStatsNames::kIndexSize,
BlockBasedTableStatsNames::kIndexSize,
r->index_block.CurrentSizeEstimate() + kBlockTrailerSize
);
AddStats(stats, TableStatsNames::kNumEntries, r->num_entries);
AddStats(stats, TableStatsNames::kNumDataBlocks, r->num_data_blocks);
AddStats(stats, BlockBasedTableStatsNames::kNumEntries, r->num_entries);
AddStats(stats, BlockBasedTableStatsNames::kNumDataBlocks,
r->num_data_blocks);
if (r->filter_block != nullptr) {
stats.insert(std::make_pair(
TableStatsNames::kFilterPolicy,
BlockBasedTableStatsNames::kFilterPolicy,
r->options.filter_policy->Name()
));
}

@ -11,24 +11,22 @@
#include <stdint.h>
#include "rocksdb/options.h"
#include "rocksdb/status.h"
#include "rocksdb/table.h"
namespace rocksdb {
class BlockBuilder;
class BlockHandle;
class WritableFile;
class TableBuilder;
class BlockBasedTableBuilder : public TableBuilder {
public:
// Create a builder that will store the contents of the table it is
// building in *file. Does not close the file. It is up to the
// caller to close the file after calling Finish(). The output file
// will be part of level specified by 'level'. A value of -1 means
// that the caller does not know which level the output file will reside.
// caller to close the file after calling Finish().
BlockBasedTableBuilder(const Options& options, WritableFile* file,
int level = -1, const bool enable_compression = true);
CompressionType compression_type);
// REQUIRES: Either Finish() or Abandon() has been called.
~BlockBasedTableBuilder();

@ -13,24 +13,22 @@
#include <memory>
#include <stdint.h>
#include "table/block_based_table_builder.h"
#include "table/block_based_table.h"
#include "table/block_based_table_reader.h"
#include "port/port.h"
namespace rocksdb {
Status BlockBasedTableFactory::OpenTable(const Options& options,
const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file,
uint64_t file_size,
unique_ptr<Table>* table) const {
Status BlockBasedTableFactory::GetTableReader(
const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const {
return BlockBasedTable::Open(options, soptions, std::move(file), file_size,
table);
table_reader);
}
TableBuilder* BlockBasedTableFactory::GetTableBuilder(
const Options& options, WritableFile* file, int level,
const bool enable_compression) const {
return new BlockBasedTableBuilder(options, file, level, enable_compression);
const Options& options, WritableFile* file,
CompressionType compression_type) const {
return new BlockBasedTableBuilder(options, file, compression_type);
}
} // namespace rocksdb

@ -36,12 +36,13 @@ public:
const char* Name() const override {
return "BlockBasedTable";
}
Status OpenTable(const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
unique_ptr<Table>* table) const override;
Status GetTableReader(const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file,
uint64_t file_size,
unique_ptr<TableReader>* table_reader) const override;
TableBuilder* GetTableBuilder(const Options& options, WritableFile* file,
int level, const bool enable_compression) const
CompressionType compression_type) const
override;
};

@ -7,7 +7,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "table/block_based_table.h"
#include "table/block_based_table_reader.h"
#include "db/dbformat.h"
@ -113,8 +113,8 @@ Status BlockBasedTable::Open(const Options& options,
const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file,
uint64_t size,
unique_ptr<Table>* table) {
table->reset();
unique_ptr<TableReader>* table_reader) {
table_reader->reset();
if (size < Footer::kEncodedLength) {
return Status::InvalidArgument("file is too short to be an sstable");
}
@ -151,8 +151,8 @@ Status BlockBasedTable::Open(const Options& options,
SetupCacheKeyPrefix(rep);
rep->filter_data = nullptr;
rep->filter = nullptr;
table->reset(new BlockBasedTable(rep));
((BlockBasedTable*) (table->get()))->ReadMeta(footer);
table_reader->reset(new BlockBasedTable(rep));
((BlockBasedTable*) (table_reader->get()))->ReadMeta(footer);
} else {
if (index_block) delete index_block;
}
@ -275,12 +275,12 @@ Status BlockBasedTable::ReadStats(const Slice& handle_value, Rep* rep) {
auto& table_stats = rep->table_stats;
// All pre-defined stats of type uint64_t
std::unordered_map<std::string, uint64_t*> predefined_uint64_stats = {
{ TableStatsNames::kDataSize, &table_stats.data_size },
{ TableStatsNames::kIndexSize, &table_stats.index_size },
{ TableStatsNames::kRawKeySize, &table_stats.raw_key_size },
{ TableStatsNames::kRawValueSize, &table_stats.raw_value_size },
{ TableStatsNames::kNumDataBlocks, &table_stats.num_data_blocks },
{ TableStatsNames::kNumEntries, &table_stats.num_entries },
{ BlockBasedTableStatsNames::kDataSize, &table_stats.data_size },
{ BlockBasedTableStatsNames::kIndexSize, &table_stats.index_size },
{ BlockBasedTableStatsNames::kRawKeySize, &table_stats.raw_key_size },
{ BlockBasedTableStatsNames::kRawValueSize, &table_stats.raw_value_size },
{ BlockBasedTableStatsNames::kNumDataBlocks, &table_stats.num_data_blocks},
{ BlockBasedTableStatsNames::kNumEntries, &table_stats.num_entries },
};
std::string last_key;
@ -313,7 +313,7 @@ Status BlockBasedTable::ReadStats(const Slice& handle_value, Rep* rep) {
continue;
}
*(pos->second) = val;
} else if (key == TableStatsNames::kFilterPolicy) {
} else if (key == BlockBasedTableStatsNames::kFilterPolicy) {
table_stats.filter_policy_name = raw_val.ToString();
} else {
// handle user-collected
@ -464,7 +464,7 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_prefix) {
// we're past end of file
may_match = false;
} else if (ExtractUserKey(iiter->key()).starts_with(
ExtractUserKey(internal_prefix))) {
ExtractUserKey(internal_prefix))) {
// we need to check for this subtle case because our only
// guarantee is that "the key is a string >= last key in that data
// block" according to the doc/table_format.txt spec.
@ -497,10 +497,6 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_prefix) {
return may_match;
}
Iterator* Table::NewIterator(const ReadOptions& options) {
return nullptr;
}
Iterator* BlockBasedTable::NewIterator(const ReadOptions& options) {
if (options.prefix) {
InternalKey internal_prefix(*options.prefix, 0, kTypeValue);
@ -517,21 +513,23 @@ Iterator* BlockBasedTable::NewIterator(const ReadOptions& options) {
options, rep_->soptions);
}
Status BlockBasedTable::Get(const ReadOptions& options, const Slice& k,
void* arg,
bool (*saver)(void*, const Slice&, const Slice&,
bool),
void (*mark_key_may_exist)(void*)) {
Status BlockBasedTable::Get(
const ReadOptions& readOptions,
const Slice& key,
void* handle_context,
bool (*result_handler)(void* handle_context, const Slice& k,
const Slice& v, bool didIO),
void (*mark_key_may_exist_handler)(void* handle_context)) {
Status s;
Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
bool done = false;
for (iiter->Seek(k); iiter->Valid() && !done; iiter->Next()) {
for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) {
Slice handle_value = iiter->value();
FilterBlockReader* filter = rep_->filter;
BlockHandle handle;
if (filter != nullptr &&
handle.DecodeFrom(&handle_value).ok() &&
!filter->KeyMayMatch(handle.offset(), k)) {
!filter->KeyMayMatch(handle.offset(), key)) {
// Not found
// TODO: think about interaction with Merge. If a user key cannot
// cross one data block, we should be fine.
@ -540,19 +538,20 @@ Status BlockBasedTable::Get(const ReadOptions& options, const Slice& k,
} else {
bool didIO = false;
std::unique_ptr<Iterator> block_iter(
BlockReader(this, options, iiter->value(), &didIO));
BlockReader(this, readOptions, iiter->value(), &didIO));
if (options.read_tier && block_iter->status().IsIncomplete()) {
if (readOptions.read_tier && block_iter->status().IsIncomplete()) {
// couldn't get block from block_cache
// Update Saver.state to Found because we are only looking for whether
// we can guarantee the key is not there when "no_io" is set
(*mark_key_may_exist)(arg);
(*mark_key_may_exist_handler)(handle_context);
break;
}
// Call the *saver function on each entry/block until it returns false
for (block_iter->Seek(k); block_iter->Valid(); block_iter->Next()) {
if (!(*saver)(arg, block_iter->key(), block_iter->value(), didIO)) {
for (block_iter->Seek(key); block_iter->Valid(); block_iter->Next()) {
if (!(*result_handler)(handle_context, block_iter->key(),
block_iter->value(), didIO)) {
done = true;
break;
}
@ -611,12 +610,17 @@ uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key) {
const std::string BlockBasedTable::kFilterBlockPrefix = "filter.";
const std::string BlockBasedTable::kStatsBlock = "rocksdb.stats";
const std::string TableStatsNames::kDataSize = "rocksdb.data.size";
const std::string TableStatsNames::kIndexSize = "rocksdb.index.size";
const std::string TableStatsNames::kRawKeySize = "rocksdb.raw.key.size";
const std::string TableStatsNames::kRawValueSize = "rocksdb.raw.value.size";
const std::string TableStatsNames::kNumDataBlocks = "rocksdb.num.data.blocks";
const std::string TableStatsNames::kNumEntries = "rocksdb.num.entries";
const std::string TableStatsNames::kFilterPolicy = "rocksdb.filter.policy";
const std::string BlockBasedTableStatsNames::kDataSize = "rocksdb.data.size";
const std::string BlockBasedTableStatsNames::kIndexSize = "rocksdb.index.size";
const std::string BlockBasedTableStatsNames::kRawKeySize =
"rocksdb.raw.key.size";
const std::string BlockBasedTableStatsNames::kRawValueSize =
"rocksdb.raw.value.size";
const std::string BlockBasedTableStatsNames::kNumDataBlocks =
"rocksdb.num.data.blocks";
const std::string BlockBasedTableStatsNames::kNumEntries =
"rocksdb.num.entries";
const std::string BlockBasedTableStatsNames::kFilterPolicy =
"rocksdb.filter.policy";
} // namespace rocksdb

@ -24,14 +24,14 @@ struct Options;
class RandomAccessFile;
struct ReadOptions;
class TableCache;
class Table;
class TableReader;
using std::unique_ptr;
// A Table is a sorted map from strings to strings. Tables are
// immutable and persistent. A Table may be safely accessed from
// multiple threads without external synchronization.
class BlockBasedTable : public Table {
class BlockBasedTable : public TableReader {
public:
static const std::string kFilterBlockPrefix;
static const std::string kStatsBlock;
@ -40,19 +40,17 @@ class BlockBasedTable : public Table {
// of "file", and read the metadata entries necessary to allow
// retrieving data from the table.
//
// If successful, returns ok and sets "*table" to the newly opened
// table. The client should delete "*table" when no longer needed.
// If there was an error while initializing the table, sets "*table"
// to nullptr and returns a non-ok status. Does not take ownership of
// "*source", but the client must ensure that "source" remains live
// for the duration of the returned table's lifetime.
// If successful, returns ok and sets "*table_reader" to the newly opened
// table. The client should delete "*table_reader" when no longer needed.
// If there was an error while initializing the table, sets "*table_reader"
// to nullptr and returns a non-ok status.
//
// *file must remain live while this Table is in use.
static Status Open(const Options& options,
const EnvOptions& soptions,
unique_ptr<RandomAccessFile>&& file,
uint64_t file_size,
unique_ptr<Table>* table);
unique_ptr<TableReader>* table_reader);
bool PrefixMayMatch(const Slice& internal_prefix) override;
@ -62,10 +60,13 @@ class BlockBasedTable : public Table {
Iterator* NewIterator(const ReadOptions&) override;
Status Get(
const ReadOptions&, const Slice& key,
void* arg,
bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool),
void (*mark_key_may_exist)(void*) = nullptr) override;
const ReadOptions& readOptions,
const Slice& key,
void* handle_context,
bool (*result_handler)(void* handle_context, const Slice& k,
const Slice& v, bool didIO),
void (*mark_key_may_exist_handler)(void* handle_context) = nullptr)
override;
// Given a key, return an approximate byte offset in the file where
// the data for that key begins (or would begin if the key were
@ -115,8 +116,18 @@ class BlockBasedTable : public Table {
}
// No copying allowed
explicit BlockBasedTable(const Table&) = delete;
void operator=(const Table&) = delete;
explicit BlockBasedTable(const TableReader&) = delete;
void operator=(const TableReader&) = delete;
};
struct BlockBasedTableStatsNames {
static const std::string kDataSize;
static const std::string kIndexSize;
static const std::string kRawKeySize;
static const std::string kRawValueSize;
static const std::string kNumDataBlocks;
static const std::string kNumEntries;
static const std::string kFilterPolicy;
};
} // namespace rocksdb

@ -22,7 +22,7 @@
#include "table/block.h"
#include "table/block_builder.h"
#include "table/format.h"
#include "table/block_based_table.h"
#include "table/block_based_table_reader.h"
#include "table/block_based_table_builder.h"
#include "util/random.h"
#include "util/testharness.h"
@ -250,7 +250,7 @@ class BlockBasedTableConstructor: public Constructor {
virtual Status FinishImpl(const Options& options, const KVMap& data) {
Reset();
sink_.reset(new StringSink());
BlockBasedTableBuilder builder(options, sink_.get());
BlockBasedTableBuilder builder(options, sink_.get(), options.compression);
for (KVMap::const_iterator it = data.begin();
it != data.end();
@ -267,36 +267,36 @@ class BlockBasedTableConstructor: public Constructor {
uniq_id_ = cur_uniq_id_++;
source_.reset(new StringSource(sink_->contents(), uniq_id_));
unique_ptr<TableFactory> table_factory;
return options.table_factory->OpenTable(options, soptions,
std::move(source_),
sink_->contents().size(),
&table_);
return options.table_factory->GetTableReader(options, soptions,
std::move(source_),
sink_->contents().size(),
&table_reader_);
}
virtual Iterator* NewIterator() const {
return table_->NewIterator(ReadOptions());
return table_reader_->NewIterator(ReadOptions());
}
uint64_t ApproximateOffsetOf(const Slice& key) const {
return table_->ApproximateOffsetOf(key);
return table_reader_->ApproximateOffsetOf(key);
}
virtual Status Reopen(const Options& options) {
source_.reset(new StringSource(sink_->contents(), uniq_id_));
return options.table_factory->OpenTable(options, soptions,
std::move(source_),
sink_->contents().size(),
&table_);
return options.table_factory->GetTableReader(options, soptions,
std::move(source_),
sink_->contents().size(),
&table_reader_);
}
virtual Table* table() {
return table_.get();
virtual TableReader* table_reader() {
return table_reader_.get();
}
private:
void Reset() {
uniq_id_ = 0;
table_.reset();
table_reader_.reset();
sink_.reset();
source_.reset();
}
@ -304,7 +304,7 @@ class BlockBasedTableConstructor: public Constructor {
uint64_t uniq_id_;
unique_ptr<StringSink> sink_;
unique_ptr<StringSource> source_;
unique_ptr<Table> table_;
unique_ptr<TableReader> table_reader_;
BlockBasedTableConstructor();
@ -883,7 +883,7 @@ TEST(TableTest, BasicTableStats) {
c.Finish(options, &keys, &kvmap);
auto& stats = c.table()->GetTableStats();
auto& stats = c.table_reader()->GetTableStats();
ASSERT_EQ(kvmap.size(), stats.num_entries);
auto raw_key_size = kvmap.size() * 2ul;
@ -918,7 +918,7 @@ TEST(TableTest, FilterPolicyNameStats) {
options.filter_policy = filter_policy.get();
c.Finish(options, &keys, &kvmap);
auto& stats = c.table()->GetTableStats();
auto& stats = c.table_reader()->GetTableStats();
ASSERT_EQ("rocksdb.BuiltinBloomFilter", stats.filter_policy_name);
}
@ -960,7 +960,7 @@ TEST(TableTest, IndexSizeStat) {
c.Finish(options, &ks, &kvmap);
auto index_size =
c.table()->GetTableStats().index_size;
c.table_reader()->GetTableStats().index_size;
ASSERT_GT(index_size, last_index_size);
last_index_size = index_size;
}
@ -985,7 +985,7 @@ TEST(TableTest, NumBlockStat) {
c.Finish(options, &ks, &kvmap);
ASSERT_EQ(
kvmap.size(),
c.table()->GetTableStats().num_data_blocks
c.table_reader()->GetTableStats().num_data_blocks
);
}
@ -1100,7 +1100,7 @@ TEST(TableTest, BlockCacheLeak) {
ASSERT_OK(c.Reopen(opt));
for (const std::string& key: keys) {
ASSERT_TRUE(c.table()->TEST_KeyInCache(ReadOptions(), key));
ASSERT_TRUE(c.table_reader()->TEST_KeyInCache(ReadOptions(), key));
}
}

@ -63,7 +63,7 @@ Status SstFileReader::ReadSequential(bool print_kv,
bool has_to,
const std::string& to_key)
{
unique_ptr<Table> table;
unique_ptr<TableReader> table_reader;
InternalKeyComparator internal_comparator_(BytewiseComparator());
Options table_options;
table_options.comparator = &internal_comparator_;
@ -76,14 +76,15 @@ Status SstFileReader::ReadSequential(bool print_kv,
uint64_t file_size;
table_options.env->GetFileSize(file_name_, &file_size);
unique_ptr<TableFactory> table_factory;
s = table_options.table_factory->OpenTable(table_options, soptions_,
std::move(file), file_size,
&table);
s = table_options.table_factory->GetTableReader(table_options, soptions_,
std::move(file), file_size,
&table_reader);
if(!s.ok()) {
return s;
}
Iterator* iter = table->NewIterator(ReadOptions(verify_checksum_, false));
Iterator* iter = table_reader->NewIterator(ReadOptions(verify_checksum_,
false));
uint64_t i = 0;
if (has_from) {
InternalKey ikey(from_key, kMaxSequenceNumber, kValueTypeForSeek);

Loading…
Cancel
Save