Make "Table" pluggable

Summary: This patch makes Table and TableBuilder a abstract class and make all the implementation of the current table into BlockedBasedTable and BlockedBasedTable Builder.

Test Plan: Make db_test.cc to work with block based table. Add a new test simple_table_db_test.cc where a different simple table format is implemented.

Reviewers: dhruba, haobo, kailiu, emayanke, vamsi

Reviewed By: dhruba

CC: leveldb

Differential Revision: https://reviews.facebook.net/D13521
main
Siying Dong 11 years ago
parent 8ace6b0f91
commit d4eec30ed0
  1. 4
      Makefile
  2. 15
      db/builder.cc
  3. 5
      db/builder.h
  4. 2
      db/corruption_test.cc
  5. 17
      db/db_impl.cc
  6. 3
      db/db_impl_readonly.cc
  7. 2
      db/db_test.cc
  8. 2
      db/dbformat.h
  9. 1118
      db/simple_table_db_test.cc
  10. 8
      db/table_cache.cc
  11. 2
      db/table_cache.h
  12. 8
      db/table_stats_collector_test.cc
  13. 12
      db/version_set.cc
  14. 9
      include/rocksdb/options.h
  15. 189
      include/rocksdb/table.h
  16. 100
      table/block_based_table.cc
  17. 52
      table/block_based_table.h
  18. 59
      table/block_based_table_builder.cc
  19. 69
      table/block_based_table_builder.h
  20. 36
      table/block_based_table_factory.cc
  21. 48
      table/block_based_table_factory.h
  22. 1
      table/block_builder.cc
  23. 3
      table/block_test.cc
  24. 2
      table/format.h
  25. 46
      table/table_test.cc
  26. 3
      table/two_level_iterator.cc
  27. 8
      tools/sst_dump.cc
  28. 4
      util/options.cc

@ -60,6 +60,7 @@ TESTS = \
merge_test \ merge_test \
redis_test \ redis_test \
reduce_levels_test \ reduce_levels_test \
simple_table_db_test \
skiplist_test \ skiplist_test \
stringappend_test \ stringappend_test \
table_test \ table_test \
@ -236,6 +237,9 @@ crc32c_test: util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS)
db_test: db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) db_test: db/db_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) $(CXX) db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
simple_table_db_test: db/simple_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) db/simple_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
perf_context_test: db/perf_context_test.o $(LIBOBJECTS) $(TESTHARNESS) perf_context_test: db/perf_context_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) db/perf_context_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(CXX) db/perf_context_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS)

@ -15,12 +15,22 @@
#include "db/table_cache.h" #include "db/table_cache.h"
#include "db/version_edit.h" #include "db/version_edit.h"
#include "rocksdb/db.h" #include "rocksdb/db.h"
#include "rocksdb/table.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/iterator.h" #include "rocksdb/iterator.h"
#include "table/block_based_table_builder.h"
#include "util/stop_watch.h" #include "util/stop_watch.h"
namespace rocksdb { namespace rocksdb {
class TableFactory;
TableBuilder* GetTableBuilder(const Options& options, WritableFile* file,
int level, const bool enable_compression) {
return options.table_factory->GetTableBuilder(options, file, level,
enable_compression);
}
Status BuildTable(const std::string& dbname, Status BuildTable(const std::string& dbname,
Env* env, Env* env,
const Options& options, const Options& options,
@ -52,8 +62,9 @@ Status BuildTable(const std::string& dbname,
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
TableBuilder* builder = new TableBuilder(options, file.get(), 0,
enable_compression); TableBuilder* builder = GetTableBuilder(options, file.get(), 0,
enable_compression);
// the first key is the smallest key // the first key is the smallest key
Slice key = iter->key(); Slice key = iter->key();

@ -20,6 +20,11 @@ class EnvOptions;
class Iterator; class Iterator;
class TableCache; class TableCache;
class VersionEdit; class VersionEdit;
class TableBuilder;
class WritableFile;
extern TableBuilder* GetTableBuilder(const Options& options, WritableFile* file,
int level, const bool enable_compression);
// Build a Table file from the contents of *iter. The generated file // Build a Table file from the contents of *iter. The generated file
// will be named according to meta->number. On success, the rest of // will be named according to meta->number. On success, the rest of

@ -15,12 +15,12 @@
#include <sys/types.h> #include <sys/types.h>
#include "rocksdb/cache.h" #include "rocksdb/cache.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/table.h"
#include "rocksdb/write_batch.h" #include "rocksdb/write_batch.h"
#include "db/db_impl.h" #include "db/db_impl.h"
#include "db/filename.h" #include "db/filename.h"
#include "db/log_format.h" #include "db/log_format.h"
#include "db/version_set.h" #include "db/version_set.h"
#include "table/table.h"
#include "util/logging.h" #include "util/logging.h"
#include "util/testharness.h" #include "util/testharness.h"
#include "util/testutil.h" #include "util/testutil.h"

@ -41,10 +41,10 @@
#include "rocksdb/merge_operator.h" #include "rocksdb/merge_operator.h"
#include "rocksdb/statistics.h" #include "rocksdb/statistics.h"
#include "rocksdb/status.h" #include "rocksdb/status.h"
#include "rocksdb/table_builder.h" #include "rocksdb/table.h"
#include "port/port.h"
#include "table/block.h" #include "table/block.h"
#include "table/merger.h" #include "table/merger.h"
#include "table/table.h"
#include "table/two_level_iterator.h" #include "table/two_level_iterator.h"
#include "util/auto_roll_logger.h" #include "util/auto_roll_logger.h"
#include "util/build_version.h" #include "util/build_version.h"
@ -1774,9 +1774,10 @@ Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) {
compact->outfile->SetPreallocationBlockSize( compact->outfile->SetPreallocationBlockSize(
1.1 * versions_->MaxFileSizeForLevel(compact->compaction->output_level())); 1.1 * versions_->MaxFileSizeForLevel(compact->compaction->output_level()));
compact->builder.reset(new TableBuilder(options_, compact->outfile.get(), compact->builder.reset(
compact->compaction->output_level(), GetTableBuilder(options_, compact->outfile.get(),
compact->compaction->enable_compression())); compact->compaction->output_level(),
compact->compaction->enable_compression()));
} }
return s; return s;
} }
@ -2026,9 +2027,9 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
compaction_filter_value.clear(); compaction_filter_value.clear();
bool to_delete = bool to_delete =
compaction_filter->Filter(compact->compaction->level(), compaction_filter->Filter(compact->compaction->level(),
ikey.user_key, value, ikey.user_key, value,
&compaction_filter_value, &compaction_filter_value,
&value_changed); &value_changed);
if (to_delete) { if (to_delete) {
// make a copy of the original key // make a copy of the original key
delete_key.assign(key.data(), key.data() + key.size()); delete_key.assign(key.data(), key.data() + key.size());

@ -29,11 +29,10 @@
#include "rocksdb/db.h" #include "rocksdb/db.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/status.h" #include "rocksdb/status.h"
#include "rocksdb/table_builder.h" #include "rocksdb/table.h"
#include "port/port.h" #include "port/port.h"
#include "table/block.h" #include "table/block.h"
#include "table/merger.h" #include "table/merger.h"
#include "table/table.h"
#include "table/two_level_iterator.h" #include "table/two_level_iterator.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/logging.h" #include "util/logging.h"

@ -20,7 +20,7 @@
#include "rocksdb/cache.h" #include "rocksdb/cache.h"
#include "rocksdb/compaction_filter.h" #include "rocksdb/compaction_filter.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "table/table.h" #include "rocksdb/table.h"
#include "util/hash.h" #include "util/hash.h"
#include "util/logging.h" #include "util/logging.h"
#include "util/mutexlock.h" #include "util/mutexlock.h"

@ -13,7 +13,7 @@
#include "rocksdb/db.h" #include "rocksdb/db.h"
#include "rocksdb/filter_policy.h" #include "rocksdb/filter_policy.h"
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
#include "rocksdb/table_builder.h" #include "rocksdb/table.h"
#include "rocksdb/types.h" #include "rocksdb/types.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/logging.h" #include "util/logging.h"

File diff suppressed because it is too large Load Diff

@ -12,7 +12,7 @@
#include "db/filename.h" #include "db/filename.h"
#include "rocksdb/statistics.h" #include "rocksdb/statistics.h"
#include "table/table.h" #include "rocksdb/table.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/stop_watch.h" #include "util/stop_watch.h"
@ -71,7 +71,9 @@ Status TableCache::FindTable(const EnvOptions& toptions,
file->Hint(RandomAccessFile::RANDOM); file->Hint(RandomAccessFile::RANDOM);
} }
StopWatch sw(env_, options_->statistics, TABLE_OPEN_IO_MICROS); StopWatch sw(env_, options_->statistics, TABLE_OPEN_IO_MICROS);
s = Table::Open(*options_, toptions, std::move(file), file_size, &table); s = options_->table_factory->OpenTable(*options_, toptions,
std::move(file),
file_size, &table);
} }
if (!s.ok()) { if (!s.ok()) {
@ -134,7 +136,7 @@ Status TableCache::Get(const ReadOptions& options,
if (s.ok()) { if (s.ok()) {
Table* t = Table* t =
reinterpret_cast<Table*>(cache_->Value(handle)); reinterpret_cast<Table*>(cache_->Value(handle));
s = t->InternalGet(options, k, arg, saver, mark_key_may_exist); s = t->Get(options, k, arg, saver, mark_key_may_exist);
cache_->Release(handle); cache_->Release(handle);
} else if (options.read_tier && s.IsIncomplete()) { } else if (options.read_tier && s.IsIncomplete()) {
// Couldnt find Table in cache but treat as kFound if no_io set // Couldnt find Table in cache but treat as kFound if no_io set

@ -16,7 +16,7 @@
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/cache.h" #include "rocksdb/cache.h"
#include "port/port.h" #include "port/port.h"
#include "table/table.h" #include "rocksdb/table.h"
namespace rocksdb { namespace rocksdb {

@ -10,9 +10,8 @@
#include "db/dbformat.h" #include "db/dbformat.h"
#include "db/db_impl.h" #include "db/db_impl.h"
#include "db/table_stats_collector.h" #include "db/table_stats_collector.h"
#include "rocksdb/table_builder.h"
#include "rocksdb/table_stats.h" #include "rocksdb/table_stats.h"
#include "table/table.h" #include "rocksdb/table.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/testharness.h" #include "util/testharness.h"
#include "util/testutil.h" #include "util/testutil.h"
@ -89,7 +88,8 @@ void MakeBuilder(
std::unique_ptr<TableBuilder>* builder) { std::unique_ptr<TableBuilder>* builder) {
writable->reset(new FakeWritableFile); writable->reset(new FakeWritableFile);
builder->reset( builder->reset(
new TableBuilder(options, writable->get()) options.table_factory->GetTableBuilder(options, writable->get(), 0,
true)
); );
} }
@ -98,7 +98,7 @@ void OpenTable(
const std::string& contents, const std::string& contents,
std::unique_ptr<Table>* table) { std::unique_ptr<Table>* table) {
std::unique_ptr<RandomAccessFile> file(new FakeRandomeAccessFile(contents)); std::unique_ptr<RandomAccessFile> file(new FakeRandomeAccessFile(contents));
auto s = Table::Open( auto s = options.table_factory->OpenTable(
options, options,
EnvOptions(), EnvOptions(),
std::move(file), std::move(file),

@ -19,7 +19,7 @@
#include "db/table_cache.h" #include "db/table_cache.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/merge_operator.h" #include "rocksdb/merge_operator.h"
#include "rocksdb/table_builder.h" #include "rocksdb/table.h"
#include "table/merger.h" #include "table/merger.h"
#include "table/two_level_iterator.h" #include "table/two_level_iterator.h"
#include "util/coding.h" #include "util/coding.h"
@ -294,11 +294,11 @@ struct Saver {
}; };
} }
// Called from TableCache::Get and InternalGet when file/block in which key may // Called from TableCache::Get and Table::Get when file/block in which
// exist are not there in TableCache/BlockCache respectively. In this case we // key may exist are not there in TableCache/BlockCache respectively. In this
// can't guarantee that key does not exist and are not permitted to do IO to be // case we can't guarantee that key does not exist and are not permitted to do
// certain.Set the status=kFound and value_found=false to let the caller know // IO to be certain.Set the status=kFound and value_found=false to let the
// that key may exist but is not there in memory // caller know that key may exist but is not there in memory
static void MarkKeyMayExist(void* arg) { static void MarkKeyMayExist(void* arg) {
Saver* s = reinterpret_cast<Saver*>(arg); Saver* s = reinterpret_cast<Saver*>(arg);
s->state = kFound; s->state = kFound;

@ -17,6 +17,9 @@
#include "rocksdb/statistics.h" #include "rocksdb/statistics.h"
#include "rocksdb/table_stats.h" #include "rocksdb/table_stats.h"
#include "rocksdb/universal_compaction.h" #include "rocksdb/universal_compaction.h"
#include "rocksdb/memtablerep.h"
#include "rocksdb/slice_transform.h"
#include "rocksdb/table.h"
namespace rocksdb { namespace rocksdb {
@ -29,6 +32,7 @@ class MergeOperator;
class Snapshot; class Snapshot;
class CompactionFilter; class CompactionFilter;
class CompactionFilterFactory; class CompactionFilterFactory;
class TableFactory;
using std::shared_ptr; using std::shared_ptr;
@ -579,6 +583,11 @@ struct Options {
// MemTableRep. // MemTableRep.
std::shared_ptr<MemTableRepFactory> memtable_factory; std::shared_ptr<MemTableRepFactory> memtable_factory;
// This is a factory that provides TableFactory objects.
// Default: a factory that provides a default implementation of
// Table and TableBuilder.
std::shared_ptr<TableFactory> table_factory;
// This is a factory that provides compaction filter objects which allow // This is a factory that provides compaction filter objects which allow
// an application to modify/delete a key-value during background compaction. // an application to modify/delete a key-value during background compaction.
// Default: a factory that doesn't provide any object // Default: a factory that doesn't provide any object

@ -0,0 +1,189 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <memory>
#include <stdint.h>
#include "rocksdb/env.h"
#include "rocksdb/iterator.h"
#include "rocksdb/table_stats.h"
namespace rocksdb {
struct Options;
class RandomAccessFile;
struct ReadOptions;
class TableCache;
class WritableFile;
using std::unique_ptr;
// TableBuilder provides the interface used to build a Table
// (an immutable and sorted map from keys to values).
//
// Multiple threads can invoke const methods on a TableBuilder without
// external synchronization, but if any of the threads may call a
// non-const method, all threads accessing the same TableBuilder must use
// external synchronization.
class TableBuilder {
public:
// Create a builder that will store the contents of the table it is
// building in *file. Does not close the file. It is up to the
// caller to close the file after calling Finish(). The output file
// will be part of level specified by 'level'. A value of -1 means
// that the caller does not know which level the output file will reside.
//
// If enable_compression=true, this table will follow the compression
// setting given in parameter options. If enable_compression=false, the
// table will not be compressed.
explicit TableBuilder(int level = -1, const bool enable_compression = true) :
level_(level) {
}
// REQUIRES: Either Finish() or Abandon() has been called.
virtual ~TableBuilder() {}
// Add key,value to the table being constructed.
// REQUIRES: key is after any previously added key according to comparator.
// REQUIRES: Finish(), Abandon() have not been called
virtual void Add(const Slice& key, const Slice& value) = 0;
// Return non-ok iff some error has been detected.
virtual Status status() const = 0;
// Finish building the table.
// REQUIRES: Finish(), Abandon() have not been called
virtual Status Finish() = 0;
// Indicate that the contents of this builder should be abandoned.
// If the caller is not going to call Finish(), it must call Abandon()
// before destroying this builder.
// REQUIRES: Finish(), Abandon() have not been called
virtual void Abandon() = 0;
// Number of calls to Add() so far.
virtual uint64_t NumEntries() const = 0;
// Size of the file generated so far. If invoked after a successful
// Finish() call, returns the size of the final generated file.
virtual uint64_t FileSize() const = 0;
protected:
int level_;
};
// A Table is a sorted map from strings to strings. Tables are
// immutable and persistent. A Table may be safely accessed from
// multiple threads without external synchronization.
class Table {
public:
virtual ~Table() {}
// Determine whether there is a chance that the current table file
// contains the key a key starting with iternal_prefix. The specific
// table implementation can use bloom filter and/or other heuristic
// to filter out this table as a whole.
virtual bool PrefixMayMatch(const Slice& internal_prefix) = 0;
// Returns a new iterator over the table contents.
// The result of NewIterator() is initially invalid (caller must
// call one of the Seek methods on the iterator before using it).
virtual Iterator* NewIterator(const ReadOptions&) = 0;
// Given a key, return an approximate byte offset in the file where
// the data for that key begins (or would begin if the key were
// present in the file). The returned value is in terms of file
// bytes, and so includes effects like compression of the underlying data.
// E.g., the approximate offset of the last key in the table will
// be close to the file length.
virtual uint64_t ApproximateOffsetOf(const Slice& key) = 0;
// Returns true if the block for the specified key is in cache.
// REQUIRES: key is in this table.
virtual bool TEST_KeyInCache(const ReadOptions& options,
const Slice& key) = 0;
// Set up the table for Compaction. Might change some parameters with
// posix_fadvise
virtual void SetupForCompaction() = 0;
virtual TableStats& GetTableStats() = 0;
// Get function issued to look for specific key.
// The table will search the first entry in the table whose user key
// matches key, and pass it to the call back function handle_result,
// with the first argument to be parameter arg, and the last bool
// parameter to be whether an I/O is issued.
// mark_key_may_exist call back is called when it is configured to be
// memory only and the key is not found in the block cache, with
// the parameter to be arg.
virtual Status Get(
const ReadOptions&, const Slice& key,
void* arg,
bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool),
void (*mark_key_may_exist)(void*) = nullptr) = 0;
};
struct TableStatsNames {
static const std::string kDataSize;
static const std::string kIndexSize;
static const std::string kRawKeySize;
static const std::string kRawValueSize;
static const std::string kNumDataBlocks;
static const std::string kNumEntries;
static const std::string kFilterPolicy;
};
// A base class for table factories
class TableFactory {
public:
virtual ~TableFactory() {}
// The name of the comparator.
//
// The client of this package should switch to a new name whenever
// the table format implementation changes.
//
// Names starting with "rocksdb." are reserved and should not be used
// by any clients of this package.
virtual const char* Name() const = 0;
// Returns a Table object table that can fetch data from file specified
// in parameter file. It's the caller's responsibility to make sure
// file is in the correct format.
//
// OpenTable() is called in two places:
// (1) TableCache::FindTable() calls the function when table cache miss
// and cache the table object returned.
// (1) SstFileReader (for SST Dump) opens the table and dump the table
// contents using the interator of the table.
virtual Status OpenTable(const Options& options,
const EnvOptions& soptions,
unique_ptr<RandomAccessFile>&& file,
uint64_t file_size,
unique_ptr<Table>* table) const = 0;
// Return a table builder to write to a file for this table type.
//
// It is called in several places:
// (1) When flushing memtable to a level-0 output file, it creates a table
// builder (In DBImpl::WriteLevel0Table(), by calling BuildTable())
// (2) During compaction, it gets the builder for writing compaction output
// files in DBImpl::OpenCompactionOutputFile().
// (3) When recovering from transaction logs, it creates a table builder to
// write to a level-0 output file (In DBImpl::WriteLevel0TableForRecovery,
// by calling BuildTable())
// (4) When running Repairer, it creates a table builder to convert logs to
// SST files (In Repairer::ConvertLogToTable() by calling BuildTable())
virtual TableBuilder* GetTableBuilder(
const Options& options, WritableFile* file, int level,
const bool enable_compression) const = 0;
};
} // namespace rocksdb

@ -7,7 +7,7 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "table/table.h" #include "table/block_based_table.h"
#include "db/dbformat.h" #include "db/dbformat.h"
@ -17,11 +17,11 @@
#include "rocksdb/filter_policy.h" #include "rocksdb/filter_policy.h"
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "rocksdb/statistics.h" #include "rocksdb/statistics.h"
#include "rocksdb/table.h"
#include "table/block.h" #include "table/block.h"
#include "table/filter_block.h" #include "table/filter_block.h"
#include "table/format.h" #include "table/format.h"
#include "table/table.h"
#include "table/two_level_iterator.h" #include "table/two_level_iterator.h"
#include "util/coding.h" #include "util/coding.h"
@ -35,7 +35,7 @@ namespace rocksdb {
// varints. // varints.
const size_t kMaxCacheKeyPrefixSize = kMaxVarint64Length*3+1; const size_t kMaxCacheKeyPrefixSize = kMaxVarint64Length*3+1;
struct Table::Rep { struct BlockBasedTable::Rep {
~Rep() { ~Rep() {
delete filter; delete filter;
delete [] filter_data; delete [] filter_data;
@ -59,8 +59,12 @@ struct Table::Rep {
TableStats table_stats; TableStats table_stats;
}; };
BlockBasedTable::~BlockBasedTable() {
delete rep_;
}
// Helper function to setup the cache key's prefix for the Table. // Helper function to setup the cache key's prefix for the Table.
void Table::SetupCacheKeyPrefix(Rep* rep) { void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep) {
assert(kMaxCacheKeyPrefixSize >= 10); assert(kMaxCacheKeyPrefixSize >= 10);
rep->cache_key_prefix_size = 0; rep->cache_key_prefix_size = 0;
if (rep->options.block_cache) { if (rep->options.block_cache) {
@ -105,11 +109,11 @@ Status ReadBlock(RandomAccessFile* file,
} // end of anonymous namespace } // end of anonymous namespace
Status Table::Open(const Options& options, Status BlockBasedTable::Open(const Options& options,
const EnvOptions& soptions, const EnvOptions& soptions,
unique_ptr<RandomAccessFile>&& file, unique_ptr<RandomAccessFile> && file,
uint64_t size, uint64_t size,
unique_ptr<Table>* table) { unique_ptr<Table>* table) {
table->reset(); table->reset();
if (size < Footer::kEncodedLength) { if (size < Footer::kEncodedLength) {
return Status::InvalidArgument("file is too short to be an sstable"); return Status::InvalidArgument("file is too short to be an sstable");
@ -139,7 +143,7 @@ Status Table::Open(const Options& options,
if (s.ok()) { if (s.ok()) {
// We've successfully read the footer and the index block: we're // We've successfully read the footer and the index block: we're
// ready to serve requests. // ready to serve requests.
Rep* rep = new Table::Rep(soptions); BlockBasedTable::Rep* rep = new BlockBasedTable::Rep(soptions);
rep->options = options; rep->options = options;
rep->file = std::move(file); rep->file = std::move(file);
rep->metaindex_handle = footer.metaindex_handle(); rep->metaindex_handle = footer.metaindex_handle();
@ -147,8 +151,8 @@ Status Table::Open(const Options& options,
SetupCacheKeyPrefix(rep); SetupCacheKeyPrefix(rep);
rep->filter_data = nullptr; rep->filter_data = nullptr;
rep->filter = nullptr; rep->filter = nullptr;
table->reset(new Table(rep)); table->reset(new BlockBasedTable(rep));
(*table)->ReadMeta(footer); ((BlockBasedTable*) (table->get()))->ReadMeta(footer);
} else { } else {
if (index_block) delete index_block; if (index_block) delete index_block;
} }
@ -156,7 +160,7 @@ Status Table::Open(const Options& options,
return s; return s;
} }
void Table::SetupForCompaction() { void BlockBasedTable::SetupForCompaction() {
switch (rep_->options.access_hint_on_compaction_start) { switch (rep_->options.access_hint_on_compaction_start) {
case Options::NONE: case Options::NONE:
break; break;
@ -175,11 +179,11 @@ void Table::SetupForCompaction() {
compaction_optimized_ = true; compaction_optimized_ = true;
} }
const TableStats& Table::GetTableStats() const { TableStats& BlockBasedTable::GetTableStats() {
return rep_->table_stats; return rep_->table_stats;
} }
void Table::ReadMeta(const Footer& footer) { void BlockBasedTable::ReadMeta(const Footer& footer) {
// TODO(sanjay): Skip this if footer.metaindex_handle() size indicates // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates
// it is an empty block. // it is an empty block.
// TODO: we never really verify check sum for meta index block // TODO: we never really verify check sum for meta index block
@ -222,7 +226,7 @@ void Table::ReadMeta(const Footer& footer) {
delete meta; delete meta;
} }
void Table::ReadFilter(const Slice& filter_handle_value) { void BlockBasedTable::ReadFilter(const Slice& filter_handle_value) {
Slice v = filter_handle_value; Slice v = filter_handle_value;
BlockHandle filter_handle; BlockHandle filter_handle;
if (!filter_handle.DecodeFrom(&v).ok()) { if (!filter_handle.DecodeFrom(&v).ok()) {
@ -230,7 +234,7 @@ void Table::ReadFilter(const Slice& filter_handle_value) {
} }
// TODO: We might want to unify with ReadBlock() if we start // TODO: We might want to unify with ReadBlock() if we start
// requiring checksum verification in Table::Open. // requiring checksum verification in BlockBasedTable::Open.
ReadOptions opt; ReadOptions opt;
BlockContents block; BlockContents block;
if (!ReadBlockContents(rep_->file.get(), opt, filter_handle, &block, if (!ReadBlockContents(rep_->file.get(), opt, filter_handle, &block,
@ -243,7 +247,7 @@ void Table::ReadFilter(const Slice& filter_handle_value) {
rep_->filter = new FilterBlockReader(rep_->options, block.data); rep_->filter = new FilterBlockReader(rep_->options, block.data);
} }
Status Table::ReadStats(const Slice& handle_value, Rep* rep) { Status BlockBasedTable::ReadStats(const Slice& handle_value, Rep* rep) {
Slice v = handle_value; Slice v = handle_value;
BlockHandle handle; BlockHandle handle;
if (!handle.DecodeFrom(&v).ok()) { if (!handle.DecodeFrom(&v).ok()) {
@ -322,10 +326,6 @@ Status Table::ReadStats(const Slice& handle_value, Rep* rep) {
return s; return s;
} }
Table::~Table() {
delete rep_;
}
static void DeleteBlock(void* arg, void* ignored) { static void DeleteBlock(void* arg, void* ignored) {
delete reinterpret_cast<Block*>(arg); delete reinterpret_cast<Block*>(arg);
} }
@ -343,13 +343,13 @@ static void ReleaseBlock(void* arg, void* h) {
// Convert an index iterator value (i.e., an encoded BlockHandle) // Convert an index iterator value (i.e., an encoded BlockHandle)
// into an iterator over the contents of the corresponding block. // into an iterator over the contents of the corresponding block.
Iterator* Table::BlockReader(void* arg, Iterator* BlockBasedTable::BlockReader(void* arg,
const ReadOptions& options, const ReadOptions& options,
const Slice& index_value, const Slice& index_value,
bool* didIO, bool* didIO,
bool for_compaction) { bool for_compaction) {
const bool no_io = (options.read_tier == kBlockCacheTier); const bool no_io = (options.read_tier == kBlockCacheTier);
Table* table = reinterpret_cast<Table*>(arg); BlockBasedTable* table = reinterpret_cast<BlockBasedTable*>(arg);
Cache* block_cache = table->rep_->options.block_cache.get(); Cache* block_cache = table->rep_->options.block_cache.get();
std::shared_ptr<Statistics> statistics = table->rep_->options.statistics; std::shared_ptr<Statistics> statistics = table->rep_->options.statistics;
Block* block = nullptr; Block* block = nullptr;
@ -427,11 +427,11 @@ Iterator* Table::BlockReader(void* arg,
return iter; return iter;
} }
Iterator* Table::BlockReader(void* arg, Iterator* BlockBasedTable::BlockReader(void* arg,
const ReadOptions& options, const ReadOptions& options,
const EnvOptions& soptions, const EnvOptions& soptions,
const Slice& index_value, const Slice& index_value,
bool for_compaction) { bool for_compaction) {
return BlockReader(arg, options, index_value, nullptr, for_compaction); return BlockReader(arg, options, index_value, nullptr, for_compaction);
} }
@ -448,7 +448,7 @@ Iterator* Table::BlockReader(void* arg,
// in memory. When blooms may need to be paged in, we should refactor so that // in memory. When blooms may need to be paged in, we should refactor so that
// this is only ever called lazily. In particular, this shouldn't be called // this is only ever called lazily. In particular, this shouldn't be called
// while the DB lock is held like it is now. // while the DB lock is held like it is now.
bool Table::PrefixMayMatch(const Slice& internal_prefix) const { bool BlockBasedTable::PrefixMayMatch(const Slice& internal_prefix) {
FilterBlockReader* filter = rep_->filter; FilterBlockReader* filter = rep_->filter;
bool may_match = true; bool may_match = true;
Status s; Status s;
@ -497,7 +497,11 @@ bool Table::PrefixMayMatch(const Slice& internal_prefix) const {
return may_match; return may_match;
} }
Iterator* Table::NewIterator(const ReadOptions& options) const { Iterator* Table::NewIterator(const ReadOptions& options) {
return nullptr;
}
Iterator* BlockBasedTable::NewIterator(const ReadOptions& options) {
if (options.prefix) { if (options.prefix) {
InternalKey internal_prefix(*options.prefix, 0, kTypeValue); InternalKey internal_prefix(*options.prefix, 0, kTypeValue);
if (!PrefixMayMatch(internal_prefix.Encode())) { if (!PrefixMayMatch(internal_prefix.Encode())) {
@ -509,14 +513,15 @@ Iterator* Table::NewIterator(const ReadOptions& options) const {
return NewTwoLevelIterator( return NewTwoLevelIterator(
rep_->index_block->NewIterator(rep_->options.comparator), rep_->index_block->NewIterator(rep_->options.comparator),
&Table::BlockReader, const_cast<Table*>(this), options, rep_->soptions); &BlockBasedTable::BlockReader, const_cast<BlockBasedTable*>(this),
options, rep_->soptions);
} }
Status Table::InternalGet(const ReadOptions& options, const Slice& k, Status BlockBasedTable::Get(const ReadOptions& options, const Slice& k,
void* arg, void* arg,
bool (*saver)(void*, const Slice&, const Slice&, bool (*saver)(void*, const Slice&, const Slice&,
bool), bool),
void (*mark_key_may_exist)(void*)) { void (*mark_key_may_exist)(void*)) {
Status s; Status s;
Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator); Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
bool done = false; bool done = false;
@ -566,16 +571,17 @@ bool SaveDidIO(void* arg, const Slice& key, const Slice& value, bool didIO) {
*reinterpret_cast<bool*>(arg) = didIO; *reinterpret_cast<bool*>(arg) = didIO;
return false; return false;
} }
bool Table::TEST_KeyInCache(const ReadOptions& options, const Slice& key) { bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
// We use InternalGet() as it has logic that checks whether we read the const Slice& key) {
// We use Get() as it has logic that checks whether we read the
// block from the disk or not. // block from the disk or not.
bool didIO = false; bool didIO = false;
Status s = InternalGet(options, key, &didIO, SaveDidIO); Status s = Get(options, key, &didIO, SaveDidIO);
assert(s.ok()); assert(s.ok());
return !didIO; return !didIO;
} }
uint64_t Table::ApproximateOffsetOf(const Slice& key) const { uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key) {
Iterator* index_iter = Iterator* index_iter =
rep_->index_block->NewIterator(rep_->options.comparator); rep_->index_block->NewIterator(rep_->options.comparator);
index_iter->Seek(key); index_iter->Seek(key);
@ -602,8 +608,8 @@ uint64_t Table::ApproximateOffsetOf(const Slice& key) const {
return result; return result;
} }
const std::string Table::kFilterBlockPrefix = "filter."; const std::string BlockBasedTable::kFilterBlockPrefix = "filter.";
const std::string Table::kStatsBlock = "rocksdb.stats"; const std::string BlockBasedTable::kStatsBlock = "rocksdb.stats";
const std::string TableStatsNames::kDataSize = "rocksdb.data.size"; const std::string TableStatsNames::kDataSize = "rocksdb.data.size";
const std::string TableStatsNames::kIndexSize = "rocksdb.index.size"; const std::string TableStatsNames::kIndexSize = "rocksdb.index.size";

@ -13,6 +13,7 @@
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/iterator.h" #include "rocksdb/iterator.h"
#include "rocksdb/table_stats.h" #include "rocksdb/table_stats.h"
#include "rocksdb/table.h"
namespace rocksdb { namespace rocksdb {
@ -23,13 +24,14 @@ struct Options;
class RandomAccessFile; class RandomAccessFile;
struct ReadOptions; struct ReadOptions;
class TableCache; class TableCache;
class Table;
using std::unique_ptr; using std::unique_ptr;
// A Table is a sorted map from strings to strings. Tables are // A Table is a sorted map from strings to strings. Tables are
// immutable and persistent. A Table may be safely accessed from // immutable and persistent. A Table may be safely accessed from
// multiple threads without external synchronization. // multiple threads without external synchronization.
class Table { class BlockBasedTable : public Table {
public: public:
static const std::string kFilterBlockPrefix; static const std::string kFilterBlockPrefix;
static const std::string kStatsBlock; static const std::string kStatsBlock;
@ -52,14 +54,18 @@ class Table {
uint64_t file_size, uint64_t file_size,
unique_ptr<Table>* table); unique_ptr<Table>* table);
~Table(); bool PrefixMayMatch(const Slice& internal_prefix) override;
bool PrefixMayMatch(const Slice& internal_prefix) const;
// Returns a new iterator over the table contents. // Returns a new iterator over the table contents.
// The result of NewIterator() is initially invalid (caller must // The result of NewIterator() is initially invalid (caller must
// call one of the Seek methods on the iterator before using it). // call one of the Seek methods on the iterator before using it).
Iterator* NewIterator(const ReadOptions&) const; Iterator* NewIterator(const ReadOptions&) override;
Status Get(
const ReadOptions&, const Slice& key,
void* arg,
bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool),
void (*mark_key_may_exist)(void*) = nullptr) override;
// Given a key, return an approximate byte offset in the file where // Given a key, return an approximate byte offset in the file where
// the data for that key begins (or would begin if the key were // the data for that key begins (or would begin if the key were
@ -67,24 +73,25 @@ class Table {
// bytes, and so includes effects like compression of the underlying data. // bytes, and so includes effects like compression of the underlying data.
// E.g., the approximate offset of the last key in the table will // E.g., the approximate offset of the last key in the table will
// be close to the file length. // be close to the file length.
uint64_t ApproximateOffsetOf(const Slice& key) const; uint64_t ApproximateOffsetOf(const Slice& key) override;
// Returns true if the block for the specified key is in cache. // Returns true if the block for the specified key is in cache.
// REQUIRES: key is in this table. // REQUIRES: key is in this table.
bool TEST_KeyInCache(const ReadOptions& options, const Slice& key); bool TEST_KeyInCache(const ReadOptions& options, const Slice& key) override;
// Set up the table for Compaction. Might change some parameters with // Set up the table for Compaction. Might change some parameters with
// posix_fadvise // posix_fadvise
void SetupForCompaction(); void SetupForCompaction() override;
TableStats& GetTableStats() override;
const TableStats& GetTableStats() const; ~BlockBasedTable();
private: private:
struct Rep; struct Rep;
Rep* rep_; Rep* rep_;
bool compaction_optimized_; bool compaction_optimized_;
explicit Table(Rep* rep) : compaction_optimized_(false) { rep_ = rep; }
static Iterator* BlockReader(void*, const ReadOptions&, static Iterator* BlockReader(void*, const ReadOptions&,
const EnvOptions& soptions, const Slice&, const EnvOptions& soptions, const Slice&,
bool for_compaction); bool for_compaction);
@ -95,12 +102,6 @@ class Table {
// after a call to Seek(key), until handle_result returns false. // after a call to Seek(key), until handle_result returns false.
// May not make such a call if filter policy says that key is not present. // May not make such a call if filter policy says that key is not present.
friend class TableCache; friend class TableCache;
Status InternalGet(
const ReadOptions&, const Slice& key,
void* arg,
bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool),
void (*mark_key_may_exist)(void*) = nullptr);
void ReadMeta(const Footer& footer); void ReadMeta(const Footer& footer);
void ReadFilter(const Slice& filter_handle_value); void ReadFilter(const Slice& filter_handle_value);
@ -108,19 +109,14 @@ class Table {
static void SetupCacheKeyPrefix(Rep* rep); static void SetupCacheKeyPrefix(Rep* rep);
// No copying allowed explicit BlockBasedTable(Rep* rep) :
Table(const Table&); compaction_optimized_(false) {
void operator=(const Table&); rep_ = rep;
}; }
struct TableStatsNames { // No copying allowed
static const std::string kDataSize; explicit BlockBasedTable(const Table&) = delete;
static const std::string kIndexSize; void operator=(const Table&) = delete;
static const std::string kRawKeySize;
static const std::string kRawValueSize;
static const std::string kNumDataBlocks;
static const std::string kNumEntries;
static const std::string kFilterPolicy;
}; };
} // namespace rocksdb } // namespace rocksdb

@ -7,19 +7,20 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "rocksdb/table_builder.h" #include "table/block_based_table_builder.h"
#include <assert.h> #include <assert.h>
#include <map> #include <map>
#include "rocksdb/comparator.h" #include "rocksdb/comparator.h"
#include "rocksdb/table.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/filter_policy.h" #include "rocksdb/filter_policy.h"
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "table/block_based_table.h"
#include "table/block_builder.h" #include "table/block_builder.h"
#include "table/filter_block.h" #include "table/filter_block.h"
#include "table/format.h" #include "table/format.h"
#include "table/table.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/crc32c.h" #include "util/crc32c.h"
#include "util/stop_watch.h" #include "util/stop_watch.h"
@ -71,7 +72,7 @@ void LogStatsCollectionError(
} // anonymous namespace } // anonymous namespace
struct TableBuilder::Rep { struct BlockBasedTableBuilder::Rep {
Options options; Options options;
Options index_block_options; Options index_block_options;
WritableFile* file; WritableFile* file;
@ -119,37 +120,22 @@ struct TableBuilder::Rep {
} }
}; };
TableBuilder::TableBuilder(const Options& options, WritableFile* file, BlockBasedTableBuilder::BlockBasedTableBuilder(const Options& options,
int level, const bool enable_compression) WritableFile* file, int level,
: rep_(new Rep(options, file, enable_compression)), level_(level) { const bool enable_compression)
: TableBuilder(level), rep_(new Rep(options, file, enable_compression)) {
if (rep_->filter_block != nullptr) { if (rep_->filter_block != nullptr) {
rep_->filter_block->StartBlock(0); rep_->filter_block->StartBlock(0);
} }
} }
TableBuilder::~TableBuilder() { BlockBasedTableBuilder::~BlockBasedTableBuilder() {
assert(rep_->closed); // Catch errors where caller forgot to call Finish() assert(rep_->closed); // Catch errors where caller forgot to call Finish()
delete rep_->filter_block; delete rep_->filter_block;
delete rep_; delete rep_;
} }
Status TableBuilder::ChangeOptions(const Options& options) { void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) {
// Note: if more fields are added to Options, update
// this function to catch changes that should not be allowed to
// change in the middle of building a Table.
if (options.comparator != rep_->options.comparator) {
return Status::InvalidArgument("changing comparator while building table");
}
// Note that any live BlockBuilders point to rep_->options and therefore
// will automatically pick up the updated options.
rep_->options = options;
rep_->index_block_options = options;
rep_->index_block_options.block_restart_interval = 1;
return Status::OK();
}
void TableBuilder::Add(const Slice& key, const Slice& value) {
Rep* r = rep_; Rep* r = rep_;
assert(!r->closed); assert(!r->closed);
if (!ok()) return; if (!ok()) return;
@ -204,7 +190,7 @@ void TableBuilder::Add(const Slice& key, const Slice& value) {
} }
} }
void TableBuilder::Flush() { void BlockBasedTableBuilder::Flush() {
Rep* r = rep_; Rep* r = rep_;
assert(!r->closed); assert(!r->closed);
if (!ok()) return; if (!ok()) return;
@ -222,7 +208,8 @@ void TableBuilder::Flush() {
++r->num_data_blocks; ++r->num_data_blocks;
} }
void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) { void BlockBasedTableBuilder::WriteBlock(BlockBuilder* block,
BlockHandle* handle) {
// File format contains a sequence of blocks where each block has: // File format contains a sequence of blocks where each block has:
// block_data: uint8[n] // block_data: uint8[n]
// type: uint8 // type: uint8
@ -302,9 +289,9 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
block->Reset(); block->Reset();
} }
void TableBuilder::WriteRawBlock(const Slice& block_contents, void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents,
CompressionType type, CompressionType type,
BlockHandle* handle) { BlockHandle* handle) {
Rep* r = rep_; Rep* r = rep_;
StopWatch sw(r->options.env, r->options.statistics, WRITE_RAW_BLOCK_MICROS); StopWatch sw(r->options.env, r->options.statistics, WRITE_RAW_BLOCK_MICROS);
handle->set_offset(r->offset); handle->set_offset(r->offset);
@ -323,11 +310,11 @@ void TableBuilder::WriteRawBlock(const Slice& block_contents,
} }
} }
Status TableBuilder::status() const { Status BlockBasedTableBuilder::status() const {
return rep_->status; return rep_->status;
} }
Status TableBuilder::Finish() { Status BlockBasedTableBuilder::Finish() {
Rep* r = rep_; Rep* r = rep_;
Flush(); Flush();
assert(!r->closed); assert(!r->closed);
@ -370,7 +357,7 @@ Status TableBuilder::Finish() {
if (r->filter_block != nullptr) { if (r->filter_block != nullptr) {
// Add mapping from "<filter_block_prefix>.Name" to location // Add mapping from "<filter_block_prefix>.Name" to location
// of filter data. // of filter data.
std::string key = Table::kFilterBlockPrefix; std::string key = BlockBasedTable::kFilterBlockPrefix;
key.append(r->options.filter_policy->Name()); key.append(r->options.filter_policy->Name());
std::string handle_encoding; std::string handle_encoding;
filter_block_handle.EncodeTo(&handle_encoding); filter_block_handle.EncodeTo(&handle_encoding);
@ -435,7 +422,7 @@ Status TableBuilder::Finish() {
std::string handle_encoding; std::string handle_encoding;
stats_block_handle.EncodeTo(&handle_encoding); stats_block_handle.EncodeTo(&handle_encoding);
meta_block_handles.insert( meta_block_handles.insert(
std::make_pair(Table::kStatsBlock, handle_encoding) std::make_pair(BlockBasedTable::kStatsBlock, handle_encoding)
); );
} // end of stats block writing } // end of stats block writing
@ -466,17 +453,17 @@ Status TableBuilder::Finish() {
return r->status; return r->status;
} }
void TableBuilder::Abandon() { void BlockBasedTableBuilder::Abandon() {
Rep* r = rep_; Rep* r = rep_;
assert(!r->closed); assert(!r->closed);
r->closed = true; r->closed = true;
} }
uint64_t TableBuilder::NumEntries() const { uint64_t BlockBasedTableBuilder::NumEntries() const {
return rep_->num_entries; return rep_->num_entries;
} }
uint64_t TableBuilder::FileSize() const { uint64_t BlockBasedTableBuilder::FileSize() const {
return rep_->offset; return rep_->offset;
} }

@ -1,18 +1,13 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
//
// TableBuilder provides the interface used to build a Table
// (an immutable and sorted map from keys to values).
//
// Multiple threads can invoke const methods on a TableBuilder without
// external synchronization, but if any of the threads may call a
// non-const method, all threads accessing the same TableBuilder must use
// external synchronization.
#ifndef STORAGE_ROCKSDB_INCLUDE_TABLE_BUILDER_H_
#define STORAGE_ROCKSDB_INCLUDE_TABLE_BUILDER_H_
#pragma once
#include <stdint.h> #include <stdint.h>
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "rocksdb/status.h" #include "rocksdb/status.h"
@ -22,64 +17,48 @@ namespace rocksdb {
class BlockBuilder; class BlockBuilder;
class BlockHandle; class BlockHandle;
class WritableFile; class WritableFile;
class TableBuilder;
class TableBuilder { class BlockBasedTableBuilder : public TableBuilder {
public: public:
// Create a builder that will store the contents of the table it is // Create a builder that will store the contents of the table it is
// building in *file. Does not close the file. It is up to the // building in *file. Does not close the file. It is up to the
// caller to close the file after calling Finish(). The output file // caller to close the file after calling Finish(). The output file
// will be part of level specified by 'level'. A value of -1 means // will be part of level specified by 'level'. A value of -1 means
// that the caller does not know which level the output file will reside. // that the caller does not know which level the output file will reside.
// BlockBasedTableBuilder(const Options& options, WritableFile* file,
// If enable_compression=true, this table will follow the compression int level = -1, const bool enable_compression = true);
// setting given in parameter options. If enable_compression=false, the
// table will not be compressed.
TableBuilder(const Options& options, WritableFile* file, int level=-1,
const bool enable_compression=true);
// REQUIRES: Either Finish() or Abandon() has been called. // REQUIRES: Either Finish() or Abandon() has been called.
~TableBuilder(); ~BlockBasedTableBuilder();
// Change the options used by this builder. Note: only some of the
// option fields can be changed after construction. If a field is
// not allowed to change dynamically and its value in the structure
// passed to the constructor is different from its value in the
// structure passed to this method, this method will return an error
// without changing any fields.
Status ChangeOptions(const Options& options);
// Add key,value to the table being constructed. // Add key,value to the table being constructed.
// REQUIRES: key is after any previously added key according to comparator. // REQUIRES: key is after any previously added key according to comparator.
// REQUIRES: Finish(), Abandon() have not been called // REQUIRES: Finish(), Abandon() have not been called
void Add(const Slice& key, const Slice& value); void Add(const Slice& key, const Slice& value) override;
// Advanced operation: flush any buffered key/value pairs to file.
// Can be used to ensure that two adjacent entries never live in
// the same data block. Most clients should not need to use this method.
// REQUIRES: Finish(), Abandon() have not been called
void Flush();
// Return non-ok iff some error has been detected. // Return non-ok iff some error has been detected.
Status status() const; Status status() const override;
// Finish building the table. Stops using the file passed to the // Finish building the table. Stops using the file passed to the
// constructor after this function returns. // constructor after this function returns.
// REQUIRES: Finish(), Abandon() have not been called // REQUIRES: Finish(), Abandon() have not been called
Status Finish(); Status Finish() override;
// Indicate that the contents of this builder should be abandoned. Stops // Indicate that the contents of this builder should be abandoned. Stops
// using the file passed to the constructor after this function returns. // using the file passed to the constructor after this function returns.
// If the caller is not going to call Finish(), it must call Abandon() // If the caller is not going to call Finish(), it must call Abandon()
// before destroying this builder. // before destroying this builder.
// REQUIRES: Finish(), Abandon() have not been called // REQUIRES: Finish(), Abandon() have not been called
void Abandon(); void Abandon() override;
// Number of calls to Add() so far. // Number of calls to Add() so far.
uint64_t NumEntries() const; uint64_t NumEntries() const override;
// Size of the file generated so far. If invoked after a successful // Size of the file generated so far. If invoked after a successful
// Finish() call, returns the size of the final generated file. // Finish() call, returns the size of the final generated file.
uint64_t FileSize() const; uint64_t FileSize() const override;
private: private:
bool ok() const { return status().ok(); } bool ok() const { return status().ok(); }
@ -88,13 +67,17 @@ class TableBuilder {
struct Rep; struct Rep;
Rep* rep_; Rep* rep_;
int level_;
// Advanced operation: flush any buffered key/value pairs to file.
// Can be used to ensure that two adjacent entries never live in
// the same data block. Most clients should not need to use this method.
// REQUIRES: Finish(), Abandon() have not been called
void Flush();
// No copying allowed // No copying allowed
TableBuilder(const TableBuilder&); BlockBasedTableBuilder(const BlockBasedTableBuilder&) = delete;
void operator=(const TableBuilder&); void operator=(const BlockBasedTableBuilder&) = delete;
}; };
} // namespace rocksdb } // namespace rocksdb
#endif // STORAGE_ROCKSDB_INCLUDE_TABLE_BUILDER_H_

@ -0,0 +1,36 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "table/block_based_table_factory.h"
#include <memory>
#include <stdint.h>
#include "table/block_based_table_builder.h"
#include "table/block_based_table.h"
#include "port/port.h"
namespace rocksdb {
Status BlockBasedTableFactory::OpenTable(const Options& options,
const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file,
uint64_t file_size,
unique_ptr<Table>* table) const {
return BlockBasedTable::Open(options, soptions, std::move(file), file_size,
table);
}
TableBuilder* BlockBasedTableFactory::GetTableBuilder(
const Options& options, WritableFile* file, int level,
const bool enable_compression) const {
return new BlockBasedTableBuilder(options, file, level, enable_compression);
}
} // namespace rocksdb

@ -0,0 +1,48 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <memory>
#include <stdint.h>
#include "rocksdb/table.h"
namespace rocksdb {
struct Options;
struct EnvOptions;
using std::unique_ptr;
class Status;
class RandomAccessFile;
class WritableFile;
class Table;
class TableBuilder;
class BlockBasedTable;
class BlockBasedTableBuilder;
class BlockBasedTableFactory: public TableFactory {
public:
~BlockBasedTableFactory() {
}
BlockBasedTableFactory() {
}
const char* Name() const override {
return "BlockBasedTable";
}
Status OpenTable(const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
unique_ptr<Table>* table) const override;
TableBuilder* GetTableBuilder(const Options& options, WritableFile* file,
int level, const bool enable_compression) const
override;
};
} // namespace rocksdb

@ -36,7 +36,6 @@
#include <algorithm> #include <algorithm>
#include <assert.h> #include <assert.h>
#include "rocksdb/comparator.h" #include "rocksdb/comparator.h"
#include "rocksdb/table_builder.h"
#include "util/coding.h" #include "util/coding.h"
namespace rocksdb { namespace rocksdb {

@ -10,10 +10,9 @@
#include "rocksdb/db.h" #include "rocksdb/db.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/iterator.h" #include "rocksdb/iterator.h"
#include "rocksdb/table_builder.h" #include "rocksdb/table.h"
#include "table/block.h" #include "table/block.h"
#include "table/block_builder.h" #include "table/block_builder.h"
#include "table/table.h"
#include "table/format.h" #include "table/format.h"
#include "util/random.h" #include "util/random.h"
#include "util/testharness.h" #include "util/testharness.h"

@ -12,7 +12,7 @@
#include <stdint.h> #include <stdint.h>
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
#include "rocksdb/status.h" #include "rocksdb/status.h"
#include "rocksdb/table_builder.h" #include "rocksdb/table.h"
namespace rocksdb { namespace rocksdb {

@ -17,12 +17,13 @@
#include "rocksdb/db.h" #include "rocksdb/db.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/iterator.h" #include "rocksdb/iterator.h"
#include "rocksdb/table_builder.h" #include "rocksdb/table.h"
#include "rocksdb/memtablerep.h" #include "rocksdb/memtablerep.h"
#include "table/block.h" #include "table/block.h"
#include "table/block_builder.h" #include "table/block_builder.h"
#include "table/format.h" #include "table/format.h"
#include "table/table.h" #include "table/block_based_table.h"
#include "table/block_based_table_builder.h"
#include "util/random.h" #include "util/random.h"
#include "util/testharness.h" #include "util/testharness.h"
#include "util/testutil.h" #include "util/testutil.h"
@ -237,19 +238,19 @@ class BlockConstructor: public Constructor {
BlockConstructor(); BlockConstructor();
}; };
class TableConstructor: public Constructor { class BlockBasedTableConstructor: public Constructor {
public: public:
explicit TableConstructor( explicit BlockBasedTableConstructor(
const Comparator* cmp) const Comparator* cmp)
: Constructor(cmp) { : Constructor(cmp) {
} }
~TableConstructor() { ~BlockBasedTableConstructor() {
Reset(); Reset();
} }
virtual Status FinishImpl(const Options& options, const KVMap& data) { virtual Status FinishImpl(const Options& options, const KVMap& data) {
Reset(); Reset();
sink_.reset(new StringSink()); sink_.reset(new StringSink());
TableBuilder builder(options, sink_.get()); BlockBasedTableBuilder builder(options, sink_.get());
for (KVMap::const_iterator it = data.begin(); for (KVMap::const_iterator it = data.begin();
it != data.end(); it != data.end();
@ -265,8 +266,11 @@ class TableConstructor: public Constructor {
// Open the table // Open the table
uniq_id_ = cur_uniq_id_++; uniq_id_ = cur_uniq_id_++;
source_.reset(new StringSource(sink_->contents(), uniq_id_)); source_.reset(new StringSource(sink_->contents(), uniq_id_));
return Table::Open(options, soptions, std::move(source_), unique_ptr<TableFactory> table_factory;
sink_->contents().size(), &table_); return options.table_factory->OpenTable(options, soptions,
std::move(source_),
sink_->contents().size(),
&table_);
} }
virtual Iterator* NewIterator() const { virtual Iterator* NewIterator() const {
@ -279,8 +283,10 @@ class TableConstructor: public Constructor {
virtual Status Reopen(const Options& options) { virtual Status Reopen(const Options& options) {
source_.reset(new StringSource(sink_->contents(), uniq_id_)); source_.reset(new StringSource(sink_->contents(), uniq_id_));
return Table::Open(options, soptions, std::move(source_), return options.table_factory->OpenTable(options, soptions,
sink_->contents().size(), &table_); std::move(source_),
sink_->contents().size(),
&table_);
} }
virtual Table* table() { virtual Table* table() {
@ -300,12 +306,12 @@ class TableConstructor: public Constructor {
unique_ptr<StringSource> source_; unique_ptr<StringSource> source_;
unique_ptr<Table> table_; unique_ptr<Table> table_;
TableConstructor(); BlockBasedTableConstructor();
static uint64_t cur_uniq_id_; static uint64_t cur_uniq_id_;
const EnvOptions soptions; const EnvOptions soptions;
}; };
uint64_t TableConstructor::cur_uniq_id_ = 1; uint64_t BlockBasedTableConstructor::cur_uniq_id_ = 1;
// A helper class that converts internal format keys into user keys // A helper class that converts internal format keys into user keys
class KeyConvertingIterator: public Iterator { class KeyConvertingIterator: public Iterator {
@ -533,7 +539,7 @@ class Harness {
} }
switch (args.type) { switch (args.type) {
case TABLE_TEST: case TABLE_TEST:
constructor_ = new TableConstructor(options_.comparator); constructor_ = new BlockBasedTableConstructor(options_.comparator);
break; break;
case BLOCK_TEST: case BLOCK_TEST:
constructor_ = new BlockConstructor(options_.comparator); constructor_ = new BlockConstructor(options_.comparator);
@ -857,7 +863,7 @@ class TableTest { };
// This test include all the basic checks except those for index size and block // This test include all the basic checks except those for index size and block
// size, which will be conducted in separated unit tests. // size, which will be conducted in separated unit tests.
TEST(TableTest, BasicTableStats) { TEST(TableTest, BasicTableStats) {
TableConstructor c(BytewiseComparator()); BlockBasedTableConstructor c(BytewiseComparator());
c.Add("a1", "val1"); c.Add("a1", "val1");
c.Add("b2", "val2"); c.Add("b2", "val2");
@ -901,7 +907,7 @@ TEST(TableTest, BasicTableStats) {
} }
TEST(TableTest, FilterPolicyNameStats) { TEST(TableTest, FilterPolicyNameStats) {
TableConstructor c(BytewiseComparator()); BlockBasedTableConstructor c(BytewiseComparator());
c.Add("a1", "val1"); c.Add("a1", "val1");
std::vector<std::string> keys; std::vector<std::string> keys;
KVMap kvmap; KVMap kvmap;
@ -941,7 +947,7 @@ TEST(TableTest, IndexSizeStat) {
// Each time we load one more key to the table. the table index block // Each time we load one more key to the table. the table index block
// size is expected to be larger than last time's. // size is expected to be larger than last time's.
for (size_t i = 1; i < keys.size(); ++i) { for (size_t i = 1; i < keys.size(); ++i) {
TableConstructor c(BytewiseComparator()); BlockBasedTableConstructor c(BytewiseComparator());
for (size_t j = 0; j < i; ++j) { for (size_t j = 0; j < i; ++j) {
c.Add(keys[j], "val"); c.Add(keys[j], "val");
} }
@ -962,7 +968,7 @@ TEST(TableTest, IndexSizeStat) {
TEST(TableTest, NumBlockStat) { TEST(TableTest, NumBlockStat) {
Random rnd(test::RandomSeed()); Random rnd(test::RandomSeed());
TableConstructor c(BytewiseComparator()); BlockBasedTableConstructor c(BytewiseComparator());
Options options; Options options;
options.compression = kNoCompression; options.compression = kNoCompression;
options.block_restart_interval = 1; options.block_restart_interval = 1;
@ -984,7 +990,7 @@ TEST(TableTest, NumBlockStat) {
} }
TEST(TableTest, ApproximateOffsetOfPlain) { TEST(TableTest, ApproximateOffsetOfPlain) {
TableConstructor c(BytewiseComparator()); BlockBasedTableConstructor c(BytewiseComparator());
c.Add("k01", "hello"); c.Add("k01", "hello");
c.Add("k02", "hello2"); c.Add("k02", "hello2");
c.Add("k03", std::string(10000, 'x')); c.Add("k03", std::string(10000, 'x'));
@ -1015,7 +1021,7 @@ TEST(TableTest, ApproximateOffsetOfPlain) {
static void Do_Compression_Test(CompressionType comp) { static void Do_Compression_Test(CompressionType comp) {
Random rnd(301); Random rnd(301);
TableConstructor c(BytewiseComparator()); BlockBasedTableConstructor c(BytewiseComparator());
std::string tmp; std::string tmp;
c.Add("k01", "hello"); c.Add("k01", "hello");
c.Add("k02", test::CompressibleString(&rnd, 0.25, 10000, &tmp)); c.Add("k02", test::CompressibleString(&rnd, 0.25, 10000, &tmp));
@ -1071,7 +1077,7 @@ TEST(TableTest, BlockCacheLeak) {
opt.block_cache = NewLRUCache(16*1024*1024); // big enough so we don't ever opt.block_cache = NewLRUCache(16*1024*1024); // big enough so we don't ever
// lose cached values. // lose cached values.
TableConstructor c(BytewiseComparator()); BlockBasedTableConstructor c(BytewiseComparator());
c.Add("k01", "hello"); c.Add("k01", "hello");
c.Add("k02", "hello2"); c.Add("k02", "hello2");
c.Add("k03", std::string(10000, 'x')); c.Add("k03", std::string(10000, 'x'));

@ -9,9 +9,10 @@
#include "table/two_level_iterator.h" #include "table/two_level_iterator.h"
#include "rocksdb/options.h"
#include "rocksdb/table.h"
#include "table/block.h" #include "table/block.h"
#include "table/format.h" #include "table/format.h"
#include "table/table.h"
#include "table/iterator_wrapper.h" #include "table/iterator_wrapper.h"
namespace rocksdb { namespace rocksdb {

@ -3,7 +3,6 @@
// LICENSE file in the root directory of this source tree. An additional grant // LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory. // of patent rights can be found in the PATENTS file in the same directory.
// //
#include "table/table.h"
#include <map> #include <map>
#include <string> #include <string>
@ -15,7 +14,7 @@
#include "rocksdb/db.h" #include "rocksdb/db.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/iterator.h" #include "rocksdb/iterator.h"
#include "rocksdb/table_builder.h" #include "rocksdb/table.h"
#include "table/block.h" #include "table/block.h"
#include "table/block_builder.h" #include "table/block_builder.h"
#include "table/format.h" #include "table/format.h"
@ -76,7 +75,10 @@ Status SstFileReader::ReadSequential(bool print_kv,
} }
uint64_t file_size; uint64_t file_size;
table_options.env->GetFileSize(file_name_, &file_size); table_options.env->GetFileSize(file_name_, &file_size);
s = Table::Open(table_options, soptions_, std::move(file), file_size, &table); unique_ptr<TableFactory> table_factory;
s = table_options.table_factory->OpenTable(table_options, soptions_,
std::move(file), file_size,
&table);
if(!s.ok()) { if(!s.ok()) {
return s; return s;
} }

@ -17,6 +17,7 @@
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/filter_policy.h" #include "rocksdb/filter_policy.h"
#include "rocksdb/merge_operator.h" #include "rocksdb/merge_operator.h"
#include "table/block_based_table_factory.h"
namespace rocksdb { namespace rocksdb {
@ -91,6 +92,8 @@ Options::Options()
filter_deletes(false), filter_deletes(false),
max_sequential_skip_in_iterations(8), max_sequential_skip_in_iterations(8),
memtable_factory(std::shared_ptr<SkipListFactory>(new SkipListFactory)), memtable_factory(std::shared_ptr<SkipListFactory>(new SkipListFactory)),
table_factory(
std::shared_ptr<TableFactory>(new BlockBasedTableFactory())),
compaction_filter_factory( compaction_filter_factory(
std::shared_ptr<CompactionFilterFactory>( std::shared_ptr<CompactionFilterFactory>(
new DefaultCompactionFilterFactory())), new DefaultCompactionFilterFactory())),
@ -114,6 +117,7 @@ Options::Dump(Logger* log) const
compaction_filter_factory->Name()); compaction_filter_factory->Name());
Log(log," Options.memtable_factory: %s", Log(log," Options.memtable_factory: %s",
memtable_factory->Name()); memtable_factory->Name());
Log(log," Options.table_factory: %s", table_factory->Name());
Log(log," Options.error_if_exists: %d", error_if_exists); Log(log," Options.error_if_exists: %d", error_if_exists);
Log(log," Options.create_if_missing: %d", create_if_missing); Log(log," Options.create_if_missing: %d", create_if_missing);
Log(log," Options.paranoid_checks: %d", paranoid_checks); Log(log," Options.paranoid_checks: %d", paranoid_checks);

Loading…
Cancel
Save