Conflicts: HISTORY.md db/db_impl.cc db/db_impl.h db/db_iter.cc db/db_test.cc db/dbformat.h db/memtable.cc db/memtable_list.cc db/memtable_list.h db/table_cache.cc db/table_cache.h db/version_edit.h db/version_set.cc db/version_set.h db/write_batch.cc db/write_batch_test.cc include/rocksdb/options.h util/options.ccmain
commit
0143abdbb0
@ -0,0 +1,337 @@ |
|||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
#include <algorithm> |
||||||
|
#include <set> |
||||||
|
|
||||||
|
#include "db/db_impl.h" |
||||||
|
#include "db/filename.h" |
||||||
|
#include "db/version_set.h" |
||||||
|
#include "db/write_batch_internal.h" |
||||||
|
#include "rocksdb/cache.h" |
||||||
|
#include "rocksdb/compaction_filter.h" |
||||||
|
#include "rocksdb/db.h" |
||||||
|
#include "rocksdb/env.h" |
||||||
|
#include "rocksdb/filter_policy.h" |
||||||
|
#include "rocksdb/slice_transform.h" |
||||||
|
#include "rocksdb/table.h" |
||||||
|
#include "table/plain_table_factory.h" |
||||||
|
#include "util/hash.h" |
||||||
|
#include "util/logging.h" |
||||||
|
#include "util/mutexlock.h" |
||||||
|
#include "util/testharness.h" |
||||||
|
#include "util/testutil.h" |
||||||
|
#include "utilities/merge_operators.h" |
||||||
|
|
||||||
|
using std::unique_ptr; |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class PlainTableDBTest { |
||||||
|
protected: |
||||||
|
private: |
||||||
|
std::string dbname_; |
||||||
|
Env* env_; |
||||||
|
DB* db_; |
||||||
|
|
||||||
|
Options last_options_; |
||||||
|
static std::unique_ptr<const SliceTransform> prefix_transform; |
||||||
|
|
||||||
|
public: |
||||||
|
PlainTableDBTest() : env_(Env::Default()) { |
||||||
|
dbname_ = test::TmpDir() + "/plain_table_db_test"; |
||||||
|
ASSERT_OK(DestroyDB(dbname_, Options())); |
||||||
|
db_ = nullptr; |
||||||
|
Reopen(); |
||||||
|
} |
||||||
|
|
||||||
|
~PlainTableDBTest() { |
||||||
|
delete db_; |
||||||
|
ASSERT_OK(DestroyDB(dbname_, Options())); |
||||||
|
} |
||||||
|
|
||||||
|
// Return the current option configuration.
|
||||||
|
Options CurrentOptions() { |
||||||
|
Options options; |
||||||
|
options.table_factory.reset(new PlainTableFactory(16, 2, 0.8)); |
||||||
|
options.prefix_extractor = prefix_transform.get(); |
||||||
|
options.allow_mmap_reads = true; |
||||||
|
return options; |
||||||
|
} |
||||||
|
|
||||||
|
DBImpl* dbfull() { |
||||||
|
return reinterpret_cast<DBImpl*>(db_); |
||||||
|
} |
||||||
|
|
||||||
|
void Reopen(Options* options = nullptr) { |
||||||
|
ASSERT_OK(TryReopen(options)); |
||||||
|
} |
||||||
|
|
||||||
|
void Close() { |
||||||
|
delete db_; |
||||||
|
db_ = nullptr; |
||||||
|
} |
||||||
|
|
||||||
|
void DestroyAndReopen(Options* options = nullptr) { |
||||||
|
//Destroy using last options
|
||||||
|
Destroy(&last_options_); |
||||||
|
ASSERT_OK(TryReopen(options)); |
||||||
|
} |
||||||
|
|
||||||
|
void Destroy(Options* options) { |
||||||
|
delete db_; |
||||||
|
db_ = nullptr; |
||||||
|
ASSERT_OK(DestroyDB(dbname_, *options)); |
||||||
|
} |
||||||
|
|
||||||
|
Status PureReopen(Options* options, DB** db) { |
||||||
|
return DB::Open(*options, dbname_, db); |
||||||
|
} |
||||||
|
|
||||||
|
Status TryReopen(Options* options = nullptr) { |
||||||
|
delete db_; |
||||||
|
db_ = nullptr; |
||||||
|
Options opts; |
||||||
|
if (options != nullptr) { |
||||||
|
opts = *options; |
||||||
|
} else { |
||||||
|
opts = CurrentOptions(); |
||||||
|
opts.create_if_missing = true; |
||||||
|
} |
||||||
|
last_options_ = opts; |
||||||
|
|
||||||
|
return DB::Open(opts, dbname_, &db_); |
||||||
|
} |
||||||
|
|
||||||
|
Status Put(const Slice& k, const Slice& v) { |
||||||
|
return db_->Put(WriteOptions(), k, v); |
||||||
|
} |
||||||
|
|
||||||
|
Status Delete(const std::string& k) { |
||||||
|
return db_->Delete(WriteOptions(), k); |
||||||
|
} |
||||||
|
|
||||||
|
std::string Get(const std::string& k, const Snapshot* snapshot = nullptr) { |
||||||
|
ReadOptions options; |
||||||
|
options.snapshot = snapshot; |
||||||
|
std::string result; |
||||||
|
Status s = db_->Get(options, k, &result); |
||||||
|
if (s.IsNotFound()) { |
||||||
|
result = "NOT_FOUND"; |
||||||
|
} else if (!s.ok()) { |
||||||
|
result = s.ToString(); |
||||||
|
} |
||||||
|
return result; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
int NumTableFilesAtLevel(int level) { |
||||||
|
std::string property; |
||||||
|
ASSERT_TRUE( |
||||||
|
db_->GetProperty("rocksdb.num-files-at-level" + NumberToString(level), |
||||||
|
&property)); |
||||||
|
return atoi(property.c_str()); |
||||||
|
} |
||||||
|
|
||||||
|
// Return spread of files per level
|
||||||
|
std::string FilesPerLevel() { |
||||||
|
std::string result; |
||||||
|
int last_non_zero_offset = 0; |
||||||
|
for (int level = 0; level < db_->NumberLevels(); level++) { |
||||||
|
int f = NumTableFilesAtLevel(level); |
||||||
|
char buf[100]; |
||||||
|
snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f); |
||||||
|
result += buf; |
||||||
|
if (f > 0) { |
||||||
|
last_non_zero_offset = result.size(); |
||||||
|
} |
||||||
|
} |
||||||
|
result.resize(last_non_zero_offset); |
||||||
|
return result; |
||||||
|
} |
||||||
|
|
||||||
|
std::string IterStatus(Iterator* iter) { |
||||||
|
std::string result; |
||||||
|
if (iter->Valid()) { |
||||||
|
result = iter->key().ToString() + "->" + iter->value().ToString(); |
||||||
|
} else { |
||||||
|
result = "(invalid)"; |
||||||
|
} |
||||||
|
return result; |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
std::unique_ptr<const SliceTransform> PlainTableDBTest::prefix_transform( |
||||||
|
NewFixedPrefixTransform(8)); |
||||||
|
|
||||||
|
TEST(PlainTableDBTest, Empty) { |
||||||
|
ASSERT_TRUE(dbfull() != nullptr); |
||||||
|
ASSERT_EQ("NOT_FOUND", Get("0000000000000foo")); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(PlainTableDBTest, ReadWrite) { |
||||||
|
ASSERT_OK(Put("1000000000000foo", "v1")); |
||||||
|
ASSERT_EQ("v1", Get("1000000000000foo")); |
||||||
|
ASSERT_OK(Put("0000000000000bar", "v2")); |
||||||
|
ASSERT_OK(Put("1000000000000foo", "v3")); |
||||||
|
ASSERT_EQ("v3", Get("1000000000000foo")); |
||||||
|
ASSERT_EQ("v2", Get("0000000000000bar")); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(PlainTableDBTest, Flush) { |
||||||
|
ASSERT_OK(Put("1000000000000foo", "v1")); |
||||||
|
ASSERT_OK(Put("0000000000000bar", "v2")); |
||||||
|
ASSERT_OK(Put("1000000000000foo", "v3")); |
||||||
|
dbfull()->TEST_FlushMemTable(); |
||||||
|
ASSERT_EQ("v3", Get("1000000000000foo")); |
||||||
|
ASSERT_EQ("v2", Get("0000000000000bar")); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(PlainTableDBTest, Iterator) { |
||||||
|
ASSERT_OK(Put("1000000000foo002", "v_2")); |
||||||
|
ASSERT_OK(Put("0000000000000bar", "random")); |
||||||
|
ASSERT_OK(Put("1000000000foo001", "v1")); |
||||||
|
ASSERT_OK(Put("3000000000000bar", "bar_v")); |
||||||
|
ASSERT_OK(Put("1000000000foo003", "v__3")); |
||||||
|
ASSERT_OK(Put("1000000000foo004", "v__4")); |
||||||
|
ASSERT_OK(Put("1000000000foo005", "v__5")); |
||||||
|
ASSERT_OK(Put("1000000000foo007", "v__7")); |
||||||
|
ASSERT_OK(Put("1000000000foo008", "v__8")); |
||||||
|
dbfull()->TEST_FlushMemTable(); |
||||||
|
ASSERT_EQ("v1", Get("1000000000foo001")); |
||||||
|
ASSERT_EQ("v__3", Get("1000000000foo003")); |
||||||
|
ReadOptions ro; |
||||||
|
Iterator* iter = dbfull()->NewIterator(ro); |
||||||
|
iter->Seek("1000000000foo001"); |
||||||
|
ASSERT_TRUE(iter->Valid()); |
||||||
|
ASSERT_EQ("1000000000foo001", iter->key().ToString()); |
||||||
|
ASSERT_EQ("v1", iter->value().ToString()); |
||||||
|
|
||||||
|
iter->Next(); |
||||||
|
ASSERT_TRUE(iter->Valid()); |
||||||
|
ASSERT_EQ("1000000000foo002", iter->key().ToString()); |
||||||
|
ASSERT_EQ("v_2", iter->value().ToString()); |
||||||
|
|
||||||
|
iter->Next(); |
||||||
|
ASSERT_TRUE(iter->Valid()); |
||||||
|
ASSERT_EQ("1000000000foo003", iter->key().ToString()); |
||||||
|
ASSERT_EQ("v__3", iter->value().ToString()); |
||||||
|
|
||||||
|
iter->Next(); |
||||||
|
ASSERT_TRUE(iter->Valid()); |
||||||
|
ASSERT_EQ("1000000000foo004", iter->key().ToString()); |
||||||
|
ASSERT_EQ("v__4", iter->value().ToString()); |
||||||
|
|
||||||
|
iter->Seek("3000000000000bar"); |
||||||
|
ASSERT_TRUE(iter->Valid()); |
||||||
|
ASSERT_EQ("3000000000000bar", iter->key().ToString()); |
||||||
|
ASSERT_EQ("bar_v", iter->value().ToString()); |
||||||
|
|
||||||
|
iter->Seek("1000000000foo000"); |
||||||
|
ASSERT_TRUE(iter->Valid()); |
||||||
|
ASSERT_EQ("1000000000foo001", iter->key().ToString()); |
||||||
|
ASSERT_EQ("v1", iter->value().ToString()); |
||||||
|
|
||||||
|
iter->Seek("1000000000foo005"); |
||||||
|
ASSERT_TRUE(iter->Valid()); |
||||||
|
ASSERT_EQ("1000000000foo005", iter->key().ToString()); |
||||||
|
ASSERT_EQ("v__5", iter->value().ToString()); |
||||||
|
|
||||||
|
iter->Seek("1000000000foo006"); |
||||||
|
ASSERT_TRUE(iter->Valid()); |
||||||
|
ASSERT_EQ("1000000000foo007", iter->key().ToString()); |
||||||
|
ASSERT_EQ("v__7", iter->value().ToString()); |
||||||
|
|
||||||
|
iter->Seek("1000000000foo008"); |
||||||
|
ASSERT_TRUE(iter->Valid()); |
||||||
|
ASSERT_EQ("1000000000foo008", iter->key().ToString()); |
||||||
|
ASSERT_EQ("v__8", iter->value().ToString()); |
||||||
|
|
||||||
|
iter->Seek("1000000000foo009"); |
||||||
|
ASSERT_TRUE(iter->Valid()); |
||||||
|
ASSERT_EQ("3000000000000bar", iter->key().ToString()); |
||||||
|
|
||||||
|
|
||||||
|
delete iter; |
||||||
|
} |
||||||
|
|
||||||
|
TEST(PlainTableDBTest, Flush2) { |
||||||
|
ASSERT_OK(Put("0000000000000bar", "b")); |
||||||
|
ASSERT_OK(Put("1000000000000foo", "v1")); |
||||||
|
dbfull()->TEST_FlushMemTable(); |
||||||
|
|
||||||
|
ASSERT_OK(Put("1000000000000foo", "v2")); |
||||||
|
dbfull()->TEST_FlushMemTable(); |
||||||
|
ASSERT_EQ("v2", Get("1000000000000foo")); |
||||||
|
|
||||||
|
ASSERT_OK(Put("0000000000000eee", "v3")); |
||||||
|
dbfull()->TEST_FlushMemTable(); |
||||||
|
ASSERT_EQ("v3", Get("0000000000000eee")); |
||||||
|
|
||||||
|
ASSERT_OK(Delete("0000000000000bar")); |
||||||
|
dbfull()->TEST_FlushMemTable(); |
||||||
|
ASSERT_EQ("NOT_FOUND", Get("0000000000000bar")); |
||||||
|
|
||||||
|
ASSERT_OK(Put("0000000000000eee", "v5")); |
||||||
|
dbfull()->TEST_FlushMemTable(); |
||||||
|
ASSERT_EQ("v5", Get("0000000000000eee")); |
||||||
|
} |
||||||
|
|
||||||
|
static std::string Key(int i) { |
||||||
|
char buf[100]; |
||||||
|
snprintf(buf, sizeof(buf), "key_______%06d", i); |
||||||
|
return std::string(buf); |
||||||
|
} |
||||||
|
|
||||||
|
static std::string RandomString(Random* rnd, int len) { |
||||||
|
std::string r; |
||||||
|
test::RandomString(rnd, len, &r); |
||||||
|
return r; |
||||||
|
} |
||||||
|
|
||||||
|
TEST(PlainTableDBTest, CompactionTrigger) { |
||||||
|
Options options = CurrentOptions(); |
||||||
|
options.write_buffer_size = 100 << 10; //100KB
|
||||||
|
options.num_levels = 3; |
||||||
|
options.max_mem_compaction_level = 0; |
||||||
|
options.level0_file_num_compaction_trigger = 3; |
||||||
|
Reopen(&options); |
||||||
|
|
||||||
|
Random rnd(301); |
||||||
|
|
||||||
|
for (int num = 0; num < options.level0_file_num_compaction_trigger - 1; |
||||||
|
num++) { |
||||||
|
std::vector<std::string> values; |
||||||
|
// Write 120KB (12 values, each 10K)
|
||||||
|
for (int i = 0; i < 12; i++) { |
||||||
|
values.push_back(RandomString(&rnd, 10000)); |
||||||
|
ASSERT_OK(Put(Key(i), values[i])); |
||||||
|
} |
||||||
|
dbfull()->TEST_WaitForFlushMemTable(); |
||||||
|
ASSERT_EQ(NumTableFilesAtLevel(0), num + 1); |
||||||
|
} |
||||||
|
|
||||||
|
//generate one more file in level-0, and should trigger level-0 compaction
|
||||||
|
std::vector<std::string> values; |
||||||
|
for (int i = 0; i < 12; i++) { |
||||||
|
values.push_back(RandomString(&rnd, 10000)); |
||||||
|
ASSERT_OK(Put(Key(i), values[i])); |
||||||
|
} |
||||||
|
dbfull()->TEST_WaitForCompact(); |
||||||
|
|
||||||
|
ASSERT_EQ(NumTableFilesAtLevel(0), 0); |
||||||
|
ASSERT_EQ(NumTableFilesAtLevel(1), 1); |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) { |
||||||
|
return rocksdb::test::RunAllTests(); |
||||||
|
} |
@ -1,45 +0,0 @@ |
|||||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
||||||
// This source code is licensed under the BSD-style license found in the
|
|
||||||
// LICENSE file in the root directory of this source tree. An additional grant
|
|
||||||
// of patent rights can be found in the PATENTS file in the same directory.
|
|
||||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style license that can be
|
|
||||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
||||||
//
|
|
||||||
// Arena class defines memory allocation methods. It's used by memtable and
|
|
||||||
// skiplist.
|
|
||||||
|
|
||||||
#ifndef STORAGE_ROCKSDB_INCLUDE_ARENA_H_ |
|
||||||
#define STORAGE_ROCKSDB_INCLUDE_ARENA_H_ |
|
||||||
|
|
||||||
#include <limits> |
|
||||||
#include <memory> |
|
||||||
|
|
||||||
namespace rocksdb { |
|
||||||
|
|
||||||
class Arena { |
|
||||||
public: |
|
||||||
Arena() {}; |
|
||||||
virtual ~Arena() {}; |
|
||||||
|
|
||||||
// Return a pointer to a newly allocated memory block of "bytes" bytes.
|
|
||||||
virtual char* Allocate(size_t bytes) = 0; |
|
||||||
|
|
||||||
// Allocate memory with the normal alignment guarantees provided by malloc.
|
|
||||||
virtual char* AllocateAligned(size_t bytes) = 0; |
|
||||||
|
|
||||||
// Returns an estimate of the total memory used by arena.
|
|
||||||
virtual const size_t ApproximateMemoryUsage() = 0; |
|
||||||
|
|
||||||
// Returns the total number of bytes in all blocks allocated so far.
|
|
||||||
virtual const size_t MemoryAllocatedBytes() = 0; |
|
||||||
|
|
||||||
private: |
|
||||||
// No copying allowed
|
|
||||||
Arena(const Arena&); |
|
||||||
void operator=(const Arena&); |
|
||||||
}; |
|
||||||
|
|
||||||
} // namespace rocksdb
|
|
||||||
|
|
||||||
#endif // STORAGE_ROCKSDB_INCLUDE_ARENA_H_
|
|
@ -1,31 +0,0 @@ |
|||||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
||||||
// This source code is licensed under the BSD-style license found in the
|
|
||||||
// LICENSE file in the root directory of this source tree. An additional grant
|
|
||||||
// of patent rights can be found in the PATENTS file in the same directory.
|
|
||||||
|
|
||||||
#pragma once |
|
||||||
#include <memory> |
|
||||||
|
|
||||||
namespace rocksdb { |
|
||||||
|
|
||||||
class FlushBlockPolicyFactory; |
|
||||||
|
|
||||||
struct BlockBasedTableOptions { |
|
||||||
// @flush_block_policy_factory creates the instances of flush block policy.
|
|
||||||
// which provides a configurable way to determine when to flush a block in
|
|
||||||
// the block based tables. If not set, table builder will use the default
|
|
||||||
// block flush policy, which cut blocks by block size (please refer to
|
|
||||||
// `FlushBlockBySizePolicy`).
|
|
||||||
std::shared_ptr<FlushBlockPolicyFactory> flush_block_policy_factory; |
|
||||||
|
|
||||||
// TODO(kailiu) Temporarily disable this feature by making the default value
|
|
||||||
// to be false. Also in master branch, this file is non-public so no user
|
|
||||||
// will be able to change the value of `cache_index_and_filter_blocks`.
|
|
||||||
//
|
|
||||||
// Indicating if we'd put index/filter blocks to the block cache.
|
|
||||||
// If not specified, each "table reader" object will pre-load index/filter
|
|
||||||
// block during table initialization.
|
|
||||||
bool cache_index_and_filter_blocks = false; |
|
||||||
}; |
|
||||||
|
|
||||||
} // namespace rocksdb
|
|
@ -0,0 +1,286 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include "table/meta_blocks.h" |
||||||
|
|
||||||
|
#include <map> |
||||||
|
|
||||||
|
#include "rocksdb/table.h" |
||||||
|
#include "table/block.h" |
||||||
|
#include "table/format.h" |
||||||
|
#include "util/coding.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
MetaIndexBuilder::MetaIndexBuilder() |
||||||
|
: meta_index_block_( |
||||||
|
new BlockBuilder(1 /* restart interval */, BytewiseComparator())) { |
||||||
|
} |
||||||
|
|
||||||
|
void MetaIndexBuilder::Add(const std::string& key, |
||||||
|
const BlockHandle& handle) { |
||||||
|
std::string handle_encoding; |
||||||
|
handle.EncodeTo(&handle_encoding); |
||||||
|
meta_block_handles_.insert({key, handle_encoding}); |
||||||
|
} |
||||||
|
|
||||||
|
Slice MetaIndexBuilder::Finish() { |
||||||
|
for (const auto& metablock : meta_block_handles_) { |
||||||
|
meta_index_block_->Add(metablock.first, metablock.second); |
||||||
|
} |
||||||
|
return meta_index_block_->Finish(); |
||||||
|
} |
||||||
|
|
||||||
|
PropertyBlockBuilder::PropertyBlockBuilder() |
||||||
|
: properties_block_( |
||||||
|
new BlockBuilder(1 /* restart interval */, BytewiseComparator())) { |
||||||
|
} |
||||||
|
|
||||||
|
void PropertyBlockBuilder::Add(const std::string& name, |
||||||
|
const std::string& val) { |
||||||
|
props_.insert({name, val}); |
||||||
|
} |
||||||
|
|
||||||
|
void PropertyBlockBuilder::Add(const std::string& name, uint64_t val) { |
||||||
|
assert(props_.find(name) == props_.end()); |
||||||
|
|
||||||
|
std::string dst; |
||||||
|
PutVarint64(&dst, val); |
||||||
|
|
||||||
|
Add(name, dst); |
||||||
|
} |
||||||
|
|
||||||
|
void PropertyBlockBuilder::Add( |
||||||
|
const UserCollectedProperties& user_collected_properties) { |
||||||
|
for (const auto& prop : user_collected_properties) { |
||||||
|
Add(prop.first, prop.second); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) { |
||||||
|
Add(TablePropertiesNames::kRawKeySize, props.raw_key_size); |
||||||
|
Add(TablePropertiesNames::kRawValueSize, props.raw_value_size); |
||||||
|
Add(TablePropertiesNames::kDataSize, props.data_size); |
||||||
|
Add(TablePropertiesNames::kIndexSize, props.index_size); |
||||||
|
Add(TablePropertiesNames::kNumEntries, props.num_entries); |
||||||
|
Add(TablePropertiesNames::kNumDataBlocks, props.num_data_blocks); |
||||||
|
Add(TablePropertiesNames::kFilterSize, props.filter_size); |
||||||
|
Add(TablePropertiesNames::kFormatVersion, props.format_version); |
||||||
|
Add(TablePropertiesNames::kFixedKeyLen, props.fixed_key_len); |
||||||
|
|
||||||
|
if (!props.filter_policy_name.empty()) { |
||||||
|
Add(TablePropertiesNames::kFilterPolicy, |
||||||
|
props.filter_policy_name); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
Slice PropertyBlockBuilder::Finish() { |
||||||
|
for (const auto& prop : props_) { |
||||||
|
properties_block_->Add(prop.first, prop.second); |
||||||
|
} |
||||||
|
|
||||||
|
return properties_block_->Finish(); |
||||||
|
} |
||||||
|
|
||||||
|
void LogPropertiesCollectionError( |
||||||
|
Logger* info_log, const std::string& method, const std::string& name) { |
||||||
|
assert(method == "Add" || method == "Finish"); |
||||||
|
|
||||||
|
std::string msg = |
||||||
|
"[Warning] encountered error when calling TablePropertiesCollector::" + |
||||||
|
method + "() with collector name: " + name; |
||||||
|
Log(info_log, "%s", msg.c_str()); |
||||||
|
} |
||||||
|
|
||||||
|
bool NotifyCollectTableCollectorsOnAdd( |
||||||
|
const Slice& key, |
||||||
|
const Slice& value, |
||||||
|
const Options::TablePropertiesCollectors& collectors, |
||||||
|
Logger* info_log) { |
||||||
|
bool all_succeeded = true; |
||||||
|
for (auto collector : collectors) { |
||||||
|
Status s = collector->Add(key, value); |
||||||
|
all_succeeded = all_succeeded && s.ok(); |
||||||
|
if (!s.ok()) { |
||||||
|
LogPropertiesCollectionError( |
||||||
|
info_log, "Add", /* method */ collector->Name() |
||||||
|
); |
||||||
|
} |
||||||
|
} |
||||||
|
return all_succeeded; |
||||||
|
} |
||||||
|
|
||||||
|
bool NotifyCollectTableCollectorsOnFinish( |
||||||
|
const Options::TablePropertiesCollectors& collectors, |
||||||
|
Logger* info_log, |
||||||
|
PropertyBlockBuilder* builder) { |
||||||
|
bool all_succeeded = true; |
||||||
|
for (auto collector : collectors) { |
||||||
|
UserCollectedProperties user_collected_properties; |
||||||
|
Status s = collector->Finish(&user_collected_properties); |
||||||
|
|
||||||
|
all_succeeded = all_succeeded && s.ok(); |
||||||
|
if (!s.ok()) { |
||||||
|
LogPropertiesCollectionError( |
||||||
|
info_log, "Finish", /* method */ collector->Name() |
||||||
|
); |
||||||
|
} else { |
||||||
|
builder->Add(user_collected_properties); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return all_succeeded; |
||||||
|
} |
||||||
|
|
||||||
|
Status ReadProperties( |
||||||
|
const Slice& handle_value, |
||||||
|
RandomAccessFile* file, |
||||||
|
Env* env, |
||||||
|
Logger* logger, |
||||||
|
TableProperties* table_properties) { |
||||||
|
assert(table_properties); |
||||||
|
|
||||||
|
Slice v = handle_value; |
||||||
|
BlockHandle handle; |
||||||
|
if (!handle.DecodeFrom(&v).ok()) { |
||||||
|
return Status::InvalidArgument("Failed to decode properties block handle"); |
||||||
|
} |
||||||
|
|
||||||
|
BlockContents block_contents; |
||||||
|
ReadOptions read_options; |
||||||
|
read_options.verify_checksums = false; |
||||||
|
Status s = ReadBlockContents( |
||||||
|
file, |
||||||
|
read_options, |
||||||
|
handle, |
||||||
|
&block_contents, |
||||||
|
env, |
||||||
|
false |
||||||
|
); |
||||||
|
|
||||||
|
if (!s.ok()) { |
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
Block properties_block(block_contents); |
||||||
|
std::unique_ptr<Iterator> iter( |
||||||
|
properties_block.NewIterator(BytewiseComparator()) |
||||||
|
); |
||||||
|
|
||||||
|
// All pre-defined properties of type uint64_t
|
||||||
|
std::unordered_map<std::string, uint64_t*> predefined_uint64_properties = { |
||||||
|
{ TablePropertiesNames::kDataSize, &table_properties->data_size }, |
||||||
|
{ TablePropertiesNames::kIndexSize, &table_properties->index_size }, |
||||||
|
{ TablePropertiesNames::kFilterSize, &table_properties->filter_size }, |
||||||
|
{ TablePropertiesNames::kRawKeySize, &table_properties->raw_key_size }, |
||||||
|
{ TablePropertiesNames::kRawValueSize, &table_properties->raw_value_size }, |
||||||
|
{ TablePropertiesNames::kNumDataBlocks, |
||||||
|
&table_properties->num_data_blocks }, |
||||||
|
{ TablePropertiesNames::kNumEntries, &table_properties->num_entries }, |
||||||
|
{ TablePropertiesNames::kFormatVersion, &table_properties->format_version }, |
||||||
|
{ TablePropertiesNames::kFixedKeyLen, &table_properties->fixed_key_len }, |
||||||
|
}; |
||||||
|
|
||||||
|
std::string last_key; |
||||||
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { |
||||||
|
s = iter->status(); |
||||||
|
if (!s.ok()) { |
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
auto key = iter->key().ToString(); |
||||||
|
// properties block is strictly sorted with no duplicate key.
|
||||||
|
assert( |
||||||
|
last_key.empty() || |
||||||
|
BytewiseComparator()->Compare(key, last_key) > 0 |
||||||
|
); |
||||||
|
last_key = key; |
||||||
|
|
||||||
|
auto raw_val = iter->value(); |
||||||
|
auto pos = predefined_uint64_properties.find(key); |
||||||
|
|
||||||
|
if (pos != predefined_uint64_properties.end()) { |
||||||
|
// handle predefined rocksdb properties
|
||||||
|
uint64_t val; |
||||||
|
if (!GetVarint64(&raw_val, &val)) { |
||||||
|
// skip malformed value
|
||||||
|
auto error_msg = |
||||||
|
"[Warning] detect malformed value in properties meta-block:" |
||||||
|
"\tkey: " + key + "\tval: " + raw_val.ToString(); |
||||||
|
Log(logger, "%s", error_msg.c_str()); |
||||||
|
continue; |
||||||
|
} |
||||||
|
*(pos->second) = val; |
||||||
|
} else if (key == TablePropertiesNames::kFilterPolicy) { |
||||||
|
table_properties->filter_policy_name = raw_val.ToString(); |
||||||
|
} else { |
||||||
|
// handle user-collected properties
|
||||||
|
table_properties->user_collected_properties.insert( |
||||||
|
std::make_pair(key, raw_val.ToString()) |
||||||
|
); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
Status ReadTableProperties( |
||||||
|
RandomAccessFile* file, |
||||||
|
uint64_t file_size, |
||||||
|
uint64_t table_magic_number, |
||||||
|
Env* env, |
||||||
|
Logger* info_log, |
||||||
|
TableProperties* properties) { |
||||||
|
// -- Read metaindex block
|
||||||
|
Footer footer(table_magic_number); |
||||||
|
auto s = ReadFooterFromFile(file, file_size, &footer); |
||||||
|
if (!s.ok()) { |
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
auto metaindex_handle = footer.metaindex_handle(); |
||||||
|
BlockContents metaindex_contents; |
||||||
|
ReadOptions read_options; |
||||||
|
read_options.verify_checksums = false; |
||||||
|
s = ReadBlockContents( |
||||||
|
file, |
||||||
|
read_options, |
||||||
|
metaindex_handle, |
||||||
|
&metaindex_contents, |
||||||
|
env, |
||||||
|
false |
||||||
|
); |
||||||
|
if (!s.ok()) { |
||||||
|
return s; |
||||||
|
} |
||||||
|
Block metaindex_block(metaindex_contents); |
||||||
|
std::unique_ptr<Iterator> meta_iter( |
||||||
|
metaindex_block.NewIterator(BytewiseComparator()) |
||||||
|
); |
||||||
|
|
||||||
|
// -- Read property block
|
||||||
|
meta_iter->Seek(kPropertiesBlock); |
||||||
|
TableProperties table_properties; |
||||||
|
if (meta_iter->Valid() && |
||||||
|
meta_iter->key() == kPropertiesBlock && |
||||||
|
meta_iter->status().ok()) { |
||||||
|
s = ReadProperties( |
||||||
|
meta_iter->value(), |
||||||
|
file, |
||||||
|
env, |
||||||
|
info_log, |
||||||
|
properties |
||||||
|
); |
||||||
|
} else { |
||||||
|
s = Status::Corruption( |
||||||
|
"Unable to read the property block from the plain table" |
||||||
|
); |
||||||
|
} |
||||||
|
|
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,121 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include <map> |
||||||
|
#include <memory> |
||||||
|
#include <string> |
||||||
|
|
||||||
|
#include "rocksdb/comparator.h" |
||||||
|
#include "rocksdb/options.h" |
||||||
|
#include "rocksdb/slice.h" |
||||||
|
#include "rocksdb/table_properties.h" |
||||||
|
#include "table/block_builder.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class BlockBuilder; |
||||||
|
class BlockHandle; |
||||||
|
class Env; |
||||||
|
class Logger; |
||||||
|
class RandomAccessFile; |
||||||
|
struct TableProperties; |
||||||
|
|
||||||
|
// An STL style comparator that does the bytewise comparator comparasion
|
||||||
|
// internally.
|
||||||
|
struct BytewiseLessThan { |
||||||
|
bool operator()(const std::string& key1, const std::string& key2) const { |
||||||
|
// smaller entries will be placed in front.
|
||||||
|
return comparator->Compare(key1, key2) <= 0; |
||||||
|
} |
||||||
|
|
||||||
|
const Comparator* comparator = BytewiseComparator(); |
||||||
|
}; |
||||||
|
|
||||||
|
// When writing to a block that requires entries to be sorted by
|
||||||
|
// `BytewiseComparator`, we can buffer the content to `BytewiseSortedMap`
|
||||||
|
// before writng to store.
|
||||||
|
typedef std::map<std::string, std::string, BytewiseLessThan> BytewiseSortedMap; |
||||||
|
|
||||||
|
class MetaIndexBuilder { |
||||||
|
public: |
||||||
|
MetaIndexBuilder(const MetaIndexBuilder&) = delete; |
||||||
|
MetaIndexBuilder& operator=(const MetaIndexBuilder&) = delete; |
||||||
|
|
||||||
|
MetaIndexBuilder(); |
||||||
|
void Add(const std::string& key, const BlockHandle& handle); |
||||||
|
|
||||||
|
// Write all the added key/value pairs to the block and return the contents
|
||||||
|
// of the block.
|
||||||
|
Slice Finish(); |
||||||
|
|
||||||
|
private: |
||||||
|
// store the sorted key/handle of the metablocks.
|
||||||
|
BytewiseSortedMap meta_block_handles_; |
||||||
|
std::unique_ptr<BlockBuilder> meta_index_block_; |
||||||
|
}; |
||||||
|
|
||||||
|
class PropertyBlockBuilder { |
||||||
|
public: |
||||||
|
PropertyBlockBuilder(const PropertyBlockBuilder&) = delete; |
||||||
|
PropertyBlockBuilder& operator=(const PropertyBlockBuilder&) = delete; |
||||||
|
|
||||||
|
PropertyBlockBuilder(); |
||||||
|
|
||||||
|
void AddTableProperty(const TableProperties& props); |
||||||
|
void Add(const std::string& key, uint64_t value); |
||||||
|
void Add(const std::string& key, const std::string& value); |
||||||
|
void Add(const UserCollectedProperties& user_collected_properties); |
||||||
|
|
||||||
|
// Write all the added entries to the block and return the block contents
|
||||||
|
Slice Finish(); |
||||||
|
|
||||||
|
private: |
||||||
|
std::unique_ptr<BlockBuilder> properties_block_; |
||||||
|
BytewiseSortedMap props_; |
||||||
|
}; |
||||||
|
|
||||||
|
// Were we encounter any error occurs during user-defined statistics collection,
|
||||||
|
// we'll write the warning message to info log.
|
||||||
|
void LogPropertiesCollectionError( |
||||||
|
Logger* info_log, const std::string& method, const std::string& name); |
||||||
|
|
||||||
|
// Utility functions help table builder to trigger batch events for user
|
||||||
|
// defined property collectors.
|
||||||
|
// Return value indicates if there is any error occurred; if error occurred,
|
||||||
|
// the warning message will be logged.
|
||||||
|
// NotifyCollectTableCollectorsOnAdd() triggers the `Add` event for all
|
||||||
|
// property collectors.
|
||||||
|
bool NotifyCollectTableCollectorsOnAdd( |
||||||
|
const Slice& key, |
||||||
|
const Slice& value, |
||||||
|
const Options::TablePropertiesCollectors& collectors, |
||||||
|
Logger* info_log); |
||||||
|
|
||||||
|
// NotifyCollectTableCollectorsOnAdd() triggers the `Finish` event for all
|
||||||
|
// property collectors. The collected properties will be added to `builder`.
|
||||||
|
bool NotifyCollectTableCollectorsOnFinish( |
||||||
|
const Options::TablePropertiesCollectors& collectors, |
||||||
|
Logger* info_log, |
||||||
|
PropertyBlockBuilder* builder); |
||||||
|
|
||||||
|
// Read the properties from the table.
|
||||||
|
Status ReadProperties( |
||||||
|
const Slice& handle_value, |
||||||
|
RandomAccessFile* file, |
||||||
|
Env* env, |
||||||
|
Logger* logger, |
||||||
|
TableProperties* table_properties); |
||||||
|
|
||||||
|
// Directly read the properties from the properties block of a plain table.
|
||||||
|
Status ReadTableProperties( |
||||||
|
RandomAccessFile* file, |
||||||
|
uint64_t file_size, |
||||||
|
uint64_t table_magic_number, |
||||||
|
Env* env, |
||||||
|
Logger* info_log, |
||||||
|
TableProperties* properties); |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,198 @@ |
|||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "table/plain_table_builder.h" |
||||||
|
|
||||||
|
#include <assert.h> |
||||||
|
#include <map> |
||||||
|
|
||||||
|
#include "rocksdb/comparator.h" |
||||||
|
#include "rocksdb/env.h" |
||||||
|
#include "rocksdb/filter_policy.h" |
||||||
|
#include "rocksdb/options.h" |
||||||
|
#include "table/plain_table_factory.h" |
||||||
|
#include "db/dbformat.h" |
||||||
|
#include "table/block_builder.h" |
||||||
|
#include "table/filter_block.h" |
||||||
|
#include "table/format.h" |
||||||
|
#include "table/meta_blocks.h" |
||||||
|
#include "util/coding.h" |
||||||
|
#include "util/crc32c.h" |
||||||
|
#include "util/stop_watch.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
namespace { |
||||||
|
|
||||||
|
// a utility that helps writing block content to the file
|
||||||
|
// @offset will advance if @block_contents was successfully written.
|
||||||
|
// @block_handle the block handle this particular block.
|
||||||
|
Status WriteBlock( |
||||||
|
const Slice& block_contents, |
||||||
|
WritableFile* file, |
||||||
|
uint64_t* offset, |
||||||
|
BlockHandle* block_handle) { |
||||||
|
block_handle->set_offset(*offset); |
||||||
|
block_handle->set_size(block_contents.size()); |
||||||
|
Status s = file->Append(block_contents); |
||||||
|
|
||||||
|
if (s.ok()) { |
||||||
|
*offset += block_contents.size(); |
||||||
|
} |
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// kPlainTableMagicNumber was picked by running
|
||||||
|
// echo rocksdb.plain.table | sha1sum
|
||||||
|
// and taking the leading 64 bits.
|
||||||
|
extern const uint64_t kPlainTableMagicNumber = 0x4f3418eb7a8f13b8ull; |
||||||
|
|
||||||
|
PlainTableBuilder::PlainTableBuilder(const Options& options, |
||||||
|
WritableFile* file, |
||||||
|
uint32_t user_key_len) : |
||||||
|
options_(options), file_(file), user_key_len_(user_key_len) { |
||||||
|
properties_.fixed_key_len = user_key_len; |
||||||
|
|
||||||
|
// for plain table, we put all the data in a big chuck.
|
||||||
|
properties_.num_data_blocks = 1; |
||||||
|
// emphasize that currently plain table doesn't have persistent index or
|
||||||
|
// filter block.
|
||||||
|
properties_.index_size = 0; |
||||||
|
properties_.filter_size = 0; |
||||||
|
properties_.format_version = 0; |
||||||
|
} |
||||||
|
|
||||||
|
PlainTableBuilder::~PlainTableBuilder() { |
||||||
|
} |
||||||
|
|
||||||
|
void PlainTableBuilder::Add(const Slice& key, const Slice& value) { |
||||||
|
size_t user_key_size = key.size() - 8; |
||||||
|
assert(user_key_len_ == 0 || user_key_size == user_key_len_); |
||||||
|
|
||||||
|
if (!IsFixedLength()) { |
||||||
|
// Write key length
|
||||||
|
key_size_str_.clear(); |
||||||
|
PutVarint32(&key_size_str_, user_key_size); |
||||||
|
file_->Append(key_size_str_); |
||||||
|
offset_ += key_size_str_.length(); |
||||||
|
} |
||||||
|
|
||||||
|
// Write key
|
||||||
|
ParsedInternalKey parsed_key; |
||||||
|
if (!ParseInternalKey(key, &parsed_key)) { |
||||||
|
status_ = Status::Corruption(Slice()); |
||||||
|
return; |
||||||
|
} |
||||||
|
if (parsed_key.sequence == 0 && parsed_key.type == kTypeValue) { |
||||||
|
file_->Append(Slice(key.data(), user_key_size)); |
||||||
|
char tmp_char = PlainTableFactory::kValueTypeSeqId0; |
||||||
|
file_->Append(Slice(&tmp_char, 1)); |
||||||
|
offset_ += key.size() - 7; |
||||||
|
} else { |
||||||
|
file_->Append(key); |
||||||
|
offset_ += key.size(); |
||||||
|
} |
||||||
|
|
||||||
|
// Write value length
|
||||||
|
value_size_str_.clear(); |
||||||
|
int value_size = value.size(); |
||||||
|
PutVarint32(&value_size_str_, value_size); |
||||||
|
file_->Append(value_size_str_); |
||||||
|
|
||||||
|
// Write value
|
||||||
|
file_->Append(value); |
||||||
|
offset_ += value_size + value_size_str_.length(); |
||||||
|
|
||||||
|
properties_.num_entries++; |
||||||
|
properties_.raw_key_size += key.size(); |
||||||
|
properties_.raw_value_size += value.size(); |
||||||
|
|
||||||
|
// notify property collectors
|
||||||
|
NotifyCollectTableCollectorsOnAdd( |
||||||
|
key, |
||||||
|
value, |
||||||
|
options_.table_properties_collectors, |
||||||
|
options_.info_log.get() |
||||||
|
); |
||||||
|
} |
||||||
|
|
||||||
|
Status PlainTableBuilder::status() const { return status_; } |
||||||
|
|
||||||
|
Status PlainTableBuilder::Finish() { |
||||||
|
assert(!closed_); |
||||||
|
closed_ = true; |
||||||
|
|
||||||
|
properties_.data_size = offset_; |
||||||
|
|
||||||
|
// Write the following blocks
|
||||||
|
// 1. [meta block: properties]
|
||||||
|
// 2. [metaindex block]
|
||||||
|
// 3. [footer]
|
||||||
|
MetaIndexBuilder meta_index_builer; |
||||||
|
|
||||||
|
PropertyBlockBuilder property_block_builder; |
||||||
|
// -- Add basic properties
|
||||||
|
property_block_builder.AddTableProperty(properties_); |
||||||
|
|
||||||
|
// -- Add user collected properties
|
||||||
|
NotifyCollectTableCollectorsOnFinish( |
||||||
|
options_.table_properties_collectors, |
||||||
|
options_.info_log.get(), |
||||||
|
&property_block_builder |
||||||
|
); |
||||||
|
|
||||||
|
// -- Write property block
|
||||||
|
BlockHandle property_block_handle; |
||||||
|
auto s = WriteBlock( |
||||||
|
property_block_builder.Finish(), |
||||||
|
file_, |
||||||
|
&offset_, |
||||||
|
&property_block_handle |
||||||
|
); |
||||||
|
if (!s.ok()) { |
||||||
|
return s; |
||||||
|
} |
||||||
|
meta_index_builer.Add(kPropertiesBlock, property_block_handle); |
||||||
|
|
||||||
|
// -- write metaindex block
|
||||||
|
BlockHandle metaindex_block_handle; |
||||||
|
s = WriteBlock( |
||||||
|
meta_index_builer.Finish(), |
||||||
|
file_, |
||||||
|
&offset_, |
||||||
|
&metaindex_block_handle |
||||||
|
); |
||||||
|
if (!s.ok()) { |
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
// Write Footer
|
||||||
|
Footer footer(kPlainTableMagicNumber); |
||||||
|
footer.set_metaindex_handle(metaindex_block_handle); |
||||||
|
footer.set_index_handle(BlockHandle::NullBlockHandle()); |
||||||
|
std::string footer_encoding; |
||||||
|
footer.EncodeTo(&footer_encoding); |
||||||
|
s = file_->Append(footer_encoding); |
||||||
|
if (s.ok()) { |
||||||
|
offset_ += footer_encoding.size(); |
||||||
|
} |
||||||
|
|
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
void PlainTableBuilder::Abandon() { |
||||||
|
closed_ = true; |
||||||
|
} |
||||||
|
|
||||||
|
uint64_t PlainTableBuilder::NumEntries() const { |
||||||
|
return properties_.num_entries; |
||||||
|
} |
||||||
|
|
||||||
|
uint64_t PlainTableBuilder::FileSize() const { |
||||||
|
return offset_; |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,85 @@ |
|||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
//
|
||||||
|
// IndexedTable is a simple table format for UNIT TEST ONLY. It is not built
|
||||||
|
// as production quality.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
#include <stdint.h> |
||||||
|
#include "rocksdb/options.h" |
||||||
|
#include "rocksdb/status.h" |
||||||
|
#include "table/table_builder.h" |
||||||
|
#include "rocksdb/table_properties.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class BlockBuilder; |
||||||
|
class BlockHandle; |
||||||
|
class WritableFile; |
||||||
|
class TableBuilder; |
||||||
|
|
||||||
|
class PlainTableBuilder: public TableBuilder { |
||||||
|
public: |
||||||
|
// Create a builder that will store the contents of the table it is
|
||||||
|
// building in *file. Does not close the file. It is up to the
|
||||||
|
// caller to close the file after calling Finish(). The output file
|
||||||
|
// will be part of level specified by 'level'. A value of -1 means
|
||||||
|
// that the caller does not know which level the output file will reside.
|
||||||
|
PlainTableBuilder(const Options& options, WritableFile* file, |
||||||
|
uint32_t user_key_size); |
||||||
|
|
||||||
|
// REQUIRES: Either Finish() or Abandon() has been called.
|
||||||
|
~PlainTableBuilder(); |
||||||
|
|
||||||
|
// Add key,value to the table being constructed.
|
||||||
|
// REQUIRES: key is after any previously added key according to comparator.
|
||||||
|
// REQUIRES: Finish(), Abandon() have not been called
|
||||||
|
void Add(const Slice& key, const Slice& value) override; |
||||||
|
|
||||||
|
// Return non-ok iff some error has been detected.
|
||||||
|
Status status() const override; |
||||||
|
|
||||||
|
// Finish building the table. Stops using the file passed to the
|
||||||
|
// constructor after this function returns.
|
||||||
|
// REQUIRES: Finish(), Abandon() have not been called
|
||||||
|
Status Finish() override; |
||||||
|
|
||||||
|
// Indicate that the contents of this builder should be abandoned. Stops
|
||||||
|
// using the file passed to the constructor after this function returns.
|
||||||
|
// If the caller is not going to call Finish(), it must call Abandon()
|
||||||
|
// before destroying this builder.
|
||||||
|
// REQUIRES: Finish(), Abandon() have not been called
|
||||||
|
void Abandon() override; |
||||||
|
|
||||||
|
// Number of calls to Add() so far.
|
||||||
|
uint64_t NumEntries() const override; |
||||||
|
|
||||||
|
// Size of the file generated so far. If invoked after a successful
|
||||||
|
// Finish() call, returns the size of the final generated file.
|
||||||
|
uint64_t FileSize() const override; |
||||||
|
|
||||||
|
private: |
||||||
|
Options options_; |
||||||
|
WritableFile* file_; |
||||||
|
uint64_t offset_ = 0; |
||||||
|
Status status_; |
||||||
|
TableProperties properties_; |
||||||
|
|
||||||
|
const size_t user_key_len_; |
||||||
|
bool closed_ = false; // Either Finish() or Abandon() has been called.
|
||||||
|
|
||||||
|
std::string key_size_str_; |
||||||
|
std::string value_size_str_; |
||||||
|
|
||||||
|
bool IsFixedLength() const { |
||||||
|
return user_key_len_ > 0; |
||||||
|
} |
||||||
|
|
||||||
|
// No copying allowed
|
||||||
|
PlainTableBuilder(const PlainTableBuilder&) = delete; |
||||||
|
void operator=(const PlainTableBuilder&) = delete; |
||||||
|
}; |
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
@ -0,0 +1,40 @@ |
|||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "table/plain_table_factory.h" |
||||||
|
|
||||||
|
#include <memory> |
||||||
|
#include <stdint.h> |
||||||
|
#include "db/dbformat.h" |
||||||
|
#include "table/plain_table_builder.h" |
||||||
|
#include "table/plain_table_reader.h" |
||||||
|
#include "port/port.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
Status PlainTableFactory::NewTableReader(const Options& options, |
||||||
|
const EnvOptions& soptions, |
||||||
|
const InternalKeyComparator& icomp, |
||||||
|
unique_ptr<RandomAccessFile>&& file, |
||||||
|
uint64_t file_size, |
||||||
|
unique_ptr<TableReader>* table) const { |
||||||
|
return PlainTableReader::Open(options, soptions, icomp, std::move(file), |
||||||
|
file_size, table, bloom_bits_per_key_, |
||||||
|
hash_table_ratio_); |
||||||
|
} |
||||||
|
|
||||||
|
TableBuilder* PlainTableFactory::NewTableBuilder( |
||||||
|
const Options& options, const InternalKeyComparator& internal_comparator, |
||||||
|
WritableFile* file, CompressionType compression_type) const { |
||||||
|
return new PlainTableBuilder(options, file, user_key_len_); |
||||||
|
} |
||||||
|
|
||||||
|
extern TableFactory* NewPlainTableFactory(uint32_t user_key_len, |
||||||
|
int bloom_bits_per_key, |
||||||
|
double hash_table_ratio) { |
||||||
|
return new PlainTableFactory(user_key_len, bloom_bits_per_key, |
||||||
|
hash_table_ratio); |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,76 @@ |
|||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
#include <memory> |
||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
#include "rocksdb/options.h" |
||||||
|
#include "rocksdb/table.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
struct Options; |
||||||
|
struct EnvOptions; |
||||||
|
|
||||||
|
using std::unique_ptr; |
||||||
|
class Status; |
||||||
|
class RandomAccessFile; |
||||||
|
class WritableFile; |
||||||
|
class Table; |
||||||
|
class TableBuilder; |
||||||
|
|
||||||
|
// IndexedTable requires fixed length key, configured as a constructor
|
||||||
|
// parameter of the factory class. Output file format:
|
||||||
|
// +-------------+-----------------+
|
||||||
|
// | version | user_key_length |
|
||||||
|
// +------------++------------------------------+ <= key1 offset
|
||||||
|
// | [key_size] | key1 | value_size | |
|
||||||
|
// +------------+-------------+-------------+ |
|
||||||
|
// | value1 |
|
||||||
|
// | |
|
||||||
|
// +----------------------------------------+---+ <= key2 offset
|
||||||
|
// | [key_size] | key2 | value_size | |
|
||||||
|
// +------------+-------------+-------------+ |
|
||||||
|
// | value2 |
|
||||||
|
// | |
|
||||||
|
// | ...... |
|
||||||
|
// +-----------------+--------------------------+
|
||||||
|
// If user_key_length = kPlainTableVariableLength, it means the key is variable
|
||||||
|
// length, there will be an extra field for key size encoded before every key.
|
||||||
|
class PlainTableFactory : public TableFactory { |
||||||
|
public: |
||||||
|
~PlainTableFactory() {} |
||||||
|
// user_key_size is the length of the user key. If it is set to be
|
||||||
|
// kPlainTableVariableLength, then it means variable length. Otherwise, all
|
||||||
|
// the keys need to have the fix length of this value. bloom_bits_per_key is
|
||||||
|
// number of bits used for bloom filer per key. hash_table_ratio is
|
||||||
|
// the desired utilization of the hash table used for prefix hashing.
|
||||||
|
// hash_table_ratio = number of prefixes / #buckets in the hash table
|
||||||
|
explicit PlainTableFactory(uint32_t user_key_len = kPlainTableVariableLength, |
||||||
|
int bloom_bits_per_key = 0, |
||||||
|
double hash_table_ratio = 0.75) |
||||||
|
: user_key_len_(user_key_len), |
||||||
|
bloom_bits_per_key_(bloom_bits_per_key), |
||||||
|
hash_table_ratio_(hash_table_ratio) {} |
||||||
|
const char* Name() const override { return "PlainTable"; } |
||||||
|
Status NewTableReader(const Options& options, const EnvOptions& soptions, |
||||||
|
const InternalKeyComparator& internal_comparator, |
||||||
|
unique_ptr<RandomAccessFile>&& file, uint64_t file_size, |
||||||
|
unique_ptr<TableReader>* table) const override; |
||||||
|
TableBuilder* NewTableBuilder(const Options& options, |
||||||
|
const InternalKeyComparator& icomparator, |
||||||
|
WritableFile* file, |
||||||
|
CompressionType compression_type) const |
||||||
|
override; |
||||||
|
|
||||||
|
static const char kValueTypeSeqId0 = 0xFF; |
||||||
|
|
||||||
|
private: |
||||||
|
uint32_t user_key_len_; |
||||||
|
int bloom_bits_per_key_; |
||||||
|
double hash_table_ratio_; |
||||||
|
}; |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,695 @@ |
|||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "table/plain_table_reader.h" |
||||||
|
|
||||||
|
#include <string> |
||||||
|
|
||||||
|
#include "db/dbformat.h" |
||||||
|
|
||||||
|
#include "rocksdb/cache.h" |
||||||
|
#include "rocksdb/comparator.h" |
||||||
|
#include "rocksdb/env.h" |
||||||
|
#include "rocksdb/filter_policy.h" |
||||||
|
#include "rocksdb/options.h" |
||||||
|
#include "rocksdb/statistics.h" |
||||||
|
|
||||||
|
#include "table/block.h" |
||||||
|
#include "table/filter_block.h" |
||||||
|
#include "table/format.h" |
||||||
|
#include "table/meta_blocks.h" |
||||||
|
#include "table/two_level_iterator.h" |
||||||
|
#include "table/plain_table_factory.h" |
||||||
|
|
||||||
|
#include "util/coding.h" |
||||||
|
#include "util/dynamic_bloom.h" |
||||||
|
#include "util/hash.h" |
||||||
|
#include "util/histogram.h" |
||||||
|
#include "util/murmurhash.h" |
||||||
|
#include "util/perf_context_imp.h" |
||||||
|
#include "util/stop_watch.h" |
||||||
|
|
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
namespace { |
||||||
|
|
||||||
|
inline uint32_t GetSliceHash(Slice const& s) { |
||||||
|
return Hash(s.data(), s.size(), 397) ; |
||||||
|
} |
||||||
|
|
||||||
|
inline uint32_t GetBucketIdFromHash(uint32_t hash, uint32_t num_buckets) { |
||||||
|
return hash % num_buckets; |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// Iterator to iterate IndexedTable
|
||||||
|
class PlainTableIterator : public Iterator { |
||||||
|
public: |
||||||
|
explicit PlainTableIterator(PlainTableReader* table); |
||||||
|
~PlainTableIterator(); |
||||||
|
|
||||||
|
bool Valid() const; |
||||||
|
|
||||||
|
void SeekToFirst(); |
||||||
|
|
||||||
|
void SeekToLast(); |
||||||
|
|
||||||
|
void Seek(const Slice& target); |
||||||
|
|
||||||
|
void Next(); |
||||||
|
|
||||||
|
void Prev(); |
||||||
|
|
||||||
|
Slice key() const; |
||||||
|
|
||||||
|
Slice value() const; |
||||||
|
|
||||||
|
Status status() const; |
||||||
|
|
||||||
|
private: |
||||||
|
PlainTableReader* table_; |
||||||
|
uint32_t offset_; |
||||||
|
uint32_t next_offset_; |
||||||
|
Slice key_; |
||||||
|
Slice value_; |
||||||
|
Status status_; |
||||||
|
std::string tmp_str_; |
||||||
|
// No copying allowed
|
||||||
|
PlainTableIterator(const PlainTableIterator&) = delete; |
||||||
|
void operator=(const Iterator&) = delete; |
||||||
|
}; |
||||||
|
|
||||||
|
extern const uint64_t kPlainTableMagicNumber; |
||||||
|
PlainTableReader::PlainTableReader(const EnvOptions& storage_options, |
||||||
|
const InternalKeyComparator& icomparator, |
||||||
|
uint64_t file_size, int bloom_bits_per_key, |
||||||
|
double hash_table_ratio, |
||||||
|
const TableProperties& table_properties) |
||||||
|
: soptions_(storage_options), |
||||||
|
internal_comparator_(icomparator), |
||||||
|
file_size_(file_size), |
||||||
|
kHashTableRatio(hash_table_ratio), |
||||||
|
kBloomBitsPerKey(bloom_bits_per_key), |
||||||
|
table_properties_(table_properties), |
||||||
|
data_end_offset_(table_properties_.data_size), |
||||||
|
user_key_len_(table_properties.fixed_key_len) {} |
||||||
|
|
||||||
|
PlainTableReader::~PlainTableReader() { |
||||||
|
delete[] hash_table_; |
||||||
|
delete[] sub_index_; |
||||||
|
delete bloom_; |
||||||
|
} |
||||||
|
|
||||||
|
Status PlainTableReader::Open(const Options& options, |
||||||
|
const EnvOptions& soptions, |
||||||
|
const InternalKeyComparator& internal_comparator, |
||||||
|
unique_ptr<RandomAccessFile>&& file, |
||||||
|
uint64_t file_size, |
||||||
|
unique_ptr<TableReader>* table_reader, |
||||||
|
const int bloom_bits_per_key, |
||||||
|
double hash_table_ratio) { |
||||||
|
assert(options.allow_mmap_reads); |
||||||
|
|
||||||
|
if (file_size > kMaxFileSize) { |
||||||
|
return Status::NotSupported("File is too large for PlainTableReader!"); |
||||||
|
} |
||||||
|
|
||||||
|
TableProperties table_properties; |
||||||
|
auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber, |
||||||
|
options.env, options.info_log.get(), |
||||||
|
&table_properties); |
||||||
|
if (!s.ok()) { |
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
std::unique_ptr<PlainTableReader> new_reader(new PlainTableReader( |
||||||
|
soptions, internal_comparator, file_size, bloom_bits_per_key, |
||||||
|
hash_table_ratio, table_properties)); |
||||||
|
new_reader->file_ = std::move(file); |
||||||
|
new_reader->options_ = options; |
||||||
|
|
||||||
|
// -- Populate Index
|
||||||
|
s = new_reader->PopulateIndex(); |
||||||
|
if (!s.ok()) { |
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
*table_reader = std::move(new_reader); |
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
void PlainTableReader::SetupForCompaction() { |
||||||
|
} |
||||||
|
|
||||||
|
bool PlainTableReader::PrefixMayMatch(const Slice& internal_prefix) { |
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
Iterator* PlainTableReader::NewIterator(const ReadOptions& options) { |
||||||
|
return new PlainTableIterator(this); |
||||||
|
} |
||||||
|
|
||||||
|
struct PlainTableReader::IndexRecord { |
||||||
|
uint32_t hash; // hash of the prefix
|
||||||
|
uint32_t offset; // offset of a row
|
||||||
|
IndexRecord* next; |
||||||
|
}; |
||||||
|
|
||||||
|
// Helper class to track all the index records
|
||||||
|
class PlainTableReader::IndexRecordList { |
||||||
|
public: |
||||||
|
explicit IndexRecordList(size_t num_records_per_group) |
||||||
|
: kNumRecordsPerGroup(num_records_per_group), |
||||||
|
current_group_(nullptr), |
||||||
|
num_records_in_current_group_(num_records_per_group) {} |
||||||
|
|
||||||
|
~IndexRecordList() { |
||||||
|
for (size_t i = 0; i < groups_.size(); i++) { |
||||||
|
delete[] groups_[i]; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void AddRecord(murmur_t hash, uint32_t offset) { |
||||||
|
if (num_records_in_current_group_ == kNumRecordsPerGroup) { |
||||||
|
current_group_ = AllocateNewGroup(); |
||||||
|
num_records_in_current_group_ = 0; |
||||||
|
} |
||||||
|
auto& new_record = current_group_[num_records_in_current_group_++]; |
||||||
|
new_record.hash = hash; |
||||||
|
new_record.offset = offset; |
||||||
|
new_record.next = nullptr; |
||||||
|
} |
||||||
|
|
||||||
|
size_t GetNumRecords() const { |
||||||
|
return (groups_.size() - 1) * kNumRecordsPerGroup + |
||||||
|
num_records_in_current_group_; |
||||||
|
} |
||||||
|
IndexRecord* At(size_t index) { |
||||||
|
return &(groups_[index / kNumRecordsPerGroup][index % kNumRecordsPerGroup]); |
||||||
|
} |
||||||
|
|
||||||
|
private: |
||||||
|
IndexRecord* AllocateNewGroup() { |
||||||
|
IndexRecord* result = new IndexRecord[kNumRecordsPerGroup]; |
||||||
|
groups_.push_back(result); |
||||||
|
return result; |
||||||
|
} |
||||||
|
|
||||||
|
const size_t kNumRecordsPerGroup; |
||||||
|
IndexRecord* current_group_; |
||||||
|
// List of arrays allocated
|
||||||
|
std::vector<IndexRecord*> groups_; |
||||||
|
size_t num_records_in_current_group_; |
||||||
|
}; |
||||||
|
|
||||||
|
int PlainTableReader::PopulateIndexRecordList(IndexRecordList* record_list) { |
||||||
|
Slice prev_key_prefix_slice; |
||||||
|
uint32_t prev_key_prefix_hash = 0; |
||||||
|
uint32_t pos = data_start_offset_; |
||||||
|
int key_index_within_prefix = 0; |
||||||
|
bool is_first_record = true; |
||||||
|
HistogramImpl keys_per_prefix_hist; |
||||||
|
// Need map to be ordered to make sure sub indexes generated
|
||||||
|
// are in order.
|
||||||
|
|
||||||
|
int num_prefixes = 0; |
||||||
|
while (pos < data_end_offset_) { |
||||||
|
uint32_t key_offset = pos; |
||||||
|
ParsedInternalKey key; |
||||||
|
Slice value_slice; |
||||||
|
status_ = Next(pos, &key, &value_slice, pos); |
||||||
|
Slice key_prefix_slice = GetPrefix(key); |
||||||
|
|
||||||
|
if (is_first_record || prev_key_prefix_slice != key_prefix_slice) { |
||||||
|
++num_prefixes; |
||||||
|
if (!is_first_record) { |
||||||
|
keys_per_prefix_hist.Add(key_index_within_prefix); |
||||||
|
} |
||||||
|
key_index_within_prefix = 0; |
||||||
|
prev_key_prefix_slice = key_prefix_slice; |
||||||
|
prev_key_prefix_hash = GetSliceHash(key_prefix_slice); |
||||||
|
} |
||||||
|
|
||||||
|
if (key_index_within_prefix++ % kIndexIntervalForSamePrefixKeys == 0) { |
||||||
|
// Add an index key for every kIndexIntervalForSamePrefixKeys keys
|
||||||
|
record_list->AddRecord(prev_key_prefix_hash, key_offset); |
||||||
|
} |
||||||
|
is_first_record = false; |
||||||
|
} |
||||||
|
|
||||||
|
keys_per_prefix_hist.Add(key_index_within_prefix); |
||||||
|
Log(options_.info_log, "Number of Keys per prefix Histogram: %s", |
||||||
|
keys_per_prefix_hist.ToString().c_str()); |
||||||
|
|
||||||
|
return num_prefixes; |
||||||
|
} |
||||||
|
|
||||||
|
void PlainTableReader::AllocateIndexAndBloom(int num_prefixes) { |
||||||
|
delete[] hash_table_; |
||||||
|
|
||||||
|
if (kBloomBitsPerKey > 0) { |
||||||
|
bloom_ = new DynamicBloom(num_prefixes * kBloomBitsPerKey); |
||||||
|
} |
||||||
|
double hash_table_size_multipier = |
||||||
|
(kHashTableRatio > 1.0) ? 1.0 : 1.0 / kHashTableRatio; |
||||||
|
hash_table_size_ = num_prefixes * hash_table_size_multipier + 1; |
||||||
|
hash_table_ = new uint32_t[hash_table_size_]; |
||||||
|
} |
||||||
|
|
||||||
|
size_t PlainTableReader::BucketizeIndexesAndFillBloom( |
||||||
|
IndexRecordList& record_list, int num_prefixes, |
||||||
|
std::vector<IndexRecord*>* hash_to_offsets, |
||||||
|
std::vector<uint32_t>* bucket_count) { |
||||||
|
size_t sub_index_size_needed = 0; |
||||||
|
bool first = true; |
||||||
|
uint32_t prev_hash = 0; |
||||||
|
size_t num_records = record_list.GetNumRecords(); |
||||||
|
for (size_t i = 0; i < num_records; i++) { |
||||||
|
IndexRecord* index_record = record_list.At(i); |
||||||
|
uint32_t cur_hash = index_record->hash; |
||||||
|
if (first || prev_hash != cur_hash) { |
||||||
|
prev_hash = cur_hash; |
||||||
|
first = false; |
||||||
|
if (bloom_) { |
||||||
|
bloom_->AddHash(cur_hash); |
||||||
|
} |
||||||
|
} |
||||||
|
uint32_t bucket = GetBucketIdFromHash(cur_hash, hash_table_size_); |
||||||
|
IndexRecord* prev_bucket_head = (*hash_to_offsets)[bucket]; |
||||||
|
index_record->next = prev_bucket_head; |
||||||
|
(*hash_to_offsets)[bucket] = index_record; |
||||||
|
auto& item_count = (*bucket_count)[bucket]; |
||||||
|
if (item_count > 0) { |
||||||
|
if (item_count == 1) { |
||||||
|
sub_index_size_needed += kOffsetLen + 1; |
||||||
|
} |
||||||
|
if (item_count == 127) { |
||||||
|
// Need more than one byte for length
|
||||||
|
sub_index_size_needed++; |
||||||
|
} |
||||||
|
sub_index_size_needed += kOffsetLen; |
||||||
|
} |
||||||
|
item_count++; |
||||||
|
} |
||||||
|
return sub_index_size_needed; |
||||||
|
} |
||||||
|
|
||||||
|
void PlainTableReader::FillIndexes( |
||||||
|
size_t sub_index_size_needed, |
||||||
|
const std::vector<IndexRecord*>& hash_to_offsets, |
||||||
|
const std::vector<uint32_t>& bucket_count) { |
||||||
|
Log(options_.info_log, "Reserving %zu bytes for sub index", |
||||||
|
sub_index_size_needed); |
||||||
|
// 8 bytes buffer for variable length size
|
||||||
|
size_t buffer_size = 8 * 8; |
||||||
|
size_t buffer_used = 0; |
||||||
|
sub_index_size_needed += buffer_size; |
||||||
|
sub_index_ = new char[sub_index_size_needed]; |
||||||
|
size_t sub_index_offset = 0; |
||||||
|
char* prev_ptr; |
||||||
|
char* cur_ptr; |
||||||
|
uint32_t* sub_index_ptr; |
||||||
|
for (int i = 0; i < hash_table_size_; i++) { |
||||||
|
uint32_t num_keys_for_bucket = bucket_count[i]; |
||||||
|
switch (num_keys_for_bucket) { |
||||||
|
case 0: |
||||||
|
// No key for bucket
|
||||||
|
hash_table_[i] = data_end_offset_; |
||||||
|
break; |
||||||
|
case 1: |
||||||
|
// point directly to the file offset
|
||||||
|
hash_table_[i] = hash_to_offsets[i]->offset; |
||||||
|
break; |
||||||
|
default: |
||||||
|
// point to second level indexes.
|
||||||
|
hash_table_[i] = sub_index_offset | kSubIndexMask; |
||||||
|
prev_ptr = sub_index_ + sub_index_offset; |
||||||
|
cur_ptr = EncodeVarint32(prev_ptr, num_keys_for_bucket); |
||||||
|
sub_index_offset += (cur_ptr - prev_ptr); |
||||||
|
if (cur_ptr - prev_ptr > 2 |
||||||
|
|| (cur_ptr - prev_ptr == 2 && num_keys_for_bucket <= 127)) { |
||||||
|
// Need to resize sub_index. Exponentially grow buffer.
|
||||||
|
buffer_used += cur_ptr - prev_ptr - 1; |
||||||
|
if (buffer_used + 4 > buffer_size) { |
||||||
|
Log(options_.info_log, "Recalculate suffix_map length to %zu", |
||||||
|
sub_index_size_needed); |
||||||
|
|
||||||
|
sub_index_size_needed += buffer_size; |
||||||
|
buffer_size *= 2; |
||||||
|
char* new_sub_index = new char[sub_index_size_needed]; |
||||||
|
memcpy(new_sub_index, sub_index_, sub_index_offset); |
||||||
|
delete[] sub_index_; |
||||||
|
sub_index_ = new_sub_index; |
||||||
|
} |
||||||
|
} |
||||||
|
sub_index_ptr = (uint32_t*) (sub_index_ + sub_index_offset); |
||||||
|
IndexRecord* record = hash_to_offsets[i]; |
||||||
|
int j; |
||||||
|
for (j = num_keys_for_bucket - 1; j >= 0 && record; |
||||||
|
j--, record = record->next) { |
||||||
|
sub_index_ptr[j] = record->offset; |
||||||
|
} |
||||||
|
assert(j == -1 && record == nullptr); |
||||||
|
sub_index_offset += kOffsetLen * num_keys_for_bucket; |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
Log(options_.info_log, "hash table size: %d, suffix_map length %zu", |
||||||
|
hash_table_size_, sub_index_size_needed); |
||||||
|
} |
||||||
|
|
||||||
|
Status PlainTableReader::PopulateIndex() { |
||||||
|
// Get mmapped memory to file_data_.
|
||||||
|
Status s = file_->Read(0, file_size_, &file_data_, nullptr); |
||||||
|
if (!s.ok()) { |
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
IndexRecordList record_list(kRecordsPerGroup); |
||||||
|
// First, read the whole file, for every kIndexIntervalForSamePrefixKeys rows
|
||||||
|
// for a prefix (starting from the first one), generate a record of (hash,
|
||||||
|
// offset) and append it to IndexRecordList, which is a data structure created
|
||||||
|
// to store them.
|
||||||
|
int num_prefixes = PopulateIndexRecordList(&record_list); |
||||||
|
// Calculated hash table and bloom filter size and allocate memory for indexes
|
||||||
|
// and bloom filter based on the number of prefixes.
|
||||||
|
AllocateIndexAndBloom(num_prefixes); |
||||||
|
|
||||||
|
// Bucketize all the index records to a temp data structure, in which for
|
||||||
|
// each bucket, we generate a linked list of IndexRecord, in reversed order.
|
||||||
|
std::vector<IndexRecord*> hash_to_offsets(hash_table_size_, nullptr); |
||||||
|
std::vector<uint32_t> bucket_count(hash_table_size_, 0); |
||||||
|
size_t sub_index_size_needed = BucketizeIndexesAndFillBloom( |
||||||
|
record_list, num_prefixes, &hash_to_offsets, &bucket_count); |
||||||
|
// From the temp data structure, populate indexes.
|
||||||
|
FillIndexes(sub_index_size_needed, hash_to_offsets, bucket_count); |
||||||
|
|
||||||
|
return Status::OK(); |
||||||
|
} |
||||||
|
|
||||||
|
Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix, |
||||||
|
uint32_t prefix_hash, bool& prefix_matched, |
||||||
|
uint32_t& ret_offset) { |
||||||
|
prefix_matched = false; |
||||||
|
int bucket = GetBucketIdFromHash(prefix_hash, hash_table_size_); |
||||||
|
uint32_t bucket_value = hash_table_[bucket]; |
||||||
|
if (bucket_value == data_end_offset_) { |
||||||
|
ret_offset = data_end_offset_; |
||||||
|
return Status::OK(); |
||||||
|
} else if ((bucket_value & kSubIndexMask) == 0) { |
||||||
|
// point directly to the file
|
||||||
|
ret_offset = bucket_value; |
||||||
|
return Status::OK(); |
||||||
|
} |
||||||
|
|
||||||
|
// point to sub-index, need to do a binary search
|
||||||
|
uint32_t low = 0; |
||||||
|
uint64_t prefix_index_offset = bucket_value ^ kSubIndexMask; |
||||||
|
|
||||||
|
const char* index_ptr = sub_index_ + prefix_index_offset; |
||||||
|
uint32_t upper_bound = 0; |
||||||
|
const uint32_t* base_ptr = (const uint32_t*) GetVarint32Ptr(index_ptr, |
||||||
|
index_ptr + 4, |
||||||
|
&upper_bound); |
||||||
|
uint32_t high = upper_bound; |
||||||
|
ParsedInternalKey mid_key; |
||||||
|
ParsedInternalKey parsed_target; |
||||||
|
if (!ParseInternalKey(target, &parsed_target)) { |
||||||
|
return Status::Corruption(Slice()); |
||||||
|
} |
||||||
|
|
||||||
|
// The key is between [low, high). Do a binary search between it.
|
||||||
|
while (high - low > 1) { |
||||||
|
uint32_t mid = (high + low) / 2; |
||||||
|
uint32_t file_offset = base_ptr[mid]; |
||||||
|
size_t tmp; |
||||||
|
Status s = ReadKey(file_data_.data() + file_offset, &mid_key, tmp); |
||||||
|
if (!s.ok()) { |
||||||
|
return s; |
||||||
|
} |
||||||
|
int cmp_result = internal_comparator_.Compare(mid_key, parsed_target); |
||||||
|
if (cmp_result < 0) { |
||||||
|
low = mid; |
||||||
|
} else { |
||||||
|
if (cmp_result == 0) { |
||||||
|
// Happen to have found the exact key or target is smaller than the
|
||||||
|
// first key after base_offset.
|
||||||
|
prefix_matched = true; |
||||||
|
ret_offset = file_offset; |
||||||
|
return Status::OK(); |
||||||
|
} else { |
||||||
|
high = mid; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
// Both of the key at the position low or low+1 could share the same
|
||||||
|
// prefix as target. We need to rule out one of them to avoid to go
|
||||||
|
// to the wrong prefix.
|
||||||
|
ParsedInternalKey low_key; |
||||||
|
size_t tmp; |
||||||
|
uint32_t low_key_offset = base_ptr[low]; |
||||||
|
Status s = ReadKey(file_data_.data() + low_key_offset, &low_key, tmp); |
||||||
|
if (GetPrefix(low_key) == prefix) { |
||||||
|
prefix_matched = true; |
||||||
|
ret_offset = low_key_offset; |
||||||
|
} else if (low + 1 < upper_bound) { |
||||||
|
// There is possible a next prefix, return it
|
||||||
|
prefix_matched = false; |
||||||
|
ret_offset = base_ptr[low + 1]; |
||||||
|
} else { |
||||||
|
// target is larger than a key of the last prefix in this bucket
|
||||||
|
// but with a different prefix. Key does not exist.
|
||||||
|
ret_offset = data_end_offset_; |
||||||
|
} |
||||||
|
return Status::OK(); |
||||||
|
} |
||||||
|
|
||||||
|
bool PlainTableReader::MayHavePrefix(uint32_t hash) { |
||||||
|
return bloom_ == nullptr || bloom_->MayContainHash(hash); |
||||||
|
} |
||||||
|
|
||||||
|
Slice PlainTableReader::GetPrefix(const ParsedInternalKey& target) { |
||||||
|
return options_.prefix_extractor->Transform(target.user_key); |
||||||
|
} |
||||||
|
|
||||||
|
Status PlainTableReader::ReadKey(const char* row_ptr, ParsedInternalKey* key, |
||||||
|
size_t& bytes_read) { |
||||||
|
const char* key_ptr = nullptr; |
||||||
|
bytes_read = 0; |
||||||
|
size_t user_key_size = 0; |
||||||
|
if (IsFixedLength()) { |
||||||
|
user_key_size = user_key_len_; |
||||||
|
key_ptr = row_ptr; |
||||||
|
} else { |
||||||
|
uint32_t tmp_size = 0; |
||||||
|
key_ptr = GetVarint32Ptr(row_ptr, file_data_.data() + data_end_offset_, |
||||||
|
&tmp_size); |
||||||
|
if (key_ptr == nullptr) { |
||||||
|
return Status::Corruption("Unable to read the next key"); |
||||||
|
} |
||||||
|
user_key_size = (size_t)tmp_size; |
||||||
|
bytes_read = key_ptr - row_ptr; |
||||||
|
} |
||||||
|
if (key_ptr + user_key_size + 1 >= file_data_.data() + data_end_offset_) { |
||||||
|
return Status::Corruption("Unable to read the next key"); |
||||||
|
} |
||||||
|
|
||||||
|
if (*(key_ptr + user_key_size) == PlainTableFactory::kValueTypeSeqId0) { |
||||||
|
// Special encoding for the row with seqID=0
|
||||||
|
key->user_key = Slice(key_ptr, user_key_size); |
||||||
|
key->sequence = 0; |
||||||
|
key->type = kTypeValue; |
||||||
|
bytes_read += user_key_size + 1; |
||||||
|
} else { |
||||||
|
if (row_ptr + user_key_size + 8 >= file_data_.data() + data_end_offset_) { |
||||||
|
return Status::Corruption("Unable to read the next key"); |
||||||
|
} |
||||||
|
if (!ParseInternalKey(Slice(key_ptr, user_key_size + 8), key)) { |
||||||
|
return Status::Corruption(Slice()); |
||||||
|
} |
||||||
|
bytes_read += user_key_size + 8; |
||||||
|
} |
||||||
|
|
||||||
|
return Status::OK(); |
||||||
|
} |
||||||
|
|
||||||
|
Status PlainTableReader::Next(uint32_t offset, ParsedInternalKey* key, |
||||||
|
Slice* value, uint32_t& next_offset) { |
||||||
|
if (offset == data_end_offset_) { |
||||||
|
next_offset = data_end_offset_; |
||||||
|
return Status::OK(); |
||||||
|
} |
||||||
|
|
||||||
|
if (offset > data_end_offset_) { |
||||||
|
return Status::Corruption("Offset is out of file size"); |
||||||
|
} |
||||||
|
|
||||||
|
const char* row_ptr = file_data_.data() + offset; |
||||||
|
size_t bytes_for_key; |
||||||
|
Status s = ReadKey(row_ptr, key, bytes_for_key); |
||||||
|
uint32_t value_size; |
||||||
|
const char* value_ptr = GetVarint32Ptr(row_ptr + bytes_for_key, |
||||||
|
file_data_.data() + data_end_offset_, |
||||||
|
&value_size); |
||||||
|
if (value_ptr == nullptr) { |
||||||
|
return Status::Corruption("Error reading value length."); |
||||||
|
} |
||||||
|
next_offset = offset + (value_ptr - row_ptr) + value_size; |
||||||
|
if (next_offset > data_end_offset_) { |
||||||
|
return Status::Corruption("Reach end of file when reading value"); |
||||||
|
} |
||||||
|
*value = Slice(value_ptr, value_size); |
||||||
|
|
||||||
|
return Status::OK(); |
||||||
|
} |
||||||
|
|
||||||
|
Status PlainTableReader::Get(const ReadOptions& ro, const Slice& target, |
||||||
|
void* arg, |
||||||
|
bool (*saver)(void*, const ParsedInternalKey&, |
||||||
|
const Slice&, bool), |
||||||
|
void (*mark_key_may_exist)(void*)) { |
||||||
|
// Check bloom filter first.
|
||||||
|
Slice prefix_slice = GetPrefix(target); |
||||||
|
uint32_t prefix_hash = GetSliceHash(prefix_slice); |
||||||
|
if (!MayHavePrefix(prefix_hash)) { |
||||||
|
return Status::OK(); |
||||||
|
} |
||||||
|
uint32_t offset; |
||||||
|
bool prefix_match; |
||||||
|
Status s = GetOffset(target, prefix_slice, prefix_hash, prefix_match, offset); |
||||||
|
if (!s.ok()) { |
||||||
|
return s; |
||||||
|
} |
||||||
|
ParsedInternalKey found_key; |
||||||
|
ParsedInternalKey parsed_target; |
||||||
|
if (!ParseInternalKey(target, &parsed_target)) { |
||||||
|
return Status::Corruption(Slice()); |
||||||
|
} |
||||||
|
|
||||||
|
Slice found_value; |
||||||
|
while (offset < data_end_offset_) { |
||||||
|
Status s = Next(offset, &found_key, &found_value, offset); |
||||||
|
if (!s.ok()) { |
||||||
|
return s; |
||||||
|
} |
||||||
|
if (!prefix_match) { |
||||||
|
// Need to verify prefix for the first key found if it is not yet
|
||||||
|
// checked.
|
||||||
|
if (GetPrefix(found_key) != prefix_slice) { |
||||||
|
return Status::OK(); |
||||||
|
} |
||||||
|
prefix_match = true; |
||||||
|
} |
||||||
|
if (internal_comparator_.Compare(found_key, parsed_target) >= 0) { |
||||||
|
if (!(*saver)(arg, found_key, found_value, true)) { |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
return Status::OK(); |
||||||
|
} |
||||||
|
|
||||||
|
uint64_t PlainTableReader::ApproximateOffsetOf(const Slice& key) { |
||||||
|
return 0; |
||||||
|
} |
||||||
|
|
||||||
|
PlainTableIterator::PlainTableIterator(PlainTableReader* table) : |
||||||
|
table_(table) { |
||||||
|
next_offset_ = offset_ = table_->data_end_offset_; |
||||||
|
} |
||||||
|
|
||||||
|
PlainTableIterator::~PlainTableIterator() { |
||||||
|
} |
||||||
|
|
||||||
|
bool PlainTableIterator::Valid() const { |
||||||
|
return offset_ < table_->data_end_offset_ |
||||||
|
&& offset_ >= table_->data_start_offset_; |
||||||
|
} |
||||||
|
|
||||||
|
void PlainTableIterator::SeekToFirst() { |
||||||
|
next_offset_ = table_->data_start_offset_; |
||||||
|
if (next_offset_ >= table_->data_end_offset_) { |
||||||
|
next_offset_ = offset_ = table_->data_end_offset_; |
||||||
|
} else { |
||||||
|
Next(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void PlainTableIterator::SeekToLast() { |
||||||
|
assert(false); |
||||||
|
} |
||||||
|
|
||||||
|
void PlainTableIterator::Seek(const Slice& target) { |
||||||
|
Slice prefix_slice = table_->GetPrefix(target); |
||||||
|
uint32_t prefix_hash = GetSliceHash(prefix_slice); |
||||||
|
if (!table_->MayHavePrefix(prefix_hash)) { |
||||||
|
offset_ = next_offset_ = table_->data_end_offset_; |
||||||
|
return; |
||||||
|
} |
||||||
|
bool prefix_match; |
||||||
|
status_ = table_->GetOffset(target, prefix_slice, prefix_hash, prefix_match, |
||||||
|
next_offset_); |
||||||
|
if (!status_.ok()) { |
||||||
|
offset_ = next_offset_ = table_->data_end_offset_; |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
if (next_offset_ < table_-> data_end_offset_) { |
||||||
|
for (Next(); status_.ok() && Valid(); Next()) { |
||||||
|
if (!prefix_match) { |
||||||
|
// Need to verify the first key's prefix
|
||||||
|
if (table_->GetPrefix(key()) != prefix_slice) { |
||||||
|
offset_ = next_offset_ = table_->data_end_offset_; |
||||||
|
break; |
||||||
|
} |
||||||
|
prefix_match = true; |
||||||
|
} |
||||||
|
if (table_->internal_comparator_.Compare(key(), target) >= 0) { |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
} else { |
||||||
|
offset_ = table_->data_end_offset_; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void PlainTableIterator::Next() { |
||||||
|
offset_ = next_offset_; |
||||||
|
if (offset_ < table_->data_end_offset_) { |
||||||
|
Slice tmp_slice; |
||||||
|
ParsedInternalKey parsed_key; |
||||||
|
status_ = table_->Next(next_offset_, &parsed_key, &value_, next_offset_); |
||||||
|
if (status_.ok()) { |
||||||
|
// Make a copy in this case. TODO optimize.
|
||||||
|
tmp_str_.clear(); |
||||||
|
AppendInternalKey(&tmp_str_, parsed_key); |
||||||
|
key_ = Slice(tmp_str_); |
||||||
|
} else { |
||||||
|
offset_ = next_offset_ = table_->data_end_offset_; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void PlainTableIterator::Prev() { |
||||||
|
assert(false); |
||||||
|
} |
||||||
|
|
||||||
|
Slice PlainTableIterator::key() const { |
||||||
|
assert(Valid()); |
||||||
|
return key_; |
||||||
|
} |
||||||
|
|
||||||
|
Slice PlainTableIterator::value() const { |
||||||
|
assert(Valid()); |
||||||
|
return value_; |
||||||
|
} |
||||||
|
|
||||||
|
Status PlainTableIterator::status() const { |
||||||
|
return status_; |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,220 @@ |
|||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
#include <unordered_map> |
||||||
|
#include <memory> |
||||||
|
#include <vector> |
||||||
|
#include <string> |
||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
#include "db/dbformat.h" |
||||||
|
#include "rocksdb/env.h" |
||||||
|
#include "rocksdb/iterator.h" |
||||||
|
#include "rocksdb/slice_transform.h" |
||||||
|
#include "rocksdb/table.h" |
||||||
|
#include "rocksdb/table_properties.h" |
||||||
|
#include "table/table_reader.h" |
||||||
|
#include "table/plain_table_factory.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class Block; |
||||||
|
class BlockHandle; |
||||||
|
class Footer; |
||||||
|
struct Options; |
||||||
|
class RandomAccessFile; |
||||||
|
struct ReadOptions; |
||||||
|
class TableCache; |
||||||
|
class TableReader; |
||||||
|
class DynamicBloom; |
||||||
|
class InternalKeyComparator; |
||||||
|
|
||||||
|
using std::unique_ptr; |
||||||
|
using std::unordered_map; |
||||||
|
extern const uint32_t kPlainTableVariableLength; |
||||||
|
|
||||||
|
// Based on following output file format shown in plain_table_factory.h
|
||||||
|
// When opening the output file, IndexedTableReader creates a hash table
|
||||||
|
// from key prefixes to offset of the output file. IndexedTable will decide
|
||||||
|
// whether it points to the data offset of the first key with the key prefix
|
||||||
|
// or the offset of it. If there are too many keys share this prefix, it will
|
||||||
|
// create a binary search-able index from the suffix to offset on disk.
|
||||||
|
//
|
||||||
|
// The implementation of IndexedTableReader requires output file is mmaped
|
||||||
|
class PlainTableReader: public TableReader { |
||||||
|
public: |
||||||
|
static Status Open(const Options& options, const EnvOptions& soptions, |
||||||
|
const InternalKeyComparator& internal_comparator, |
||||||
|
unique_ptr<RandomAccessFile>&& file, uint64_t file_size, |
||||||
|
unique_ptr<TableReader>* table, |
||||||
|
const int bloom_bits_per_key, double hash_table_ratio); |
||||||
|
|
||||||
|
bool PrefixMayMatch(const Slice& internal_prefix); |
||||||
|
|
||||||
|
Iterator* NewIterator(const ReadOptions&); |
||||||
|
|
||||||
|
Status Get(const ReadOptions&, const Slice& key, void* arg, |
||||||
|
bool (*result_handler)(void* arg, const ParsedInternalKey& k, |
||||||
|
const Slice& v, bool), |
||||||
|
void (*mark_key_may_exist)(void*) = nullptr); |
||||||
|
|
||||||
|
uint64_t ApproximateOffsetOf(const Slice& key); |
||||||
|
|
||||||
|
void SetupForCompaction(); |
||||||
|
|
||||||
|
const TableProperties& GetTableProperties() { return table_properties_; } |
||||||
|
|
||||||
|
PlainTableReader(const EnvOptions& storage_options, |
||||||
|
const InternalKeyComparator& internal_comparator, |
||||||
|
uint64_t file_size, int bloom_num_bits, |
||||||
|
double hash_table_ratio, |
||||||
|
const TableProperties& table_properties); |
||||||
|
~PlainTableReader(); |
||||||
|
|
||||||
|
private: |
||||||
|
struct IndexRecord; |
||||||
|
class IndexRecordList; |
||||||
|
|
||||||
|
uint32_t* hash_table_ = nullptr; |
||||||
|
int hash_table_size_ = 0; |
||||||
|
char* sub_index_ = nullptr; |
||||||
|
|
||||||
|
Options options_; |
||||||
|
const EnvOptions& soptions_; |
||||||
|
const InternalKeyComparator internal_comparator_; |
||||||
|
Status status_; |
||||||
|
unique_ptr<RandomAccessFile> file_; |
||||||
|
|
||||||
|
Slice file_data_; |
||||||
|
uint32_t version_; |
||||||
|
uint32_t file_size_; |
||||||
|
|
||||||
|
const double kHashTableRatio; |
||||||
|
const int kBloomBitsPerKey; |
||||||
|
DynamicBloom* bloom_ = nullptr; |
||||||
|
|
||||||
|
TableProperties table_properties_; |
||||||
|
const uint32_t data_start_offset_ = 0; |
||||||
|
const uint32_t data_end_offset_; |
||||||
|
const size_t user_key_len_; |
||||||
|
|
||||||
|
static const size_t kNumInternalBytes = 8; |
||||||
|
static const uint32_t kSubIndexMask = 0x80000000; |
||||||
|
static const size_t kOffsetLen = sizeof(uint32_t); |
||||||
|
static const uint64_t kMaxFileSize = 1u << 31; |
||||||
|
static const size_t kRecordsPerGroup = 256; |
||||||
|
// To speed up the search for keys with same prefix, we'll add index key for
|
||||||
|
// every N keys, where the "N" is determined by
|
||||||
|
// kIndexIntervalForSamePrefixKeys
|
||||||
|
static const size_t kIndexIntervalForSamePrefixKeys = 16; |
||||||
|
|
||||||
|
bool IsFixedLength() const { |
||||||
|
return user_key_len_ != kPlainTableVariableLength; |
||||||
|
} |
||||||
|
|
||||||
|
size_t GetFixedInternalKeyLength() const { |
||||||
|
return user_key_len_ + kNumInternalBytes; |
||||||
|
} |
||||||
|
|
||||||
|
friend class TableCache; |
||||||
|
friend class PlainTableIterator; |
||||||
|
|
||||||
|
// Internal helper function to generate an IndexRecordList object from all
|
||||||
|
// the rows, which contains index records as a list.
|
||||||
|
int PopulateIndexRecordList(IndexRecordList* record_list); |
||||||
|
|
||||||
|
// Internal helper function to allocate memory for indexes and bloom filters
|
||||||
|
void AllocateIndexAndBloom(int num_prefixes); |
||||||
|
|
||||||
|
// Internal helper function to bucket index record list to hash buckets.
|
||||||
|
// hash_to_offsets is sized of of hash_table_size_, each contains a linked
|
||||||
|
// list
|
||||||
|
// of offsets for the hash, in reversed order.
|
||||||
|
// bucket_count is sized of hash_table_size_. The value is how many index
|
||||||
|
// records are there in hash_to_offsets for the same bucket.
|
||||||
|
size_t BucketizeIndexesAndFillBloom( |
||||||
|
IndexRecordList& record_list, int num_prefixes, |
||||||
|
std::vector<IndexRecord*>* hash_to_offsets, |
||||||
|
std::vector<uint32_t>* bucket_count); |
||||||
|
|
||||||
|
// Internal helper class to fill the indexes and bloom filters to internal
|
||||||
|
// data structures. hash_to_offsets and bucket_count are bucketized indexes
|
||||||
|
// and counts generated by BucketizeIndexesAndFillBloom().
|
||||||
|
void FillIndexes(size_t sub_index_size_needed, |
||||||
|
const std::vector<IndexRecord*>& hash_to_offsets, |
||||||
|
const std::vector<uint32_t>& bucket_count); |
||||||
|
|
||||||
|
// PopulateIndex() builds index of keys. It must be called before any query
|
||||||
|
// to the table.
|
||||||
|
//
|
||||||
|
// hash_table_ contains buckets size of hash_table_size_, each is a 32-bit
|
||||||
|
// integer. The lower 31 bits contain an offset value (explained below) and
|
||||||
|
// the first bit of the integer indicates type of the offset.
|
||||||
|
//
|
||||||
|
// +--------------+------------------------------------------------------+
|
||||||
|
// | Flag (1 bit) | Offset to binary search buffer or file (31 bits) +
|
||||||
|
// +--------------+------------------------------------------------------+
|
||||||
|
//
|
||||||
|
// Explanation for the "flag bit":
|
||||||
|
//
|
||||||
|
// 0 indicates that the bucket contains only one prefix (no conflict when
|
||||||
|
// hashing this prefix), whose first row starts from this offset of the
|
||||||
|
// file.
|
||||||
|
// 1 indicates that the bucket contains more than one prefixes, or there
|
||||||
|
// are too many rows for one prefix so we need a binary search for it. In
|
||||||
|
// this case, the offset indicates the offset of sub_index_ holding the
|
||||||
|
// binary search indexes of keys for those rows. Those binary search indexes
|
||||||
|
// are organized in this way:
|
||||||
|
//
|
||||||
|
// The first 4 bytes, indicate how many indexes (N) are stored after it. After
|
||||||
|
// it, there are N 32-bit integers, each points of an offset of the file,
|
||||||
|
// which
|
||||||
|
// points to starting of a row. Those offsets need to be guaranteed to be in
|
||||||
|
// ascending order so the keys they are pointing to are also in ascending
|
||||||
|
// order
|
||||||
|
// to make sure we can use them to do binary searches. Below is visual
|
||||||
|
// presentation of a bucket.
|
||||||
|
//
|
||||||
|
// <begin>
|
||||||
|
// number_of_records: varint32
|
||||||
|
// record 1 file offset: fixedint32
|
||||||
|
// record 2 file offset: fixedint32
|
||||||
|
// ....
|
||||||
|
// record N file offset: fixedint32
|
||||||
|
// <end>
|
||||||
|
Status PopulateIndex(); |
||||||
|
|
||||||
|
// Check bloom filter to see whether it might contain this prefix.
|
||||||
|
// The hash of the prefix is given, since it can be reused for index lookup
|
||||||
|
// too.
|
||||||
|
bool MayHavePrefix(uint32_t hash); |
||||||
|
|
||||||
|
Status ReadKey(const char* row_ptr, ParsedInternalKey* key, |
||||||
|
size_t& bytes_read); |
||||||
|
// Read the key and value at offset to key and value.
|
||||||
|
// tmp_slice is a tmp slice.
|
||||||
|
// return next_offset as the offset for the next key.
|
||||||
|
Status Next(uint32_t offset, ParsedInternalKey* key, Slice* value, |
||||||
|
uint32_t& next_offset); |
||||||
|
// Get file offset for key target.
|
||||||
|
// return value prefix_matched is set to true if the offset is confirmed
|
||||||
|
// for a key with the same prefix as target.
|
||||||
|
Status GetOffset(const Slice& target, const Slice& prefix, |
||||||
|
uint32_t prefix_hash, bool& prefix_matched, |
||||||
|
uint32_t& ret_offset); |
||||||
|
|
||||||
|
Slice GetPrefix(const Slice& target) { |
||||||
|
assert(target.size() >= 8); // target is internal key
|
||||||
|
return options_.prefix_extractor->Transform( |
||||||
|
Slice(target.data(), target.size() - 8)); |
||||||
|
} |
||||||
|
|
||||||
|
Slice GetPrefix(const ParsedInternalKey& target); |
||||||
|
|
||||||
|
// No copying allowed
|
||||||
|
explicit PlainTableReader(const TableReader&) = delete; |
||||||
|
void operator=(const TableReader&) = delete; |
||||||
|
}; |
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,55 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class Slice; |
||||||
|
class Status; |
||||||
|
|
||||||
|
// TableBuilder provides the interface used to build a Table
|
||||||
|
// (an immutable and sorted map from keys to values).
|
||||||
|
//
|
||||||
|
// Multiple threads can invoke const methods on a TableBuilder without
|
||||||
|
// external synchronization, but if any of the threads may call a
|
||||||
|
// non-const method, all threads accessing the same TableBuilder must use
|
||||||
|
// external synchronization.
|
||||||
|
class TableBuilder { |
||||||
|
public: |
||||||
|
// REQUIRES: Either Finish() or Abandon() has been called.
|
||||||
|
virtual ~TableBuilder() {} |
||||||
|
|
||||||
|
// Add key,value to the table being constructed.
|
||||||
|
// REQUIRES: key is after any previously added key according to comparator.
|
||||||
|
// REQUIRES: Finish(), Abandon() have not been called
|
||||||
|
virtual void Add(const Slice& key, const Slice& value) = 0; |
||||||
|
|
||||||
|
// Return non-ok iff some error has been detected.
|
||||||
|
virtual Status status() const = 0; |
||||||
|
|
||||||
|
// Finish building the table.
|
||||||
|
// REQUIRES: Finish(), Abandon() have not been called
|
||||||
|
virtual Status Finish() = 0; |
||||||
|
|
||||||
|
// Indicate that the contents of this builder should be abandoned.
|
||||||
|
// If the caller is not going to call Finish(), it must call Abandon()
|
||||||
|
// before destroying this builder.
|
||||||
|
// REQUIRES: Finish(), Abandon() have not been called
|
||||||
|
virtual void Abandon() = 0; |
||||||
|
|
||||||
|
// Number of calls to Add() so far.
|
||||||
|
virtual uint64_t NumEntries() const = 0; |
||||||
|
|
||||||
|
// Size of the file generated so far. If invoked after a successful
|
||||||
|
// Finish() call, returns the size of the final generated file.
|
||||||
|
virtual uint64_t FileSize() const = 0; |
||||||
|
}; |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,114 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include "rocksdb/table_properties.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
namespace { |
||||||
|
void AppendProperty( |
||||||
|
std::string& props, |
||||||
|
const std::string& key, |
||||||
|
const std::string& value, |
||||||
|
const std::string& prop_delim, |
||||||
|
const std::string& kv_delim) { |
||||||
|
props.append(key); |
||||||
|
props.append(kv_delim); |
||||||
|
props.append(value); |
||||||
|
props.append(prop_delim); |
||||||
|
} |
||||||
|
|
||||||
|
template <class TValue> |
||||||
|
void AppendProperty( |
||||||
|
std::string& props, |
||||||
|
const std::string& key, |
||||||
|
const TValue& value, |
||||||
|
const std::string& prop_delim, |
||||||
|
const std::string& kv_delim) { |
||||||
|
AppendProperty( |
||||||
|
props, key, std::to_string(value), prop_delim, kv_delim |
||||||
|
); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
std::string TableProperties::ToString( |
||||||
|
const std::string& prop_delim, |
||||||
|
const std::string& kv_delim) const { |
||||||
|
std::string result; |
||||||
|
result.reserve(1024); |
||||||
|
|
||||||
|
// Basic Info
|
||||||
|
AppendProperty( |
||||||
|
result, "# data blocks", num_data_blocks, prop_delim, kv_delim |
||||||
|
); |
||||||
|
AppendProperty(result, "# entries", num_entries, prop_delim, kv_delim); |
||||||
|
|
||||||
|
AppendProperty(result, "raw key size", raw_key_size, prop_delim, kv_delim); |
||||||
|
AppendProperty( |
||||||
|
result, |
||||||
|
"raw average key size", |
||||||
|
num_entries != 0 ? 1.0 * raw_key_size / num_entries : 0.0, |
||||||
|
prop_delim, |
||||||
|
kv_delim |
||||||
|
); |
||||||
|
AppendProperty( |
||||||
|
result, "raw value size", raw_value_size, prop_delim, kv_delim |
||||||
|
); |
||||||
|
AppendProperty( |
||||||
|
result, |
||||||
|
"raw average value size", |
||||||
|
num_entries != 0 ? 1.0 * raw_value_size / num_entries : 0.0, |
||||||
|
prop_delim, |
||||||
|
kv_delim |
||||||
|
); |
||||||
|
|
||||||
|
AppendProperty(result, "data block size", data_size, prop_delim, kv_delim); |
||||||
|
AppendProperty(result, "index block size", index_size, prop_delim, kv_delim); |
||||||
|
AppendProperty( |
||||||
|
result, "filter block size", filter_size, prop_delim, kv_delim |
||||||
|
); |
||||||
|
AppendProperty( |
||||||
|
result, |
||||||
|
"(estimated) table size", |
||||||
|
data_size + index_size + filter_size, |
||||||
|
prop_delim, |
||||||
|
kv_delim |
||||||
|
); |
||||||
|
|
||||||
|
AppendProperty( |
||||||
|
result, |
||||||
|
"filter policy name", |
||||||
|
filter_policy_name.empty() ? std::string("N/A") : filter_policy_name, |
||||||
|
prop_delim, |
||||||
|
kv_delim |
||||||
|
); |
||||||
|
|
||||||
|
return result; |
||||||
|
} |
||||||
|
|
||||||
|
const std::string TablePropertiesNames::kDataSize = |
||||||
|
"rocksdb.data.size"; |
||||||
|
const std::string TablePropertiesNames::kIndexSize = |
||||||
|
"rocksdb.index.size"; |
||||||
|
const std::string TablePropertiesNames::kFilterSize = |
||||||
|
"rocksdb.filter.size"; |
||||||
|
const std::string TablePropertiesNames::kRawKeySize = |
||||||
|
"rocksdb.raw.key.size"; |
||||||
|
const std::string TablePropertiesNames::kRawValueSize = |
||||||
|
"rocksdb.raw.value.size"; |
||||||
|
const std::string TablePropertiesNames::kNumDataBlocks = |
||||||
|
"rocksdb.num.data.blocks"; |
||||||
|
const std::string TablePropertiesNames::kNumEntries = |
||||||
|
"rocksdb.num.entries"; |
||||||
|
const std::string TablePropertiesNames::kFilterPolicy = |
||||||
|
"rocksdb.filter.policy"; |
||||||
|
const std::string TablePropertiesNames::kFormatVersion = |
||||||
|
"rocksdb.format.version"; |
||||||
|
const std::string TablePropertiesNames::kFixedKeyLen = |
||||||
|
"rocksdb.fixed.key.length"; |
||||||
|
|
||||||
|
extern const std::string kPropertiesBlock = "rocksdb.properties"; |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,71 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class Iterator; |
||||||
|
struct ParsedInternalKey; |
||||||
|
class Slice; |
||||||
|
struct ReadOptions; |
||||||
|
struct TableProperties; |
||||||
|
|
||||||
|
// A Table is a sorted map from strings to strings. Tables are
|
||||||
|
// immutable and persistent. A Table may be safely accessed from
|
||||||
|
// multiple threads without external synchronization.
|
||||||
|
class TableReader { |
||||||
|
public: |
||||||
|
virtual ~TableReader() {} |
||||||
|
|
||||||
|
// Determine whether there is a chance that the current table file
|
||||||
|
// contains the key a key starting with iternal_prefix. The specific
|
||||||
|
// table implementation can use bloom filter and/or other heuristic
|
||||||
|
// to filter out this table as a whole.
|
||||||
|
virtual bool PrefixMayMatch(const Slice& internal_prefix) = 0; |
||||||
|
|
||||||
|
// Returns a new iterator over the table contents.
|
||||||
|
// The result of NewIterator() is initially invalid (caller must
|
||||||
|
// call one of the Seek methods on the iterator before using it).
|
||||||
|
virtual Iterator* NewIterator(const ReadOptions&) = 0; |
||||||
|
|
||||||
|
// Given a key, return an approximate byte offset in the file where
|
||||||
|
// the data for that key begins (or would begin if the key were
|
||||||
|
// present in the file). The returned value is in terms of file
|
||||||
|
// bytes, and so includes effects like compression of the underlying data.
|
||||||
|
// E.g., the approximate offset of the last key in the table will
|
||||||
|
// be close to the file length.
|
||||||
|
virtual uint64_t ApproximateOffsetOf(const Slice& key) = 0; |
||||||
|
|
||||||
|
// Set up the table for Compaction. Might change some parameters with
|
||||||
|
// posix_fadvise
|
||||||
|
virtual void SetupForCompaction() = 0; |
||||||
|
|
||||||
|
virtual const TableProperties& GetTableProperties() = 0; |
||||||
|
|
||||||
|
// Calls (*result_handler)(handle_context, ...) repeatedly, starting with
|
||||||
|
// the entry found after a call to Seek(key), until result_handler returns
|
||||||
|
// false, where k is the actual internal key for a row found and v as the
|
||||||
|
// value of the key. didIO is true if I/O is involved in the operation. May
|
||||||
|
// not make such a call if filter policy says that key is not present.
|
||||||
|
//
|
||||||
|
// mark_key_may_exist_handler needs to be called when it is configured to be
|
||||||
|
// memory only and the key is not found in the block cache, with
|
||||||
|
// the parameter to be handle_context.
|
||||||
|
//
|
||||||
|
// readOptions is the options for the read
|
||||||
|
// key is the key to search for
|
||||||
|
virtual Status Get( |
||||||
|
const ReadOptions& readOptions, const Slice& key, void* handle_context, |
||||||
|
bool (*result_handler)(void* arg, const ParsedInternalKey& k, |
||||||
|
const Slice& v, bool didIO), |
||||||
|
void (*mark_key_may_exist_handler)(void* handle_context) = nullptr) = 0; |
||||||
|
}; |
||||||
|
|
||||||
|
} // namespace rocksdb
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,36 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include "dynamic_bloom.h" |
||||||
|
|
||||||
|
#include "rocksdb/slice.h" |
||||||
|
#include "util/hash.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
namespace { |
||||||
|
static uint32_t BloomHash(const Slice& key) { |
||||||
|
return Hash(key.data(), key.size(), 0xbc9f1d34); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
DynamicBloom::DynamicBloom(uint32_t total_bits, |
||||||
|
uint32_t (*hash_func)(const Slice& key), |
||||||
|
uint32_t num_probes) |
||||||
|
: hash_func_(hash_func), |
||||||
|
kTotalBits((total_bits + 7) / 8 * 8), |
||||||
|
kNumProbes(num_probes) { |
||||||
|
assert(hash_func_); |
||||||
|
assert(kNumProbes > 0); |
||||||
|
assert(kTotalBits > 0); |
||||||
|
data_.reset(new unsigned char[kTotalBits / 8]()); |
||||||
|
} |
||||||
|
|
||||||
|
DynamicBloom::DynamicBloom(uint32_t total_bits, |
||||||
|
uint32_t num_probes) |
||||||
|
: DynamicBloom(total_bits, &BloomHash, num_probes) { |
||||||
|
} |
||||||
|
|
||||||
|
} // rocksdb
|
@ -0,0 +1,72 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include <atomic> |
||||||
|
#include <memory> |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class Slice; |
||||||
|
|
||||||
|
class DynamicBloom { |
||||||
|
public: |
||||||
|
// total_bits: fixed total bits for the bloom
|
||||||
|
// hash_func: customized hash function
|
||||||
|
// num_probes: number of hash probes for a single key
|
||||||
|
DynamicBloom(uint32_t total_bits, |
||||||
|
uint32_t (*hash_func)(const Slice& key), |
||||||
|
uint32_t num_probes = 6); |
||||||
|
|
||||||
|
explicit DynamicBloom(uint32_t total_bits, uint32_t num_probes = 6); |
||||||
|
|
||||||
|
// Assuming single threaded access to this function.
|
||||||
|
void Add(const Slice& key); |
||||||
|
|
||||||
|
// Assuming single threaded access to this function.
|
||||||
|
void AddHash(uint32_t hash); |
||||||
|
|
||||||
|
// Multithreaded access to this function is OK
|
||||||
|
bool MayContain(const Slice& key); |
||||||
|
|
||||||
|
// Multithreaded access to this function is OK
|
||||||
|
bool MayContainHash(uint32_t hash); |
||||||
|
|
||||||
|
private: |
||||||
|
uint32_t (*hash_func_)(const Slice& key); |
||||||
|
const uint32_t kTotalBits; |
||||||
|
const uint32_t kNumProbes; |
||||||
|
std::unique_ptr<unsigned char[]> data_; |
||||||
|
}; |
||||||
|
|
||||||
|
inline void DynamicBloom::Add(const Slice& key) { AddHash(hash_func_(key)); } |
||||||
|
|
||||||
|
inline bool DynamicBloom::MayContain(const Slice& key) { |
||||||
|
return (MayContainHash(hash_func_(key))); |
||||||
|
} |
||||||
|
|
||||||
|
inline bool DynamicBloom::MayContainHash(uint32_t h) { |
||||||
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
||||||
|
for (uint32_t i = 0; i < kNumProbes; i++) { |
||||||
|
const uint32_t bitpos = h % kTotalBits; |
||||||
|
if (((data_[bitpos / 8]) & (1 << (bitpos % 8))) == 0) { |
||||||
|
return false; |
||||||
|
} |
||||||
|
h += delta; |
||||||
|
} |
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
inline void DynamicBloom::AddHash(uint32_t h) { |
||||||
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
||||||
|
for (uint32_t i = 0; i < kNumProbes; i++) { |
||||||
|
const uint32_t bitpos = h % kTotalBits; |
||||||
|
data_[bitpos / 8] |= (1 << (bitpos % 8)); |
||||||
|
h += delta; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
} // rocksdb
|
@ -0,0 +1,113 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include <gflags/gflags.h> |
||||||
|
|
||||||
|
#include "dynamic_bloom.h" |
||||||
|
#include "util/logging.h" |
||||||
|
#include "util/testharness.h" |
||||||
|
#include "util/testutil.h" |
||||||
|
|
||||||
|
DEFINE_int32(bits_per_key, 10, ""); |
||||||
|
DEFINE_int32(num_probes, 6, ""); |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
static Slice Key(int i, char* buffer) { |
||||||
|
memcpy(buffer, &i, sizeof(i)); |
||||||
|
return Slice(buffer, sizeof(i)); |
||||||
|
} |
||||||
|
|
||||||
|
class DynamicBloomTest { |
||||||
|
}; |
||||||
|
|
||||||
|
TEST(DynamicBloomTest, EmptyFilter) { |
||||||
|
DynamicBloom bloom(100, 2); |
||||||
|
ASSERT_TRUE(! bloom.MayContain("hello")); |
||||||
|
ASSERT_TRUE(! bloom.MayContain("world")); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(DynamicBloomTest, Small) { |
||||||
|
DynamicBloom bloom(100, 2); |
||||||
|
bloom.Add("hello"); |
||||||
|
bloom.Add("world"); |
||||||
|
ASSERT_TRUE(bloom.MayContain("hello")); |
||||||
|
ASSERT_TRUE(bloom.MayContain("world")); |
||||||
|
ASSERT_TRUE(! bloom.MayContain("x")); |
||||||
|
ASSERT_TRUE(! bloom.MayContain("foo")); |
||||||
|
} |
||||||
|
|
||||||
|
static int NextLength(int length) { |
||||||
|
if (length < 10) { |
||||||
|
length += 1; |
||||||
|
} else if (length < 100) { |
||||||
|
length += 10; |
||||||
|
} else if (length < 1000) { |
||||||
|
length += 100; |
||||||
|
} else { |
||||||
|
length += 1000; |
||||||
|
} |
||||||
|
return length; |
||||||
|
} |
||||||
|
|
||||||
|
TEST(DynamicBloomTest, VaryingLengths) { |
||||||
|
char buffer[sizeof(int)]; |
||||||
|
|
||||||
|
// Count number of filters that significantly exceed the false positive rate
|
||||||
|
int mediocre_filters = 0; |
||||||
|
int good_filters = 0; |
||||||
|
|
||||||
|
fprintf(stderr, "bits_per_key: %d num_probes: %d\n", |
||||||
|
FLAGS_bits_per_key, FLAGS_num_probes); |
||||||
|
|
||||||
|
for (int length = 1; length <= 10000; length = NextLength(length)) { |
||||||
|
uint32_t bloom_bits = std::max(length * FLAGS_bits_per_key, 64); |
||||||
|
DynamicBloom bloom(bloom_bits, FLAGS_num_probes); |
||||||
|
for (int i = 0; i < length; i++) { |
||||||
|
bloom.Add(Key(i, buffer)); |
||||||
|
ASSERT_TRUE(bloom.MayContain(Key(i, buffer))); |
||||||
|
} |
||||||
|
|
||||||
|
// All added keys must match
|
||||||
|
for (int i = 0; i < length; i++) { |
||||||
|
ASSERT_TRUE(bloom.MayContain(Key(i, buffer))) |
||||||
|
<< "Length " << length << "; key " << i; |
||||||
|
} |
||||||
|
|
||||||
|
// Check false positive rate
|
||||||
|
|
||||||
|
int result = 0; |
||||||
|
for (int i = 0; i < 10000; i++) { |
||||||
|
if (bloom.MayContain(Key(i + 1000000000, buffer))) { |
||||||
|
result++; |
||||||
|
} |
||||||
|
} |
||||||
|
double rate = result / 10000.0; |
||||||
|
|
||||||
|
fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; \n", |
||||||
|
rate*100.0, length); |
||||||
|
|
||||||
|
//ASSERT_LE(rate, 0.02); // Must not be over 2%
|
||||||
|
if (rate > 0.0125) |
||||||
|
mediocre_filters++; // Allowed, but not too often
|
||||||
|
else |
||||||
|
good_filters++; |
||||||
|
} |
||||||
|
|
||||||
|
fprintf(stderr, "Filters: %d good, %d mediocre\n", |
||||||
|
good_filters, mediocre_filters); |
||||||
|
|
||||||
|
ASSERT_LE(mediocre_filters, good_filters/5); |
||||||
|
} |
||||||
|
|
||||||
|
// Different bits-per-byte
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) { |
||||||
|
google::ParseCommandLineFlags(&argc, &argv, true); |
||||||
|
|
||||||
|
return rocksdb::test::RunAllTests(); |
||||||
|
} |
@ -0,0 +1,470 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "util/hash_linklist_rep.h" |
||||||
|
|
||||||
|
#include "rocksdb/memtablerep.h" |
||||||
|
#include "util/arena.h" |
||||||
|
#include "rocksdb/slice.h" |
||||||
|
#include "rocksdb/slice_transform.h" |
||||||
|
#include "port/port.h" |
||||||
|
#include "port/atomic_pointer.h" |
||||||
|
#include "util/murmurhash.h" |
||||||
|
#include "db/memtable.h" |
||||||
|
#include "db/skiplist.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
namespace { |
||||||
|
|
||||||
|
typedef const char* Key; |
||||||
|
|
||||||
|
struct Node { |
||||||
|
explicit Node(const Key& k) : |
||||||
|
key(k) { |
||||||
|
} |
||||||
|
|
||||||
|
Key const key; |
||||||
|
|
||||||
|
// Accessors/mutators for links. Wrapped in methods so we can
|
||||||
|
// add the appropriate barriers as necessary.
|
||||||
|
Node* Next() { |
||||||
|
// Use an 'acquire load' so that we observe a fully initialized
|
||||||
|
// version of the returned Node.
|
||||||
|
return reinterpret_cast<Node*>(next_.Acquire_Load()); |
||||||
|
} |
||||||
|
void SetNext(Node* x) { |
||||||
|
// Use a 'release store' so that anybody who reads through this
|
||||||
|
// pointer observes a fully initialized version of the inserted node.
|
||||||
|
next_.Release_Store(x); |
||||||
|
} |
||||||
|
|
||||||
|
// No-barrier variants that can be safely used in a few locations.
|
||||||
|
Node* NoBarrier_Next() { |
||||||
|
return reinterpret_cast<Node*>(next_.NoBarrier_Load()); |
||||||
|
} |
||||||
|
void NoBarrier_SetNext(Node* x) { |
||||||
|
next_.NoBarrier_Store(x); |
||||||
|
} |
||||||
|
|
||||||
|
private: |
||||||
|
port::AtomicPointer next_; |
||||||
|
}; |
||||||
|
|
||||||
|
class HashLinkListRep : public MemTableRep { |
||||||
|
public: |
||||||
|
HashLinkListRep(MemTableRep::KeyComparator& compare, Arena* arena, |
||||||
|
const SliceTransform* transform, size_t bucket_size); |
||||||
|
|
||||||
|
virtual void Insert(const char* key) override; |
||||||
|
|
||||||
|
virtual bool Contains(const char* key) const override; |
||||||
|
|
||||||
|
virtual size_t ApproximateMemoryUsage() override; |
||||||
|
|
||||||
|
virtual ~HashLinkListRep(); |
||||||
|
|
||||||
|
virtual MemTableRep::Iterator* GetIterator() override; |
||||||
|
|
||||||
|
virtual MemTableRep::Iterator* GetIterator(const Slice& slice) override; |
||||||
|
|
||||||
|
virtual MemTableRep::Iterator* GetPrefixIterator(const Slice& prefix) |
||||||
|
override; |
||||||
|
|
||||||
|
virtual MemTableRep::Iterator* GetDynamicPrefixIterator() override; |
||||||
|
|
||||||
|
private: |
||||||
|
friend class DynamicIterator; |
||||||
|
typedef SkipList<const char*, MemTableRep::KeyComparator&> FullList; |
||||||
|
|
||||||
|
size_t bucket_size_; |
||||||
|
|
||||||
|
// Maps slices (which are transformed user keys) to buckets of keys sharing
|
||||||
|
// the same transform.
|
||||||
|
port::AtomicPointer* buckets_; |
||||||
|
|
||||||
|
// The user-supplied transform whose domain is the user keys.
|
||||||
|
const SliceTransform* transform_; |
||||||
|
|
||||||
|
MemTableRep::KeyComparator& compare_; |
||||||
|
// immutable after construction
|
||||||
|
Arena* const arena_; |
||||||
|
|
||||||
|
bool BucketContains(Node* head, const Slice& key) const; |
||||||
|
|
||||||
|
Slice GetPrefix(const Slice& internal_key) const { |
||||||
|
return transform_->Transform(ExtractUserKey(internal_key)); |
||||||
|
} |
||||||
|
|
||||||
|
size_t GetHash(const Slice& slice) const { |
||||||
|
return MurmurHash(slice.data(), slice.size(), 0) % bucket_size_; |
||||||
|
} |
||||||
|
|
||||||
|
Node* GetBucket(size_t i) const { |
||||||
|
return static_cast<Node*>(buckets_[i].Acquire_Load()); |
||||||
|
} |
||||||
|
|
||||||
|
Node* GetBucket(const Slice& slice) const { |
||||||
|
return GetBucket(GetHash(slice)); |
||||||
|
} |
||||||
|
|
||||||
|
Node* NewNode(const Key& key) { |
||||||
|
char* mem = arena_->AllocateAligned(sizeof(Node)); |
||||||
|
return new (mem) Node(key); |
||||||
|
} |
||||||
|
|
||||||
|
bool Equal(const Slice& a, const Key& b) const { |
||||||
|
return (compare_(b, a) == 0); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); } |
||||||
|
|
||||||
|
bool KeyIsAfterNode(const Slice& internal_key, const Node* n) const { |
||||||
|
// nullptr n is considered infinite
|
||||||
|
return (n != nullptr) && (compare_(n->key, internal_key) < 0); |
||||||
|
} |
||||||
|
|
||||||
|
bool KeyIsAfterNode(const Key& key, const Node* n) const { |
||||||
|
// nullptr n is considered infinite
|
||||||
|
return (n != nullptr) && (compare_(n->key, key) < 0); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
Node* FindGreaterOrEqualInBucket(Node* head, const Slice& key) const; |
||||||
|
|
||||||
|
class FullListIterator : public MemTableRep::Iterator { |
||||||
|
public: |
||||||
|
explicit FullListIterator(FullList* list) |
||||||
|
: iter_(list), full_list_(list) {} |
||||||
|
|
||||||
|
virtual ~FullListIterator() { |
||||||
|
} |
||||||
|
|
||||||
|
// Returns true iff the iterator is positioned at a valid node.
|
||||||
|
virtual bool Valid() const { |
||||||
|
return iter_.Valid(); |
||||||
|
} |
||||||
|
|
||||||
|
// Returns the key at the current position.
|
||||||
|
// REQUIRES: Valid()
|
||||||
|
virtual const char* key() const { |
||||||
|
assert(Valid()); |
||||||
|
return iter_.key(); |
||||||
|
} |
||||||
|
|
||||||
|
// Advances to the next position.
|
||||||
|
// REQUIRES: Valid()
|
||||||
|
virtual void Next() { |
||||||
|
assert(Valid()); |
||||||
|
iter_.Next(); |
||||||
|
} |
||||||
|
|
||||||
|
// Advances to the previous position.
|
||||||
|
// REQUIRES: Valid()
|
||||||
|
virtual void Prev() { |
||||||
|
assert(Valid()); |
||||||
|
iter_.Prev(); |
||||||
|
} |
||||||
|
|
||||||
|
// Advance to the first entry with a key >= target
|
||||||
|
virtual void Seek(const Slice& internal_key, const char* memtable_key) { |
||||||
|
const char* encoded_key = |
||||||
|
(memtable_key != nullptr) ? |
||||||
|
memtable_key : EncodeKey(&tmp_, internal_key); |
||||||
|
iter_.Seek(encoded_key); |
||||||
|
} |
||||||
|
|
||||||
|
// Position at the first entry in collection.
|
||||||
|
// Final state of iterator is Valid() iff collection is not empty.
|
||||||
|
virtual void SeekToFirst() { |
||||||
|
iter_.SeekToFirst(); |
||||||
|
} |
||||||
|
|
||||||
|
// Position at the last entry in collection.
|
||||||
|
// Final state of iterator is Valid() iff collection is not empty.
|
||||||
|
virtual void SeekToLast() { |
||||||
|
iter_.SeekToLast(); |
||||||
|
} |
||||||
|
private: |
||||||
|
FullList::Iterator iter_; |
||||||
|
// To destruct with the iterator.
|
||||||
|
std::unique_ptr<FullList> full_list_; |
||||||
|
std::string tmp_; // For passing to EncodeKey
|
||||||
|
}; |
||||||
|
|
||||||
|
class Iterator : public MemTableRep::Iterator { |
||||||
|
public: |
||||||
|
explicit Iterator(const HashLinkListRep* const hash_link_list_rep, |
||||||
|
Node* head) : |
||||||
|
hash_link_list_rep_(hash_link_list_rep), head_(head), node_(nullptr) { |
||||||
|
} |
||||||
|
|
||||||
|
virtual ~Iterator() { |
||||||
|
} |
||||||
|
|
||||||
|
// Returns true iff the iterator is positioned at a valid node.
|
||||||
|
virtual bool Valid() const { |
||||||
|
return node_ != nullptr; |
||||||
|
} |
||||||
|
|
||||||
|
// Returns the key at the current position.
|
||||||
|
// REQUIRES: Valid()
|
||||||
|
virtual const char* key() const { |
||||||
|
assert(Valid()); |
||||||
|
return node_->key; |
||||||
|
} |
||||||
|
|
||||||
|
// Advances to the next position.
|
||||||
|
// REQUIRES: Valid()
|
||||||
|
virtual void Next() { |
||||||
|
assert(Valid()); |
||||||
|
node_ = node_->Next(); |
||||||
|
} |
||||||
|
|
||||||
|
// Advances to the previous position.
|
||||||
|
// REQUIRES: Valid()
|
||||||
|
virtual void Prev() { |
||||||
|
// Prefix iterator does not support total order.
|
||||||
|
// We simply set the iterator to invalid state
|
||||||
|
Reset(nullptr); |
||||||
|
} |
||||||
|
|
||||||
|
// Advance to the first entry with a key >= target
|
||||||
|
virtual void Seek(const Slice& internal_key, const char* memtable_key) { |
||||||
|
node_ = hash_link_list_rep_->FindGreaterOrEqualInBucket(head_, |
||||||
|
internal_key); |
||||||
|
} |
||||||
|
|
||||||
|
// Position at the first entry in collection.
|
||||||
|
// Final state of iterator is Valid() iff collection is not empty.
|
||||||
|
virtual void SeekToFirst() { |
||||||
|
// Prefix iterator does not support total order.
|
||||||
|
// We simply set the iterator to invalid state
|
||||||
|
Reset(nullptr); |
||||||
|
} |
||||||
|
|
||||||
|
// Position at the last entry in collection.
|
||||||
|
// Final state of iterator is Valid() iff collection is not empty.
|
||||||
|
virtual void SeekToLast() { |
||||||
|
// Prefix iterator does not support total order.
|
||||||
|
// We simply set the iterator to invalid state
|
||||||
|
Reset(nullptr); |
||||||
|
} |
||||||
|
|
||||||
|
protected: |
||||||
|
void Reset(Node* head) { |
||||||
|
head_ = head; |
||||||
|
node_ = nullptr; |
||||||
|
} |
||||||
|
private: |
||||||
|
friend class HashLinkListRep; |
||||||
|
const HashLinkListRep* const hash_link_list_rep_; |
||||||
|
Node* head_; |
||||||
|
Node* node_; |
||||||
|
std::string tmp_; // For passing to EncodeKey
|
||||||
|
|
||||||
|
virtual void SeekToHead() { |
||||||
|
node_ = head_; |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
class DynamicIterator : public HashLinkListRep::Iterator { |
||||||
|
public: |
||||||
|
explicit DynamicIterator(HashLinkListRep& memtable_rep) |
||||||
|
: HashLinkListRep::Iterator(&memtable_rep, nullptr), |
||||||
|
memtable_rep_(memtable_rep) {} |
||||||
|
|
||||||
|
// Advance to the first entry with a key >= target
|
||||||
|
virtual void Seek(const Slice& k, const char* memtable_key) { |
||||||
|
auto transformed = memtable_rep_.GetPrefix(k); |
||||||
|
Reset(memtable_rep_.GetBucket(transformed)); |
||||||
|
HashLinkListRep::Iterator::Seek(k, memtable_key); |
||||||
|
} |
||||||
|
|
||||||
|
private: |
||||||
|
// the underlying memtable
|
||||||
|
const HashLinkListRep& memtable_rep_; |
||||||
|
}; |
||||||
|
|
||||||
|
class EmptyIterator : public MemTableRep::Iterator { |
||||||
|
// This is used when there wasn't a bucket. It is cheaper than
|
||||||
|
// instantiating an empty bucket over which to iterate.
|
||||||
|
public: |
||||||
|
EmptyIterator() { } |
||||||
|
virtual bool Valid() const { |
||||||
|
return false; |
||||||
|
} |
||||||
|
virtual const char* key() const { |
||||||
|
assert(false); |
||||||
|
return nullptr; |
||||||
|
} |
||||||
|
virtual void Next() { } |
||||||
|
virtual void Prev() { } |
||||||
|
virtual void Seek(const Slice& user_key, const char* memtable_key) { } |
||||||
|
virtual void SeekToFirst() { } |
||||||
|
virtual void SeekToLast() { } |
||||||
|
private: |
||||||
|
}; |
||||||
|
}; |
||||||
|
|
||||||
|
HashLinkListRep::HashLinkListRep(MemTableRep::KeyComparator& compare, |
||||||
|
Arena* arena, const SliceTransform* transform, |
||||||
|
size_t bucket_size) |
||||||
|
: bucket_size_(bucket_size), |
||||||
|
transform_(transform), |
||||||
|
compare_(compare), |
||||||
|
arena_(arena) { |
||||||
|
char* mem = arena_->AllocateAligned( |
||||||
|
sizeof(port::AtomicPointer) * bucket_size); |
||||||
|
|
||||||
|
buckets_ = new (mem) port::AtomicPointer[bucket_size]; |
||||||
|
|
||||||
|
for (size_t i = 0; i < bucket_size_; ++i) { |
||||||
|
buckets_[i].NoBarrier_Store(nullptr); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
HashLinkListRep::~HashLinkListRep() { |
||||||
|
} |
||||||
|
|
||||||
|
void HashLinkListRep::Insert(const char* key) { |
||||||
|
assert(!Contains(key)); |
||||||
|
Slice internal_key = GetLengthPrefixedSlice(key); |
||||||
|
auto transformed = GetPrefix(internal_key); |
||||||
|
auto& bucket = buckets_[GetHash(transformed)]; |
||||||
|
Node* head = static_cast<Node*>(bucket.Acquire_Load()); |
||||||
|
|
||||||
|
if (!head) { |
||||||
|
Node* x = NewNode(key); |
||||||
|
// NoBarrier_SetNext() suffices since we will add a barrier when
|
||||||
|
// we publish a pointer to "x" in prev[i].
|
||||||
|
x->NoBarrier_SetNext(nullptr); |
||||||
|
bucket.Release_Store(static_cast<void*>(x)); |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
Node* cur = head; |
||||||
|
Node* prev = nullptr; |
||||||
|
while (true) { |
||||||
|
if (cur == nullptr) { |
||||||
|
break; |
||||||
|
} |
||||||
|
Node* next = cur->Next(); |
||||||
|
// Make sure the lists are sorted.
|
||||||
|
// If x points to head_ or next points nullptr, it is trivially satisfied.
|
||||||
|
assert((cur == head) || (next == nullptr) || |
||||||
|
KeyIsAfterNode(next->key, cur)); |
||||||
|
if (KeyIsAfterNode(internal_key, cur)) { |
||||||
|
// Keep searching in this list
|
||||||
|
prev = cur; |
||||||
|
cur = next; |
||||||
|
} else { |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Our data structure does not allow duplicate insertion
|
||||||
|
assert(cur == nullptr || !Equal(key, cur->key)); |
||||||
|
|
||||||
|
Node* x = NewNode(key); |
||||||
|
|
||||||
|
// NoBarrier_SetNext() suffices since we will add a barrier when
|
||||||
|
// we publish a pointer to "x" in prev[i].
|
||||||
|
x->NoBarrier_SetNext(cur); |
||||||
|
|
||||||
|
if (prev) { |
||||||
|
prev->SetNext(x); |
||||||
|
} else { |
||||||
|
bucket.Release_Store(static_cast<void*>(x)); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
bool HashLinkListRep::Contains(const char* key) const { |
||||||
|
Slice internal_key = GetLengthPrefixedSlice(key); |
||||||
|
|
||||||
|
auto transformed = GetPrefix(internal_key); |
||||||
|
auto bucket = GetBucket(transformed); |
||||||
|
if (bucket == nullptr) { |
||||||
|
return false; |
||||||
|
} |
||||||
|
return BucketContains(bucket, internal_key); |
||||||
|
} |
||||||
|
|
||||||
|
size_t HashLinkListRep::ApproximateMemoryUsage() { |
||||||
|
// Memory is always allocated from the arena.
|
||||||
|
return 0; |
||||||
|
} |
||||||
|
|
||||||
|
MemTableRep::Iterator* HashLinkListRep::GetIterator() { |
||||||
|
auto list = new FullList(compare_, arena_); |
||||||
|
for (size_t i = 0; i < bucket_size_; ++i) { |
||||||
|
auto bucket = GetBucket(i); |
||||||
|
if (bucket != nullptr) { |
||||||
|
Iterator itr(this, bucket); |
||||||
|
for (itr.SeekToHead(); itr.Valid(); itr.Next()) { |
||||||
|
list->Insert(itr.key()); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
return new FullListIterator(list); |
||||||
|
} |
||||||
|
|
||||||
|
MemTableRep::Iterator* HashLinkListRep::GetPrefixIterator( |
||||||
|
const Slice& prefix) { |
||||||
|
auto bucket = GetBucket(prefix); |
||||||
|
if (bucket == nullptr) { |
||||||
|
return new EmptyIterator(); |
||||||
|
} |
||||||
|
return new Iterator(this, bucket); |
||||||
|
} |
||||||
|
|
||||||
|
MemTableRep::Iterator* HashLinkListRep::GetIterator(const Slice& slice) { |
||||||
|
return GetPrefixIterator(transform_->Transform(slice)); |
||||||
|
} |
||||||
|
|
||||||
|
MemTableRep::Iterator* HashLinkListRep::GetDynamicPrefixIterator() { |
||||||
|
return new DynamicIterator(*this); |
||||||
|
} |
||||||
|
|
||||||
|
bool HashLinkListRep::BucketContains(Node* head, const Slice& user_key) const { |
||||||
|
Node* x = FindGreaterOrEqualInBucket(head, user_key); |
||||||
|
return (x != nullptr && Equal(user_key, x->key)); |
||||||
|
} |
||||||
|
|
||||||
|
Node* HashLinkListRep::FindGreaterOrEqualInBucket(Node* head, |
||||||
|
const Slice& key) const { |
||||||
|
Node* x = head; |
||||||
|
while (true) { |
||||||
|
if (x == nullptr) { |
||||||
|
return x; |
||||||
|
} |
||||||
|
Node* next = x->Next(); |
||||||
|
// Make sure the lists are sorted.
|
||||||
|
// If x points to head_ or next points nullptr, it is trivially satisfied.
|
||||||
|
assert((x == head) || (next == nullptr) || KeyIsAfterNode(next->key, x)); |
||||||
|
if (KeyIsAfterNode(key, x)) { |
||||||
|
// Keep searching in this list
|
||||||
|
x = next; |
||||||
|
} else { |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
return x; |
||||||
|
} |
||||||
|
|
||||||
|
} // anon namespace
|
||||||
|
|
||||||
|
MemTableRep* HashLinkListRepFactory::CreateMemTableRep( |
||||||
|
MemTableRep::KeyComparator& compare, Arena* arena) { |
||||||
|
return new HashLinkListRep(compare, arena, transform_, bucket_count_); |
||||||
|
} |
||||||
|
|
||||||
|
MemTableRepFactory* NewHashLinkListRepFactory( |
||||||
|
const SliceTransform* transform, size_t bucket_count) { |
||||||
|
return new HashLinkListRepFactory(transform, bucket_count); |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,39 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
#include "rocksdb/slice_transform.h" |
||||||
|
#include "rocksdb/memtablerep.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class HashLinkListRepFactory : public MemTableRepFactory { |
||||||
|
public: |
||||||
|
explicit HashLinkListRepFactory( |
||||||
|
const SliceTransform* transform, |
||||||
|
size_t bucket_count) |
||||||
|
: transform_(transform), |
||||||
|
bucket_count_(bucket_count) { } |
||||||
|
|
||||||
|
virtual ~HashLinkListRepFactory() { delete transform_; } |
||||||
|
|
||||||
|
virtual MemTableRep* CreateMemTableRep(MemTableRep::KeyComparator& compare, |
||||||
|
Arena* arena) override; |
||||||
|
|
||||||
|
virtual const char* Name() const override { |
||||||
|
return "HashLinkListRepFactory"; |
||||||
|
} |
||||||
|
|
||||||
|
const SliceTransform* GetTransform() { return transform_; } |
||||||
|
|
||||||
|
private: |
||||||
|
const SliceTransform* transform_; |
||||||
|
const size_t bucket_count_; |
||||||
|
}; |
||||||
|
|
||||||
|
} |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue