Enable hash index for block-based table

Summary: Based on previous patches, this diff eventually provides the end-to-end mechanism for users to specify the hash-index.

Test Plan: Wrote several new unit tests.

Reviewers: sdong, haobo, dhruba

Reviewed By: sdong

CC: leveldb

Differential Revision: https://reviews.facebook.net/D16539
main
Kai Liu 11 years ago
parent 7a92537fc4
commit 75b59d5146
  1. 42
      db/db_test.cc
  2. 31
      db/dbformat.h
  3. 6
      include/rocksdb/table.h
  4. 113
      table/block.cc
  5. 18
      table/block.h
  6. 36
      table/block_based_table_builder.cc
  7. 76
      table/block_based_table_reader.cc
  8. 4
      table/block_based_table_reader.h
  9. 164
      table/block_test.cc
  10. 110
      table/table_test.cc

@ -266,6 +266,8 @@ class DBTest {
// Sequence of option configurations to try // Sequence of option configurations to try
enum OptionConfig { enum OptionConfig {
kDefault, kDefault,
kBlockBasedTableWithPrefixHashIndex,
kBlockBasedTableWithWholeKeyHashIndex,
kPlainTableFirstBytePrefix, kPlainTableFirstBytePrefix,
kPlainTableAllBytesPrefix, kPlainTableAllBytesPrefix,
kVectorRep, kVectorRep,
@ -303,7 +305,8 @@ class DBTest {
kSkipDeletesFilterFirst = 1, kSkipDeletesFilterFirst = 1,
kSkipUniversalCompaction = 2, kSkipUniversalCompaction = 2,
kSkipMergePut = 4, kSkipMergePut = 4,
kSkipPlainTable = 8 kSkipPlainTable = 8,
kSkipHashIndex = 16
}; };
DBTest() : option_config_(kDefault), DBTest() : option_config_(kDefault),
@ -343,6 +346,12 @@ class DBTest {
|| option_config_ == kPlainTableFirstBytePrefix)) { || option_config_ == kPlainTableFirstBytePrefix)) {
continue; continue;
} }
if ((skip_mask & kSkipPlainTable) &&
(option_config_ == kBlockBasedTableWithPrefixHashIndex ||
option_config_ == kBlockBasedTableWithWholeKeyHashIndex)) {
continue;
}
break; break;
} }
@ -439,6 +448,20 @@ class DBTest {
case kInfiniteMaxOpenFiles: case kInfiniteMaxOpenFiles:
options.max_open_files = -1; options.max_open_files = -1;
break; break;
case kBlockBasedTableWithPrefixHashIndex: {
BlockBasedTableOptions table_options;
table_options.index_type = BlockBasedTableOptions::kHashSearch;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
options.prefix_extractor.reset(NewFixedPrefixTransform(1));
break;
}
case kBlockBasedTableWithWholeKeyHashIndex: {
BlockBasedTableOptions table_options;
table_options.index_type = BlockBasedTableOptions::kHashSearch;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
options.prefix_extractor.reset(NewNoopTransform());
break;
}
default: default:
break; break;
} }
@ -1363,7 +1386,7 @@ TEST(DBTest, KeyMayExist) {
// KeyMayExist function only checks data in block caches, which is not used // KeyMayExist function only checks data in block caches, which is not used
// by plain table format. // by plain table format.
} while (ChangeOptions(kSkipPlainTable)); } while (ChangeOptions(kSkipPlainTable | kSkipHashIndex));
} }
TEST(DBTest, NonBlockingIteration) { TEST(DBTest, NonBlockingIteration) {
@ -6184,7 +6207,9 @@ TEST(DBTest, Randomized) {
int minimum = 0; int minimum = 0;
if (option_config_ == kHashSkipList || if (option_config_ == kHashSkipList ||
option_config_ == kHashLinkList || option_config_ == kHashLinkList ||
option_config_ == kPlainTableFirstBytePrefix) { option_config_ == kPlainTableFirstBytePrefix ||
option_config_ == kBlockBasedTableWithWholeKeyHashIndex ||
option_config_ == kBlockBasedTableWithPrefixHashIndex) {
minimum = 1; minimum = 1;
} }
if (p < 45) { // Put if (p < 45) { // Put
@ -6224,8 +6249,15 @@ TEST(DBTest, Randomized) {
} }
if ((step % 100) == 0) { if ((step % 100) == 0) {
ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr)); // For DB instances that use the hash index + block-based table, the
ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap)); // iterator will be invalid right when seeking a non-existent key, right
// than return a key that is close to it.
if (option_config_ != kBlockBasedTableWithWholeKeyHashIndex &&
option_config_ != kBlockBasedTableWithPrefixHashIndex) {
ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
}
// Save a snapshot from each DB this time that we'll use next // Save a snapshot from each DB this time that we'll use next
// time we compare things, to make sure the current state is // time we compare things, to make sure the current state is
// preserved with the snapshot // preserved with the snapshot

@ -13,6 +13,7 @@
#include "rocksdb/db.h" #include "rocksdb/db.h"
#include "rocksdb/filter_policy.h" #include "rocksdb/filter_policy.h"
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
#include "rocksdb/slice_transform.h"
#include "rocksdb/table.h" #include "rocksdb/table.h"
#include "rocksdb/types.h" #include "rocksdb/types.h"
#include "util/coding.h" #include "util/coding.h"
@ -304,4 +305,34 @@ class IterKey {
void operator=(const IterKey&) = delete; void operator=(const IterKey&) = delete;
}; };
class InternalKeySliceTransform : public SliceTransform {
public:
explicit InternalKeySliceTransform(const SliceTransform* transform)
: transform_(transform) {}
virtual const char* Name() const { return transform_->Name(); }
virtual Slice Transform(const Slice& src) const {
auto user_key = ExtractUserKey(src);
return transform_->Transform(user_key);
}
virtual bool InDomain(const Slice& src) const {
auto user_key = ExtractUserKey(src);
return transform_->InDomain(user_key);
}
virtual bool InRange(const Slice& dst) const {
auto user_key = ExtractUserKey(dst);
return transform_->InRange(user_key);
}
const SliceTransform* user_prefix_extractor() const { return transform_; }
private:
// Like comparator, InternalKeySliceTransform will not take care of the
// deletion of transform_
const SliceTransform* const transform_;
};
} // namespace rocksdb } // namespace rocksdb

@ -60,6 +60,12 @@ struct BlockBasedTableOptions {
// A space efficient index block that is optimized for // A space efficient index block that is optimized for
// binary-search-based index. // binary-search-based index.
kBinarySearch, kBinarySearch,
// The hash index, if enabled, will do the hash lookup when
// `ReadOption.prefix_seek == true`. User should also specify
// `Options.prefix_extractor` to allow the index block to correctly
// extract the prefix of the given key and perform hash table lookup.
kHashSearch,
}; };
IndexType index_type = kBinarySearch; IndexType index_type = kBinarySearch;

@ -11,16 +11,20 @@
#include "table/block.h" #include "table/block.h"
#include <vector>
#include <algorithm> #include <algorithm>
#include <string>
#include <unordered_map>
#include <vector>
#include "rocksdb/comparator.h" #include "rocksdb/comparator.h"
#include "table/block_hash_index.h"
#include "table/format.h" #include "table/format.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/logging.h" #include "util/logging.h"
namespace rocksdb { namespace rocksdb {
inline uint32_t Block::NumRestarts() const { uint32_t Block::NumRestarts() const {
assert(size_ >= 2*sizeof(uint32_t)); assert(size_ >= 2*sizeof(uint32_t));
return DecodeFixed32(data_ + size_ - sizeof(uint32_t)); return DecodeFixed32(data_ + size_ - sizeof(uint32_t));
} }
@ -92,6 +96,7 @@ class Block::Iter : public Iterator {
std::string key_; std::string key_;
Slice value_; Slice value_;
Status status_; Status status_;
BlockHashIndex* hash_index_;
inline int Compare(const Slice& a, const Slice& b) const { inline int Compare(const Slice& a, const Slice& b) const {
return comparator_->Compare(a, b); return comparator_->Compare(a, b);
@ -118,16 +123,15 @@ class Block::Iter : public Iterator {
} }
public: public:
Iter(const Comparator* comparator, Iter(const Comparator* comparator, const char* data, uint32_t restarts,
const char* data, uint32_t num_restarts, BlockHashIndex* hash_index)
uint32_t restarts,
uint32_t num_restarts)
: comparator_(comparator), : comparator_(comparator),
data_(data), data_(data),
restarts_(restarts), restarts_(restarts),
num_restarts_(num_restarts), num_restarts_(num_restarts),
current_(restarts_), current_(restarts_),
restart_index_(num_restarts_) { restart_index_(num_restarts_),
hash_index_(hash_index) {
assert(num_restarts_ > 0); assert(num_restarts_ > 0);
} }
@ -169,45 +173,22 @@ class Block::Iter : public Iterator {
} }
virtual void Seek(const Slice& target) { virtual void Seek(const Slice& target) {
// Binary search in restart array to find the first restart point uint32_t index = 0;
// with a key >= target bool ok = hash_index_ ? HashSeek(target, &index)
uint32_t left = 0; : BinarySeek(target, 0, num_restarts_ - 1, &index);
uint32_t right = num_restarts_ - 1;
while (left < right) {
uint32_t mid = (left + right + 1) / 2;
uint32_t region_offset = GetRestartPoint(mid);
uint32_t shared, non_shared, value_length;
const char* key_ptr = DecodeEntry(data_ + region_offset,
data_ + restarts_,
&shared, &non_shared, &value_length);
if (key_ptr == nullptr || (shared != 0)) {
CorruptionError();
return;
}
Slice mid_key(key_ptr, non_shared);
if (Compare(mid_key, target) < 0) {
// Key at "mid" is smaller than "target". Therefore all
// blocks before "mid" are uninteresting.
left = mid;
} else {
// Key at "mid" is >= "target". Therefore all blocks at or
// after "mid" are uninteresting.
right = mid - 1;
}
}
if (!ok) {
return;
}
SeekToRestartPoint(index);
// Linear search (within restart block) for first key >= target // Linear search (within restart block) for first key >= target
SeekToRestartPoint(left);
while (true) { while (true) {
if (!ParseNextKey()) { if (!ParseNextKey() || Compare(key_, target) >= 0) {
return;
}
if (Compare(key_, target) >= 0) {
return; return;
} }
} }
} }
virtual void SeekToFirst() { virtual void SeekToFirst() {
SeekToRestartPoint(0); SeekToRestartPoint(0);
ParseNextKey(); ParseNextKey();
@ -257,6 +238,53 @@ class Block::Iter : public Iterator {
return true; return true;
} }
} }
// Binary search in restart array to find the first restart point
// with a key >= target
bool BinarySeek(const Slice& target, uint32_t left, uint32_t right,
uint32_t* index) {
assert(left <= right);
while (left < right) {
uint32_t mid = (left + right + 1) / 2;
uint32_t region_offset = GetRestartPoint(mid);
uint32_t shared, non_shared, value_length;
const char* key_ptr =
DecodeEntry(data_ + region_offset, data_ + restarts_, &shared,
&non_shared, &value_length);
if (key_ptr == nullptr || (shared != 0)) {
CorruptionError();
return false;
}
Slice mid_key(key_ptr, non_shared);
if (Compare(mid_key, target) < 0) {
// Key at "mid" is smaller than "target". Therefore all
// blocks before "mid" are uninteresting.
left = mid;
} else {
// Key at "mid" is >= "target". Therefore all blocks at or
// after "mid" are uninteresting.
right = mid - 1;
}
}
*index = left;
return true;
}
bool HashSeek(const Slice& target, uint32_t* index) {
assert(hash_index_);
auto restart_index = hash_index_->GetRestartIndex(target);
if (restart_index == nullptr) {
current_ = restarts_;
return 0;
}
// the elements in restart_array[index : index + num_blocks]
// are all with same prefix. We'll do binary search in that small range.
auto left = restart_index->first_index;
auto right = restart_index->first_index + restart_index->num_blocks - 1;
return BinarySeek(target, left, right, index);
}
}; };
Iterator* Block::NewIterator(const Comparator* cmp) { Iterator* Block::NewIterator(const Comparator* cmp) {
@ -267,8 +295,13 @@ Iterator* Block::NewIterator(const Comparator* cmp) {
if (num_restarts == 0) { if (num_restarts == 0) {
return NewEmptyIterator(); return NewEmptyIterator();
} else { } else {
return new Iter(cmp, data_, restart_offset_, num_restarts); return new Iter(cmp, data_, restart_offset_, num_restarts,
hash_index_.get());
} }
} }
void Block::SetBlockHashIndex(BlockHashIndex* hash_index) {
hash_index_.reset(hash_index);
}
} // namespace rocksdb } // namespace rocksdb

@ -10,6 +10,7 @@
#pragma once #pragma once
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include "rocksdb/iterator.h" #include "rocksdb/iterator.h"
#include "rocksdb/options.h" #include "rocksdb/options.h"
@ -17,6 +18,7 @@ namespace rocksdb {
struct BlockContents; struct BlockContents;
class Comparator; class Comparator;
class BlockHashIndex;
class Block { class Block {
public: public:
@ -26,20 +28,28 @@ class Block {
~Block(); ~Block();
size_t size() const { return size_; } size_t size() const { return size_; }
bool cachable() const { return cachable_; } const char* data() const { return data_; }
bool cachable() const { return cachable_; }
uint32_t NumRestarts() const;
CompressionType compression_type() const { return compression_type_; } CompressionType compression_type() const { return compression_type_; }
// If hash index lookup is enabled and `use_hash_index` is true. This block
// will do hash lookup for the key prefix.
//
// NOTE: for the hash based lookup, if a key prefix doesn't match any key,
// the iterator will simply be set as "invalid", rather than returning
// the key that is just pass the target key.
Iterator* NewIterator(const Comparator* comparator); Iterator* NewIterator(const Comparator* comparator);
const char* data() { return data_; } void SetBlockHashIndex(BlockHashIndex* hash_index);
private: private:
uint32_t NumRestarts() const;
const char* data_; const char* data_;
size_t size_; size_t size_;
uint32_t restart_offset_; // Offset in data_ of restart array uint32_t restart_offset_; // Offset in data_ of restart array
bool owned_; // Block owns data_[] bool owned_; // Block owns data_[]
bool cachable_; bool cachable_;
CompressionType compression_type_; CompressionType compression_type_;
std::unique_ptr<BlockHashIndex> hash_index_;
// No copying allowed // No copying allowed
Block(const Block&); Block(const Block&);

@ -97,9 +97,9 @@ class IndexBuilder {
// 2. Shorten the key length for index block. Other than honestly using the // 2. Shorten the key length for index block. Other than honestly using the
// last key in the data block as the index key, we instead find a shortest // last key in the data block as the index key, we instead find a shortest
// substitute key that serves the same function. // substitute key that serves the same function.
class BinarySearchIndexBuilder : public IndexBuilder { class ShortenedIndexBuilder : public IndexBuilder {
public: public:
explicit BinarySearchIndexBuilder(const Comparator* comparator) explicit ShortenedIndexBuilder(const Comparator* comparator)
: IndexBuilder(comparator), : IndexBuilder(comparator),
index_block_builder_(1 /* block_restart_interval == 1 */, comparator) {} index_block_builder_(1 /* block_restart_interval == 1 */, comparator) {}
@ -128,11 +128,41 @@ class BinarySearchIndexBuilder : public IndexBuilder {
BlockBuilder index_block_builder_; BlockBuilder index_block_builder_;
}; };
// FullKeyIndexBuilder is also based on BlockBuilder. It works pretty much like
// ShortenedIndexBuilder, but preserves the full key instead the substitude key.
// with the reason being that hash index is based on "prefix".
class FullKeyIndexBuilder : public IndexBuilder {
public:
explicit FullKeyIndexBuilder(const Comparator* comparator)
: IndexBuilder(comparator),
index_block_builder_(1 /* block_restart_interval == 1 */, comparator) {}
virtual void AddEntry(std::string* last_key_in_current_block,
const Slice* first_key_in_next_block,
const BlockHandle& block_handle) override {
std::string handle_encoding;
block_handle.EncodeTo(&handle_encoding);
index_block_builder_.Add(*last_key_in_current_block, handle_encoding);
}
virtual Slice Finish() override { return index_block_builder_.Finish(); }
virtual size_t EstimatedSize() const {
return index_block_builder_.CurrentSizeEstimate();
}
private:
BlockBuilder index_block_builder_;
};
// Create a index builder based on its type. // Create a index builder based on its type.
IndexBuilder* CreateIndexBuilder(IndexType type, const Comparator* comparator) { IndexBuilder* CreateIndexBuilder(IndexType type, const Comparator* comparator) {
switch (type) { switch (type) {
case BlockBasedTableOptions::kBinarySearch: { case BlockBasedTableOptions::kBinarySearch: {
return new BinarySearchIndexBuilder(comparator); return new ShortenedIndexBuilder(comparator);
}
case BlockBasedTableOptions::kHashSearch: {
return new FullKeyIndexBuilder(comparator);
} }
default: { default: {
assert(!"Do not recognize the index type "); assert(!"Do not recognize the index type ");

@ -25,6 +25,7 @@
#include "table/block.h" #include "table/block.h"
#include "table/filter_block.h" #include "table/filter_block.h"
#include "table/block_hash_index.h"
#include "table/format.h" #include "table/format.h"
#include "table/meta_blocks.h" #include "table/meta_blocks.h"
#include "table/two_level_iterator.h" #include "table/two_level_iterator.h"
@ -180,19 +181,51 @@ class BinarySearchIndexReader : public IndexReader {
std::unique_ptr<Block> index_block_; std::unique_ptr<Block> index_block_;
}; };
// TODO(kailiu) This class is only a stub for now. And the comment below is also
// not completed.
// Index that leverages an internal hash table to quicken the lookup for a given // Index that leverages an internal hash table to quicken the lookup for a given
// key. // key.
// @param data_iter_gen, equavalent to BlockBasedTable::NewIterator(). But that
// functions requires index to be initalized. To avoid this problem external
// caller will pass a function that can create the iterator over the entries
// without the table to be fully initialized.
class HashIndexReader : public IndexReader { class HashIndexReader : public IndexReader {
public: public:
static Status Create(RandomAccessFile* file, const BlockHandle& index_handle, static Status Create(RandomAccessFile* file, const BlockHandle& index_handle,
Env* env, const Comparator* comparator, Env* env, const Comparator* comparator,
BlockBasedTable* table, std::function<Iterator*(Iterator*)> data_iter_gen,
const SliceTransform* prefix_extractor, const SliceTransform* prefix_extractor,
IndexReader** index_reader) { IndexReader** index_reader) {
return Status::NotSupported("not implemented yet!"); assert(prefix_extractor);
Block* index_block = nullptr;
auto s =
ReadBlockFromFile(file, ReadOptions(), index_handle, &index_block, env);
if (!s.ok()) {
return s;
}
*index_reader = new HashIndexReader(comparator, index_block);
std::unique_ptr<Iterator> index_iter(index_block->NewIterator(nullptr));
std::unique_ptr<Iterator> data_iter(
data_iter_gen(index_block->NewIterator(nullptr)));
auto hash_index = CreateBlockHashIndex(index_iter.get(), data_iter.get(),
index_block->NumRestarts(),
comparator, prefix_extractor);
index_block->SetBlockHashIndex(hash_index);
return s;
}
virtual Iterator* NewIterator() override {
return index_block_->NewIterator(comparator_);
} }
virtual size_t size() const override { return index_block_->size(); }
private:
HashIndexReader(const Comparator* comparator, Block* index_block)
: IndexReader(comparator), index_block_(index_block) {
assert(index_block_ != nullptr);
}
std::unique_ptr<Block> index_block_;
}; };
@ -223,6 +256,11 @@ struct BlockBasedTable::Rep {
std::shared_ptr<const TableProperties> table_properties; std::shared_ptr<const TableProperties> table_properties;
BlockBasedTableOptions::IndexType index_type; BlockBasedTableOptions::IndexType index_type;
// TODO(kailiu) It is very ugly to use internal key in table, since table
// module should not be relying on db module. However to make things easier
// and compatible with existing code, we introduce a wrapper that allows
// block to extract prefix without knowing if a key is internal or not.
unique_ptr<SliceTransform> internal_prefix_transform;
}; };
BlockBasedTable::~BlockBasedTable() { BlockBasedTable::~BlockBasedTable() {
@ -747,8 +785,7 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
return { filter, cache_handle }; return { filter, cache_handle };
} }
Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options) Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options) {
const {
// index reader has already been pre-populated. // index reader has already been pre-populated.
if (rep_->index_reader) { if (rep_->index_reader) {
return rep_->index_reader->NewIterator(); return rep_->index_reader->NewIterator();
@ -978,7 +1015,7 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
// 3. options // 3. options
// 4. internal_comparator // 4. internal_comparator
// 5. index_type // 5. index_type
Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) const { Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) {
// Some old version of block-based tables don't have index type present in // Some old version of block-based tables don't have index type present in
// table properties. If that's the case we can safely use the kBinarySearch. // table properties. If that's the case we can safely use the kBinarySearch.
auto index_type = BlockBasedTableOptions::kBinarySearch; auto index_type = BlockBasedTableOptions::kBinarySearch;
@ -989,11 +1026,30 @@ Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) const {
DecodeFixed32(pos->second.c_str())); DecodeFixed32(pos->second.c_str()));
} }
auto file = rep_->file.get();
const auto& index_handle = rep_->index_handle;
auto env = rep_->options.env;
auto comparator = &rep_->internal_comparator;
switch (index_type) { switch (index_type) {
case BlockBasedTableOptions::kBinarySearch: { case BlockBasedTableOptions::kBinarySearch: {
return BinarySearchIndexReader::Create( return BinarySearchIndexReader::Create(file, index_handle, env,
rep_->file.get(), rep_->index_handle, rep_->options.env, comparator, index_reader);
&rep_->internal_comparator, index_reader); }
case BlockBasedTableOptions::kHashSearch: {
// We need to wrap data with internal_prefix_transform to make sure it can
// handle prefix correctly.
rep_->internal_prefix_transform.reset(
new InternalKeySliceTransform(rep_->options.prefix_extractor.get()));
return HashIndexReader::Create(
file, index_handle, env, comparator,
[&](Iterator* index_iter) {
return NewTwoLevelIterator(
index_iter, &BlockBasedTable::DataBlockReader,
const_cast<BlockBasedTable*>(this), ReadOptions(),
rep_->soptions, rep_->internal_comparator);
},
rep_->internal_prefix_transform.get(), index_reader);
} }
default: { default: {
std::string error_message = std::string error_message =

@ -131,7 +131,7 @@ class BlockBasedTable : public TableReader {
// 2. index is not present in block cache. // 2. index is not present in block cache.
// 3. We disallowed any io to be performed, that is, read_options == // 3. We disallowed any io to be performed, that is, read_options ==
// kBlockCacheTier // kBlockCacheTier
Iterator* NewIndexIterator(const ReadOptions& read_options) const; Iterator* NewIndexIterator(const ReadOptions& read_options);
// Read block cache from block caches (if set): block_cache and // Read block cache from block caches (if set): block_cache and
// block_cache_compressed. // block_cache_compressed.
@ -164,7 +164,7 @@ class BlockBasedTable : public TableReader {
void ReadMeta(const Footer& footer); void ReadMeta(const Footer& footer);
void ReadFilter(const Slice& filter_handle_value); void ReadFilter(const Slice& filter_handle_value);
Status CreateIndexReader(IndexReader** index_reader) const; Status CreateIndexReader(IndexReader** index_reader);
// Read the meta block from sst. // Read the meta block from sst.
static Status ReadMetaBlock( static Status ReadMetaBlock(

@ -3,7 +3,10 @@
// LICENSE file in the root directory of this source tree. An additional grant // LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory. // of patent rights can be found in the PATENTS file in the same directory.
// //
#include <stdio.h>
#include <string> #include <string>
#include <vector>
#include "db/dbformat.h" #include "db/dbformat.h"
#include "db/memtable.h" #include "db/memtable.h"
#include "db/write_batch_internal.h" #include "db/write_batch_internal.h"
@ -11,9 +14,11 @@
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/iterator.h" #include "rocksdb/iterator.h"
#include "rocksdb/table.h" #include "rocksdb/table.h"
#include "rocksdb/slice_transform.h"
#include "table/block.h" #include "table/block.h"
#include "table/block_builder.h" #include "table/block_builder.h"
#include "table/format.h" #include "table/format.h"
#include "table/block_hash_index.h"
#include "util/random.h" #include "util/random.h"
#include "util/testharness.h" #include "util/testharness.h"
#include "util/testutil.h" #include "util/testutil.h"
@ -25,6 +30,40 @@ static std::string RandomString(Random* rnd, int len) {
test::RandomString(rnd, len, &r); test::RandomString(rnd, len, &r);
return r; return r;
} }
std::string GenerateKey(int primary_key, int secondary_key, int padding_size,
Random *rnd) {
char buf[50];
char *p = &buf[0];
snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key);
std::string k(p);
if (padding_size) {
k += RandomString(rnd, padding_size);
}
return k;
}
// Generate random key value pairs.
// The generated key will be sorted. You can tune the parameters to generated
// different kinds of test key/value pairs for different scenario.
void GenerateRandomKVs(std::vector<std::string> *keys,
std::vector<std::string> *values, const int from,
const int len, const int step = 1,
const int padding_size = 0,
const int keys_share_prefix = 1) {
Random rnd(302);
// generate different prefix
for (int i = from; i < from + len; i += step) {
// generating keys that shares the prefix
for (int j = 0; j < keys_share_prefix; ++j) {
keys->emplace_back(GenerateKey(i, j, padding_size, &rnd));
// 100 bytes values
values->emplace_back(RandomString(&rnd, 100));
}
}
}
class BlockTest {}; class BlockTest {};
@ -39,24 +78,11 @@ TEST(BlockTest, SimpleTest) {
std::vector<std::string> values; std::vector<std::string> values;
BlockBuilder builder(options, ic.get()); BlockBuilder builder(options, ic.get());
int num_records = 100000; int num_records = 100000;
char buf[10];
char* p = &buf[0];
GenerateRandomKVs(&keys, &values, 0, num_records);
// add a bunch of records to a block // add a bunch of records to a block
for (int i = 0; i < num_records; i++) { for (int i = 0; i < num_records; i++) {
// generate random kvs builder.Add(keys[i], values[i]);
sprintf(p, "%6d", i);
std::string k(p);
std::string v = RandomString(&rnd, 100); // 100 byte values
// write kvs to the block
Slice key(k);
Slice value(v);
builder.Add(key, value);
// remember kvs in a lookaside array
keys.push_back(k);
values.push_back(v);
} }
// read serialized contents of the block // read serialized contents of the block
@ -101,6 +127,114 @@ TEST(BlockTest, SimpleTest) {
delete iter; delete iter;
} }
// return the block contents
BlockContents GetBlockContents(std::unique_ptr<BlockBuilder> *builder,
const std::vector<std::string> &keys,
const std::vector<std::string> &values,
const int prefix_group_size = 1) {
builder->reset(
new BlockBuilder(1 /* restart interval */, BytewiseComparator()));
// Add only half of the keys
for (size_t i = 0; i < keys.size(); ++i) {
(*builder)->Add(keys[i], values[i]);
}
Slice rawblock = (*builder)->Finish();
BlockContents contents;
contents.data = rawblock;
contents.cachable = false;
contents.heap_allocated = false;
return contents;
}
void CheckBlockContents(BlockContents contents, const int max_key,
const std::vector<std::string> &keys,
const std::vector<std::string> &values) {
const size_t prefix_size = 6;
// create block reader
Block reader1(contents);
Block reader2(contents);
std::unique_ptr<const SliceTransform> prefix_extractor(
NewFixedPrefixTransform(prefix_size));
{
auto iter1 = reader1.NewIterator(nullptr);
auto iter2 = reader1.NewIterator(nullptr);
reader1.SetBlockHashIndex(CreateBlockHashIndex(iter1, iter2, keys.size(),
BytewiseComparator(),
prefix_extractor.get()));
delete iter1;
delete iter2;
}
std::unique_ptr<Iterator> hash_iter(
reader1.NewIterator(BytewiseComparator()));
std::unique_ptr<Iterator> regular_iter(
reader2.NewIterator(BytewiseComparator()));
// Seek existent keys
for (size_t i = 0; i < keys.size(); i++) {
hash_iter->Seek(keys[i]);
ASSERT_OK(hash_iter->status());
ASSERT_TRUE(hash_iter->Valid());
Slice v = hash_iter->value();
ASSERT_EQ(v.ToString().compare(values[i]), 0);
}
// Seek non-existent keys.
// For hash index, if no key with a given prefix is not found, iterator will
// simply be set as invalid; whereas the binary search based iterator will
// return the one that is closest.
for (int i = 1; i < max_key - 1; i += 2) {
auto key = GenerateKey(i, 0, 0, nullptr);
hash_iter->Seek(key);
ASSERT_TRUE(!hash_iter->Valid());
regular_iter->Seek(key);
ASSERT_TRUE(regular_iter->Valid());
}
}
// In this test case, no two key share same prefix.
TEST(BlockTest, SimpleIndexHash) {
const int kMaxKey = 100000;
std::vector<std::string> keys;
std::vector<std::string> values;
GenerateRandomKVs(&keys, &values, 0 /* first key id */,
kMaxKey /* last key id */, 2 /* step */,
8 /* padding size (8 bytes randomly generated suffix) */);
std::unique_ptr<BlockBuilder> builder;
auto contents = GetBlockContents(&builder, keys, values);
CheckBlockContents(contents, kMaxKey, keys, values);
}
TEST(BlockTest, IndexHashWithSharedPrefix) {
const int kMaxKey = 100000;
// for each prefix, there will be 5 keys starts with it.
const int kPrefixGroup = 5;
std::vector<std::string> keys;
std::vector<std::string> values;
// Generate keys with same prefix.
GenerateRandomKVs(&keys, &values, 0, // first key id
kMaxKey, // last key id
2, // step
10, // padding size,
kPrefixGroup);
std::unique_ptr<BlockBuilder> builder;
auto contents = GetBlockContents(&builder, keys, values, kPrefixGroup);
CheckBlockContents(contents, kMaxKey, keys, values);
}
} // namespace rocksdb } // namespace rocksdb
int main(int argc, char** argv) { int main(int argc, char** argv) {

@ -1055,6 +1055,116 @@ static std::string RandomString(Random* rnd, int len) {
return r; return r;
} }
void AddInternalKey(TableConstructor* c, const std::string prefix,
int suffix_len = 800) {
static Random rnd(1023);
InternalKey k(prefix + RandomString(&rnd, 800), 0, kTypeValue);
c->Add(k.Encode().ToString(), "v");
}
TEST(TableTest, HashIndexTest) {
TableConstructor c(BytewiseComparator());
// keys with prefix length 3, make sure the key/value is big enough to fill
// one block
AddInternalKey(&c, "0015");
AddInternalKey(&c, "0035");
AddInternalKey(&c, "0054");
AddInternalKey(&c, "0055");
AddInternalKey(&c, "0056");
AddInternalKey(&c, "0057");
AddInternalKey(&c, "0058");
AddInternalKey(&c, "0075");
AddInternalKey(&c, "0076");
AddInternalKey(&c, "0095");
std::vector<std::string> keys;
KVMap kvmap;
Options options;
BlockBasedTableOptions table_options;
table_options.index_type = BlockBasedTableOptions::kHashSearch;
options.table_factory.reset(new BlockBasedTableFactory(table_options));
options.prefix_extractor.reset(NewFixedPrefixTransform(3));
options.block_cache = NewLRUCache(1024);
options.block_size = 1700;
std::unique_ptr<InternalKeyComparator> comparator(
new InternalKeyComparator(BytewiseComparator()));
c.Finish(options, *comparator, &keys, &kvmap);
auto reader = c.table_reader();
auto props = c.table_reader()->GetTableProperties();
ASSERT_EQ(5u, props->num_data_blocks);
std::unique_ptr<Iterator> hash_iter(reader->NewIterator(ReadOptions()));
// -- Find keys do not exist, but have common prefix.
std::vector<std::string> prefixes = {"001", "003", "005", "007", "009"};
std::vector<std::string> lower_bound = {keys[0], keys[1], keys[2],
keys[7], keys[9], };
// find the lower bound of the prefix
for (size_t i = 0; i < prefixes.size(); ++i) {
hash_iter->Seek(InternalKey(prefixes[i], 0, kTypeValue).Encode());
ASSERT_OK(hash_iter->status());
ASSERT_TRUE(hash_iter->Valid());
// seek the first element in the block
ASSERT_EQ(lower_bound[i], hash_iter->key().ToString());
ASSERT_EQ("v", hash_iter->value().ToString());
}
// find the upper bound of prefixes
std::vector<std::string> upper_bound = {keys[1], keys[2], keys[7], keys[9], };
// find existing keys
for (const auto& item : kvmap) {
auto ukey = ExtractUserKey(item.first).ToString();
hash_iter->Seek(ukey);
// ASSERT_OK(regular_iter->status());
ASSERT_OK(hash_iter->status());
// ASSERT_TRUE(regular_iter->Valid());
ASSERT_TRUE(hash_iter->Valid());
ASSERT_EQ(item.first, hash_iter->key().ToString());
ASSERT_EQ(item.second, hash_iter->value().ToString());
}
for (size_t i = 0; i < prefixes.size(); ++i) {
// the key is greater than any existing keys.
auto key = prefixes[i] + "9";
hash_iter->Seek(InternalKey(key, 0, kTypeValue).Encode());
ASSERT_OK(hash_iter->status());
if (i == prefixes.size() - 1) {
// last key
ASSERT_TRUE(!hash_iter->Valid());
} else {
ASSERT_TRUE(hash_iter->Valid());
// seek the first element in the block
ASSERT_EQ(upper_bound[i], hash_iter->key().ToString());
ASSERT_EQ("v", hash_iter->value().ToString());
}
}
// find keys with prefix that don't match any of the existing prefixes.
std::vector<std::string> non_exist_prefixes = {"002", "004", "006", "008"};
for (const auto& prefix : non_exist_prefixes) {
hash_iter->Seek(InternalKey(prefix, 0, kTypeValue).Encode());
// regular_iter->Seek(prefix);
ASSERT_OK(hash_iter->status());
ASSERT_TRUE(!hash_iter->Valid());
}
}
// It's very hard to figure out the index block size of a block accurately. // It's very hard to figure out the index block size of a block accurately.
// To make sure we get the index size, we just make sure as key number // To make sure we get the index size, we just make sure as key number
// grows, the filter block size also grows. // grows, the filter block size also grows.

Loading…
Cancel
Save