Summary: Deprecate this one option and delete code and tests that are now superfluous. Test Plan: all tests pass Reviewers: igor, yhchiang, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: msalib, leveldb, andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D55317main
parent
0e77246ba9
commit
1d725ca51d
@ -1,159 +0,0 @@ |
|||||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
||||||
// This source code is licensed under the BSD-style license found in the
|
|
||||||
// LICENSE file in the root directory of this source tree. An additional grant
|
|
||||||
// of patent rights can be found in the PATENTS file in the same directory.
|
|
||||||
|
|
||||||
#include "table/block_hash_index.h" |
|
||||||
|
|
||||||
#include <algorithm> |
|
||||||
|
|
||||||
#include "rocksdb/comparator.h" |
|
||||||
#include "rocksdb/iterator.h" |
|
||||||
#include "rocksdb/slice_transform.h" |
|
||||||
#include "table/internal_iterator.h" |
|
||||||
#include "util/coding.h" |
|
||||||
|
|
||||||
namespace rocksdb { |
|
||||||
|
|
||||||
Status CreateBlockHashIndex(const SliceTransform* hash_key_extractor, |
|
||||||
const Slice& prefixes, const Slice& prefix_meta, |
|
||||||
BlockHashIndex** hash_index) { |
|
||||||
uint64_t pos = 0; |
|
||||||
auto meta_pos = prefix_meta; |
|
||||||
Status s; |
|
||||||
*hash_index = new BlockHashIndex( |
|
||||||
hash_key_extractor, |
|
||||||
false /* external module manages memory space for prefixes */); |
|
||||||
|
|
||||||
while (!meta_pos.empty()) { |
|
||||||
uint32_t prefix_size = 0; |
|
||||||
uint32_t entry_index = 0; |
|
||||||
uint32_t num_blocks = 0; |
|
||||||
if (!GetVarint32(&meta_pos, &prefix_size) || |
|
||||||
!GetVarint32(&meta_pos, &entry_index) || |
|
||||||
!GetVarint32(&meta_pos, &num_blocks)) { |
|
||||||
s = Status::Corruption( |
|
||||||
"Corrupted prefix meta block: unable to read from it."); |
|
||||||
break; |
|
||||||
} |
|
||||||
Slice prefix(prefixes.data() + pos, prefix_size); |
|
||||||
(*hash_index)->Add(prefix, entry_index, num_blocks); |
|
||||||
|
|
||||||
pos += prefix_size; |
|
||||||
} |
|
||||||
|
|
||||||
if (s.ok() && pos != prefixes.size()) { |
|
||||||
s = Status::Corruption("Corrupted prefix meta block"); |
|
||||||
} |
|
||||||
|
|
||||||
if (!s.ok()) { |
|
||||||
delete *hash_index; |
|
||||||
} |
|
||||||
|
|
||||||
return s; |
|
||||||
} |
|
||||||
|
|
||||||
BlockHashIndex* CreateBlockHashIndexOnTheFly( |
|
||||||
InternalIterator* index_iter, InternalIterator* data_iter, |
|
||||||
const uint32_t num_restarts, const Comparator* comparator, |
|
||||||
const SliceTransform* hash_key_extractor) { |
|
||||||
assert(hash_key_extractor); |
|
||||||
auto hash_index = new BlockHashIndex( |
|
||||||
hash_key_extractor, |
|
||||||
true /* hash_index will copy prefix when Add() is called */); |
|
||||||
uint32_t current_restart_index = 0; |
|
||||||
|
|
||||||
std::string pending_entry_prefix; |
|
||||||
// pending_block_num == 0 also implies there is no entry inserted at all.
|
|
||||||
uint32_t pending_block_num = 0; |
|
||||||
uint32_t pending_entry_index = 0; |
|
||||||
|
|
||||||
// scan all the entries and create a hash index based on their prefixes.
|
|
||||||
data_iter->SeekToFirst(); |
|
||||||
for (index_iter->SeekToFirst(); |
|
||||||
index_iter->Valid() && current_restart_index < num_restarts; |
|
||||||
index_iter->Next()) { |
|
||||||
Slice last_key_in_block = index_iter->key(); |
|
||||||
assert(data_iter->Valid() && data_iter->status().ok()); |
|
||||||
|
|
||||||
// scan through all entries within a data block.
|
|
||||||
while (data_iter->Valid() && |
|
||||||
comparator->Compare(data_iter->key(), last_key_in_block) <= 0) { |
|
||||||
auto key_prefix = hash_key_extractor->Transform(data_iter->key()); |
|
||||||
bool is_first_entry = pending_block_num == 0; |
|
||||||
|
|
||||||
// Keys may share the prefix
|
|
||||||
if (is_first_entry || pending_entry_prefix != key_prefix) { |
|
||||||
if (!is_first_entry) { |
|
||||||
bool succeeded = hash_index->Add( |
|
||||||
pending_entry_prefix, pending_entry_index, pending_block_num); |
|
||||||
if (!succeeded) { |
|
||||||
delete hash_index; |
|
||||||
return nullptr; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
// update the status.
|
|
||||||
// needs a hard copy otherwise the underlying data changes all the time.
|
|
||||||
pending_entry_prefix = key_prefix.ToString(); |
|
||||||
pending_block_num = 1; |
|
||||||
pending_entry_index = current_restart_index; |
|
||||||
} else { |
|
||||||
// entry number increments when keys share the prefix reside in
|
|
||||||
// different data blocks.
|
|
||||||
auto last_restart_index = pending_entry_index + pending_block_num - 1; |
|
||||||
assert(last_restart_index <= current_restart_index); |
|
||||||
if (last_restart_index != current_restart_index) { |
|
||||||
++pending_block_num; |
|
||||||
} |
|
||||||
} |
|
||||||
data_iter->Next(); |
|
||||||
} |
|
||||||
|
|
||||||
++current_restart_index; |
|
||||||
} |
|
||||||
|
|
||||||
// make sure all entries has been scaned.
|
|
||||||
assert(!index_iter->Valid()); |
|
||||||
assert(!data_iter->Valid()); |
|
||||||
|
|
||||||
if (pending_block_num > 0) { |
|
||||||
auto succeeded = hash_index->Add(pending_entry_prefix, pending_entry_index, |
|
||||||
pending_block_num); |
|
||||||
if (!succeeded) { |
|
||||||
delete hash_index; |
|
||||||
return nullptr; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
return hash_index; |
|
||||||
} |
|
||||||
|
|
||||||
bool BlockHashIndex::Add(const Slice& prefix, uint32_t restart_index, |
|
||||||
uint32_t num_blocks) { |
|
||||||
auto prefix_to_insert = prefix; |
|
||||||
if (kOwnPrefixes) { |
|
||||||
auto prefix_ptr = arena_.Allocate(prefix.size()); |
|
||||||
// MSVC reports C4996 Function call with parameters that may be
|
|
||||||
// unsafe when using std::copy with a output iterator - pointer
|
|
||||||
memcpy(prefix_ptr, prefix.data(), prefix.size()); |
|
||||||
prefix_to_insert = Slice(prefix_ptr, prefix.size()); |
|
||||||
} |
|
||||||
auto result = restart_indices_.insert( |
|
||||||
{prefix_to_insert, RestartIndex(restart_index, num_blocks)}); |
|
||||||
return result.second; |
|
||||||
} |
|
||||||
|
|
||||||
const BlockHashIndex::RestartIndex* BlockHashIndex::GetRestartIndex( |
|
||||||
const Slice& key) { |
|
||||||
auto key_prefix = hash_key_extractor_->Transform(key); |
|
||||||
|
|
||||||
auto pos = restart_indices_.find(key_prefix); |
|
||||||
if (pos == restart_indices_.end()) { |
|
||||||
return nullptr; |
|
||||||
} |
|
||||||
|
|
||||||
return &pos->second; |
|
||||||
} |
|
||||||
|
|
||||||
} // namespace rocksdb
|
|
@ -1,86 +0,0 @@ |
|||||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
||||||
// This source code is licensed under the BSD-style license found in the
|
|
||||||
// LICENSE file in the root directory of this source tree. An additional grant
|
|
||||||
// of patent rights can be found in the PATENTS file in the same directory.
|
|
||||||
#pragma once |
|
||||||
|
|
||||||
#include <string> |
|
||||||
#include <unordered_map> |
|
||||||
|
|
||||||
#include "rocksdb/status.h" |
|
||||||
#include "util/arena.h" |
|
||||||
#include "util/murmurhash.h" |
|
||||||
|
|
||||||
namespace rocksdb { |
|
||||||
|
|
||||||
class Comparator; |
|
||||||
class InternalIterator; |
|
||||||
class Slice; |
|
||||||
class SliceTransform; |
|
||||||
|
|
||||||
// Build a hash-based index to speed up the lookup for "index block".
|
|
||||||
// BlockHashIndex accepts a key and, if found, returns its restart index within
|
|
||||||
// that index block.
|
|
||||||
class BlockHashIndex { |
|
||||||
public: |
|
||||||
// Represents a restart index in the index block's restart array.
|
|
||||||
struct RestartIndex { |
|
||||||
explicit RestartIndex(uint32_t _first_index, uint32_t _num_blocks = 1) |
|
||||||
: first_index(_first_index), num_blocks(_num_blocks) {} |
|
||||||
|
|
||||||
// For a given prefix, what is the restart index for the first data block
|
|
||||||
// that contains it.
|
|
||||||
uint32_t first_index = 0; |
|
||||||
|
|
||||||
// How many data blocks contains this prefix?
|
|
||||||
uint32_t num_blocks = 1; |
|
||||||
}; |
|
||||||
|
|
||||||
// @params own_prefixes indicate if we should take care the memory space for
|
|
||||||
// the `key_prefix`
|
|
||||||
// passed by Add()
|
|
||||||
explicit BlockHashIndex(const SliceTransform* hash_key_extractor, |
|
||||||
bool own_prefixes) |
|
||||||
: hash_key_extractor_(hash_key_extractor), kOwnPrefixes(own_prefixes) {} |
|
||||||
|
|
||||||
// Maps a key to its restart first_index.
|
|
||||||
// Returns nullptr if the restart first_index is found
|
|
||||||
const RestartIndex* GetRestartIndex(const Slice& key); |
|
||||||
|
|
||||||
bool Add(const Slice& key_prefix, uint32_t restart_index, |
|
||||||
uint32_t num_blocks); |
|
||||||
|
|
||||||
size_t ApproximateMemoryUsage() const { |
|
||||||
return arena_.ApproximateMemoryUsage(); |
|
||||||
} |
|
||||||
|
|
||||||
private: |
|
||||||
const SliceTransform* hash_key_extractor_; |
|
||||||
std::unordered_map<Slice, RestartIndex, murmur_hash> restart_indices_; |
|
||||||
|
|
||||||
Arena arena_; |
|
||||||
bool kOwnPrefixes; |
|
||||||
}; |
|
||||||
|
|
||||||
// Create hash index by reading from the metadata blocks.
|
|
||||||
// @params prefixes: a sequence of prefixes.
|
|
||||||
// @params prefix_meta: contains the "metadata" to of the prefixes.
|
|
||||||
Status CreateBlockHashIndex(const SliceTransform* hash_key_extractor, |
|
||||||
const Slice& prefixes, const Slice& prefix_meta, |
|
||||||
BlockHashIndex** hash_index); |
|
||||||
|
|
||||||
// Create hash index by scanning the entries in index as well as the whole
|
|
||||||
// dataset.
|
|
||||||
// @params index_iter: an iterator with the pointer to the first entry in a
|
|
||||||
// block.
|
|
||||||
// @params data_iter: an iterator that can scan all the entries reside in a
|
|
||||||
// table.
|
|
||||||
// @params num_restarts: used for correctness verification.
|
|
||||||
// @params hash_key_extractor: extract the hashable part of a given key.
|
|
||||||
// On error, nullptr will be returned.
|
|
||||||
BlockHashIndex* CreateBlockHashIndexOnTheFly( |
|
||||||
InternalIterator* index_iter, InternalIterator* data_iter, |
|
||||||
const uint32_t num_restarts, const Comparator* comparator, |
|
||||||
const SliceTransform* hash_key_extractor); |
|
||||||
|
|
||||||
} // namespace rocksdb
|
|
@ -1,121 +0,0 @@ |
|||||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
||||||
// This source code is licensed under the BSD-style license found in the
|
|
||||||
// LICENSE file in the root directory of this source tree. An additional grant
|
|
||||||
// of patent rights can be found in the PATENTS file in the same directory.
|
|
||||||
|
|
||||||
#include <map> |
|
||||||
#include <memory> |
|
||||||
#include <vector> |
|
||||||
|
|
||||||
#include "rocksdb/comparator.h" |
|
||||||
#include "rocksdb/iterator.h" |
|
||||||
#include "rocksdb/slice_transform.h" |
|
||||||
#include "table/block_hash_index.h" |
|
||||||
#include "table/internal_iterator.h" |
|
||||||
#include "util/testharness.h" |
|
||||||
#include "util/testutil.h" |
|
||||||
|
|
||||||
namespace rocksdb { |
|
||||||
|
|
||||||
typedef std::map<std::string, std::string> Data; |
|
||||||
|
|
||||||
class MapIterator : public InternalIterator { |
|
||||||
public: |
|
||||||
explicit MapIterator(const Data& data) : data_(data), pos_(data_.end()) {} |
|
||||||
|
|
||||||
virtual bool Valid() const override { return pos_ != data_.end(); } |
|
||||||
|
|
||||||
virtual void SeekToFirst() override { pos_ = data_.begin(); } |
|
||||||
|
|
||||||
virtual void SeekToLast() override { |
|
||||||
pos_ = data_.end(); |
|
||||||
--pos_; |
|
||||||
} |
|
||||||
|
|
||||||
virtual void Seek(const Slice& target) override { |
|
||||||
pos_ = data_.find(target.ToString()); |
|
||||||
} |
|
||||||
|
|
||||||
virtual void Next() override { ++pos_; } |
|
||||||
|
|
||||||
virtual void Prev() override { --pos_; } |
|
||||||
|
|
||||||
virtual Slice key() const override { return pos_->first; } |
|
||||||
|
|
||||||
virtual Slice value() const override { return pos_->second; } |
|
||||||
|
|
||||||
virtual Status status() const override { return Status::OK(); } |
|
||||||
|
|
||||||
private: |
|
||||||
const Data& data_; |
|
||||||
Data::const_iterator pos_; |
|
||||||
}; |
|
||||||
|
|
||||||
class BlockTest : public testing::Test {}; |
|
||||||
|
|
||||||
TEST_F(BlockTest, BasicTest) { |
|
||||||
const size_t keys_per_block = 4; |
|
||||||
const size_t prefix_size = 2; |
|
||||||
std::vector<std::string> keys = {/* block 1 */ |
|
||||||
"0101", "0102", "0103", "0201", |
|
||||||
/* block 2 */ |
|
||||||
"0202", "0203", "0301", "0401", |
|
||||||
/* block 3 */ |
|
||||||
"0501", "0601", "0701", "0801", |
|
||||||
/* block 4 */ |
|
||||||
"0802", "0803", "0804", "0805", |
|
||||||
/* block 5 */ |
|
||||||
"0806", "0807", "0808", "0809", }; |
|
||||||
|
|
||||||
Data data_entries; |
|
||||||
for (const auto key : keys) { |
|
||||||
data_entries.insert({key, key}); |
|
||||||
} |
|
||||||
|
|
||||||
Data index_entries; |
|
||||||
for (size_t i = 3; i < keys.size(); i += keys_per_block) { |
|
||||||
// simply ignore the value part
|
|
||||||
index_entries.insert({keys[i], ""}); |
|
||||||
} |
|
||||||
|
|
||||||
MapIterator data_iter(data_entries); |
|
||||||
MapIterator index_iter(index_entries); |
|
||||||
|
|
||||||
auto prefix_extractor = NewFixedPrefixTransform(prefix_size); |
|
||||||
std::unique_ptr<BlockHashIndex> block_hash_index(CreateBlockHashIndexOnTheFly( |
|
||||||
&index_iter, &data_iter, static_cast<uint32_t>(index_entries.size()), |
|
||||||
BytewiseComparator(), prefix_extractor)); |
|
||||||
|
|
||||||
std::map<std::string, BlockHashIndex::RestartIndex> expected = { |
|
||||||
{"01xx", BlockHashIndex::RestartIndex(0, 1)}, |
|
||||||
{"02yy", BlockHashIndex::RestartIndex(0, 2)}, |
|
||||||
{"03zz", BlockHashIndex::RestartIndex(1, 1)}, |
|
||||||
{"04pp", BlockHashIndex::RestartIndex(1, 1)}, |
|
||||||
{"05ww", BlockHashIndex::RestartIndex(2, 1)}, |
|
||||||
{"06xx", BlockHashIndex::RestartIndex(2, 1)}, |
|
||||||
{"07pp", BlockHashIndex::RestartIndex(2, 1)}, |
|
||||||
{"08xz", BlockHashIndex::RestartIndex(2, 3)}, }; |
|
||||||
|
|
||||||
const BlockHashIndex::RestartIndex* index = nullptr; |
|
||||||
// search existed prefixes
|
|
||||||
for (const auto& item : expected) { |
|
||||||
index = block_hash_index->GetRestartIndex(item.first); |
|
||||||
ASSERT_TRUE(index != nullptr); |
|
||||||
ASSERT_EQ(item.second.first_index, index->first_index); |
|
||||||
ASSERT_EQ(item.second.num_blocks, index->num_blocks); |
|
||||||
} |
|
||||||
|
|
||||||
// search non exist prefixes
|
|
||||||
ASSERT_TRUE(!block_hash_index->GetRestartIndex("00xx")); |
|
||||||
ASSERT_TRUE(!block_hash_index->GetRestartIndex("10yy")); |
|
||||||
ASSERT_TRUE(!block_hash_index->GetRestartIndex("20zz")); |
|
||||||
|
|
||||||
delete prefix_extractor; |
|
||||||
} |
|
||||||
|
|
||||||
} // namespace rocksdb
|
|
||||||
|
|
||||||
int main(int argc, char** argv) { |
|
||||||
::testing::InitGoogleTest(&argc, argv); |
|
||||||
return RUN_ALL_TESTS(); |
|
||||||
} |
|
Loading…
Reference in new issue