improve memory efficiency of cuckoo reader

Summary:
When creating a new iterator, instead of storing mapping from key to
bucket id for sorting, store only bucket id and read key from mmap file
based on the id. This reduces from 20 bytes per entry to only 4 bytes.

Test Plan: db_bench

Reviewers: igor, yhchiang, sdong

Reviewed By: sdong

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D23757
main
Lei Jin 10 years ago
parent 581442d446
commit c6275956e2
  1. 2
      include/rocksdb/table.h
  2. 7
      table/cuckoo_table_builder.cc
  3. 124
      table/cuckoo_table_reader.cc
  4. 1
      table/cuckoo_table_reader.h

@ -255,6 +255,8 @@ struct CuckooTablePropertyNames {
static const std::string kIdentityAsFirstHash; static const std::string kIdentityAsFirstHash;
// Indicate if using module or bit and to calculate hash value // Indicate if using module or bit and to calculate hash value
static const std::string kUseModuleHash; static const std::string kUseModuleHash;
// Fixed user key length
static const std::string kUserKeyLength;
}; };
struct CuckooTableOptions { struct CuckooTableOptions {

@ -39,6 +39,8 @@ const std::string CuckooTablePropertyNames::kIdentityAsFirstHash =
"rocksdb.cuckoo.hash.identityfirst"; "rocksdb.cuckoo.hash.identityfirst";
const std::string CuckooTablePropertyNames::kUseModuleHash = const std::string CuckooTablePropertyNames::kUseModuleHash =
"rocksdb.cuckoo.hash.usemodule"; "rocksdb.cuckoo.hash.usemodule";
const std::string CuckooTablePropertyNames::kUserKeyLength =
"rocksdb.cuckoo.hash.userkeylength";
// Obtained by running echo rocksdb.table.cuckoo | sha1sum // Obtained by running echo rocksdb.table.cuckoo | sha1sum
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull; extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
@ -280,6 +282,11 @@ Status CuckooTableBuilder::Finish() {
CuckooTablePropertyNames::kUseModuleHash].assign( CuckooTablePropertyNames::kUseModuleHash].assign(
reinterpret_cast<const char*>(&use_module_hash_), reinterpret_cast<const char*>(&use_module_hash_),
sizeof(use_module_hash_)); sizeof(use_module_hash_));
uint32_t user_key_len = static_cast<uint32_t>(smallest_user_key_.size());
properties_.user_collected_properties[
CuckooTablePropertyNames::kUserKeyLength].assign(
reinterpret_cast<const char*>(&user_key_len),
sizeof(user_key_len));
// Write meta blocks. // Write meta blocks.
MetaIndexBuilder meta_index_builder; MetaIndexBuilder meta_index_builder;

@ -16,6 +16,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "rocksdb/iterator.h" #include "rocksdb/iterator.h"
#include "rocksdb/table.h"
#include "table/meta_blocks.h" #include "table/meta_blocks.h"
#include "table/cuckoo_table_factory.h" #include "table/cuckoo_table_factory.h"
#include "util/arena.h" #include "util/arena.h"
@ -23,7 +24,8 @@
namespace rocksdb { namespace rocksdb {
namespace { namespace {
static const uint64_t CACHE_LINE_MASK = ~((uint64_t)CACHE_LINE_SIZE - 1); const uint64_t CACHE_LINE_MASK = ~((uint64_t)CACHE_LINE_SIZE - 1);
const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
} }
extern const uint64_t kCuckooTableMagicNumber; extern const uint64_t kCuckooTableMagicNumber;
@ -62,6 +64,14 @@ CuckooTableReader::CuckooTableReader(
unused_key_ = unused_key->second; unused_key_ = unused_key->second;
key_length_ = props->fixed_key_len; key_length_ = props->fixed_key_len;
auto user_key_len = user_props.find(CuckooTablePropertyNames::kUserKeyLength);
if (user_key_len == user_props.end()) {
status_ = Status::Corruption("User key length not found");
return;
}
user_key_length_ = *reinterpret_cast<const uint32_t*>(
user_key_len->second.data());
auto value_length = user_props.find(CuckooTablePropertyNames::kValueLength); auto value_length = user_props.find(CuckooTablePropertyNames::kValueLength);
if (value_length == user_props.end()) { if (value_length == user_props.end()) {
status_ = Status::Corruption("Value length not found"); status_ = Status::Corruption("Value length not found");
@ -104,7 +114,6 @@ CuckooTableReader::CuckooTableReader(
} }
use_module_hash_ = *reinterpret_cast<const bool*>( use_module_hash_ = *reinterpret_cast<const bool*>(
use_module_hash->second.data()); use_module_hash->second.data());
fprintf(stderr, "use_module_hash %d\n", use_module_hash_);
auto cuckoo_block_size = user_props.find( auto cuckoo_block_size = user_props.find(
CuckooTablePropertyNames::kCuckooBlockSize); CuckooTablePropertyNames::kCuckooBlockSize);
if (cuckoo_block_size == user_props.end()) { if (cuckoo_block_size == user_props.end()) {
@ -185,30 +194,39 @@ class CuckooTableIterator : public Iterator {
void LoadKeysFromReader(); void LoadKeysFromReader();
private: private:
struct CompareKeys { struct BucketComparator {
CompareKeys(const Comparator* ucomp, const bool last_level) BucketComparator(const Slice file_data, const Comparator* ucomp,
: ucomp_(ucomp), uint32_t bucket_len, uint32_t user_key_len,
is_last_level_(last_level) {} const Slice target = Slice())
bool operator()(const std::pair<Slice, uint32_t>& first, : file_data_(file_data),
const std::pair<Slice, uint32_t>& second) const { ucomp_(ucomp),
if (is_last_level_) { bucket_len_(bucket_len),
return ucomp_->Compare(first.first, second.first) < 0; user_key_len_(user_key_len),
} else { target_(target) {}
return ucomp_->Compare(ExtractUserKey(first.first), bool operator()(const uint32_t first, const uint32_t second) const {
ExtractUserKey(second.first)) < 0; const char* first_bucket =
} (first == kInvalidIndex) ? target_.data() :
&file_data_.data()[first * bucket_len_];
const char* second_bucket =
(second == kInvalidIndex) ? target_.data() :
&file_data_.data()[second * bucket_len_];
return ucomp_->Compare(Slice(first_bucket, user_key_len_),
Slice(second_bucket, user_key_len_)) < 0;
} }
private: private:
const Slice file_data_;
const Comparator* ucomp_; const Comparator* ucomp_;
const bool is_last_level_; const uint32_t bucket_len_;
const uint32_t user_key_len_;
const Slice target_;
}; };
const CompareKeys comparator_;
const BucketComparator bucket_comparator_;
void PrepareKVAtCurrIdx(); void PrepareKVAtCurrIdx();
CuckooTableReader* reader_; CuckooTableReader* reader_;
Status status_; Status status_;
// Contains a map of keys to bucket_id sorted in key order. // Contains a map of keys to bucket_id sorted in key order.
std::vector<std::pair<Slice, uint32_t>> key_to_bucket_id_; std::vector<uint32_t> sorted_bucket_ids_;
// We assume that the number of items can be stored in uint32 (4 Billion). // We assume that the number of items can be stored in uint32 (4 Billion).
uint32_t curr_key_idx_; uint32_t curr_key_idx_;
Slice curr_value_; Slice curr_value_;
@ -219,29 +237,31 @@ class CuckooTableIterator : public Iterator {
}; };
CuckooTableIterator::CuckooTableIterator(CuckooTableReader* reader) CuckooTableIterator::CuckooTableIterator(CuckooTableReader* reader)
: comparator_(reader->ucomp_, reader->is_last_level_), : bucket_comparator_(reader->file_data_, reader->ucomp_,
reader->bucket_length_, reader->user_key_length_),
reader_(reader), reader_(reader),
curr_key_idx_(std::numeric_limits<int32_t>::max()) { curr_key_idx_(kInvalidIndex) {
key_to_bucket_id_.clear(); sorted_bucket_ids_.clear();
curr_value_.clear(); curr_value_.clear();
curr_key_.Clear(); curr_key_.Clear();
} }
void CuckooTableIterator::LoadKeysFromReader() { void CuckooTableIterator::LoadKeysFromReader() {
key_to_bucket_id_.reserve(reader_->GetTableProperties()->num_entries); sorted_bucket_ids_.reserve(reader_->GetTableProperties()->num_entries);
uint64_t num_buckets = reader_->table_size_ + reader_->cuckoo_block_size_ - 1; uint64_t num_buckets = reader_->table_size_ + reader_->cuckoo_block_size_ - 1;
for (uint32_t bucket_id = 0; bucket_id < num_buckets; bucket_id++) { assert(num_buckets < kInvalidIndex);
Slice read_key; const char* bucket = reader_->file_data_.data();
status_ = reader_->file_->Read(bucket_id * reader_->bucket_length_, for (uint32_t bucket_id = 0; bucket_id < num_buckets; ++bucket_id) {
reader_->key_length_, &read_key, nullptr); if (Slice(bucket, reader_->key_length_) != Slice(reader_->unused_key_)) {
if (read_key != Slice(reader_->unused_key_)) { sorted_bucket_ids_.push_back(bucket_id);
key_to_bucket_id_.push_back(std::make_pair(read_key, bucket_id));
} }
bucket += reader_->bucket_length_;
} }
assert(key_to_bucket_id_.size() == assert(sorted_bucket_ids_.size() ==
reader_->GetTableProperties()->num_entries); reader_->GetTableProperties()->num_entries);
std::sort(key_to_bucket_id_.begin(), key_to_bucket_id_.end(), comparator_); std::sort(sorted_bucket_ids_.begin(), sorted_bucket_ids_.end(),
curr_key_idx_ = key_to_bucket_id_.size(); bucket_comparator_);
curr_key_idx_ = kInvalidIndex;
} }
void CuckooTableIterator::SeekToFirst() { void CuckooTableIterator::SeekToFirst() {
@ -250,25 +270,25 @@ void CuckooTableIterator::SeekToFirst() {
} }
void CuckooTableIterator::SeekToLast() { void CuckooTableIterator::SeekToLast() {
curr_key_idx_ = key_to_bucket_id_.size() - 1; curr_key_idx_ = sorted_bucket_ids_.size() - 1;
PrepareKVAtCurrIdx(); PrepareKVAtCurrIdx();
} }
void CuckooTableIterator::Seek(const Slice& target) { void CuckooTableIterator::Seek(const Slice& target) {
// We assume that the target is an internal key. If this is last level file, const BucketComparator seek_comparator(
// we need to take only the user key part to seek. reader_->file_data_, reader_->ucomp_,
Slice target_to_search = reader_->is_last_level_ ? reader_->bucket_length_, reader_->user_key_length_,
ExtractUserKey(target) : target; ExtractUserKey(target));
auto seek_it = std::lower_bound(key_to_bucket_id_.begin(), auto seek_it = std::lower_bound(sorted_bucket_ids_.begin(),
key_to_bucket_id_.end(), sorted_bucket_ids_.end(),
std::make_pair(target_to_search, 0), kInvalidIndex,
comparator_); seek_comparator);
curr_key_idx_ = std::distance(key_to_bucket_id_.begin(), seek_it); curr_key_idx_ = std::distance(sorted_bucket_ids_.begin(), seek_it);
PrepareKVAtCurrIdx(); PrepareKVAtCurrIdx();
} }
bool CuckooTableIterator::Valid() const { bool CuckooTableIterator::Valid() const {
return curr_key_idx_ < key_to_bucket_id_.size(); return curr_key_idx_ < sorted_bucket_ids_.size();
} }
void CuckooTableIterator::PrepareKVAtCurrIdx() { void CuckooTableIterator::PrepareKVAtCurrIdx() {
@ -277,15 +297,17 @@ void CuckooTableIterator::PrepareKVAtCurrIdx() {
curr_key_.Clear(); curr_key_.Clear();
return; return;
} }
uint64_t offset = ((uint64_t) key_to_bucket_id_[curr_key_idx_].second uint32_t id = sorted_bucket_ids_[curr_key_idx_];
* reader_->bucket_length_) + reader_->key_length_; const char* offset = reader_->file_data_.data() +
status_ = reader_->file_->Read(offset, reader_->value_length_, id * reader_->bucket_length_;
&curr_value_, nullptr);
if (reader_->is_last_level_) { if (reader_->is_last_level_) {
// Always return internal key. // Always return internal key.
curr_key_.SetInternalKey( curr_key_.SetInternalKey(Slice(offset, reader_->user_key_length_),
key_to_bucket_id_[curr_key_idx_].first, 0, kTypeValue); 0, kTypeValue);
} else {
curr_key_.SetKey(Slice(offset, reader_->key_length_));
} }
curr_value_ = Slice(offset + reader_->key_length_, reader_->value_length_);
} }
void CuckooTableIterator::Next() { void CuckooTableIterator::Next() {
@ -300,7 +322,7 @@ void CuckooTableIterator::Next() {
void CuckooTableIterator::Prev() { void CuckooTableIterator::Prev() {
if (curr_key_idx_ == 0) { if (curr_key_idx_ == 0) {
curr_key_idx_ = key_to_bucket_id_.size(); curr_key_idx_ = sorted_bucket_ids_.size();
} }
if (!Valid()) { if (!Valid()) {
curr_value_.clear(); curr_value_.clear();
@ -313,11 +335,7 @@ void CuckooTableIterator::Prev() {
Slice CuckooTableIterator::key() const { Slice CuckooTableIterator::key() const {
assert(Valid()); assert(Valid());
if (reader_->is_last_level_) {
return curr_key_.GetKey(); return curr_key_.GetKey();
} else {
return key_to_bucket_id_[curr_key_idx_].first;
}
} }
Slice CuckooTableIterator::value() const { Slice CuckooTableIterator::value() const {

@ -71,6 +71,7 @@ class CuckooTableReader: public TableReader {
uint32_t num_hash_func_; uint32_t num_hash_func_;
std::string unused_key_; std::string unused_key_;
uint32_t key_length_; uint32_t key_length_;
uint32_t user_key_length_;
uint32_t value_length_; uint32_t value_length_;
uint32_t bucket_length_; uint32_t bucket_length_;
uint32_t cuckoo_block_size_; uint32_t cuckoo_block_size_;

Loading…
Cancel
Save