|
|
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <limits>
|
|
|
|
#include <string>
|
|
|
|
#include <utility>
|
|
|
|
#include <vector>
|
|
|
|
#include "rocksdb/status.h"
|
|
|
|
#include "table/table_builder.h"
|
|
|
|
#include "rocksdb/table.h"
|
|
|
|
#include "rocksdb/table_properties.h"
|
|
|
|
#include "util/autovector.h"
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
class CuckooTableBuilder: public TableBuilder {
|
|
|
|
public:
|
|
|
|
CuckooTableBuilder(
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
10 years ago
|
|
|
WritableFile* file, double max_hash_table_ratio,
|
|
|
|
uint32_t max_num_hash_func, uint32_t max_search_depth,
|
|
|
|
const Comparator* user_comparator, uint32_t cuckoo_block_size,
|
|
|
|
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t));
|
|
|
|
|
|
|
|
// REQUIRES: Either Finish() or Abandon() has been called.
|
|
|
|
~CuckooTableBuilder() {}
|
|
|
|
|
|
|
|
// Add key,value to the table being constructed.
|
|
|
|
// REQUIRES: key is after any previously added key according to comparator.
|
|
|
|
// REQUIRES: Finish(), Abandon() have not been called
|
|
|
|
void Add(const Slice& key, const Slice& value) override;
|
|
|
|
|
|
|
|
// Return non-ok iff some error has been detected.
|
|
|
|
Status status() const override { return status_; }
|
|
|
|
|
|
|
|
// Finish building the table. Stops using the file passed to the
|
|
|
|
// constructor after this function returns.
|
|
|
|
// REQUIRES: Finish(), Abandon() have not been called
|
|
|
|
Status Finish() override;
|
|
|
|
|
|
|
|
// Indicate that the contents of this builder should be abandoned. Stops
|
|
|
|
// using the file passed to the constructor after this function returns.
|
|
|
|
// If the caller is not going to call Finish(), it must call Abandon()
|
|
|
|
// before destroying this builder.
|
|
|
|
// REQUIRES: Finish(), Abandon() have not been called
|
|
|
|
void Abandon() override;
|
|
|
|
|
|
|
|
// Number of calls to Add() so far.
|
|
|
|
uint64_t NumEntries() const override;
|
|
|
|
|
|
|
|
// Size of the file generated so far. If invoked after a successful
|
|
|
|
// Finish() call, returns the size of the final generated file.
|
|
|
|
uint64_t FileSize() const override;
|
|
|
|
|
|
|
|
private:
|
|
|
|
struct CuckooBucket {
|
|
|
|
CuckooBucket()
|
|
|
|
: vector_idx(kMaxVectorIdx), make_space_for_key_call_id(0) {}
|
|
|
|
uint32_t vector_idx;
|
|
|
|
// This number will not exceed kvs_.size() + max_num_hash_func_.
|
|
|
|
// We assume number of items is <= 2^32.
|
|
|
|
uint32_t make_space_for_key_call_id;
|
|
|
|
};
|
|
|
|
static const uint32_t kMaxVectorIdx = std::numeric_limits<int32_t>::max();
|
|
|
|
|
|
|
|
bool MakeSpaceForKey(
|
|
|
|
const autovector<uint64_t>& hash_vals,
|
|
|
|
const uint64_t call_id,
|
|
|
|
std::vector<CuckooBucket>* buckets,
|
|
|
|
uint64_t* bucket_id);
|
|
|
|
Status MakeHashTable(std::vector<CuckooBucket>* buckets);
|
|
|
|
|
|
|
|
uint32_t num_hash_func_;
|
|
|
|
WritableFile* file_;
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
10 years ago
|
|
|
const double max_hash_table_ratio_;
|
|
|
|
const uint32_t max_num_hash_func_;
|
|
|
|
const uint32_t max_search_depth_;
|
|
|
|
const uint32_t cuckoo_block_size_;
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
10 years ago
|
|
|
uint64_t hash_table_size_;
|
|
|
|
bool is_last_level_file_;
|
|
|
|
Status status_;
|
|
|
|
std::vector<std::pair<std::string, std::string>> kvs_;
|
|
|
|
TableProperties properties_;
|
|
|
|
bool has_seen_first_key_;
|
|
|
|
const Comparator* ucomp_;
|
|
|
|
uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index,
|
|
|
|
uint64_t max_num_buckets);
|
|
|
|
std::string largest_user_key_ = "";
|
|
|
|
std::string smallest_user_key_ = "";
|
|
|
|
|
|
|
|
bool closed_; // Either Finish() or Abandon() has been called.
|
|
|
|
|
|
|
|
// No copying allowed
|
|
|
|
CuckooTableBuilder(const CuckooTableBuilder&) = delete;
|
|
|
|
void operator=(const CuckooTableBuilder&) = delete;
|
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace rocksdb
|
|
|
|
|
|
|
|
#endif // ROCKSDB_LITE
|