|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
//
|
|
|
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#include "table/block_based/block_based_filter_block.h"
|
|
|
|
#include <algorithm>
|
|
|
|
|
|
|
|
#include "db/dbformat.h"
|
|
|
|
#include "monitoring/perf_context_imp.h"
|
|
|
|
#include "rocksdb/filter_policy.h"
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
#include "table/block_based/block_based_table_reader.h"
|
|
|
|
#include "util/coding.h"
|
|
|
|
#include "util/string_util.h"
|
|
|
|
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
void AppendItem(std::string* props, const std::string& key,
|
|
|
|
const std::string& value) {
|
|
|
|
char cspace = ' ';
|
|
|
|
std::string value_str("");
|
|
|
|
size_t i = 0;
|
|
|
|
const size_t dataLength = 64;
|
|
|
|
const size_t tabLength = 2;
|
|
|
|
const size_t offLength = 16;
|
|
|
|
|
|
|
|
value_str.append(&value[i], std::min(size_t(dataLength), value.size()));
|
|
|
|
i += dataLength;
|
|
|
|
while (i < value.size()) {
|
|
|
|
value_str.append("\n");
|
|
|
|
value_str.append(offLength, cspace);
|
|
|
|
value_str.append(&value[i], std::min(size_t(dataLength), value.size() - i));
|
|
|
|
i += dataLength;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string result("");
|
|
|
|
if (key.size() < (offLength - tabLength))
|
|
|
|
result.append(size_t((offLength - tabLength)) - key.size(), cspace);
|
|
|
|
result.append(key);
|
|
|
|
|
|
|
|
props->append(result + ": " + value_str + "\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class TKey>
|
|
|
|
void AppendItem(std::string* props, const TKey& key, const std::string& value) {
|
|
|
|
std::string key_str = ROCKSDB_NAMESPACE::ToString(key);
|
|
|
|
AppendItem(props, key_str, value);
|
|
|
|
}
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
// See doc/table_format.txt for an explanation of the filter block format.
|
|
|
|
|
|
|
|
// Generate new filter every 2KB of data
|
|
|
|
static const size_t kFilterBaseLg = 11;
|
|
|
|
static const size_t kFilterBase = 1 << kFilterBaseLg;
|
|
|
|
|
|
|
|
BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder(
|
|
|
|
const SliceTransform* prefix_extractor,
|
|
|
|
const BlockBasedTableOptions& table_opt)
|
|
|
|
: policy_(table_opt.filter_policy.get()),
|
|
|
|
prefix_extractor_(prefix_extractor),
|
|
|
|
whole_key_filtering_(table_opt.whole_key_filtering),
|
|
|
|
prev_prefix_start_(0),
|
|
|
|
prev_prefix_size_(0),
|
|
|
|
num_added_(0) {
|
|
|
|
assert(policy_);
|
|
|
|
}
|
|
|
|
|
|
|
|
void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset) {
|
|
|
|
uint64_t filter_index = (block_offset / kFilterBase);
|
|
|
|
assert(filter_index >= filter_offsets_.size());
|
|
|
|
while (filter_index > filter_offsets_.size()) {
|
|
|
|
GenerateFilter();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void BlockBasedFilterBlockBuilder::Add(const Slice& key) {
|
|
|
|
if (prefix_extractor_ && prefix_extractor_->InDomain(key)) {
|
|
|
|
AddPrefix(key);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (whole_key_filtering_) {
|
|
|
|
AddKey(key);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add key to filter if needed
|
|
|
|
inline void BlockBasedFilterBlockBuilder::AddKey(const Slice& key) {
|
|
|
|
num_added_++;
|
|
|
|
start_.push_back(entries_.size());
|
|
|
|
entries_.append(key.data(), key.size());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add prefix to filter if needed
|
|
|
|
inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) {
|
|
|
|
// get slice for most recently added entry
|
|
|
|
Slice prev;
|
|
|
|
if (prev_prefix_size_ > 0) {
|
|
|
|
prev = Slice(entries_.data() + prev_prefix_start_, prev_prefix_size_);
|
Fix two nasty use-after-free-bugs
Summary:
These bugs were caught by ASAN crash test.
1. The first one, in table/filter_block.cc is very nasty. We first reference entries_ and store the reference to Slice prev. Then, we call entries_.append(), which can change the reference. The Slice prev now points to junk.
2. The second one is a bug in a test, so it's not very serious. Once we set read_opts.prefix, we never clear it, so some other function might still reference it.
Test Plan: asan crash test now runs more than 5 mins. Before, it failed immediately. I will run the full one, but the full one takes quite some time (5 hours)
Reviewers: dhruba, haobo, kailiu
Reviewed By: dhruba
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14223
11 years ago
|
|
|
}
|
|
|
|
|
|
|
|
Slice prefix = prefix_extractor_->Transform(key);
|
|
|
|
// insert prefix only when it's different from the previous prefix.
|
|
|
|
if (prev.size() == 0 || prefix != prev) {
|
|
|
|
prev_prefix_start_ = entries_.size();
|
|
|
|
prev_prefix_size_ = prefix.size();
|
|
|
|
AddKey(prefix);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Slice BlockBasedFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
|
|
|
|
Status* status) {
|
|
|
|
// In this impl we ignore BlockHandle
|
|
|
|
*status = Status::OK();
|
|
|
|
if (!start_.empty()) {
|
|
|
|
GenerateFilter();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Append array of per-filter offsets
|
|
|
|
const uint32_t array_offset = static_cast<uint32_t>(result_.size());
|
|
|
|
for (size_t i = 0; i < filter_offsets_.size(); i++) {
|
|
|
|
PutFixed32(&result_, filter_offsets_[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
PutFixed32(&result_, array_offset);
|
|
|
|
result_.push_back(kFilterBaseLg); // Save encoding parameter in result
|
|
|
|
return Slice(result_);
|
|
|
|
}
|
|
|
|
|
|
|
|
void BlockBasedFilterBlockBuilder::GenerateFilter() {
|
|
|
|
const size_t num_entries = start_.size();
|
|
|
|
if (num_entries == 0) {
|
|
|
|
// Fast path if there are no keys for this filter
|
|
|
|
filter_offsets_.push_back(static_cast<uint32_t>(result_.size()));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make list of keys from flattened key structure
|
|
|
|
start_.push_back(entries_.size()); // Simplify length computation
|
|
|
|
tmp_entries_.resize(num_entries);
|
|
|
|
for (size_t i = 0; i < num_entries; i++) {
|
|
|
|
const char* base = entries_.data() + start_[i];
|
|
|
|
size_t length = start_[i + 1] - start_[i];
|
|
|
|
tmp_entries_[i] = Slice(base, length);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Generate filter for current set of keys and append to result_.
|
|
|
|
filter_offsets_.push_back(static_cast<uint32_t>(result_.size()));
|
|
|
|
policy_->CreateFilter(&tmp_entries_[0], static_cast<int>(num_entries),
|
|
|
|
&result_);
|
|
|
|
|
|
|
|
tmp_entries_.clear();
|
|
|
|
entries_.clear();
|
|
|
|
start_.clear();
|
|
|
|
prev_prefix_start_ = 0;
|
|
|
|
prev_prefix_size_ = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
BlockBasedFilterBlockReader::BlockBasedFilterBlockReader(
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
const BlockBasedTable* t, CachableEntry<BlockContents>&& filter_block)
|
|
|
|
: FilterBlockReaderCommon(t, std::move(filter_block)) {
|
|
|
|
assert(table());
|
|
|
|
assert(table()->get_rep());
|
|
|
|
assert(table()->get_rep()->filter_policy);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::unique_ptr<FilterBlockReader> BlockBasedFilterBlockReader::Create(
|
|
|
|
const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer,
|
|
|
|
bool use_cache, bool prefetch, bool pin,
|
|
|
|
BlockCacheLookupContext* lookup_context) {
|
|
|
|
assert(table);
|
|
|
|
assert(table->get_rep());
|
|
|
|
assert(!pin || prefetch);
|
|
|
|
|
|
|
|
CachableEntry<BlockContents> filter_block;
|
|
|
|
if (prefetch || !use_cache) {
|
|
|
|
const Status s = ReadFilterBlock(table, prefetch_buffer, ReadOptions(),
|
|
|
|
use_cache, nullptr /* get_context */,
|
|
|
|
lookup_context, &filter_block);
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
if (!s.ok()) {
|
|
|
|
IGNORE_STATUS_IF_ERROR(s);
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
return std::unique_ptr<FilterBlockReader>();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (use_cache && !pin) {
|
|
|
|
filter_block.Reset();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return std::unique_ptr<FilterBlockReader>(
|
|
|
|
new BlockBasedFilterBlockReader(table, std::move(filter_block)));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool BlockBasedFilterBlockReader::KeyMayMatch(
|
|
|
|
const Slice& key, const SliceTransform* /* prefix_extractor */,
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
uint64_t block_offset, const bool no_io,
|
|
|
|
const Slice* const /*const_ikey_ptr*/, GetContext* get_context,
|
|
|
|
BlockCacheLookupContext* lookup_context) {
|
|
|
|
assert(block_offset != kNotValid);
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
if (!whole_key_filtering()) {
|
|
|
|
return true;
|
|
|
|
}
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
return MayMatch(key, block_offset, no_io, get_context, lookup_context);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool BlockBasedFilterBlockReader::PrefixMayMatch(
|
|
|
|
const Slice& prefix, const SliceTransform* /* prefix_extractor */,
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
uint64_t block_offset, const bool no_io,
|
|
|
|
const Slice* const /*const_ikey_ptr*/, GetContext* get_context,
|
|
|
|
BlockCacheLookupContext* lookup_context) {
|
|
|
|
assert(block_offset != kNotValid);
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
return MayMatch(prefix, block_offset, no_io, get_context, lookup_context);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool BlockBasedFilterBlockReader::ParseFieldsFromBlock(
|
|
|
|
const BlockContents& contents, const char** data, const char** offset,
|
|
|
|
size_t* num, size_t* base_lg) {
|
|
|
|
assert(data);
|
|
|
|
assert(offset);
|
|
|
|
assert(num);
|
|
|
|
assert(base_lg);
|
|
|
|
|
|
|
|
const size_t n = contents.data.size();
|
|
|
|
if (n < 5) { // 1 byte for base_lg and 4 for start of offset array
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
const uint32_t last_word = DecodeFixed32(contents.data.data() + n - 5);
|
|
|
|
if (last_word > n - 5) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
*data = contents.data.data();
|
|
|
|
*offset = (*data) + last_word;
|
|
|
|
*num = (n - 5 - last_word) / 4;
|
|
|
|
*base_lg = contents.data[n - 1];
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
bool BlockBasedFilterBlockReader::MayMatch(
|
|
|
|
const Slice& entry, uint64_t block_offset, bool no_io,
|
|
|
|
GetContext* get_context, BlockCacheLookupContext* lookup_context) const {
|
|
|
|
CachableEntry<BlockContents> filter_block;
|
|
|
|
|
|
|
|
const Status s =
|
|
|
|
GetOrReadFilterBlock(no_io, get_context, lookup_context, &filter_block);
|
|
|
|
if (!s.ok()) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(filter_block.GetValue());
|
|
|
|
|
|
|
|
const char* data = nullptr;
|
|
|
|
const char* offset = nullptr;
|
|
|
|
size_t num = 0;
|
|
|
|
size_t base_lg = 0;
|
|
|
|
if (!ParseFieldsFromBlock(*filter_block.GetValue(), &data, &offset, &num,
|
|
|
|
&base_lg)) {
|
|
|
|
return true; // Errors are treated as potential matches
|
|
|
|
}
|
|
|
|
|
|
|
|
const uint64_t index = block_offset >> base_lg;
|
|
|
|
if (index < num) {
|
|
|
|
const uint32_t start = DecodeFixed32(offset + index * 4);
|
|
|
|
const uint32_t limit = DecodeFixed32(offset + index * 4 + 4);
|
|
|
|
if (start <= limit && limit <= (uint32_t)(offset - data)) {
|
|
|
|
const Slice filter = Slice(data + start, limit - start);
|
|
|
|
|
|
|
|
assert(table());
|
|
|
|
assert(table()->get_rep());
|
|
|
|
const FilterPolicy* const policy = table()->get_rep()->filter_policy;
|
|
|
|
|
|
|
|
const bool may_match = policy->KeyMayMatch(entry, filter);
|
|
|
|
if (may_match) {
|
|
|
|
PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else if (start == limit) {
|
|
|
|
// Empty filters do not match any entries
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true; // Errors are treated as potential matches
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t BlockBasedFilterBlockReader::ApproximateMemoryUsage() const {
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
size_t usage = ApproximateFilterBlockMemoryUsage();
|
|
|
|
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
|
|
|
usage += malloc_usable_size(const_cast<BlockBasedFilterBlockReader*>(this));
|
|
|
|
#else
|
|
|
|
usage += sizeof(*this);
|
|
|
|
#endif // ROCKSDB_MALLOC_USABLE_SIZE
|
|
|
|
return usage;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string BlockBasedFilterBlockReader::ToString() const {
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
CachableEntry<BlockContents> filter_block;
|
|
|
|
|
|
|
|
const Status s =
|
|
|
|
GetOrReadFilterBlock(false /* no_io */, nullptr /* get_context */,
|
|
|
|
nullptr /* lookup_context */, &filter_block);
|
|
|
|
if (!s.ok()) {
|
|
|
|
return std::string("Unable to retrieve filter block");
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(filter_block.GetValue());
|
|
|
|
|
|
|
|
const char* data = nullptr;
|
|
|
|
const char* offset = nullptr;
|
|
|
|
size_t num = 0;
|
|
|
|
size_t base_lg = 0;
|
|
|
|
if (!ParseFieldsFromBlock(*filter_block.GetValue(), &data, &offset, &num,
|
|
|
|
&base_lg)) {
|
|
|
|
return std::string("Error parsing filter block");
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string result;
|
|
|
|
result.reserve(1024);
|
|
|
|
|
|
|
|
std::string s_bo("Block offset"), s_hd("Hex dump"), s_fb("# filter blocks");
|
|
|
|
AppendItem(&result, s_fb, ROCKSDB_NAMESPACE::ToString(num));
|
|
|
|
AppendItem(&result, s_bo, s_hd);
|
|
|
|
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
for (size_t index = 0; index < num; index++) {
|
|
|
|
uint32_t start = DecodeFixed32(offset + index * 4);
|
|
|
|
uint32_t limit = DecodeFixed32(offset + index * 4 + 4);
|
|
|
|
|
|
|
|
if (start != limit) {
|
|
|
|
result.append(" filter block # " +
|
|
|
|
ROCKSDB_NAMESPACE::ToString(index + 1) + "\n");
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
Slice filter = Slice(data + start, limit - start);
|
|
|
|
AppendItem(&result, start, filter.ToString(true));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|