|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <memory>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include "db/dbformat.h"
|
|
|
|
#include "rocksdb/options.h"
|
|
|
|
#include "rocksdb/slice.h"
|
|
|
|
#include "rocksdb/slice_transform.h"
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
#include "table/block_based/filter_block_reader_common.h"
|
|
|
|
#include "table/block_based/parsed_full_filter_block.h"
|
|
|
|
#include "util/hash.h"
|
|
|
|
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
|
|
|
|
class FilterPolicy;
|
|
|
|
class FilterBitsBuilder;
|
|
|
|
class FilterBitsReader;
|
|
|
|
|
|
|
|
// A FullFilterBlockBuilder is used to construct a full filter for a
|
|
|
|
// particular Table. It generates a single string which is stored as
|
|
|
|
// a special block in the Table.
|
|
|
|
// The format of full filter block is:
|
|
|
|
// +----------------------------------------------------------------+
|
|
|
|
// | full filter for all keys in sst file |
|
|
|
|
// +----------------------------------------------------------------+
|
|
|
|
// The full filter can be very large. At the end of it, we put
|
|
|
|
// num_probes: how many hash functions are used in bloom filter
|
|
|
|
//
|
|
|
|
class FullFilterBlockBuilder : public FilterBlockBuilder {
|
|
|
|
public:
|
|
|
|
explicit FullFilterBlockBuilder(const SliceTransform* prefix_extractor,
|
|
|
|
bool whole_key_filtering,
|
|
|
|
FilterBitsBuilder* filter_bits_builder);
|
|
|
|
// No copying allowed
|
|
|
|
FullFilterBlockBuilder(const FullFilterBlockBuilder&) = delete;
|
|
|
|
void operator=(const FullFilterBlockBuilder&) = delete;
|
|
|
|
|
|
|
|
// bits_builder is created in filter_policy, it should be passed in here
|
|
|
|
// directly. and be deleted here
|
|
|
|
~FullFilterBlockBuilder() {}
|
|
|
|
|
|
|
|
virtual bool IsBlockBased() override { return false; }
|
|
|
|
virtual void StartBlock(uint64_t /*block_offset*/) override {}
|
|
|
|
virtual void Add(const Slice& key_without_ts) override;
|
|
|
|
virtual size_t NumAdded() const override { return num_added_; }
|
|
|
|
virtual Slice Finish(const BlockHandle& tmp, Status* status) override;
|
|
|
|
using FilterBlockBuilder::Finish;
|
|
|
|
|
|
|
|
protected:
|
|
|
|
virtual void AddKey(const Slice& key);
|
|
|
|
std::unique_ptr<FilterBitsBuilder> filter_bits_builder_;
|
|
|
|
virtual void Reset();
|
|
|
|
void AddPrefix(const Slice& key);
|
|
|
|
const SliceTransform* prefix_extractor() { return prefix_extractor_; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
// important: all of these might point to invalid addresses
|
|
|
|
// at the time of destruction of this filter block. destructor
|
|
|
|
// should NOT dereference them.
|
|
|
|
const SliceTransform* prefix_extractor_;
|
|
|
|
bool whole_key_filtering_;
|
|
|
|
bool last_whole_key_recorded_;
|
|
|
|
std::string last_whole_key_str_;
|
|
|
|
bool last_prefix_recorded_;
|
|
|
|
std::string last_prefix_str_;
|
|
|
|
|
|
|
|
uint32_t num_added_;
|
|
|
|
std::unique_ptr<const char[]> filter_data_;
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
// A FilterBlockReader is used to parse filter from SST table.
|
|
|
|
// KeyMayMatch and PrefixMayMatch would trigger filter checking
|
|
|
|
class FullFilterBlockReader
|
|
|
|
: public FilterBlockReaderCommon<ParsedFullFilterBlock> {
|
|
|
|
public:
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
FullFilterBlockReader(const BlockBasedTable* t,
|
|
|
|
CachableEntry<ParsedFullFilterBlock>&& filter_block);
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
|
|
|
|
static std::unique_ptr<FilterBlockReader> Create(
|
|
|
|
const BlockBasedTable* table, const ReadOptions& ro,
|
|
|
|
FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
|
|
|
|
bool pin, BlockCacheLookupContext* lookup_context);
|
|
|
|
|
|
|
|
bool IsBlockBased() override { return false; }
|
|
|
|
|
|
|
|
bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor,
|
|
|
|
uint64_t block_offset, const bool no_io,
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
const Slice* const const_ikey_ptr, GetContext* get_context,
|
|
|
|
BlockCacheLookupContext* lookup_context) override;
|
|
|
|
|
|
|
|
bool PrefixMayMatch(const Slice& prefix,
|
|
|
|
const SliceTransform* prefix_extractor,
|
|
|
|
uint64_t block_offset, const bool no_io,
|
|
|
|
const Slice* const const_ikey_ptr,
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
GetContext* get_context,
|
|
|
|
BlockCacheLookupContext* lookup_context) override;
|
|
|
|
|
|
|
|
void KeysMayMatch(MultiGetRange* range,
|
|
|
|
const SliceTransform* prefix_extractor,
|
|
|
|
uint64_t block_offset, const bool no_io,
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
BlockCacheLookupContext* lookup_context) override;
|
|
|
|
|
|
|
|
void PrefixesMayMatch(MultiGetRange* range,
|
|
|
|
const SliceTransform* prefix_extractor,
|
|
|
|
uint64_t block_offset, const bool no_io,
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
BlockCacheLookupContext* lookup_context) override;
|
|
|
|
size_t ApproximateMemoryUsage() const override;
|
|
|
|
bool RangeMayExist(const Slice* iterate_upper_bound, const Slice& user_key,
|
|
|
|
const SliceTransform* prefix_extractor,
|
|
|
|
const Comparator* comparator,
|
|
|
|
const Slice* const const_ikey_ptr, bool* filter_checked,
|
Fix iterator reading filter block despite read_tier == kBlockCacheTier (#6562)
Summary:
We're seeing iterators with `ReadOptions::read_tier == kBlockCacheTier` sometimes doing file reads. Stack trace:
```
rocksdb::RandomAccessFileReader::Read(unsigned long, unsigned long, rocksdb::Slice*, char*, bool) const
rocksdb::BlockFetcher::ReadBlockContents()
rocksdb::Status rocksdb::BlockBasedTable::MaybeReadBlockAndLoadToCache<rocksdb::ParsedFullFilterBlock>(rocksdb::FilePrefetchBuffer*, rocksdb::ReadOptions const&, rocksdb::BlockHandle const&, rocksdb::UncompressionDict const&, rocksdb::CachableEntry<rocksdb::ParsedFullFilterBlock>*, rocksdb::BlockType, rocksdb::GetContext*, rocksdb::BlockCacheLookupContext*, rocksdb::BlockContents*) const
rocksdb::Status rocksdb::BlockBasedTable::RetrieveBlock<rocksdb::ParsedFullFilterBlock>(rocksdb::FilePrefetchBuffer*, rocksdb::ReadOptions const&, rocksdb::BlockHandle const&, rocksdb::UncompressionDict const&, rocksdb::CachableEntry<rocksdb::ParsedFullFilterBlock>*, rocksdb::BlockType, rocksdb::GetContext*, rocksdb::BlockCacheLookupContext*, bool, bool) const
rocksdb::FilterBlockReaderCommon<rocksdb::ParsedFullFilterBlock>::ReadFilterBlock(rocksdb::BlockBasedTable const*, rocksdb::FilePrefetchBuffer*, rocksdb::ReadOptions const&, bool, rocksdb::GetContext*, rocksdb::BlockCacheLookupContext*, rocksdb::CachableEntry<rocksdb::ParsedFullFilterBlock>*)
rocksdb::FilterBlockReaderCommon<rocksdb::ParsedFullFilterBlock>::GetOrReadFilterBlock(bool, rocksdb::GetContext*, rocksdb::BlockCacheLookupContext*, rocksdb::CachableEntry<rocksdb::ParsedFullFilterBlock>*) const
rocksdb::FullFilterBlockReader::MayMatch(rocksdb::Slice const&, bool, rocksdb::GetContext*, rocksdb::BlockCacheLookupContext*) const
rocksdb::FullFilterBlockReader::RangeMayExist(rocksdb::Slice const*, rocksdb::Slice const&, rocksdb::SliceTransform const*, rocksdb::Comparator const*, rocksdb::Slice const*, bool*, bool, rocksdb::BlockCacheLookupContext*)
rocksdb::BlockBasedTable::PrefixMayMatch(rocksdb::Slice const&, rocksdb::ReadOptions const&, rocksdb::SliceTransform const*, bool, rocksdb::BlockCacheLookupContext*) const
rocksdb::BlockBasedTableIterator<rocksdb::DataBlockIter, rocksdb::Slice>::SeekImpl(rocksdb::Slice const*)
rocksdb::ForwardIterator::SeekInternal(rocksdb::Slice const&, bool)
rocksdb::DBIter::Seek(rocksdb::Slice const&)
```
`BlockBasedTableIterator::CheckPrefixMayMatch` was missing a check for `kBlockCacheTier`. This PR adds it.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6562
Test Plan: deployed it to a logdevice test cluster and looked at logdevice's IO tracing.
Reviewed By: siying
Differential Revision: D20529368
Pulled By: al13n321
fbshipit-source-id: 65bf33964b1951464415c900336635fb20919611
5 years ago
|
|
|
bool need_upper_bound_check, bool no_io,
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
BlockCacheLookupContext* lookup_context) override;
|
|
|
|
|
|
|
|
private:
|
|
|
|
bool MayMatch(const Slice& entry, bool no_io, GetContext* get_context,
|
|
|
|
BlockCacheLookupContext* lookup_context) const;
|
|
|
|
void MayMatch(MultiGetRange* range, bool no_io,
|
|
|
|
const SliceTransform* prefix_extractor,
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
BlockCacheLookupContext* lookup_context) const;
|
|
|
|
bool IsFilterCompatible(const Slice* iterate_upper_bound, const Slice& prefix,
|
|
|
|
const Comparator* comparator) const;
|
|
|
|
|
|
|
|
private:
|
|
|
|
bool full_length_enabled_;
|
|
|
|
size_t prefix_extractor_full_length_;
|
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|