Reset blob value as soon as it's not needed in DBIter (#10490)

Summary:
We have recently added caching support to BlobDB, and separately,
implemented an optimization where reading blobs from the cache
results in the cache handle being transferred to the target `PinnableSlice`
(as opposed to the contents getting copied). With these changes,
it makes sense to reset the `PinnableSlice` storing the blob value in
`DBIter` as soon as we move to a different iterator position to prevent
us from holding on to the cache handle any longer than necessary.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10490

Test Plan: `make check`

Reviewed By: akankshamahajan15

Differential Revision: D38473630

Pulled By: ltamasi

fbshipit-source-id: 84c045ffac76436c6152fd0f5775b007f4051386
main
Levi Tamasi 2 years ago committed by Facebook GitHub Bot
parent 24bcab7d5d
commit 06b04127a8
  1. 1
      HISTORY.md
  2. 142
      db/blob/db_blob_basic_test.cc
  3. 26
      db/db_iter.cc
  4. 5
      db/db_iter.h

@ -34,6 +34,7 @@
### Performance Improvements ### Performance Improvements
* Instead of constructing `FragmentedRangeTombstoneList` during every read operation, it is now constructed once and stored in immutable memtables. This improves speed of querying range tombstones from immutable memtables. * Instead of constructing `FragmentedRangeTombstoneList` during every read operation, it is now constructed once and stored in immutable memtables. This improves speed of querying range tombstones from immutable memtables.
* Improve read performance by avoiding dynamic memory allocation. * Improve read performance by avoiding dynamic memory allocation.
* When using iterators with the integrated BlobDB implementation, blob cache handles are now released immediately when the iterator's position changes.
## 7.5.0 (07/15/2022) ## 7.5.0 (07/15/2022)
### New Features ### New Features

@ -227,6 +227,148 @@ TEST_F(DBBlobBasicTest, IterateBlobsFromCache) {
} }
} }
TEST_F(DBBlobBasicTest, IterateBlobsFromCachePinning) {
constexpr size_t min_blob_size = 6;
Options options = GetDefaultOptions();
LRUCacheOptions cache_options;
cache_options.capacity = 2048;
cache_options.num_shard_bits = 0;
cache_options.metadata_charge_policy = kDontChargeCacheMetadata;
options.blob_cache = NewLRUCache(cache_options);
options.enable_blob_files = true;
options.min_blob_size = min_blob_size;
Reopen(options);
// Put then iterate over three key-values. The second value is below the size
// limit and is thus stored inline; the other two are stored separately as
// blobs. We expect to have something pinned in the cache iff we are
// positioned on a blob.
constexpr char first_key[] = "first_key";
constexpr char first_value[] = "long_value";
static_assert(sizeof(first_value) - 1 >= min_blob_size,
"first_value too short to be stored as blob");
ASSERT_OK(Put(first_key, first_value));
constexpr char second_key[] = "second_key";
constexpr char second_value[] = "short";
static_assert(sizeof(second_value) - 1 < min_blob_size,
"second_value too long to be inlined");
ASSERT_OK(Put(second_key, second_value));
constexpr char third_key[] = "third_key";
constexpr char third_value[] = "other_long_value";
static_assert(sizeof(third_value) - 1 >= min_blob_size,
"third_value too short to be stored as blob");
ASSERT_OK(Put(third_key, third_value));
ASSERT_OK(Flush());
{
ReadOptions read_options;
read_options.fill_cache = true;
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
iter->SeekToFirst();
ASSERT_TRUE(iter->Valid());
ASSERT_OK(iter->status());
ASSERT_EQ(iter->key(), first_key);
ASSERT_EQ(iter->value(), first_value);
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_OK(iter->status());
ASSERT_EQ(iter->key(), second_key);
ASSERT_EQ(iter->value(), second_value);
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_OK(iter->status());
ASSERT_EQ(iter->key(), third_key);
ASSERT_EQ(iter->value(), third_value);
iter->Next();
ASSERT_FALSE(iter->Valid());
ASSERT_OK(iter->status());
}
{
ReadOptions read_options;
read_options.fill_cache = false;
read_options.read_tier = kBlockCacheTier;
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
iter->SeekToFirst();
ASSERT_TRUE(iter->Valid());
ASSERT_OK(iter->status());
ASSERT_EQ(iter->key(), first_key);
ASSERT_EQ(iter->value(), first_value);
ASSERT_GT(options.blob_cache->GetPinnedUsage(), 0);
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_OK(iter->status());
ASSERT_EQ(iter->key(), second_key);
ASSERT_EQ(iter->value(), second_value);
ASSERT_EQ(options.blob_cache->GetPinnedUsage(), 0);
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_OK(iter->status());
ASSERT_EQ(iter->key(), third_key);
ASSERT_EQ(iter->value(), third_value);
ASSERT_GT(options.blob_cache->GetPinnedUsage(), 0);
iter->Next();
ASSERT_FALSE(iter->Valid());
ASSERT_OK(iter->status());
ASSERT_EQ(options.blob_cache->GetPinnedUsage(), 0);
}
{
ReadOptions read_options;
read_options.fill_cache = false;
read_options.read_tier = kBlockCacheTier;
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
iter->SeekToLast();
ASSERT_TRUE(iter->Valid());
ASSERT_OK(iter->status());
ASSERT_EQ(iter->key(), third_key);
ASSERT_EQ(iter->value(), third_value);
ASSERT_GT(options.blob_cache->GetPinnedUsage(), 0);
iter->Prev();
ASSERT_TRUE(iter->Valid());
ASSERT_OK(iter->status());
ASSERT_EQ(iter->key(), second_key);
ASSERT_EQ(iter->value(), second_value);
ASSERT_EQ(options.blob_cache->GetPinnedUsage(), 0);
iter->Prev();
ASSERT_TRUE(iter->Valid());
ASSERT_OK(iter->status());
ASSERT_EQ(iter->key(), first_key);
ASSERT_EQ(iter->value(), first_value);
ASSERT_GT(options.blob_cache->GetPinnedUsage(), 0);
iter->Prev();
ASSERT_FALSE(iter->Valid());
ASSERT_OK(iter->status());
ASSERT_EQ(options.blob_cache->GetPinnedUsage(), 0);
}
}
TEST_F(DBBlobBasicTest, MultiGetBlobs) { TEST_F(DBBlobBasicTest, MultiGetBlobs) {
constexpr size_t min_blob_size = 6; constexpr size_t min_blob_size = 6;

@ -134,6 +134,7 @@ void DBIter::Next() {
PERF_CPU_TIMER_GUARD(iter_next_cpu_nanos, clock_); PERF_CPU_TIMER_GUARD(iter_next_cpu_nanos, clock_);
// Release temporarily pinned blocks from last operation // Release temporarily pinned blocks from last operation
ReleaseTempPinnedData(); ReleaseTempPinnedData();
ResetBlobValue();
ResetWideColumnValue(); ResetWideColumnValue();
local_stats_.skip_count_ += num_internal_keys_skipped_; local_stats_.skip_count_ += num_internal_keys_skipped_;
local_stats_.skip_count_--; local_stats_.skip_count_--;
@ -177,6 +178,7 @@ void DBIter::Next() {
bool DBIter::SetBlobValueIfNeeded(const Slice& user_key, bool DBIter::SetBlobValueIfNeeded(const Slice& user_key,
const Slice& blob_index) { const Slice& blob_index) {
assert(!is_blob_); assert(!is_blob_);
assert(blob_value_.empty());
assert(!is_wide_); assert(!is_wide_);
assert(value_of_default_column_.empty()); assert(value_of_default_column_.empty());
@ -216,6 +218,7 @@ bool DBIter::SetBlobValueIfNeeded(const Slice& user_key,
bool DBIter::SetWideColumnValueIfNeeded(const Slice& wide_columns_slice) { bool DBIter::SetWideColumnValueIfNeeded(const Slice& wide_columns_slice) {
assert(!is_blob_); assert(!is_blob_);
assert(blob_value_.empty());
assert(!is_wide_); assert(!is_wide_);
assert(value_of_default_column_.empty()); assert(value_of_default_column_.empty());
@ -280,7 +283,8 @@ bool DBIter::FindNextUserEntryInternal(bool skipping_saved_key,
// to one. // to one.
bool reseek_done = false; bool reseek_done = false;
is_blob_ = false; assert(!is_blob_);
assert(blob_value_.empty());
assert(!is_wide_); assert(!is_wide_);
assert(value_of_default_column_.empty()); assert(value_of_default_column_.empty());
@ -611,7 +615,7 @@ bool DBIter::MergeValuesNewToOld() {
return false; return false;
} }
is_blob_ = false; ResetBlobValue();
assert(!is_wide_); assert(!is_wide_);
assert(value_of_default_column_.empty()); assert(value_of_default_column_.empty());
@ -660,6 +664,7 @@ void DBIter::Prev() {
PERF_CPU_TIMER_GUARD(iter_prev_cpu_nanos, clock_); PERF_CPU_TIMER_GUARD(iter_prev_cpu_nanos, clock_);
ReleaseTempPinnedData(); ReleaseTempPinnedData();
ResetBlobValue();
ResetWideColumnValue(); ResetWideColumnValue();
ResetInternalKeysSkippedCounter(); ResetInternalKeysSkippedCounter();
bool ok = true; bool ok = true;
@ -990,7 +995,9 @@ bool DBIter::FindValueForCurrentKey() {
Status s; Status s;
s.PermitUncheckedError(); s.PermitUncheckedError();
is_blob_ = false;
assert(!is_blob_);
assert(blob_value_.empty());
assert(!is_wide_); assert(!is_wide_);
assert(value_of_default_column_.empty()); assert(value_of_default_column_.empty());
@ -1033,7 +1040,7 @@ bool DBIter::FindValueForCurrentKey() {
return false; return false;
} }
is_blob_ = false; ResetBlobValue();
assert(!is_wide_); assert(!is_wide_);
assert(value_of_default_column_.empty()); assert(value_of_default_column_.empty());
@ -1111,7 +1118,9 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
// In case read_callback presents, the value we seek to may not be visible. // In case read_callback presents, the value we seek to may not be visible.
// Find the next value that's visible. // Find the next value that's visible.
ParsedInternalKey ikey; ParsedInternalKey ikey;
is_blob_ = false;
assert(!is_blob_);
assert(blob_value_.empty());
assert(!is_wide_); assert(!is_wide_);
assert(value_of_default_column_.empty()); assert(value_of_default_column_.empty());
@ -1250,7 +1259,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
return false; return false;
} }
is_blob_ = false; ResetBlobValue();
assert(!is_wide_); assert(!is_wide_);
assert(value_of_default_column_.empty()); assert(value_of_default_column_.empty());
@ -1478,6 +1487,7 @@ void DBIter::Seek(const Slice& target) {
status_ = Status::OK(); status_ = Status::OK();
ReleaseTempPinnedData(); ReleaseTempPinnedData();
ResetBlobValue();
ResetWideColumnValue(); ResetWideColumnValue();
ResetInternalKeysSkippedCounter(); ResetInternalKeysSkippedCounter();
@ -1555,6 +1565,7 @@ void DBIter::SeekForPrev(const Slice& target) {
status_ = Status::OK(); status_ = Status::OK();
ReleaseTempPinnedData(); ReleaseTempPinnedData();
ResetBlobValue();
ResetWideColumnValue(); ResetWideColumnValue();
ResetInternalKeysSkippedCounter(); ResetInternalKeysSkippedCounter();
@ -1613,6 +1624,7 @@ void DBIter::SeekToFirst() {
status_ = Status::OK(); status_ = Status::OK();
direction_ = kForward; direction_ = kForward;
ReleaseTempPinnedData(); ReleaseTempPinnedData();
ResetBlobValue();
ResetWideColumnValue(); ResetWideColumnValue();
ResetInternalKeysSkippedCounter(); ResetInternalKeysSkippedCounter();
ClearSavedValue(); ClearSavedValue();
@ -1661,6 +1673,7 @@ void DBIter::SeekToLast() {
*iterate_upper_bound_, /*a_has_ts=*/false, k, *iterate_upper_bound_, /*a_has_ts=*/false, k,
/*b_has_ts=*/false)) { /*b_has_ts=*/false)) {
ReleaseTempPinnedData(); ReleaseTempPinnedData();
ResetBlobValue();
ResetWideColumnValue(); ResetWideColumnValue();
PrevInternal(nullptr); PrevInternal(nullptr);
@ -1681,6 +1694,7 @@ void DBIter::SeekToLast() {
status_ = Status::OK(); status_ = Status::OK();
direction_ = kReverse; direction_ = kReverse;
ReleaseTempPinnedData(); ReleaseTempPinnedData();
ResetBlobValue();
ResetWideColumnValue(); ResetWideColumnValue();
ResetInternalKeysSkippedCounter(); ResetInternalKeysSkippedCounter();
ClearSavedValue(); ClearSavedValue();

@ -303,6 +303,11 @@ class DBIter final : public Iterator {
// index when using the integrated BlobDB implementation. // index when using the integrated BlobDB implementation.
bool SetBlobValueIfNeeded(const Slice& user_key, const Slice& blob_index); bool SetBlobValueIfNeeded(const Slice& user_key, const Slice& blob_index);
void ResetBlobValue() {
is_blob_ = false;
blob_value_.Reset();
}
bool SetWideColumnValueIfNeeded(const Slice& wide_columns_slice); bool SetWideColumnValueIfNeeded(const Slice& wide_columns_slice);
void ResetWideColumnValue() { void ResetWideColumnValue() {

Loading…
Cancel
Save