diff --git a/db/db_iter.cc b/db/db_iter.cc index b12fd756f..825a6b840 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -76,6 +76,7 @@ class DBIter final: public Iterator { prev_count_ = 0; prev_found_count_ = 0; bytes_read_ = 0; + skip_count_ = 0; } void BumpGlobalStatistics(Statistics* global_statistics) { @@ -84,6 +85,7 @@ class DBIter final: public Iterator { RecordTick(global_statistics, NUMBER_DB_PREV, prev_count_); RecordTick(global_statistics, NUMBER_DB_PREV_FOUND, prev_found_count_); RecordTick(global_statistics, ITER_BYTES_READ, bytes_read_); + RecordTick(global_statistics, NUMBER_ITER_SKIP, skip_count_); PERF_COUNTER_ADD(iter_read_bytes, bytes_read_); ResetCounters(); } @@ -98,6 +100,8 @@ class DBIter final: public Iterator { uint64_t prev_found_count_; // Map to Tickers::ITER_BYTES_READ uint64_t bytes_read_; + // Map to Tickers::NUMBER_ITER_SKIP + uint64_t skip_count_; }; DBIter(Env* _env, const ReadOptions& read_options, @@ -147,6 +151,7 @@ class DBIter final: public Iterator { // Compiler warning issue filed: // https://github.com/facebook/rocksdb/issues/3013 RecordTick(statistics_, NO_ITERATORS, uint64_t(-1)); + ResetInternalKeysSkippedCounter(); local_stats_.BumpGlobalStatistics(statistics_); if (!arena_mode_) { delete iter_; @@ -267,6 +272,10 @@ class DBIter final: public Iterator { } inline void ResetInternalKeysSkippedCounter() { + local_stats_.skip_count_ += num_internal_keys_skipped_; + if (valid_) { + local_stats_.skip_count_--; + } num_internal_keys_skipped_ = 0; } @@ -1143,6 +1152,7 @@ void DBIter::Seek(const Slice& target) { } if (statistics_ != nullptr) { if (valid_) { + // Decrement since we don't want to count this key as skipped RecordTick(statistics_, NUMBER_DB_SEEK_FOUND); RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size()); PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size()); @@ -1269,7 +1279,8 @@ void DBIter::SeekToLast() { if (!Valid()) { return; } else if (user_comparator_->Equal(*iterate_upper_bound_, key())) { - Prev(); + ReleaseTempPinnedData(); + PrevInternal(); } } else { PrevInternal(); diff --git a/db/db_iterator_test.cc b/db/db_iterator_test.cc index d62862a95..746946a28 100644 --- a/db/db_iterator_test.cc +++ b/db/db_iterator_test.cc @@ -2076,6 +2076,87 @@ TEST_F(DBIteratorTest, TableFilter) { } } +TEST_F(DBIteratorTest, SkipStatistics) { + Options options = CurrentOptions(); + options.statistics = rocksdb::CreateDBStatistics(); + DestroyAndReopen(options); + + int skip_count = 0; + + // write a bunch of kvs to the database. + ASSERT_OK(Put("a", "1")); + ASSERT_OK(Put("b", "1")); + ASSERT_OK(Put("c", "1")); + ASSERT_OK(Flush()); + ASSERT_OK(Put("d", "1")); + ASSERT_OK(Put("e", "1")); + ASSERT_OK(Put("f", "1")); + ASSERT_OK(Put("a", "2")); + ASSERT_OK(Put("b", "2")); + ASSERT_OK(Flush()); + ASSERT_OK(Delete("d")); + ASSERT_OK(Delete("e")); + ASSERT_OK(Delete("f")); + + Iterator* iter = db_->NewIterator(ReadOptions()); + int count = 0; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ASSERT_OK(iter->status()); + count++; + } + ASSERT_EQ(count, 3); + delete iter; + skip_count += 8; // 3 deletes + 3 original keys + 2 lower in sequence + ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP)); + + iter = db_->NewIterator(ReadOptions()); + count = 0; + for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { + ASSERT_OK(iter->status()); + count++; + } + ASSERT_EQ(count, 3); + delete iter; + skip_count += 8; // Same as above, but in reverse order + ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP)); + + ASSERT_OK(Put("aa", "1")); + ASSERT_OK(Put("ab", "1")); + ASSERT_OK(Put("ac", "1")); + ASSERT_OK(Put("ad", "1")); + ASSERT_OK(Flush()); + ASSERT_OK(Delete("ab")); + ASSERT_OK(Delete("ac")); + ASSERT_OK(Delete("ad")); + + ReadOptions ro; + Slice prefix("b"); + ro.iterate_upper_bound = &prefix; + + iter = db_->NewIterator(ro); + count = 0; + for(iter->Seek("aa"); iter->Valid(); iter->Next()) { + ASSERT_OK(iter->status()); + count++; + } + ASSERT_EQ(count, 1); + delete iter; + skip_count += 6; // 3 deletes + 3 original keys + ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP)); + + iter = db_->NewIterator(ro); + count = 0; + for(iter->SeekToLast(); iter->Valid(); iter->Prev()) { + ASSERT_OK(iter->status()); + count++; + } + ASSERT_EQ(count, 2); + delete iter; + // 3 deletes + 3 original keys + 2 keys of "b" + lower sequence of "a" + skip_count += 9; + ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP)); +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index 731ff7809..a41ea7d4b 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -223,6 +223,9 @@ enum Tickers : uint32_t { // Number of refill intervals where rate limiter's bytes are fully consumed. NUMBER_RATE_LIMITER_DRAINS, + // Number of internal keys skipped by Iterator + NUMBER_ITER_SKIP, + TICKER_ENUM_MAX }; @@ -328,6 +331,7 @@ const std::vector> TickersNameMap = { {READ_AMP_ESTIMATE_USEFUL_BYTES, "rocksdb.read.amp.estimate.useful.bytes"}, {READ_AMP_TOTAL_READ_BYTES, "rocksdb.read.amp.total.read.bytes"}, {NUMBER_RATE_LIMITER_DRAINS, "rocksdb.number.rate_limiter.drains"}, + {NUMBER_ITER_SKIP, "rocksdb.number.iter.skip"}, }; /** diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index 811048027..7fd7874bf 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -2523,8 +2523,10 @@ class TickerTypeJni { return 0x5B; case rocksdb::Tickers::NUMBER_RATE_LIMITER_DRAINS: return 0x5C; - case rocksdb::Tickers::TICKER_ENUM_MAX: + case rocksdb::Tickers::NUMBER_ITER_SKIP: return 0x5D; + case rocksdb::Tickers::TICKER_ENUM_MAX: + return 0x5E; default: // undefined/default @@ -2723,6 +2725,8 @@ class TickerTypeJni { case 0x5C: return rocksdb::Tickers::NUMBER_RATE_LIMITER_DRAINS; case 0x5D: + return rocksdb::Tickers::NUMBER_ITER_SKIP; + case 0x5E: return rocksdb::Tickers::TICKER_ENUM_MAX; default: diff --git a/monitoring/statistics.h b/monitoring/statistics.h index 6e915215d..3bf20d4cf 100644 --- a/monitoring/statistics.h +++ b/monitoring/statistics.h @@ -76,8 +76,7 @@ class StatisticsImpl : public Statistics { padding[(CACHE_LINE_SIZE - (INTERNAL_TICKER_ENUM_MAX * sizeof(std::atomic_uint_fast64_t) + INTERNAL_HISTOGRAM_ENUM_MAX * sizeof(HistogramImpl)) % - CACHE_LINE_SIZE) % - CACHE_LINE_SIZE] ROCKSDB_FIELD_UNUSED; + CACHE_LINE_SIZE)] ROCKSDB_FIELD_UNUSED; }; static_assert(sizeof(StatisticsData) % 64 == 0, "Expected 64-byte aligned");