From fdd70d14955e521d1b6bdb42838b44569f40c942 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 1 Feb 2016 14:58:46 -0800 Subject: [PATCH] Skip filters for last L0 file if hit-optimized Summary: Following up on D53493, we can still enable the filter-skipping optimization for last file in L0. It's correct to assume the key will be present in the last L0 file when we're hit-optimized and L0 is deepest. The FilePicker encapsulates the state for traversing each level's files, so I needed to make it expose whether the returned file is last in its level. Test Plan: verified below test fails before this patch and passes afterwards. The change to how the test memtable is populated is needed so file 1 has keys (0, 30, 60), file 2 has keys (10, 40, 70), etc. $ ./db_universal_compaction_test --gtest_filter=UniversalCompactionNumLevels/DBTestUniversalCompaction.OptimizeFiltersForHits/* Reviewers: sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53583 --- db/db_universal_compaction_test.cc | 15 ++++++++++++--- db/version_set.cc | 30 ++++++++++++++++++------------ db/version_set.h | 2 +- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/db/db_universal_compaction_test.cc b/db/db_universal_compaction_test.cc index 9d3cca83c..a4cf6657f 100644 --- a/db/db_universal_compaction_test.cc +++ b/db/db_universal_compaction_test.cc @@ -187,14 +187,16 @@ TEST_P(DBTestUniversalCompaction, OptimizeFiltersForHits) { env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::LOW); - Put("", ""); for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { Put(Key(num * 10), "val"); + if (num) { + dbfull()->TEST_WaitForFlushMemTable(); + } Put(Key(30 + num * 10), "val"); Put(Key(60 + num * 10), "val"); - - dbfull()->TEST_WaitForFlushMemTable(); } + Put("", ""); + dbfull()->TEST_WaitForFlushMemTable(); // Query set of non existing keys for (int i = 5; i < 90; i += 10) { @@ -205,6 +207,13 @@ TEST_P(DBTestUniversalCompaction, OptimizeFiltersForHits) { ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); auto prev_counter = TestGetTickerCount(options, BLOOM_FILTER_USEFUL); + // Make sure bloom filter is used for all but the last L0 file when looking + // up a non-existent key that's in the range of all L0 files. + ASSERT_EQ(Get(Key(35)), "NOT_FOUND"); + ASSERT_EQ(prev_counter + NumTableFilesAtLevel(0) - 1, + TestGetTickerCount(options, BLOOM_FILTER_USEFUL)); + prev_counter = TestGetTickerCount(options, BLOOM_FILTER_USEFUL); + // Unblock compaction and wait it for happening. sleeping_task_low.WakeUp(); dbfull()->TEST_WaitForCompact(); diff --git a/db/version_set.cc b/db/version_set.cc index 3679bfbb4..6804730d7 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -84,15 +84,11 @@ int FindFileInRange(const InternalKeyComparator& icmp, // are MergeInProgress). class FilePicker { public: - FilePicker( - std::vector* files, - const Slice& user_key, - const Slice& ikey, - autovector* file_levels, - unsigned int num_levels, - FileIndexer* file_indexer, - const Comparator* user_comparator, - const InternalKeyComparator* internal_comparator) + FilePicker(std::vector* files, const Slice& user_key, + const Slice& ikey, autovector* file_levels, + unsigned int num_levels, FileIndexer* file_indexer, + const Comparator* user_comparator, + const InternalKeyComparator* internal_comparator) : num_levels_(num_levels), curr_level_(-1), hit_file_level_(-1), @@ -102,6 +98,7 @@ class FilePicker { files_(files), #endif level_files_brief_(file_levels), + is_hit_file_last_in_level_(false), user_key_(user_key), ikey_(ikey), file_indexer_(file_indexer), @@ -126,6 +123,8 @@ class FilePicker { // Loops over all files in current level. FdWithKeyRange* f = &curr_file_level_->files[curr_index_in_curr_level_]; hit_file_level_ = curr_level_; + is_hit_file_last_in_level_ = + curr_index_in_curr_level_ == curr_file_level_->num_files - 1; int cmp_largest = -1; // Do key range filtering of files or/and fractional cascading if: @@ -209,6 +208,10 @@ class FilePicker { // for GET_HIT_L0, GET_HIT_L1 & GET_HIT_L2_AND_UP counts unsigned int GetHitFileLevel() { return hit_file_level_; } + // Returns true if the most recent "hit file" (i.e., one returned by + // GetNextFile()) is at the last index in its level. + bool IsHitFileLastInLevel() { return is_hit_file_last_in_level_; } + private: unsigned int num_levels_; unsigned int curr_level_; @@ -220,6 +223,7 @@ class FilePicker { #endif autovector* level_files_brief_; bool search_ended_; + bool is_hit_file_last_in_level_; LevelFilesBrief* curr_file_level_; unsigned int curr_index_in_curr_level_; unsigned int start_index_in_curr_level_; @@ -903,7 +907,8 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k, *status = table_cache_->Get( read_options, *internal_comparator(), f->fd, ikey, &get_context, cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()), - IsFilterSkipped(static_cast(fp.GetHitFileLevel()))); + IsFilterSkipped(static_cast(fp.GetHitFileLevel()), + fp.IsHitFileLastInLevel())); // TODO: examine the behavior for corrupted key if (!status->ok()) { return; @@ -960,10 +965,11 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k, } } -bool Version::IsFilterSkipped(int level) { +bool Version::IsFilterSkipped(int level, bool is_file_last_in_level) { // Reaching the bottom level implies misses at all upper levels, so we'll // skip checking the filters when we predict a hit. - return cfd_->ioptions()->optimize_filters_for_hits && level > 0 && + return cfd_->ioptions()->optimize_filters_for_hits && + (level > 0 || is_file_last_in_level) && level == storage_info_.num_non_empty_levels() - 1; } diff --git a/db/version_set.h b/db/version_set.h index 097109fd4..7ce4a6bdf 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -530,7 +530,7 @@ class Version { // checked during read operations. In certain cases (trivial move or preload), // the filter block may already be cached, but we still do not access it such // that it eventually expires from the cache. - bool IsFilterSkipped(int level); + bool IsFilterSkipped(int level, bool is_file_last_in_level = false); // The helper function of UpdateAccumulatedStats, which may fill the missing // fields of file_mata from its associated TableProperties.