From 25403c2265cb700462d59fa3cb9dbec85d25d48f Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Thu, 28 Jun 2018 13:02:55 -0700 Subject: [PATCH] Prefetch cache lines for filter lookup (#4068) Summary: Since the filter data is unaligned, even though we ensure all probes are within a span of `cache_line_size` bytes, those bytes can span two cache lines. In that case I doubt hardware prefetching does a great job considering we don't necessarily access those two cache lines in order. This guess seems correct since adding explicit prefetch instructions reduced filter lookup overhead by 19.4%. Closes https://github.com/facebook/rocksdb/pull/4068 Differential Revision: D8674189 Pulled By: ajkr fbshipit-source-id: 747427d9a17900151c17820488e3f7efe06b1871 --- util/bloom.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/util/bloom.cc b/util/bloom.cc index 542d4fe7d..51de07953 100644 --- a/util/bloom.cc +++ b/util/bloom.cc @@ -228,6 +228,8 @@ bool FullFilterBitsReader::HashMayMatch(const uint32_t& hash, uint32_t h = hash; const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits uint32_t b = (h % num_lines) * (cache_line_size * 8); + PREFETCH(&data[b / 8], 0 /* rw */, 1 /* locality */); + PREFETCH(&data[b / 8 + cache_line_size - 1], 0 /* rw */, 1 /* locality */); for (uint32_t i = 0; i < num_probes; ++i) { // Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized