Genericize and clean up FastRange (#7436)

Summary: A generic algorithm in progress depends on a templatized version of fastrange, so this change generalizes it and renames it to fit our style guidelines, FastRange32, FastRange64, and now FastRangeGeneric. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7436 Test Plan: added a few more test cases Reviewed By: jay-zhuang Differential Revision: D23958153 Pulled By: pdillinger fbshipit-source-id: 8c3b76101653417804997e5f076623a25586f3e8
5 years ago · 08552b19d3
parent 8f8264032d
commit 08552b19d3
13 changed files with 228 additions and 130 deletions
--- a/cache/cache_bench.cc
+++ b/cache/cache_bench.cc
@ -136,7 +136,7 @@ struct KeyGen {
    for (uint32_t i = 0; i < FLAGS_skew; ++i) {
      raw = std::min(raw, rnd.Next());
    }
-    uint64_t key = fastrange64(raw, max_key);
+    uint64_t key = FastRange64(raw, max_key);
    // Variable size and alignment
    size_t off = key % 8;
    key_data[0] = char{42};
--- a/db/memtable.cc
+++ b/db/memtable.cc
@ -454,7 +454,7 @@ FragmentedRangeTombstoneIterator* MemTable::NewRangeTombstoneIterator(
 }

 port::RWMutex* MemTable::GetLock(const Slice& key) {
-  return &locks_[fastrange64(GetSliceNPHash64(key), locks_.size())];
+  return &locks_[GetSliceRangedNPHash(key, locks_.size())];
 }

 MemTable::MemTableStats MemTable::ApproximateStats(const Slice& start_ikey,
--- a/memtable/hash_linklist_rep.cc
+++ b/memtable/hash_linklist_rep.cc
@ -218,7 +218,7 @@ class HashLinkListRep : public MemTableRep {
  }

  size_t GetHash(const Slice& slice) const {
-    return fastrange64(GetSliceNPHash64(slice), bucket_size_);
+    return GetSliceRangedNPHash(slice, bucket_size_);
  }

  Pointer* GetBucket(size_t i) const {
--- a/trace_replay/block_cache_tracer.cc
+++ b/trace_replay/block_cache_tracer.cc
@ -26,8 +26,7 @@ bool ShouldTrace(const Slice& block_key, const TraceOptions& trace_options) {
  }
  // We use spatial downsampling so that we have a complete access history for a
  // block.
-  return 0 == fastrange64(GetSliceNPHash64(block_key),
-                          trace_options.sampling_frequency);
+  return 0 == GetSliceRangedNPHash(block_key, trace_options.sampling_frequency);
 }
 }  // namespace

--- a/util/bloom_impl.h
+++ b/util/bloom_impl.h
@ -87,7 +87,7 @@ class BloomMath {

 // A fast, flexible, and accurate cache-local Bloom implementation with
 // SIMD-optimized query performance (currently using AVX2 on Intel). Write
-// performance and non-SIMD read are very good, benefiting from fastrange32
+// performance and non-SIMD read are very good, benefiting from FastRange32
 // used in place of % and single-cycle multiplication on recent processors.
 //
 // Most other SIMD Bloom implementations sacrifice flexibility and/or
@ -193,7 +193,7 @@ class FastLocalBloomImpl {

  static inline void AddHash(uint32_t h1, uint32_t h2, uint32_t len_bytes,
                             int num_probes, char *data) {
-    uint32_t bytes_to_cache_line = fastrange32(len_bytes >> 6, h1) << 6;
+    uint32_t bytes_to_cache_line = FastRange32(len_bytes >> 6, h1) << 6;
    AddHashPrepared(h2, num_probes, data + bytes_to_cache_line);
  }

@ -210,7 +210,7 @@ class FastLocalBloomImpl {
  static inline void PrepareHash(uint32_t h1, uint32_t len_bytes,
                                 const char *data,
                                 uint32_t /*out*/ *byte_offset) {
-    uint32_t bytes_to_cache_line = fastrange32(len_bytes >> 6, h1) << 6;
+    uint32_t bytes_to_cache_line = FastRange32(len_bytes >> 6, h1) << 6;
    PREFETCH(data + bytes_to_cache_line, 0 /* rw */, 1 /* locality */);
    PREFETCH(data + bytes_to_cache_line + 63, 0 /* rw */, 1 /* locality */);
    *byte_offset = bytes_to_cache_line;
@ -218,7 +218,7 @@ class FastLocalBloomImpl {

  static inline bool HashMayMatch(uint32_t h1, uint32_t h2, uint32_t len_bytes,
                                  int num_probes, const char *data) {
-    uint32_t bytes_to_cache_line = fastrange32(len_bytes >> 6, h1) << 6;
+    uint32_t bytes_to_cache_line = FastRange32(len_bytes >> 6, h1) << 6;
    return HashMayMatchPrepared(h2, num_probes, data + bytes_to_cache_line);
  }

--- a/util/dynamic_bloom.h
+++ b/util/dynamic_bloom.h
@ -126,7 +126,7 @@ inline void DynamicBloom::MayContain(int num_keys, Slice** keys,
  std::array<size_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets;
  for (int i = 0; i < num_keys; ++i) {
    hashes[i] = BloomHash(*keys[i]);
-    size_t a = fastrange32(kLen, hashes[i]);
+    size_t a = FastRange32(kLen, hashes[i]);
    PREFETCH(data_ + a, 0, 3);
    byte_offsets[i] = a;
  }
@ -142,7 +142,7 @@ inline void DynamicBloom::MayContain(int num_keys, Slice** keys,
 #pragma warning(disable : 4189)
 #endif
 inline void DynamicBloom::Prefetch(uint32_t h32) {
-  size_t a = fastrange32(kLen, h32);
+  size_t a = FastRange32(kLen, h32);
  PREFETCH(data_ + a, 0, 3);
 }
 #if defined(_MSC_VER)
@ -171,7 +171,7 @@ inline void DynamicBloom::Prefetch(uint32_t h32) {
 // because of false positives.)

 inline bool DynamicBloom::MayContainHash(uint32_t h32) const {
-  size_t a = fastrange32(kLen, h32);
+  size_t a = FastRange32(kLen, h32);
  PREFETCH(data_ + a, 0, 3);
  return DoubleProbe(h32, a);
 }
@ -195,7 +195,7 @@ inline bool DynamicBloom::DoubleProbe(uint32_t h32, size_t byte_offset) const {

 template <typename OrFunc>
 inline void DynamicBloom::AddHash(uint32_t h32, const OrFunc& or_func) {
-  size_t a = fastrange32(kLen, h32);
+  size_t a = FastRange32(kLen, h32);
  PREFETCH(data_ + a, 0, 3);
  // Expand/remix with 64-bit golden ratio
  uint64_t h = 0x9e3779b97f4a7c13ULL * h32;
--- a/util/fastrange.h
+++ b/util/fastrange.h
@ -0,0 +1,112 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+// fastrange/FastRange: A faster alternative to % for mapping a hash value
+// to an arbitrary range. See https://github.com/lemire/fastrange
+//
+// Generally recommended are FastRange32 for mapping results of 32-bit
+// hash functions and FastRange64 for mapping results of 64-bit hash
+// functions. FastRange is less forgiving than % if the input hashes are
+// not well distributed over the full range of the type (32 or 64 bits).
+//
+// Also included is a templated implementation FastRangeGeneric for use
+// in generic algorithms, but not otherwise recommended because of
+// potential ambiguity. Unlike with %, it is critical to use the right
+// FastRange variant for the output size of your hash function.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+#ifdef TEST_UINT128_COMPAT
+#undef HAVE_UINT128_EXTENSION
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace detail {
+
+// Using a class template to support partial specialization
+template <typename Hash, typename Range>
+struct FastRangeGenericImpl {
+  // only reach this on no supported specialization
+};
+
+template <typename Range>
+struct FastRangeGenericImpl<uint32_t, Range> {
+  static inline Range Fn(uint32_t hash, Range range) {
+    static_assert(std::is_unsigned<Range>::value, "must be unsigned");
+    static_assert(sizeof(Range) <= sizeof(uint32_t),
+                  "cannot be larger than hash (32 bits)");
+
+    uint64_t product = uint64_t{range} * hash;
+    return static_cast<Range>(product >> 32);
+  }
+};
+
+template <typename Range>
+struct FastRangeGenericImpl<uint64_t, Range> {
+  static inline Range Fn(uint64_t hash, Range range) {
+    static_assert(std::is_unsigned<Range>::value, "must be unsigned");
+    static_assert(sizeof(Range) <= sizeof(uint64_t),
+                  "cannot be larger than hash (64 bits)");
+
+#ifdef HAVE_UINT128_EXTENSION
+    // Can use compiler's 128-bit type. Trust it to do the right thing.
+    __uint128_t wide = __uint128_t{range} * hash;
+    return static_cast<Range>(wide >> 64);
+#else
+    // Fall back: full decomposition.
+    // NOTE: GCC seems to fully understand this code as 64-bit x 64-bit
+    // -> 128-bit multiplication and optimize it appropriately
+    uint64_t range64 = range;  // ok to shift by 32, even if Range is 32-bit
+    uint64_t tmp = uint64_t{range64 & 0xffffFFFF} * uint64_t{hash & 0xffffFFFF};
+    tmp >>= 32;
+    tmp += uint64_t{range64 & 0xffffFFFF} * uint64_t{hash >> 32};
+    // Avoid overflow: first add lower 32 of tmp2, and later upper 32
+    uint64_t tmp2 = uint64_t{range64 >> 32} * uint64_t{hash & 0xffffFFFF};
+    tmp += static_cast<uint32_t>(tmp2);
+    tmp >>= 32;
+    tmp += (tmp2 >> 32);
+    tmp += uint64_t{range64 >> 32} * uint64_t{hash >> 32};
+    return static_cast<Range>(tmp);
+#endif
+  }
+};
+
+}  // namespace detail
+
+// Now an omnibus templated function (yay parameter inference).
+//
+// NOTICE:
+// This templated version is not recommended for typical use because
+// of the potential to mix a 64-bit FastRange with a 32-bit bit hash,
+// most likely because you put your 32-bit hash in an "unsigned long"
+// which is 64 bits on some platforms. That doesn't really matter for
+// an operation like %, but 64-bit FastRange gives extremely bad results,
+// mostly zero, on 32-bit hash values. And because good hashing is not
+// generally required for correctness, this kind of mistake could go
+// unnoticed with just unit tests. Plus it could vary by platform.
+template <typename Hash, typename Range>
+inline Range FastRangeGeneric(Hash hash, Range range) {
+  return detail::FastRangeGenericImpl<Hash, Range>::Fn(hash, range);
+}
+
+// The most popular / convenient / recommended variants:
+
+// Map a quality 64-bit hash value down to an arbitrary size_t range.
+// (size_t is standard for mapping to things in memory.)
+inline size_t FastRange64(uint64_t hash, size_t range) {
+  return FastRangeGeneric(hash, range);
+}
+
+// Map a quality 32-bit hash value down to an arbitrary uint32_t range.
+inline uint32_t FastRange32(uint32_t hash, uint32_t range) {
+  return FastRangeGeneric(hash, range);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/util/filter_bench.cc
+++ b/util/filter_bench.cc
@ -124,7 +124,7 @@ using ROCKSDB_NAMESPACE::BloomHash;
 using ROCKSDB_NAMESPACE::BuiltinFilterBitsBuilder;
 using ROCKSDB_NAMESPACE::CachableEntry;
 using ROCKSDB_NAMESPACE::EncodeFixed32;
-using ROCKSDB_NAMESPACE::fastrange32;
+using ROCKSDB_NAMESPACE::FastRange32;
 using ROCKSDB_NAMESPACE::FilterBitsReader;
 using ROCKSDB_NAMESPACE::FilterBuildingContext;
 using ROCKSDB_NAMESPACE::FullFilterBlockReader;
@ -161,7 +161,7 @@ struct KeyMaker {
    if (FLAGS_vary_key_size_log2_interval < 30) {
      // To get range [avg_size - 2, avg_size + 2]
      // use range [smallest_size, smallest_size + 4]
-      len += fastrange32(
+      len += FastRange32(
          (val_num >> FLAGS_vary_key_size_log2_interval) * 1234567891, 5);
    }
    char * data = buf_.get() + start;
@ -365,7 +365,7 @@ void FilterBench::Go() {
         total_keys_added < max_total_keys) {
    uint32_t filter_id = random_.Next();
    uint32_t keys_to_add = FLAGS_average_keys_per_filter +
-                           fastrange32(random_.Next(), variance_range) -
+                           FastRange32(random_.Next(), variance_range) -
                           variance_offset;
    if (max_total_keys - total_keys_added < keys_to_add) {
      keys_to_add = static_cast<uint32_t>(max_total_keys - total_keys_added);
--- a/util/hash.h
+++ b/util/hash.h
@ -18,10 +18,12 @@
 // hash inputs.

 #pragma once
-#include <stddef.h>
-#include <stdint.h>
+
+#include <cstddef>
+#include <cstdint>

 #include "rocksdb/slice.h"
+#include "util/fastrange.h"

 namespace ROCKSDB_NAMESPACE {

@ -64,6 +66,10 @@ inline uint64_t GetSliceNPHash64(const Slice& s) {
  return NPHash64(s.data(), s.size());
 }

+inline size_t GetSliceRangedNPHash(const Slice& s, size_t range) {
+  return FastRange64(NPHash64(s.data(), s.size()), range);
+}
+
 // TODO: consider rename to GetSliceHash32
 inline uint32_t GetSliceHash(const Slice& s) {
  return Hash(s.data(), s.size(), 397);
@ -81,44 +87,4 @@ struct SliceHasher {
  uint32_t operator()(const Slice& s) const { return GetSliceHash(s); }
 };

-// An alternative to % for mapping a hash value to an arbitrary range. See
-// https://github.com/lemire/fastrange
-inline uint32_t fastrange32(uint32_t hash, uint32_t range) {
-  uint64_t product = uint64_t{range} * hash;
-  return static_cast<uint32_t>(product >> 32);
-}
-
-#ifdef TEST_UINT128_COMPAT
-#undef HAVE_UINT128_EXTENSION
-#endif
-
-// An alternative to % for mapping a 64-bit hash value to an arbitrary range
-// that fits in size_t. See https://github.com/lemire/fastrange
-// We find size_t more convenient than uint64_t for the range, with side
-// benefit of better optimization on 32-bit platforms.
-inline size_t fastrange64(uint64_t hash, size_t range) {
-#ifdef HAVE_UINT128_EXTENSION
-  // Can use compiler's 128-bit type. Trust it to do the right thing.
-  __uint128_t wide = __uint128_t{range} * hash;
-  return static_cast<size_t>(wide >> 64);
-#else
-  // Fall back: full decomposition.
-  // NOTE: GCC seems to fully understand this code as 64-bit x {32 or 64}-bit
-  // -> {96 or 128}-bit multiplication and optimize it down to a single
-  // wide-result multiplication (64-bit platform) or two wide-result
-  // multiplications (32-bit platforms, where range64 >> 32 is zero).
-  uint64_t range64 = range;  // ok to shift by 32, even if size_t is 32-bit
-  uint64_t tmp = uint64_t{range64 & 0xffffFFFF} * uint64_t{hash & 0xffffFFFF};
-  tmp >>= 32;
-  tmp += uint64_t{range64 & 0xffffFFFF} * uint64_t{hash >> 32};
-  // Avoid overflow: first add lower 32 of tmp2, and later upper 32
-  uint64_t tmp2 = uint64_t{range64 >> 32} * uint64_t{hash & 0xffffFFFF};
-  tmp += static_cast<uint32_t>(tmp2);
-  tmp >>= 32;
-  tmp += (tmp2 >> 32);
-  tmp += uint64_t{range64 >> 32} * uint64_t{hash >> 32};
-  return static_cast<size_t>(tmp);
-#endif
-}
-
 }  // namespace ROCKSDB_NAMESPACE
--- a/util/hash_test.cc
+++ b/util/hash_test.cc
@ -267,109 +267,128 @@ TEST(HashTest, Hash64LargeValueSchema) {
      "eMFlxCIYUpTCsal2qsmnGOWa8WCcefrohMjDj1fjzSvSaQwlpyR1GZHF2uPOoQagiCpHpm");
 }

-TEST(Fastrange32Test, Values) {
-  using ROCKSDB_NAMESPACE::fastrange32;
+TEST(FastRange32Test, Values) {
+  using ROCKSDB_NAMESPACE::FastRange32;
  // Zero range
-  EXPECT_EQ(fastrange32(0, 0), 0U);
-  EXPECT_EQ(fastrange32(123, 0), 0U);
-  EXPECT_EQ(fastrange32(0xffffffff, 0), 0U);
+  EXPECT_EQ(FastRange32(0, 0), 0U);
+  EXPECT_EQ(FastRange32(123, 0), 0U);
+  EXPECT_EQ(FastRange32(0xffffffff, 0), 0U);

  // One range
-  EXPECT_EQ(fastrange32(0, 1), 0U);
-  EXPECT_EQ(fastrange32(123, 1), 0U);
-  EXPECT_EQ(fastrange32(0xffffffff, 1), 0U);
+  EXPECT_EQ(FastRange32(0, 1), 0U);
+  EXPECT_EQ(FastRange32(123, 1), 0U);
+  EXPECT_EQ(FastRange32(0xffffffff, 1), 0U);

  // Two range
-  EXPECT_EQ(fastrange32(0, 2), 0U);
-  EXPECT_EQ(fastrange32(123, 2), 0U);
-  EXPECT_EQ(fastrange32(0x7fffffff, 2), 0U);
-  EXPECT_EQ(fastrange32(0x80000000, 2), 1U);
-  EXPECT_EQ(fastrange32(0xffffffff, 2), 1U);
+  EXPECT_EQ(FastRange32(0, 2), 0U);
+  EXPECT_EQ(FastRange32(123, 2), 0U);
+  EXPECT_EQ(FastRange32(0x7fffffff, 2), 0U);
+  EXPECT_EQ(FastRange32(0x80000000, 2), 1U);
+  EXPECT_EQ(FastRange32(0xffffffff, 2), 1U);

  // Seven range
-  EXPECT_EQ(fastrange32(0, 7), 0U);
-  EXPECT_EQ(fastrange32(123, 7), 0U);
-  EXPECT_EQ(fastrange32(613566756, 7), 0U);
-  EXPECT_EQ(fastrange32(613566757, 7), 1U);
-  EXPECT_EQ(fastrange32(1227133513, 7), 1U);
-  EXPECT_EQ(fastrange32(1227133514, 7), 2U);
+  EXPECT_EQ(FastRange32(0, 7), 0U);
+  EXPECT_EQ(FastRange32(123, 7), 0U);
+  EXPECT_EQ(FastRange32(613566756, 7), 0U);
+  EXPECT_EQ(FastRange32(613566757, 7), 1U);
+  EXPECT_EQ(FastRange32(1227133513, 7), 1U);
+  EXPECT_EQ(FastRange32(1227133514, 7), 2U);
  // etc.
-  EXPECT_EQ(fastrange32(0xffffffff, 7), 6U);
+  EXPECT_EQ(FastRange32(0xffffffff, 7), 6U);

  // Big
-  EXPECT_EQ(fastrange32(1, 0x80000000), 0U);
-  EXPECT_EQ(fastrange32(2, 0x80000000), 1U);
-  EXPECT_EQ(fastrange32(4, 0x7fffffff), 1U);
-  EXPECT_EQ(fastrange32(4, 0x80000000), 2U);
-  EXPECT_EQ(fastrange32(0xffffffff, 0x7fffffff), 0x7ffffffeU);
-  EXPECT_EQ(fastrange32(0xffffffff, 0x80000000), 0x7fffffffU);
+  EXPECT_EQ(FastRange32(1, 0x80000000), 0U);
+  EXPECT_EQ(FastRange32(2, 0x80000000), 1U);
+  EXPECT_EQ(FastRange32(4, 0x7fffffff), 1U);
+  EXPECT_EQ(FastRange32(4, 0x80000000), 2U);
+  EXPECT_EQ(FastRange32(0xffffffff, 0x7fffffff), 0x7ffffffeU);
+  EXPECT_EQ(FastRange32(0xffffffff, 0x80000000), 0x7fffffffU);
 }

-TEST(Fastrange64Test, Values) {
-  using ROCKSDB_NAMESPACE::fastrange64;
+TEST(FastRange64Test, Values) {
+  using ROCKSDB_NAMESPACE::FastRange64;
  // Zero range
-  EXPECT_EQ(fastrange64(0, 0), 0U);
-  EXPECT_EQ(fastrange64(123, 0), 0U);
-  EXPECT_EQ(fastrange64(0xffffFFFF, 0), 0U);
-  EXPECT_EQ(fastrange64(0xffffFFFFffffFFFF, 0), 0U);
+  EXPECT_EQ(FastRange64(0, 0), 0U);
+  EXPECT_EQ(FastRange64(123, 0), 0U);
+  EXPECT_EQ(FastRange64(0xffffFFFF, 0), 0U);
+  EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 0), 0U);

  // One range
-  EXPECT_EQ(fastrange64(0, 1), 0U);
-  EXPECT_EQ(fastrange64(123, 1), 0U);
-  EXPECT_EQ(fastrange64(0xffffFFFF, 1), 0U);
-  EXPECT_EQ(fastrange64(0xffffFFFFffffFFFF, 1), 0U);
+  EXPECT_EQ(FastRange64(0, 1), 0U);
+  EXPECT_EQ(FastRange64(123, 1), 0U);
+  EXPECT_EQ(FastRange64(0xffffFFFF, 1), 0U);
+  EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 1), 0U);

  // Two range
-  EXPECT_EQ(fastrange64(0, 2), 0U);
-  EXPECT_EQ(fastrange64(123, 2), 0U);
-  EXPECT_EQ(fastrange64(0xffffFFFF, 2), 0U);
-  EXPECT_EQ(fastrange64(0x7fffFFFFffffFFFF, 2), 0U);
-  EXPECT_EQ(fastrange64(0x8000000000000000, 2), 1U);
-  EXPECT_EQ(fastrange64(0xffffFFFFffffFFFF, 2), 1U);
+  EXPECT_EQ(FastRange64(0, 2), 0U);
+  EXPECT_EQ(FastRange64(123, 2), 0U);
+  EXPECT_EQ(FastRange64(0xffffFFFF, 2), 0U);
+  EXPECT_EQ(FastRange64(0x7fffFFFFffffFFFF, 2), 0U);
+  EXPECT_EQ(FastRange64(0x8000000000000000, 2), 1U);
+  EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 2), 1U);

  // Seven range
-  EXPECT_EQ(fastrange64(0, 7), 0U);
-  EXPECT_EQ(fastrange64(123, 7), 0U);
-  EXPECT_EQ(fastrange64(0xffffFFFF, 7), 0U);
-  EXPECT_EQ(fastrange64(2635249153387078802, 7), 0U);
-  EXPECT_EQ(fastrange64(2635249153387078803, 7), 1U);
-  EXPECT_EQ(fastrange64(5270498306774157604, 7), 1U);
-  EXPECT_EQ(fastrange64(5270498306774157605, 7), 2U);
-  EXPECT_EQ(fastrange64(0x7fffFFFFffffFFFF, 7), 3U);
-  EXPECT_EQ(fastrange64(0x8000000000000000, 7), 3U);
-  EXPECT_EQ(fastrange64(0xffffFFFFffffFFFF, 7), 6U);
+  EXPECT_EQ(FastRange64(0, 7), 0U);
+  EXPECT_EQ(FastRange64(123, 7), 0U);
+  EXPECT_EQ(FastRange64(0xffffFFFF, 7), 0U);
+  EXPECT_EQ(FastRange64(2635249153387078802, 7), 0U);
+  EXPECT_EQ(FastRange64(2635249153387078803, 7), 1U);
+  EXPECT_EQ(FastRange64(5270498306774157604, 7), 1U);
+  EXPECT_EQ(FastRange64(5270498306774157605, 7), 2U);
+  EXPECT_EQ(FastRange64(0x7fffFFFFffffFFFF, 7), 3U);
+  EXPECT_EQ(FastRange64(0x8000000000000000, 7), 3U);
+  EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 7), 6U);

  // Big but 32-bit range
-  EXPECT_EQ(fastrange64(0x100000000, 0x80000000), 0U);
-  EXPECT_EQ(fastrange64(0x200000000, 0x80000000), 1U);
-  EXPECT_EQ(fastrange64(0x400000000, 0x7fffFFFF), 1U);
-  EXPECT_EQ(fastrange64(0x400000000, 0x80000000), 2U);
-  EXPECT_EQ(fastrange64(0xffffFFFFffffFFFF, 0x7fffFFFF), 0x7fffFFFEU);
-  EXPECT_EQ(fastrange64(0xffffFFFFffffFFFF, 0x80000000), 0x7fffFFFFU);
+  EXPECT_EQ(FastRange64(0x100000000, 0x80000000), 0U);
+  EXPECT_EQ(FastRange64(0x200000000, 0x80000000), 1U);
+  EXPECT_EQ(FastRange64(0x400000000, 0x7fffFFFF), 1U);
+  EXPECT_EQ(FastRange64(0x400000000, 0x80000000), 2U);
+  EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 0x7fffFFFF), 0x7fffFFFEU);
+  EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 0x80000000), 0x7fffFFFFU);

  // Big, > 32-bit range
 #if SIZE_MAX == UINT64_MAX
-  EXPECT_EQ(fastrange64(0x7fffFFFFffffFFFF, 0x4200000002), 0x2100000000U);
-  EXPECT_EQ(fastrange64(0x8000000000000000, 0x4200000002), 0x2100000001U);
+  EXPECT_EQ(FastRange64(0x7fffFFFFffffFFFF, 0x4200000002), 0x2100000000U);
+  EXPECT_EQ(FastRange64(0x8000000000000000, 0x4200000002), 0x2100000001U);

-  EXPECT_EQ(fastrange64(0x0000000000000000, 420000000002), 0U);
-  EXPECT_EQ(fastrange64(0x7fffFFFFffffFFFF, 420000000002), 210000000000U);
-  EXPECT_EQ(fastrange64(0x8000000000000000, 420000000002), 210000000001U);
-  EXPECT_EQ(fastrange64(0xffffFFFFffffFFFF, 420000000002), 420000000001U);
+  EXPECT_EQ(FastRange64(0x0000000000000000, 420000000002), 0U);
+  EXPECT_EQ(FastRange64(0x7fffFFFFffffFFFF, 420000000002), 210000000000U);
+  EXPECT_EQ(FastRange64(0x8000000000000000, 420000000002), 210000000001U);
+  EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 420000000002), 420000000001U);

-  EXPECT_EQ(fastrange64(0xffffFFFFffffFFFF, 0xffffFFFFffffFFFF),
+  EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 0xffffFFFFffffFFFF),
            0xffffFFFFffffFFFEU);
 #endif
 }

+TEST(FastRangeGenericTest, Values) {
+  using ROCKSDB_NAMESPACE::FastRangeGeneric;
+  // Generic (including big and small)
+  // Note that FastRangeGeneric is also tested indirectly above via
+  // FastRange32 and FastRange64.
+  EXPECT_EQ(
+      FastRangeGeneric(uint64_t{0x8000000000000000}, uint64_t{420000000002}),
+      uint64_t{210000000001});
+  EXPECT_EQ(FastRangeGeneric(uint64_t{0x8000000000000000}, uint16_t{12468}),
+            uint16_t{6234});
+  EXPECT_EQ(FastRangeGeneric(uint32_t{0x80000000}, uint16_t{12468}),
+            uint16_t{6234});
+  // Not recommended for typical use because for example this could fail on
+  // some platforms and pass on others:
+  //EXPECT_EQ(FastRangeGeneric(static_cast<unsigned long>(0x80000000),
+  //                           uint16_t{12468}),
+  //          uint16_t{6234});
+}
+
 // for inspection of disassembly
-uint32_t fastrange32(uint32_t hash, uint32_t range) {
-  return ROCKSDB_NAMESPACE::fastrange32(hash, range);
+uint32_t FastRange32(uint32_t hash, uint32_t range) {
+  return ROCKSDB_NAMESPACE::FastRange32(hash, range);
 }

 // for inspection of disassembly
-size_t fastrange64(uint64_t hash, size_t range) {
-  return ROCKSDB_NAMESPACE::fastrange64(hash, range);
+size_t FastRange64(uint64_t hash, size_t range) {
+  return ROCKSDB_NAMESPACE::FastRange64(hash, range);
 }

 // Tests for math.h / math128.h (not worth a separate test binary)
--- a/util/math.h
+++ b/util/math.h
@ -6,11 +6,13 @@
 #pragma once

 #include <assert.h>
-#include <stdint.h>
 #ifdef _MSC_VER
 #include <intrin.h>
 #endif

+#include <cstdint>
+#include <type_traits>
+
 namespace ROCKSDB_NAMESPACE {

 // Fast implementation of floor(log2(v)). Undefined for 0 or negative
--- a/utilities/transactions/optimistic_transaction.cc
+++ b/utilities/transactions/optimistic_transaction.cc
@ -107,7 +107,7 @@ Status OptimisticTransaction::CommitWithParallelValidate() {
    assert(key_it != nullptr);
    while (key_it->HasNext()) {
      const std::string& key = key_it->Next();
-      lk_idxes.insert(fastrange64(GetSliceNPHash64(key), space));
+      lk_idxes.insert(FastRange64(GetSliceNPHash64(key), space));
    }
  }
  // NOTE: in a single txn, all bucket-locks are taken in ascending order.
--- a/utilities/transactions/transaction_lock_mgr.cc
+++ b/utilities/transactions/transaction_lock_mgr.cc
@ -173,7 +173,7 @@ TransactionLockMgr::~TransactionLockMgr() {}

 size_t LockMap::GetStripe(const std::string& key) const {
  assert(num_stripes_ > 0);
-  return fastrange64(GetSliceNPHash64(key), num_stripes_);
+  return FastRange64(GetSliceNPHash64(key), num_stripes_);
 }

 void TransactionLockMgr::AddColumnFamily(uint32_t column_family_id) {