Add some more bit operations to internal APIs (#11660)

Summary:
BottomNBits() - there is a single fast instruction for this on x86 since BMI2, but testing with godbolt indicates you need at least GCC 10 for the compiler to choose that instruction from the obvious C++ code. https://godbolt.org/z/5a7Ysd41h

BitwiseAnd() - this is a convenience function that works around the language flaw that the type of the result of x & y is the larger of the two input types, when it should be the smaller. This can save some ugly static_cast.

I expect to use both of these in coming HyperClockCache developments, and have applied them in a couple of places in existing code.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11660

Test Plan: unit tests added

Reviewed By: jowlyzhang

Differential Revision: D47935531

Pulled By: pdillinger

fbshipit-source-id: d148c43a1e51df4a1c549b93aaf2725a3f8d3bd6
oxigraph-main
Peter Dillinger 1 year ago committed by Facebook GitHub Bot
parent 946d1009bc
commit f4e4039f00
  1. 3
      cache/clock_cache.h
  2. 2
      cache/sharded_cache.cc
  3. 3
      util/core_local.h
  4. 44
      util/hash_test.cc
  5. 64
      util/math.h
  6. 36
      util/math128.h

@ -24,6 +24,7 @@
#include "rocksdb/cache.h" #include "rocksdb/cache.h"
#include "rocksdb/secondary_cache.h" #include "rocksdb/secondary_cache.h"
#include "util/autovector.h" #include "util/autovector.h"
#include "util/math.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
@ -563,7 +564,7 @@ class HyperClockTable : public BaseClockTable {
private: // functions private: // functions
// Returns x mod 2^{length_bits_}. // Returns x mod 2^{length_bits_}.
inline size_t ModTableSize(uint64_t x) { inline size_t ModTableSize(uint64_t x) {
return static_cast<size_t>(x) & length_bits_mask_; return BitwiseAnd(x, length_bits_mask_);
} }
// Returns the first slot in the probe sequence with a handle e such that // Returns the first slot in the probe sequence with a handle e such that

@ -38,7 +38,7 @@ uint32_t DetermineSeed(int32_t hash_seed_option) {
return GetSliceHash(hostname) & kSeedMask; return GetSliceHash(hostname) & kSeedMask;
} else { } else {
// Fall back on something stable within the process. // Fall back on something stable within the process.
return static_cast<uint32_t>(gen.GetBaseUpper()) & kSeedMask; return BitwiseAnd(gen.GetBaseUpper(), kSeedMask);
} }
} else { } else {
// for kQuasiRandomHashSeed and fallback // for kQuasiRandomHashSeed and fallback

@ -13,6 +13,7 @@
#include "port/likely.h" #include "port/likely.h"
#include "port/port.h" #include "port/port.h"
#include "util/math.h"
#include "util/random.h" #include "util/random.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
@ -70,7 +71,7 @@ std::pair<T*, size_t> CoreLocalArray<T>::AccessElementAndIndex() const {
// cpu id unavailable, just pick randomly // cpu id unavailable, just pick randomly
core_idx = Random::GetTLSInstance()->Uniform(1 << size_shift_); core_idx = Random::GetTLSInstance()->Uniform(1 << size_shift_);
} else { } else {
core_idx = static_cast<size_t>(cpuid & ((1 << size_shift_) - 1)); core_idx = static_cast<size_t>(BottomNBits(cpuid, size_shift_));
} }
return {AccessAtCore(core_idx), core_idx}; return {AccessAtCore(core_idx), core_idx};
} }

@ -565,6 +565,8 @@ size_t FastRange64(uint64_t hash, size_t range) {
// Tests for math.h / math128.h (not worth a separate test binary) // Tests for math.h / math128.h (not worth a separate test binary)
using ROCKSDB_NAMESPACE::BitParity; using ROCKSDB_NAMESPACE::BitParity;
using ROCKSDB_NAMESPACE::BitsSetToOne; using ROCKSDB_NAMESPACE::BitsSetToOne;
using ROCKSDB_NAMESPACE::BitwiseAnd;
using ROCKSDB_NAMESPACE::BottomNBits;
using ROCKSDB_NAMESPACE::ConstexprFloorLog2; using ROCKSDB_NAMESPACE::ConstexprFloorLog2;
using ROCKSDB_NAMESPACE::CountTrailingZeroBits; using ROCKSDB_NAMESPACE::CountTrailingZeroBits;
using ROCKSDB_NAMESPACE::DecodeFixed128; using ROCKSDB_NAMESPACE::DecodeFixed128;
@ -580,6 +582,19 @@ using ROCKSDB_NAMESPACE::Upper64of128;
int blah(int x) { return DownwardInvolution(x); } int blah(int x) { return DownwardInvolution(x); }
template <typename T1, typename T2>
static void test_BitwiseAnd(T1 v1, T2 v2) {
auto a = BitwiseAnd(v1, v2);
// Essentially repeating the implementation :-/
if constexpr (sizeof(T1) < sizeof(T2)) {
static_assert(std::is_same_v<decltype(a), T1>);
EXPECT_EQ(a, static_cast<T1>(v1 & v2));
} else {
static_assert(std::is_same_v<decltype(a), T2>);
EXPECT_EQ(a, static_cast<T2>(v1 & v2));
}
}
template <typename T> template <typename T>
static void test_BitOps() { static void test_BitOps() {
// This complex code is to generalize to 128-bit values. Otherwise // This complex code is to generalize to 128-bit values. Otherwise
@ -598,6 +613,22 @@ static void test_BitOps() {
// If we could directly use arithmetic: // If we could directly use arithmetic:
// T vm1 = static_cast<T>(v - 1); // T vm1 = static_cast<T>(v - 1);
// BottomNBits
{
// An essentially full length value
T x = everyOtherBit;
if (i > 2) {
// Make it slightly irregular
x = x ^ (T{1} << (i / 2));
}
auto a = BottomNBits(x, i);
auto b = BottomNBits(~x, i);
EXPECT_EQ(x | a, x);
EXPECT_EQ(a | b, vm1);
EXPECT_EQ(a & b, T{0});
EXPECT_EQ(BottomNBits(x ^ a, i), T{0});
}
// FloorLog2 // FloorLog2
if (v > 0) { if (v > 0) {
EXPECT_EQ(FloorLog2(v), i); EXPECT_EQ(FloorLog2(v), i);
@ -707,9 +738,22 @@ static void test_BitOps() {
} }
} }
// BitwiseAnd
{
test_BitwiseAnd(vm1, static_cast<char>(0x99));
test_BitwiseAnd(v, static_cast<char>(0x99));
test_BitwiseAnd(char{0x66}, vm1);
test_BitwiseAnd(char{0x66}, v);
test_BitwiseAnd(v, int16_t{0x6699});
test_BitwiseAnd(v, uint16_t{0x9966});
test_BitwiseAnd(int64_t{0x1234234534564567}, v);
test_BitwiseAnd(uint64_t{0x9876876576545432}, v);
}
vm1 = (vm1 << 1) | 1; vm1 = (vm1 << 1) | 1;
} }
// ConstexprFloorLog2
EXPECT_EQ(ConstexprFloorLog2(T{1}), 0); EXPECT_EQ(ConstexprFloorLog2(T{1}), 0);
EXPECT_EQ(ConstexprFloorLog2(T{2}), 1); EXPECT_EQ(ConstexprFloorLog2(T{2}), 1);
EXPECT_EQ(ConstexprFloorLog2(T{3}), 1); EXPECT_EQ(ConstexprFloorLog2(T{3}), 1);

@ -9,6 +9,9 @@
#ifdef _MSC_VER #ifdef _MSC_VER
#include <intrin.h> #include <intrin.h>
#endif #endif
#ifdef __BMI2__
#include <immintrin.h>
#endif
#include <cstdint> #include <cstdint>
#include <type_traits> #include <type_traits>
@ -20,11 +23,33 @@ ASSERT_FEATURE_COMPAT_HEADER();
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
// Fast implementation of extracting the bottom n bits of an integer.
// To ensure fast implementation, undefined if n bits is full width or more.
template <typename T>
inline T BottomNBits(T v, int nbits) {
static_assert(std::is_integral_v<T>, "non-integral type");
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
assert(nbits >= 0);
assert(nbits < int{8 * sizeof(T)});
#ifdef __BMI2__
if constexpr (sizeof(T) <= 4) {
return static_cast<T>(_bzhi_u32(static_cast<uint32_t>(v), nbits));
}
if constexpr (sizeof(T) <= 8) {
return static_cast<T>(_bzhi_u64(static_cast<uint64_t>(v), nbits));
}
#endif
// Newer compilers compile this down to bzhi on x86, but some older
// ones don't, thus the need for the intrinsic above.
return static_cast<T>(v & ((T{1} << nbits) - 1));
}
// Fast implementation of floor(log2(v)). Undefined for 0 or negative // Fast implementation of floor(log2(v)). Undefined for 0 or negative
// numbers (in case of signed type). // numbers (in case of signed type).
template <typename T> template <typename T>
inline int FloorLog2(T v) { inline int FloorLog2(T v) {
static_assert(std::is_integral<T>::value, "non-integral type"); static_assert(std::is_integral_v<T>, "non-integral type");
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
assert(v > 0); assert(v > 0);
#ifdef _MSC_VER #ifdef _MSC_VER
static_assert(sizeof(T) <= sizeof(uint64_t), "type too big"); static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
@ -63,6 +88,8 @@ inline int FloorLog2(T v) {
// Constexpr version of FloorLog2 // Constexpr version of FloorLog2
template <typename T> template <typename T>
constexpr int ConstexprFloorLog2(T v) { constexpr int ConstexprFloorLog2(T v) {
// NOTE: not checking is_integral so that this works with Unsigned128
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
int rv = 0; int rv = 0;
while (v > T{1}) { while (v > T{1}) {
++rv; ++rv;
@ -74,7 +101,8 @@ constexpr int ConstexprFloorLog2(T v) {
// Number of low-order zero bits before the first 1 bit. Undefined for 0. // Number of low-order zero bits before the first 1 bit. Undefined for 0.
template <typename T> template <typename T>
inline int CountTrailingZeroBits(T v) { inline int CountTrailingZeroBits(T v) {
static_assert(std::is_integral<T>::value, "non-integral type"); static_assert(std::is_integral_v<T>, "non-integral type");
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
assert(v != 0); assert(v != 0);
#ifdef _MSC_VER #ifdef _MSC_VER
static_assert(sizeof(T) <= sizeof(uint64_t), "type too big"); static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
@ -115,6 +143,9 @@ namespace detail {
template <typename T> template <typename T>
int BitsSetToOneFallback(T v) { int BitsSetToOneFallback(T v) {
static_assert(std::is_integral_v<T>, "non-integral type");
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
const int kBits = static_cast<int>(sizeof(T)) * 8; const int kBits = static_cast<int>(sizeof(T)) * 8;
static_assert((kBits & (kBits - 1)) == 0, "must be power of two bits"); static_assert((kBits & (kBits - 1)) == 0, "must be power of two bits");
// we static_cast these bit patterns in order to truncate them to the correct // we static_cast these bit patterns in order to truncate them to the correct
@ -140,7 +171,9 @@ int BitsSetToOneFallback(T v) {
// Number of bits set to 1. Also known as "population count". // Number of bits set to 1. Also known as "population count".
template <typename T> template <typename T>
inline int BitsSetToOne(T v) { inline int BitsSetToOne(T v) {
static_assert(std::is_integral<T>::value, "non-integral type"); static_assert(std::is_integral_v<T>, "non-integral type");
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
#ifdef _MSC_VER #ifdef _MSC_VER
static_assert(sizeof(T) <= sizeof(uint64_t), "type too big"); static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
if (sizeof(T) < sizeof(uint32_t)) { if (sizeof(T) < sizeof(uint32_t)) {
@ -192,7 +225,9 @@ inline int BitsSetToOne(T v) {
template <typename T> template <typename T>
inline int BitParity(T v) { inline int BitParity(T v) {
static_assert(std::is_integral<T>::value, "non-integral type"); static_assert(std::is_integral_v<T>, "non-integral type");
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
#ifdef _MSC_VER #ifdef _MSC_VER
// bit parity == oddness of popcount // bit parity == oddness of popcount
return BitsSetToOne(v) & 1; return BitsSetToOne(v) & 1;
@ -214,7 +249,8 @@ inline int BitParity(T v) {
// encode/decode big endian. // encode/decode big endian.
template <typename T> template <typename T>
inline T EndianSwapValue(T v) { inline T EndianSwapValue(T v) {
static_assert(std::is_integral<T>::value, "non-integral type"); static_assert(std::is_integral_v<T>, "non-integral type");
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
#ifdef _MSC_VER #ifdef _MSC_VER
if (sizeof(T) == 2) { if (sizeof(T) == 2) {
@ -244,6 +280,9 @@ inline T EndianSwapValue(T v) {
// Reverses the order of bits in an integral value // Reverses the order of bits in an integral value
template <typename T> template <typename T>
inline T ReverseBits(T v) { inline T ReverseBits(T v) {
static_assert(std::is_integral_v<T>, "non-integral type");
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
T r = EndianSwapValue(v); T r = EndianSwapValue(v);
const T kHighestByte = T{1} << ((sizeof(T) - 1) * 8); const T kHighestByte = T{1} << ((sizeof(T) - 1) * 8);
const T kEveryByte = kHighestByte | (kHighestByte / 255); const T kEveryByte = kHighestByte | (kHighestByte / 255);
@ -277,7 +316,8 @@ inline T ReverseBits(T v) {
// is that all square sub-matrices that include the top row are invertible. // is that all square sub-matrices that include the top row are invertible.
template <typename T> template <typename T>
inline T DownwardInvolution(T v) { inline T DownwardInvolution(T v) {
static_assert(std::is_integral<T>::value, "non-integral type"); static_assert(std::is_integral_v<T>, "non-integral type");
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
static_assert(sizeof(T) <= 8, "only supported up to 64 bits"); static_assert(sizeof(T) <= 8, "only supported up to 64 bits");
uint64_t r = static_cast<uint64_t>(v); uint64_t r = static_cast<uint64_t>(v);
@ -296,4 +336,16 @@ inline T DownwardInvolution(T v) {
return static_cast<T>(r); return static_cast<T>(r);
} }
// Bitwise-And with typing that allows you to avoid writing an explicit cast
// to the smaller type, or the type of the right parameter if same size.
template <typename A, typename B>
inline std::conditional_t<sizeof(A) < sizeof(B), A, B> BitwiseAnd(A a, B b) {
static_assert(std::is_integral_v<A>, "non-integral type");
static_assert(std::is_integral_v<B>, "non-integral type");
static_assert(!std::is_reference_v<A>, "use std::remove_reference_t");
static_assert(!std::is_reference_v<B>, "use std::remove_reference_t");
using Smaller = std::conditional_t<sizeof(A) < sizeof(B), A, B>;
return static_cast<Smaller>(a & b);
}
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

@ -41,13 +41,13 @@ struct Unsigned128 {
hi = upper; hi = upper;
} }
explicit operator uint64_t() { return lo; } // Convert to any integer 64 bits or less.
template <typename T,
explicit operator uint32_t() { return static_cast<uint32_t>(lo); } typename = std::enable_if_t<std::is_integral_v<T> &&
sizeof(T) <= sizeof(uint64_t)> >
explicit operator uint16_t() { return static_cast<uint16_t>(lo); } explicit operator T() {
return static_cast<T>(lo);
explicit operator uint8_t() { return static_cast<uint8_t>(lo); } }
}; };
inline Unsigned128 operator<<(const Unsigned128& lhs, unsigned shift) { inline Unsigned128 operator<<(const Unsigned128& lhs, unsigned shift) {
@ -190,6 +190,16 @@ inline Unsigned128 Multiply64to128(uint64_t a, uint64_t b) {
#endif #endif
} }
template <>
inline Unsigned128 BottomNBits(Unsigned128 v, int nbits) {
if (nbits < 64) {
return BottomNBits(Lower64of128(v), nbits);
} else {
return (Unsigned128{BottomNBits(Upper64of128(v), nbits - 64)} << 64) |
Lower64of128(v);
}
}
template <> template <>
inline int FloorLog2(Unsigned128 v) { inline int FloorLog2(Unsigned128 v) {
if (Upper64of128(v) == 0) { if (Upper64of128(v) == 0) {
@ -236,6 +246,18 @@ inline Unsigned128 DownwardInvolution(Unsigned128 v) {
DownwardInvolution(Upper64of128(v) ^ Lower64of128(v)); DownwardInvolution(Upper64of128(v) ^ Lower64of128(v));
} }
template <typename A>
inline std::remove_reference_t<A> BitwiseAnd(A a, Unsigned128 b) {
static_assert(sizeof(A) <= sizeof(Unsigned128));
return static_cast<A>(a & b);
}
template <typename B>
inline std::remove_reference_t<B> BitwiseAnd(Unsigned128 a, B b) {
static_assert(sizeof(B) <= sizeof(Unsigned128));
return static_cast<B>(a & b);
}
template <typename T> template <typename T>
struct IsUnsignedUpTo128 struct IsUnsignedUpTo128
: std::integral_constant<bool, std::is_unsigned<T>::value || : std::integral_constant<bool, std::is_unsigned<T>::value ||

Loading…
Cancel
Save