From c4d8838a2b54cb364ee3f05d86c8f980ac59bbc1 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Thu, 3 Sep 2020 09:31:28 -0700 Subject: [PATCH] New bit manipulation functions and 128-bit value library (#7338) Summary: These new functions and 128-bit value bit operations are expected to be used in a forthcoming Bloom filter alternative. No functional changes to production code, just new code only called by unit tests, cosmetic changes to existing headers, and fix an existing function for a yet-unused template instantiation (BitsSetToOne on something signed and smaller than 32 bits). Pull Request resolved: https://github.com/facebook/rocksdb/pull/7338 Test Plan: Unit tests included. Works with and without TEST_UINT128_COMPAT=1 to check compatibility with and without __uint128_t. Also added that parameter to the CircleCI build build-linux-shared_lib-alt_namespace-status_checked. Reviewed By: jay-zhuang Differential Revision: D23494945 Pulled By: pdillinger fbshipit-source-id: 5c0dc419100d9df5d4d9abb153b2855d5aea39e8 --- .circleci/config.yml | 2 +- Makefile | 4 + build_tools/build_detect_platform | 35 +++++ util/hash.h | 6 +- util/hash_test.cc | 157 ++++++++++++++++++++- util/math.h | 102 +++++++++++++- util/math128.h | 223 ++++++++++++++++++++++++++++++ 7 files changed, 519 insertions(+), 10 deletions(-) create mode 100644 util/math128.h diff --git a/.circleci/config.yml b/.circleci/config.yml index da4bb5541..412f6ef70 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -31,7 +31,7 @@ jobs: - checkout # check out the code in the project directory - run: pyenv global 3.5.2 - run: sudo apt-get update -y && sudo apt-get install -y libgflags-dev - - run: SKIP_FORMAT_BUCK_CHECKS=1 PRINT_PARALLEL_OUTPUTS=1 ASSERT_STATUS_CHECKED=1 LIB_MODE=shared OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" GTEST_THROW_ON_FAILURE=0 GTEST_OUTPUT="xml:/tmp/test-results/" make V=1 -j32 all check_some | .circleci/cat_ignore_eagain + - run: SKIP_FORMAT_BUCK_CHECKS=1 PRINT_PARALLEL_OUTPUTS=1 ASSERT_STATUS_CHECKED=1 TEST_UINT128_COMPAT=1 LIB_MODE=shared OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" GTEST_THROW_ON_FAILURE=0 GTEST_OUTPUT="xml:/tmp/test-results/" make V=1 -j32 all check_some | .circleci/cat_ignore_eagain - store_test_results: path: /tmp/test-results diff --git a/Makefile b/Makefile index 7c1cc8f05..a2b1c3f1b 100644 --- a/Makefile +++ b/Makefile @@ -410,6 +410,10 @@ ifdef TEST_CACHE_LINE_SIZE PLATFORM_CCFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE) PLATFORM_CXXFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE) endif +ifdef TEST_UINT128_COMPAT + PLATFORM_CCFLAGS += -DTEST_UINT128_COMPAT=1 + PLATFORM_CXXFLAGS += -DTEST_UINT128_COMPAT=1 +endif # This (the first rule) must depend on "all". default: all diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index 094d35e6b..805b7eab5 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -665,11 +665,18 @@ if test "$TRY_SSE_ETC"; then # It doesn't even really check that your current CPU is compatible. # # SSE4.2 available since nehalem, ca. 2008-2010 + # Includes POPCNT for BitsSetToOne, BitParity TRY_SSE42="-msse4.2" # PCLMUL available since westmere, ca. 2010-2011 TRY_PCLMUL="-mpclmul" # AVX2 available since haswell, ca. 2013-2015 TRY_AVX2="-mavx2" + # BMI available since haswell, ca. 2013-2015 + # Primarily for TZCNT for CountTrailingZeroBits + TRY_BMI="-mbmi" + # LZCNT available since haswell, ca. 2013-2015 + # For FloorLog2 + TRY_LZCNT="-mlzcnt" fi $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_SSE42 -x c++ - -o /dev/null 2>/dev/null <&2 fi +$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_BMI -x c++ - -o /dev/null 2>/dev/null < + #include + int main(int argc, char *argv[]) { + (void)argv; + return (int)_tzcnt_u64((uint64_t)argc); + } +EOF +if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS $TRY_BMI -DHAVE_BMI" +elif test "$USE_SSE"; then + echo "warning: USE_SSE specified but compiler could not use BMI intrinsics, disabling" >&2 +fi + +$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_LZCNT -x c++ - -o /dev/null 2>/dev/null < + #include + int main(int argc, char *argv[]) { + (void)argv; + return (int)_lzcnt_u64((uint64_t)argc); + } +EOF +if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS $TRY_LZCNT -DHAVE_LZCNT" +elif test "$USE_SSE"; then + echo "warning: USE_SSE specified but compiler could not use LZCNT intrinsics, disabling" >&2 +fi + $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null < int main() { diff --git a/util/hash.h b/util/hash.h index 17490a366..5bf5ce85e 100644 --- a/util/hash.h +++ b/util/hash.h @@ -88,12 +88,16 @@ inline uint32_t fastrange32(uint32_t hash, uint32_t range) { return static_cast(product >> 32); } +#ifdef TEST_UINT128_COMPAT +#undef HAVE_UINT128_EXTENSION +#endif + // An alternative to % for mapping a 64-bit hash value to an arbitrary range // that fits in size_t. See https://github.com/lemire/fastrange // We find size_t more convenient than uint64_t for the range, with side // benefit of better optimization on 32-bit platforms. inline size_t fastrange64(uint64_t hash, size_t range) { -#if defined(HAVE_UINT128_EXTENSION) +#ifdef HAVE_UINT128_EXTENSION // Can use compiler's 128-bit type. Trust it to do the right thing. __uint128_t wide = __uint128_t{range} * hash; return static_cast(wide >> 64); diff --git a/util/hash_test.cc b/util/hash_test.cc index 9c3c6efe9..802fb27f3 100644 --- a/util/hash_test.cc +++ b/util/hash_test.cc @@ -7,12 +7,14 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. +#include "util/hash.h" + #include #include #include "test_util/testharness.h" #include "util/coding.h" -#include "util/hash.h" +#include "util/math128.h" using ROCKSDB_NAMESPACE::EncodeFixed32; using ROCKSDB_NAMESPACE::GetSliceHash64; @@ -370,6 +372,159 @@ size_t fastrange64(uint64_t hash, size_t range) { return ROCKSDB_NAMESPACE::fastrange64(hash, range); } +// Tests for math.h / math128.h (not worth a separate test binary) +using ROCKSDB_NAMESPACE::BitParity; +using ROCKSDB_NAMESPACE::BitsSetToOne; +using ROCKSDB_NAMESPACE::CountTrailingZeroBits; +using ROCKSDB_NAMESPACE::DecodeFixed128; +using ROCKSDB_NAMESPACE::EncodeFixed128; +using ROCKSDB_NAMESPACE::FloorLog2; +using ROCKSDB_NAMESPACE::Lower64of128; +using ROCKSDB_NAMESPACE::Multiply64to128; +using ROCKSDB_NAMESPACE::Unsigned128; +using ROCKSDB_NAMESPACE::Upper64of128; + +template +static void test_BitOps() { + // This complex code is to generalize to 128-bit values. Otherwise + // we could just use = static_cast(0x5555555555555555ULL); + T everyOtherBit = 0; + for (unsigned i = 0; i < sizeof(T); ++i) { + everyOtherBit = (everyOtherBit << 8) | T{0x55}; + } + + // This one built using bit operations, as our 128-bit layer + // might not implement arithmetic such as subtraction. + T vm1 = 0; // "v minus one" + + for (int i = 0; i < int{8 * sizeof(T)}; ++i) { + T v = T{1} << i; + // If we could directly use arithmetic: + // T vm1 = static_cast(v - 1); + + // FloorLog2 + if (v > 0) { + EXPECT_EQ(FloorLog2(v), i); + } + if (vm1 > 0) { + EXPECT_EQ(FloorLog2(vm1), i - 1); + EXPECT_EQ(FloorLog2(everyOtherBit & vm1), (i - 1) & ~1); + } + + // CountTrailingZeroBits + if (v != 0) { + EXPECT_EQ(CountTrailingZeroBits(v), i); + } + if (vm1 != 0) { + EXPECT_EQ(CountTrailingZeroBits(vm1), 0); + } + if (i < int{8 * sizeof(T)} - 1) { + EXPECT_EQ(CountTrailingZeroBits(~vm1 & everyOtherBit), (i + 1) & ~1); + } + + // BitsSetToOne + EXPECT_EQ(BitsSetToOne(v), 1); + EXPECT_EQ(BitsSetToOne(vm1), i); + EXPECT_EQ(BitsSetToOne(vm1 & everyOtherBit), (i + 1) / 2); + + // BitParity + EXPECT_EQ(BitParity(v), 1); + EXPECT_EQ(BitParity(vm1), i & 1); + EXPECT_EQ(BitParity(vm1 & everyOtherBit), ((i + 1) / 2) & 1); + + vm1 = (vm1 << 1) | 1; + } +} + +TEST(MathTest, BitOps) { + test_BitOps(); + test_BitOps(); + test_BitOps(); + test_BitOps(); + test_BitOps(); + test_BitOps(); + test_BitOps(); + test_BitOps(); + test_BitOps(); + test_BitOps(); + test_BitOps(); + test_BitOps(); + test_BitOps(); + test_BitOps(); + test_BitOps(); + test_BitOps(); + test_BitOps(); + test_BitOps(); + test_BitOps(); + test_BitOps(); + test_BitOps(); +} + +TEST(MathTest, BitOps128) { test_BitOps(); } + +TEST(MathTest, Math128) { + const Unsigned128 sixteenHexOnes = 0x1111111111111111U; + const Unsigned128 thirtyHexOnes = (sixteenHexOnes << 56) | sixteenHexOnes; + const Unsigned128 sixteenHexTwos = 0x2222222222222222U; + const Unsigned128 thirtyHexTwos = (sixteenHexTwos << 56) | sixteenHexTwos; + + // v will slide from all hex ones to all hex twos + Unsigned128 v = thirtyHexOnes; + for (int i = 0; i <= 30; ++i) { + // Test bitwise operations + EXPECT_EQ(BitsSetToOne(v), 30); + EXPECT_EQ(BitsSetToOne(~v), 128 - 30); + EXPECT_EQ(BitsSetToOne(v & thirtyHexOnes), 30 - i); + EXPECT_EQ(BitsSetToOne(v | thirtyHexOnes), 30 + i); + EXPECT_EQ(BitsSetToOne(v ^ thirtyHexOnes), 2 * i); + EXPECT_EQ(BitsSetToOne(v & thirtyHexTwos), i); + EXPECT_EQ(BitsSetToOne(v | thirtyHexTwos), 60 - i); + EXPECT_EQ(BitsSetToOne(v ^ thirtyHexTwos), 60 - 2 * i); + + // Test comparisons + EXPECT_EQ(v == thirtyHexOnes, i == 0); + EXPECT_EQ(v == thirtyHexTwos, i == 30); + EXPECT_EQ(v > thirtyHexOnes, i > 0); + EXPECT_EQ(v > thirtyHexTwos, false); + EXPECT_EQ(v >= thirtyHexOnes, true); + EXPECT_EQ(v >= thirtyHexTwos, i == 30); + EXPECT_EQ(v < thirtyHexOnes, false); + EXPECT_EQ(v < thirtyHexTwos, i < 30); + EXPECT_EQ(v <= thirtyHexOnes, i == 0); + EXPECT_EQ(v <= thirtyHexTwos, true); + + // Update v, clearing upper-most byte + v = ((v << 12) >> 8) | 0x2; + } + + for (int i = 0; i < 128; ++i) { + // Test shifts + Unsigned128 sl = thirtyHexOnes << i; + Unsigned128 sr = thirtyHexOnes >> i; + EXPECT_EQ(BitsSetToOne(sl), std::min(30, 32 - i / 4)); + EXPECT_EQ(BitsSetToOne(sr), std::max(0, 30 - (i + 3) / 4)); + EXPECT_EQ(BitsSetToOne(sl & sr), i % 2 ? 0 : std::max(0, 30 - i / 2)); + } + + // Test 64x64->128 multiply + Unsigned128 product = + Multiply64to128(0x1111111111111111U, 0x2222222222222222U); + EXPECT_EQ(Lower64of128(product), 2295594818061633090U); + EXPECT_EQ(Upper64of128(product), 163971058432973792U); +} + +TEST(MathTest, Coding128) { + const char *in = "_1234567890123456"; + Unsigned128 decoded = DecodeFixed128(in + 1); + EXPECT_EQ(Lower64of128(decoded), 4050765991979987505U); + EXPECT_EQ(Upper64of128(decoded), 3906085646303834169U); + char out[18]; + out[0] = '_'; + EncodeFixed128(out + 1, decoded); + out[17] = '\0'; + EXPECT_EQ(std::string(in), std::string(out)); +} + int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); diff --git a/util/math.h b/util/math.h index 64cdb2f44..2e57c1c08 100644 --- a/util/math.h +++ b/util/math.h @@ -13,24 +13,112 @@ namespace ROCKSDB_NAMESPACE { +// Fast implementation of floor(log2(v)). Undefined for 0 or negative +// numbers (in case of signed type). template -inline int BitsSetToOne(T v) { +inline int FloorLog2(T v) { static_assert(std::is_integral::value, "non-integral type"); + assert(v > 0); #ifdef _MSC_VER static_assert(sizeof(T) <= sizeof(uint64_t), "type too big"); - if (sizeof(T) > sizeof(uint32_t)) { - return static_cast(__popcnt64(static_cast(v))); + unsigned long lz = 0; + if (sizeof(T) <= sizeof(uint32_t)) { + _BitScanReverse(&lz, static_cast(v)); + } else { + _BitScanReverse64(&lz, static_cast(v)); + } + return 63 - static_cast(lz); +#else + static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big"); + if (sizeof(T) <= sizeof(unsigned int)) { + int lz = __builtin_clz(static_cast(v)); + return int{sizeof(unsigned int)} * 8 - 1 - lz; + } else if (sizeof(T) <= sizeof(unsigned long)) { + int lz = __builtin_clzl(static_cast(v)); + return int{sizeof(unsigned long)} * 8 - 1 - lz; + } else { + int lz = __builtin_clzll(static_cast(v)); + return int{sizeof(unsigned long long)} * 8 - 1 - lz; + } +#endif +} + +// Number of low-order zero bits before the first 1 bit. Undefined for 0. +template +inline int CountTrailingZeroBits(T v) { + static_assert(std::is_integral::value, "non-integral type"); + assert(v != 0); +#ifdef _MSC_VER + static_assert(sizeof(T) <= sizeof(uint64_t), "type too big"); + unsigned long tz = 0; + if (sizeof(T) <= sizeof(uint32_t)) { + _BitScanForward(&tz, static_cast(v)); } else { + _BitScanForward64(&tz, static_cast(v)); + } + return static_cast(tz); +#else + static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big"); + if (sizeof(T) <= sizeof(unsigned int)) { + return __builtin_ctz(static_cast(v)); + } else if (sizeof(T) <= sizeof(unsigned long)) { + return __builtin_ctzl(static_cast(v)); + } else { + return __builtin_ctzll(static_cast(v)); + } +#endif +} + +// Number of bits set to 1. Also known as "population count". +template +inline int BitsSetToOne(T v) { + static_assert(std::is_integral::value, "non-integral type"); +#ifdef _MSC_VER + static_assert(sizeof(T) <= sizeof(uint64_t), "type too big"); + if (sizeof(T) < sizeof(uint32_t)) { + // This bit mask is to avoid a compiler warning on unused path + constexpr auto mm = 8 * sizeof(uint32_t) - 1; + // The bit mask is to neutralize sign extension on small signed types + constexpr uint32_t m = (uint32_t{1} << ((8 * sizeof(T)) & mm)) - 1; + return static_cast(__popcnt(static_cast(v) & m)); + } else if (sizeof(T) == sizeof(uint32_t)) { return static_cast(__popcnt(static_cast(v))); + } else { + return static_cast(__popcnt64(static_cast(v))); } #else static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big"); - if (sizeof(T) > sizeof(unsigned long)) { - return __builtin_popcountll(static_cast(v)); - } else if (sizeof(T) > sizeof(unsigned int)) { + if (sizeof(T) < sizeof(unsigned int)) { + // This bit mask is to avoid a compiler warning on unused path + constexpr auto mm = 8 * sizeof(unsigned int) - 1; + // This bit mask is to neutralize sign extension on small signed types + constexpr unsigned int m = (1U << ((8 * sizeof(T)) & mm)) - 1; + return __builtin_popcount(static_cast(v) & m); + } else if (sizeof(T) == sizeof(unsigned int)) { + return __builtin_popcount(static_cast(v)); + } else if (sizeof(T) <= sizeof(unsigned long)) { return __builtin_popcountl(static_cast(v)); } else { - return __builtin_popcount(static_cast(v)); + return __builtin_popcountll(static_cast(v)); + } +#endif +} + +template +inline int BitParity(T v) { + static_assert(std::is_integral::value, "non-integral type"); +#ifdef _MSC_VER + // bit parity == oddness of popcount + return BitsSetToOne(v) & 1; +#else + static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big"); + if (sizeof(T) <= sizeof(unsigned int)) { + // On any sane systen, potential sign extension here won't change parity + return __builtin_parity(static_cast(v)); + } else if (sizeof(T) <= sizeof(unsigned long)) { + return __builtin_parityl(static_cast(v)); + } else { + return __builtin_parityll(static_cast(v)); } #endif } diff --git a/util/math128.h b/util/math128.h new file mode 100644 index 000000000..10e99714e --- /dev/null +++ b/util/math128.h @@ -0,0 +1,223 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include "util/coding.h" +#include "util/math.h" + +#ifdef TEST_UINT128_COMPAT +#undef HAVE_UINT128_EXTENSION +#endif + +namespace ROCKSDB_NAMESPACE { + +// Unsigned128 is a 128 bit value supporting (at least) bitwise operators, +// shifts, and comparisons. __uint128_t is not always available. + +#ifdef HAVE_UINT128_EXTENSION +using Unsigned128 = __uint128_t; +#else +struct Unsigned128 { + uint64_t lo; + uint64_t hi; + + inline Unsigned128() { + static_assert(sizeof(Unsigned128) == 2 * sizeof(uint64_t), + "unexpected overhead in representation"); + lo = 0; + hi = 0; + } + + inline Unsigned128(uint64_t lower) { + lo = lower; + hi = 0; + } + + inline Unsigned128(uint64_t lower, uint64_t upper) { + lo = lower; + hi = upper; + } +}; + +inline Unsigned128 operator<<(const Unsigned128& lhs, unsigned shift) { + shift &= 127; + Unsigned128 rv; + if (shift >= 64) { + rv.lo = 0; + rv.hi = lhs.lo << (shift & 63); + } else { + uint64_t tmp = lhs.lo; + rv.lo = tmp << shift; + // Ensure shift==0 shifts away everything. (This avoids another + // conditional branch on shift == 0.) + tmp = tmp >> 1 >> (63 - shift); + rv.hi = tmp | (lhs.hi << shift); + } + return rv; +} + +inline Unsigned128& operator<<=(Unsigned128& lhs, unsigned shift) { + lhs = lhs << shift; + return lhs; +} + +inline Unsigned128 operator>>(const Unsigned128& lhs, unsigned shift) { + shift &= 127; + Unsigned128 rv; + if (shift >= 64) { + rv.hi = 0; + rv.lo = lhs.hi >> (shift & 63); + } else { + uint64_t tmp = lhs.hi; + rv.hi = tmp >> shift; + // Ensure shift==0 shifts away everything + tmp = tmp << 1 << (63 - shift); + rv.lo = tmp | (lhs.lo >> shift); + } + return rv; +} + +inline Unsigned128& operator>>=(Unsigned128& lhs, unsigned shift) { + lhs = lhs >> shift; + return lhs; +} + +inline Unsigned128 operator&(const Unsigned128& lhs, const Unsigned128& rhs) { + return Unsigned128(lhs.lo & rhs.lo, lhs.hi & rhs.hi); +} + +inline Unsigned128& operator&=(Unsigned128& lhs, const Unsigned128& rhs) { + lhs = lhs & rhs; + return lhs; +} + +inline Unsigned128 operator|(const Unsigned128& lhs, const Unsigned128& rhs) { + return Unsigned128(lhs.lo | rhs.lo, lhs.hi | rhs.hi); +} + +inline Unsigned128& operator|=(Unsigned128& lhs, const Unsigned128& rhs) { + lhs = lhs | rhs; + return lhs; +} + +inline Unsigned128 operator^(const Unsigned128& lhs, const Unsigned128& rhs) { + return Unsigned128(lhs.lo ^ rhs.lo, lhs.hi ^ rhs.hi); +} + +inline Unsigned128& operator^=(Unsigned128& lhs, const Unsigned128& rhs) { + lhs = lhs ^ rhs; + return lhs; +} + +inline Unsigned128 operator~(const Unsigned128& v) { + return Unsigned128(~v.lo, ~v.hi); +} + +inline bool operator==(const Unsigned128& lhs, const Unsigned128& rhs) { + return lhs.lo == rhs.lo && lhs.hi == rhs.hi; +} + +inline bool operator!=(const Unsigned128& lhs, const Unsigned128& rhs) { + return lhs.lo != rhs.lo || lhs.hi != rhs.hi; +} + +inline bool operator>(const Unsigned128& lhs, const Unsigned128& rhs) { + return lhs.hi > rhs.hi || (lhs.hi == rhs.hi && lhs.lo > rhs.lo); +} + +inline bool operator<(const Unsigned128& lhs, const Unsigned128& rhs) { + return lhs.hi < rhs.hi || (lhs.hi == rhs.hi && lhs.lo < rhs.lo); +} + +inline bool operator>=(const Unsigned128& lhs, const Unsigned128& rhs) { + return lhs.hi > rhs.hi || (lhs.hi == rhs.hi && lhs.lo >= rhs.lo); +} + +inline bool operator<=(const Unsigned128& lhs, const Unsigned128& rhs) { + return lhs.hi < rhs.hi || (lhs.hi == rhs.hi && lhs.lo <= rhs.lo); +} +#endif + +inline uint64_t Lower64of128(Unsigned128 v) { +#ifdef HAVE_UINT128_EXTENSION + return static_cast(v); +#else + return v.lo; +#endif +} + +inline uint64_t Upper64of128(Unsigned128 v) { +#ifdef HAVE_UINT128_EXTENSION + return static_cast(v >> 64); +#else + return v.hi; +#endif +} + +// This generally compiles down to a single fast instruction on 64-bit. +// This doesn't really make sense as operator* because it's not a +// general 128x128 multiply and provides more output than 64x64 multiply. +inline Unsigned128 Multiply64to128(uint64_t a, uint64_t b) { +#ifdef HAVE_UINT128_EXTENSION + return Unsigned128{a} * Unsigned128{b}; +#else + // Full decomposition + // NOTE: GCC seems to fully understand this code as 64-bit x 64-bit + // -> 128-bit multiplication and optimize it appropriately. + uint64_t tmp = uint64_t{b & 0xffffFFFF} * uint64_t{a & 0xffffFFFF}; + uint64_t lower = tmp & 0xffffFFFF; + tmp >>= 32; + tmp += uint64_t{b & 0xffffFFFF} * uint64_t{a >> 32}; + // Avoid overflow: first add lower 32 of tmp2, and later upper 32 + uint64_t tmp2 = uint64_t{b >> 32} * uint64_t{a & 0xffffFFFF}; + tmp += tmp2 & 0xffffFFFF; + lower |= tmp << 32; + tmp >>= 32; + tmp += tmp2 >> 32; + tmp += uint64_t{b >> 32} * uint64_t{a >> 32}; + return Unsigned128(lower, tmp); +#endif +} + +template <> +inline int FloorLog2(Unsigned128 v) { + if (Upper64of128(v) == 0) { + return FloorLog2(Lower64of128(v)); + } else { + return FloorLog2(Upper64of128(v)) + 64; + } +} + +template <> +inline int CountTrailingZeroBits(Unsigned128 v) { + if (Lower64of128(v) != 0) { + return CountTrailingZeroBits(Lower64of128(v)); + } else { + return CountTrailingZeroBits(Upper64of128(v)) + 64; + } +} + +template <> +inline int BitsSetToOne(Unsigned128 v) { + return BitsSetToOne(Lower64of128(v)) + BitsSetToOne(Upper64of128(v)); +} + +template <> +inline int BitParity(Unsigned128 v) { + return BitParity(Lower64of128(v)) ^ BitParity(Upper64of128(v)); +} + +inline void EncodeFixed128(char* dst, Unsigned128 value) { + EncodeFixed64(dst, Lower64of128(value)); + EncodeFixed64(dst + 8, Upper64of128(value)); +} + +inline Unsigned128 DecodeFixed128(const char* ptr) { + Unsigned128 rv = DecodeFixed64(ptr + 8); + return (rv << 64) | DecodeFixed64(ptr); +} + +} // namespace ROCKSDB_NAMESPACE