New bit manipulation functions and 128-bit value library (#7338)

Summary: These new functions and 128-bit value bit operations are expected to be used in a forthcoming Bloom filter alternative. No functional changes to production code, just new code only called by unit tests, cosmetic changes to existing headers, and fix an existing function for a yet-unused template instantiation (BitsSetToOne on something signed and smaller than 32 bits). Pull Request resolved: https://github.com/facebook/rocksdb/pull/7338 Test Plan: Unit tests included. Works with and without TEST_UINT128_COMPAT=1 to check compatibility with and without __uint128_t. Also added that parameter to the CircleCI build build-linux-shared_lib-alt_namespace-status_checked. Reviewed By: jay-zhuang Differential Revision: D23494945 Pulled By: pdillinger fbshipit-source-id: 5c0dc419100d9df5d4d9abb153b2855d5aea39e8
5 years ago · c4d8838a2b
parent a09c3cf13e
commit c4d8838a2b
7 changed files with 519 additions and 10 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -31,7 +31,7 @@ jobs:
      - checkout # check out the code in the project directory
      - run: pyenv global 3.5.2
      - run: sudo apt-get update -y && sudo apt-get install -y libgflags-dev
-      - run: SKIP_FORMAT_BUCK_CHECKS=1 PRINT_PARALLEL_OUTPUTS=1 ASSERT_STATUS_CHECKED=1 LIB_MODE=shared OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" GTEST_THROW_ON_FAILURE=0 GTEST_OUTPUT="xml:/tmp/test-results/" make V=1 -j32 all check_some | .circleci/cat_ignore_eagain
+      - run: SKIP_FORMAT_BUCK_CHECKS=1 PRINT_PARALLEL_OUTPUTS=1 ASSERT_STATUS_CHECKED=1 TEST_UINT128_COMPAT=1 LIB_MODE=shared OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" GTEST_THROW_ON_FAILURE=0 GTEST_OUTPUT="xml:/tmp/test-results/" make V=1 -j32 all check_some | .circleci/cat_ignore_eagain
      - store_test_results:
          path: /tmp/test-results
--- a/4
+++ b/4
@ -410,6 +410,10 @@ ifdef TEST_CACHE_LINE_SIZE
  PLATFORM_CCFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE)
  PLATFORM_CXXFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE)
 endif
 ifdef TEST_UINT128_COMPAT
  PLATFORM_CCFLAGS += -DTEST_UINT128_COMPAT=1
  PLATFORM_CXXFLAGS += -DTEST_UINT128_COMPAT=1
 endif
 # This (the first rule) must depend on "all".
 default: all
--- a/build_tools/build_detect_platform
+++ b/build_tools/build_detect_platform
@ -665,11 +665,18 @@ if test "$TRY_SSE_ETC"; then
  # It doesn't even really check that your current CPU is compatible.
  #
  # SSE4.2 available since nehalem, ca. 2008-2010
  # Includes POPCNT for BitsSetToOne, BitParity
  TRY_SSE42="-msse4.2"
  # PCLMUL available since westmere, ca. 2010-2011
  TRY_PCLMUL="-mpclmul"
  # AVX2 available since haswell, ca. 2013-2015
  TRY_AVX2="-mavx2"
  # BMI available since haswell, ca. 2013-2015
  # Primarily for TZCNT for CountTrailingZeroBits
  TRY_BMI="-mbmi"
  # LZCNT available since haswell, ca. 2013-2015
  # For FloorLog2
  TRY_LZCNT="-mlzcnt"
 fi
 $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_SSE42 -x c++ - -o /dev/null 2>/dev/null <<EOF
@ -718,6 +725,34 @@ elif test "$USE_SSE"; then
  echo "warning: USE_SSE specified but compiler could not use AVX2 intrinsics, disabling" >&2
 fi
 $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_BMI -x c++ - -o /dev/null 2>/dev/null <<EOF
  #include <cstdint>
  #include <immintrin.h>
  int main(int argc, char *argv[]) {
    (void)argv;
    return (int)_tzcnt_u64((uint64_t)argc);
  }
 EOF
 if [ "$?" = 0 ]; then
  COMMON_FLAGS="$COMMON_FLAGS $TRY_BMI -DHAVE_BMI"
 elif test "$USE_SSE"; then
  echo "warning: USE_SSE specified but compiler could not use BMI intrinsics, disabling" >&2
 fi
 $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_LZCNT -x c++ - -o /dev/null 2>/dev/null <<EOF
  #include <cstdint>
  #include <immintrin.h>
  int main(int argc, char *argv[]) {
    (void)argv;
    return (int)_lzcnt_u64((uint64_t)argc);
  }
 EOF
 if [ "$?" = 0 ]; then
  COMMON_FLAGS="$COMMON_FLAGS $TRY_LZCNT -DHAVE_LZCNT"
 elif test "$USE_SSE"; then
  echo "warning: USE_SSE specified but compiler could not use LZCNT intrinsics, disabling" >&2
 fi
 $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
  #include <cstdint>
  int main() {
--- a/util/hash.h
+++ b/util/hash.h
@ -88,12 +88,16 @@ inline uint32_t fastrange32(uint32_t hash, uint32_t range) {
  return static_cast<uint32_t>(product >> 32);
 }
 #ifdef TEST_UINT128_COMPAT
 #undef HAVE_UINT128_EXTENSION
 #endif
 // An alternative to % for mapping a 64-bit hash value to an arbitrary range
 // that fits in size_t. See https://github.com/lemire/fastrange
 // We find size_t more convenient than uint64_t for the range, with side
 // benefit of better optimization on 32-bit platforms.
 inline size_t fastrange64(uint64_t hash, size_t range) {
-#if defined(HAVE_UINT128_EXTENSION)
+#ifdef HAVE_UINT128_EXTENSION
  // Can use compiler's 128-bit type. Trust it to do the right thing.
  __uint128_t wide = __uint128_t{range} * hash;
  return static_cast<size_t>(wide >> 64);
--- a/util/hash_test.cc
+++ b/util/hash_test.cc
@ -7,12 +7,14 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file. See the AUTHORS file for names of contributors.
 #include "util/hash.h"
 #include <cstring>
 #include <vector>
 #include "test_util/testharness.h"
 #include "util/coding.h"
-#include "util/hash.h"
+#include "util/math128.h"
 using ROCKSDB_NAMESPACE::EncodeFixed32;
 using ROCKSDB_NAMESPACE::GetSliceHash64;
@ -370,6 +372,159 @@ size_t fastrange64(uint64_t hash, size_t range) {
  return ROCKSDB_NAMESPACE::fastrange64(hash, range);
 }
 // Tests for math.h / math128.h (not worth a separate test binary)
 using ROCKSDB_NAMESPACE::BitParity;
 using ROCKSDB_NAMESPACE::BitsSetToOne;
 using ROCKSDB_NAMESPACE::CountTrailingZeroBits;
 using ROCKSDB_NAMESPACE::DecodeFixed128;
 using ROCKSDB_NAMESPACE::EncodeFixed128;
 using ROCKSDB_NAMESPACE::FloorLog2;
 using ROCKSDB_NAMESPACE::Lower64of128;
 using ROCKSDB_NAMESPACE::Multiply64to128;
 using ROCKSDB_NAMESPACE::Unsigned128;
 using ROCKSDB_NAMESPACE::Upper64of128;
 template <typename T>
 static void test_BitOps() {
  // This complex code is to generalize to 128-bit values. Otherwise
  // we could just use = static_cast<T>(0x5555555555555555ULL);
  T everyOtherBit = 0;
  for (unsigned i = 0; i < sizeof(T); ++i) {
    everyOtherBit = (everyOtherBit << 8) | T{0x55};
  }
  // This one built using bit operations, as our 128-bit layer
  // might not implement arithmetic such as subtraction.
  T vm1 = 0;  // "v minus one"
  for (int i = 0; i < int{8 * sizeof(T)}; ++i) {
    T v = T{1} << i;
    // If we could directly use arithmetic:
    // T vm1 = static_cast<T>(v - 1);
    // FloorLog2
    if (v > 0) {
      EXPECT_EQ(FloorLog2(v), i);
    }
    if (vm1 > 0) {
      EXPECT_EQ(FloorLog2(vm1), i - 1);
      EXPECT_EQ(FloorLog2(everyOtherBit & vm1), (i - 1) & ~1);
    }
    // CountTrailingZeroBits
    if (v != 0) {
      EXPECT_EQ(CountTrailingZeroBits(v), i);
    }
    if (vm1 != 0) {
      EXPECT_EQ(CountTrailingZeroBits(vm1), 0);
    }
    if (i < int{8 * sizeof(T)} - 1) {
      EXPECT_EQ(CountTrailingZeroBits(~vm1 & everyOtherBit), (i + 1) & ~1);
    }
    // BitsSetToOne
    EXPECT_EQ(BitsSetToOne(v), 1);
    EXPECT_EQ(BitsSetToOne(vm1), i);
    EXPECT_EQ(BitsSetToOne(vm1 & everyOtherBit), (i + 1) / 2);
    // BitParity
    EXPECT_EQ(BitParity(v), 1);
    EXPECT_EQ(BitParity(vm1), i & 1);
    EXPECT_EQ(BitParity(vm1 & everyOtherBit), ((i + 1) / 2) & 1);
    vm1 = (vm1 << 1) | 1;
  }
 }
 TEST(MathTest, BitOps) {
  test_BitOps<uint32_t>();
  test_BitOps<uint64_t>();
  test_BitOps<uint16_t>();
  test_BitOps<uint8_t>();
  test_BitOps<unsigned char>();
  test_BitOps<unsigned short>();
  test_BitOps<unsigned int>();
  test_BitOps<unsigned long>();
  test_BitOps<unsigned long long>();
  test_BitOps<char>();
  test_BitOps<size_t>();
  test_BitOps<int32_t>();
  test_BitOps<int64_t>();
  test_BitOps<int16_t>();
  test_BitOps<int8_t>();
  test_BitOps<signed char>();
  test_BitOps<short>();
  test_BitOps<int>();
  test_BitOps<long>();
  test_BitOps<long long>();
  test_BitOps<ptrdiff_t>();
 }
 TEST(MathTest, BitOps128) { test_BitOps<Unsigned128>(); }
 TEST(MathTest, Math128) {
  const Unsigned128 sixteenHexOnes = 0x1111111111111111U;
  const Unsigned128 thirtyHexOnes = (sixteenHexOnes << 56) | sixteenHexOnes;
  const Unsigned128 sixteenHexTwos = 0x2222222222222222U;
  const Unsigned128 thirtyHexTwos = (sixteenHexTwos << 56) | sixteenHexTwos;
  // v will slide from all hex ones to all hex twos
  Unsigned128 v = thirtyHexOnes;
  for (int i = 0; i <= 30; ++i) {
    // Test bitwise operations
    EXPECT_EQ(BitsSetToOne(v), 30);
    EXPECT_EQ(BitsSetToOne(~v), 128 - 30);
    EXPECT_EQ(BitsSetToOne(v & thirtyHexOnes), 30 - i);
    EXPECT_EQ(BitsSetToOne(v | thirtyHexOnes), 30 + i);
    EXPECT_EQ(BitsSetToOne(v ^ thirtyHexOnes), 2 * i);
    EXPECT_EQ(BitsSetToOne(v & thirtyHexTwos), i);
    EXPECT_EQ(BitsSetToOne(v | thirtyHexTwos), 60 - i);
    EXPECT_EQ(BitsSetToOne(v ^ thirtyHexTwos), 60 - 2 * i);
    // Test comparisons
    EXPECT_EQ(v == thirtyHexOnes, i == 0);
    EXPECT_EQ(v == thirtyHexTwos, i == 30);
    EXPECT_EQ(v > thirtyHexOnes, i > 0);
    EXPECT_EQ(v > thirtyHexTwos, false);
    EXPECT_EQ(v >= thirtyHexOnes, true);
    EXPECT_EQ(v >= thirtyHexTwos, i == 30);
    EXPECT_EQ(v < thirtyHexOnes, false);
    EXPECT_EQ(v < thirtyHexTwos, i < 30);
    EXPECT_EQ(v <= thirtyHexOnes, i == 0);
    EXPECT_EQ(v <= thirtyHexTwos, true);
    // Update v, clearing upper-most byte
    v = ((v << 12) >> 8) | 0x2;
  }
  for (int i = 0; i < 128; ++i) {
    // Test shifts
    Unsigned128 sl = thirtyHexOnes << i;
    Unsigned128 sr = thirtyHexOnes >> i;
    EXPECT_EQ(BitsSetToOne(sl), std::min(30, 32 - i / 4));
    EXPECT_EQ(BitsSetToOne(sr), std::max(0, 30 - (i + 3) / 4));
    EXPECT_EQ(BitsSetToOne(sl & sr), i % 2 ? 0 : std::max(0, 30 - i / 2));
  }
  // Test 64x64->128 multiply
  Unsigned128 product =
      Multiply64to128(0x1111111111111111U, 0x2222222222222222U);
  EXPECT_EQ(Lower64of128(product), 2295594818061633090U);
  EXPECT_EQ(Upper64of128(product), 163971058432973792U);
 }
 TEST(MathTest, Coding128) {
  const char *in = "_1234567890123456";
  Unsigned128 decoded = DecodeFixed128(in + 1);
  EXPECT_EQ(Lower64of128(decoded), 4050765991979987505U);
  EXPECT_EQ(Upper64of128(decoded), 3906085646303834169U);
  char out[18];
  out[0] = '_';
  EncodeFixed128(out + 1, decoded);
  out[17] = '\0';
  EXPECT_EQ(std::string(in), std::string(out));
 }
 int main(int argc, char** argv) {
  ::testing::InitGoogleTest(&argc, argv);
--- a/util/math.h
+++ b/util/math.h
@ -13,24 +13,112 @@
 namespace ROCKSDB_NAMESPACE {
 // Fast implementation of floor(log2(v)). Undefined for 0 or negative
 // numbers (in case of signed type).
 template <typename T>
-inline int BitsSetToOne(T v) {
+inline int FloorLog2(T v) {
  static_assert(std::is_integral<T>::value, "non-integral type");
  assert(v > 0);
 #ifdef _MSC_VER
  static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
-  if (sizeof(T) > sizeof(uint32_t)) {
+  unsigned long lz = 0;
-    return static_cast<int>(__popcnt64(static_cast<uint64_t>(v)));
+  if (sizeof(T) <= sizeof(uint32_t)) {
    _BitScanReverse(&lz, static_cast<uint32_t>(v));
  } else {
    _BitScanReverse64(&lz, static_cast<uint64_t>(v));
  }
  return 63 - static_cast<int>(lz);
 #else
  static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
  if (sizeof(T) <= sizeof(unsigned int)) {
    int lz = __builtin_clz(static_cast<unsigned int>(v));
    return int{sizeof(unsigned int)} * 8 - 1 - lz;
  } else if (sizeof(T) <= sizeof(unsigned long)) {
    int lz = __builtin_clzl(static_cast<unsigned long>(v));
    return int{sizeof(unsigned long)} * 8 - 1 - lz;
  } else {
    int lz = __builtin_clzll(static_cast<unsigned long long>(v));
    return int{sizeof(unsigned long long)} * 8 - 1 - lz;
  }
 #endif
 }
 // Number of low-order zero bits before the first 1 bit. Undefined for 0.
 template <typename T>
 inline int CountTrailingZeroBits(T v) {
  static_assert(std::is_integral<T>::value, "non-integral type");
  assert(v != 0);
 #ifdef _MSC_VER
  static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
  unsigned long tz = 0;
  if (sizeof(T) <= sizeof(uint32_t)) {
    _BitScanForward(&tz, static_cast<uint32_t>(v));
  } else {
    _BitScanForward64(&tz, static_cast<uint64_t>(v));
  }
  return static_cast<int>(tz);
 #else
  static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
  if (sizeof(T) <= sizeof(unsigned int)) {
    return __builtin_ctz(static_cast<unsigned int>(v));
  } else if (sizeof(T) <= sizeof(unsigned long)) {
    return __builtin_ctzl(static_cast<unsigned long>(v));
  } else {
    return __builtin_ctzll(static_cast<unsigned long long>(v));
  }
 #endif
 }
 // Number of bits set to 1. Also known as "population count".
 template <typename T>
 inline int BitsSetToOne(T v) {
  static_assert(std::is_integral<T>::value, "non-integral type");
 #ifdef _MSC_VER
  static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
  if (sizeof(T) < sizeof(uint32_t)) {
    // This bit mask is to avoid a compiler warning on unused path
    constexpr auto mm = 8 * sizeof(uint32_t) - 1;
    // The bit mask is to neutralize sign extension on small signed types
    constexpr uint32_t m = (uint32_t{1} << ((8 * sizeof(T)) & mm)) - 1;
    return static_cast<int>(__popcnt(static_cast<uint32_t>(v) & m));
  } else if (sizeof(T) == sizeof(uint32_t)) {
    return static_cast<int>(__popcnt(static_cast<uint32_t>(v)));
  } else {
    return static_cast<int>(__popcnt64(static_cast<uint64_t>(v)));
  }
 #else
  static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
-  if (sizeof(T) > sizeof(unsigned long)) {
+  if (sizeof(T) < sizeof(unsigned int)) {
-    return __builtin_popcountll(static_cast<unsigned long long>(v));
+    // This bit mask is to avoid a compiler warning on unused path
-  } else if (sizeof(T) > sizeof(unsigned int)) {
+    constexpr auto mm = 8 * sizeof(unsigned int) - 1;
    // This bit mask is to neutralize sign extension on small signed types
    constexpr unsigned int m = (1U << ((8 * sizeof(T)) & mm)) - 1;
    return __builtin_popcount(static_cast<unsigned int>(v) & m);
  } else if (sizeof(T) == sizeof(unsigned int)) {
    return __builtin_popcount(static_cast<unsigned int>(v));
  } else if (sizeof(T) <= sizeof(unsigned long)) {
    return __builtin_popcountl(static_cast<unsigned long>(v));
  } else {
-    return __builtin_popcount(static_cast<unsigned int>(v));
+    return __builtin_popcountll(static_cast<unsigned long long>(v));
  }
 #endif
 }
 template <typename T>
 inline int BitParity(T v) {
  static_assert(std::is_integral<T>::value, "non-integral type");
 #ifdef _MSC_VER
  // bit parity == oddness of popcount
  return BitsSetToOne(v) & 1;
 #else
  static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
  if (sizeof(T) <= sizeof(unsigned int)) {
    // On any sane systen, potential sign extension here won't change parity
    return __builtin_parity(static_cast<unsigned int>(v));
  } else if (sizeof(T) <= sizeof(unsigned long)) {
    return __builtin_parityl(static_cast<unsigned long>(v));
  } else {
    return __builtin_parityll(static_cast<unsigned long long>(v));
  }
 #endif
 }
--- a/util/math128.h
+++ b/util/math128.h
@ -0,0 +1,223 @@
 //  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 //  This source code is licensed under both the GPLv2 (found in the
 //  COPYING file in the root directory) and Apache 2.0 License
 //  (found in the LICENSE.Apache file in the root directory).
 #pragma once
 #include "util/coding.h"
 #include "util/math.h"
 #ifdef TEST_UINT128_COMPAT
 #undef HAVE_UINT128_EXTENSION
 #endif
 namespace ROCKSDB_NAMESPACE {
 // Unsigned128 is a 128 bit value supporting (at least) bitwise operators,
 // shifts, and comparisons. __uint128_t is not always available.
 #ifdef HAVE_UINT128_EXTENSION
 using Unsigned128 = __uint128_t;
 #else
 struct Unsigned128 {
  uint64_t lo;
  uint64_t hi;
  inline Unsigned128() {
    static_assert(sizeof(Unsigned128) == 2 * sizeof(uint64_t),
                  "unexpected overhead in representation");
    lo = 0;
    hi = 0;
  }
  inline Unsigned128(uint64_t lower) {
    lo = lower;
    hi = 0;
  }
  inline Unsigned128(uint64_t lower, uint64_t upper) {
    lo = lower;
    hi = upper;
  }
 };
 inline Unsigned128 operator<<(const Unsigned128& lhs, unsigned shift) {
  shift &= 127;
  Unsigned128 rv;
  if (shift >= 64) {
    rv.lo = 0;
    rv.hi = lhs.lo << (shift & 63);
  } else {
    uint64_t tmp = lhs.lo;
    rv.lo = tmp << shift;
    // Ensure shift==0 shifts away everything. (This avoids another
    // conditional branch on shift == 0.)
    tmp = tmp >> 1 >> (63 - shift);
    rv.hi = tmp | (lhs.hi << shift);
  }
  return rv;
 }
 inline Unsigned128& operator<<=(Unsigned128& lhs, unsigned shift) {
  lhs = lhs << shift;
  return lhs;
 }
 inline Unsigned128 operator>>(const Unsigned128& lhs, unsigned shift) {
  shift &= 127;
  Unsigned128 rv;
  if (shift >= 64) {
    rv.hi = 0;
    rv.lo = lhs.hi >> (shift & 63);
  } else {
    uint64_t tmp = lhs.hi;
    rv.hi = tmp >> shift;
    // Ensure shift==0 shifts away everything
    tmp = tmp << 1 << (63 - shift);
    rv.lo = tmp | (lhs.lo >> shift);
  }
  return rv;
 }
 inline Unsigned128& operator>>=(Unsigned128& lhs, unsigned shift) {
  lhs = lhs >> shift;
  return lhs;
 }
 inline Unsigned128 operator&(const Unsigned128& lhs, const Unsigned128& rhs) {
  return Unsigned128(lhs.lo & rhs.lo, lhs.hi & rhs.hi);
 }
 inline Unsigned128& operator&=(Unsigned128& lhs, const Unsigned128& rhs) {
  lhs = lhs & rhs;
  return lhs;
 }
 inline Unsigned128 operator|(const Unsigned128& lhs, const Unsigned128& rhs) {
  return Unsigned128(lhs.lo | rhs.lo, lhs.hi | rhs.hi);
 }
 inline Unsigned128& operator|=(Unsigned128& lhs, const Unsigned128& rhs) {
  lhs = lhs | rhs;
  return lhs;
 }
 inline Unsigned128 operator^(const Unsigned128& lhs, const Unsigned128& rhs) {
  return Unsigned128(lhs.lo ^ rhs.lo, lhs.hi ^ rhs.hi);
 }
 inline Unsigned128& operator^=(Unsigned128& lhs, const Unsigned128& rhs) {
  lhs = lhs ^ rhs;
  return lhs;
 }
 inline Unsigned128 operator~(const Unsigned128& v) {
  return Unsigned128(~v.lo, ~v.hi);
 }
 inline bool operator==(const Unsigned128& lhs, const Unsigned128& rhs) {
  return lhs.lo == rhs.lo && lhs.hi == rhs.hi;
 }
 inline bool operator!=(const Unsigned128& lhs, const Unsigned128& rhs) {
  return lhs.lo != rhs.lo || lhs.hi != rhs.hi;
 }
 inline bool operator>(const Unsigned128& lhs, const Unsigned128& rhs) {
  return lhs.hi > rhs.hi || (lhs.hi == rhs.hi && lhs.lo > rhs.lo);
 }
 inline bool operator<(const Unsigned128& lhs, const Unsigned128& rhs) {
  return lhs.hi < rhs.hi || (lhs.hi == rhs.hi && lhs.lo < rhs.lo);
 }
 inline bool operator>=(const Unsigned128& lhs, const Unsigned128& rhs) {
  return lhs.hi > rhs.hi || (lhs.hi == rhs.hi && lhs.lo >= rhs.lo);
 }
 inline bool operator<=(const Unsigned128& lhs, const Unsigned128& rhs) {
  return lhs.hi < rhs.hi || (lhs.hi == rhs.hi && lhs.lo <= rhs.lo);
 }
 #endif
 inline uint64_t Lower64of128(Unsigned128 v) {
 #ifdef HAVE_UINT128_EXTENSION
  return static_cast<uint64_t>(v);
 #else
  return v.lo;
 #endif
 }
 inline uint64_t Upper64of128(Unsigned128 v) {
 #ifdef HAVE_UINT128_EXTENSION
  return static_cast<uint64_t>(v >> 64);
 #else
  return v.hi;
 #endif
 }
 // This generally compiles down to a single fast instruction on 64-bit.
 // This doesn't really make sense as operator* because it's not a
 // general 128x128 multiply and provides more output than 64x64 multiply.
 inline Unsigned128 Multiply64to128(uint64_t a, uint64_t b) {
 #ifdef HAVE_UINT128_EXTENSION
  return Unsigned128{a} * Unsigned128{b};
 #else
  // Full decomposition
  // NOTE: GCC seems to fully understand this code as 64-bit x 64-bit
  // -> 128-bit multiplication and optimize it appropriately.
  uint64_t tmp = uint64_t{b & 0xffffFFFF} * uint64_t{a & 0xffffFFFF};
  uint64_t lower = tmp & 0xffffFFFF;
  tmp >>= 32;
  tmp += uint64_t{b & 0xffffFFFF} * uint64_t{a >> 32};
  // Avoid overflow: first add lower 32 of tmp2, and later upper 32
  uint64_t tmp2 = uint64_t{b >> 32} * uint64_t{a & 0xffffFFFF};
  tmp += tmp2 & 0xffffFFFF;
  lower |= tmp << 32;
  tmp >>= 32;
  tmp += tmp2 >> 32;
  tmp += uint64_t{b >> 32} * uint64_t{a >> 32};
  return Unsigned128(lower, tmp);
 #endif
 }
 template <>
 inline int FloorLog2(Unsigned128 v) {
  if (Upper64of128(v) == 0) {
    return FloorLog2(Lower64of128(v));
  } else {
    return FloorLog2(Upper64of128(v)) + 64;
  }
 }
 template <>
 inline int CountTrailingZeroBits(Unsigned128 v) {
  if (Lower64of128(v) != 0) {
    return CountTrailingZeroBits(Lower64of128(v));
  } else {
    return CountTrailingZeroBits(Upper64of128(v)) + 64;
  }
 }
 template <>
 inline int BitsSetToOne(Unsigned128 v) {
  return BitsSetToOne(Lower64of128(v)) + BitsSetToOne(Upper64of128(v));
 }
 template <>
 inline int BitParity(Unsigned128 v) {
  return BitParity(Lower64of128(v)) ^ BitParity(Upper64of128(v));
 }
 inline void EncodeFixed128(char* dst, Unsigned128 value) {
  EncodeFixed64(dst, Lower64of128(value));
  EncodeFixed64(dst + 8, Upper64of128(value));
 }
 inline Unsigned128 DecodeFixed128(const char* ptr) {
  Unsigned128 rv = DecodeFixed64(ptr + 8);
  return (rv << 64) | DecodeFixed64(ptr);
 }
 }  // namespace ROCKSDB_NAMESPACE