From a16d1b2fd36a2fe036b0aed1008f60570c981773 Mon Sep 17 00:00:00 2001
From: Peter Dillinger <peterd@fb.com>
Date: Fri, 23 Oct 2020 14:08:52 -0700
Subject: [PATCH] Add Encode/DecodeFixedGeneric, coding_lean.h (#7587)

Summary:
To minimize dependencies for Ribbon filter code in progress,
core part of coding.h for fixed sizes has been moved to coding_lean.h.
Also, generic versions of these functions have been added to math128.h
(since the generic versions are likely only to be used along with
Unsigned128).

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7587

Test Plan: Unit tests added for new functions

Reviewed By: jay-zhuang

Differential Revision: D24486718

Pulled By: pdillinger

fbshipit-source-id: a69768f742379689442135fa52237c01dfe2647e
---
 util/coding.h      |  91 ++--------------------------------------
 util/coding_lean.h | 101 +++++++++++++++++++++++++++++++++++++++++++++
 util/hash_test.cc  |  44 +++++++++++++++++++-
 util/math128.h     |  64 +++++++++++++++++++++++++++-
 4 files changed, 210 insertions(+), 90 deletions(-)
 create mode 100644 util/coding_lean.h
diff --git a/util/coding.h b/util/coding.h
index 0dd10ee5f..5215b3e9c 100644
--- a/util/coding.h
+++ b/util/coding.h
@@ -12,15 +12,16 @@
 //   (little endian, native order on Intel and others)
 // * In addition we support variable length "varint" encoding
 // * Strings are encoded prefixed by their length in varint format
+//
+// Some related functions are provided in coding_lean.h
 
 #pragma once
 #include <algorithm>
-#include <stdint.h>
-#include <string.h>
 #include <string>
 
-#include "rocksdb/write_batch.h"
 #include "port/port.h"
+#include "rocksdb/slice.h"
+#include "util/coding_lean.h"
 
 // Some processors does not allow unaligned access to memory
 #if defined(__sparc)
@@ -94,60 +95,12 @@ inline const char* GetVarsignedint64Ptr(const char* p, const char* limit,
 // Returns the length of the varint32 or varint64 encoding of "v"
 extern int VarintLength(uint64_t v);
 
-// Lower-level versions of Put... that write directly into a character buffer
-// REQUIRES: dst has enough space for the value being written
-extern void EncodeFixed16(char* dst, uint16_t value);
-extern void EncodeFixed32(char* dst, uint32_t value);
-extern void EncodeFixed64(char* dst, uint64_t value);
-
 // Lower-level versions of Put... that write directly into a character buffer
 // and return a pointer just past the last byte written.
 // REQUIRES: dst has enough space for the value being written
 extern char* EncodeVarint32(char* dst, uint32_t value);
 extern char* EncodeVarint64(char* dst, uint64_t value);
 
-// Lower-level versions of Get... that read directly from a character buffer
-// without any bounds checking.
-
-inline uint16_t DecodeFixed16(const char* ptr) {
-  if (port::kLittleEndian) {
-    // Load the raw bytes
-    uint16_t result;
-    memcpy(&result, ptr, sizeof(result));  // gcc optimizes this to a plain load
-    return result;
-  } else {
-    return ((static_cast<uint16_t>(static_cast<unsigned char>(ptr[0]))) |
-            (static_cast<uint16_t>(static_cast<unsigned char>(ptr[1])) << 8));
-  }
-}
-
-inline uint32_t DecodeFixed32(const char* ptr) {
-  if (port::kLittleEndian) {
-    // Load the raw bytes
-    uint32_t result;
-    memcpy(&result, ptr, sizeof(result));  // gcc optimizes this to a plain load
-    return result;
-  } else {
-    return ((static_cast<uint32_t>(static_cast<unsigned char>(ptr[0])))
-        | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[1])) << 8)
-        | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[2])) << 16)
-        | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[3])) << 24));
-  }
-}
-
-inline uint64_t DecodeFixed64(const char* ptr) {
-  if (port::kLittleEndian) {
-    // Load the raw bytes
-    uint64_t result;
-    memcpy(&result, ptr, sizeof(result));  // gcc optimizes this to a plain load
-    return result;
-  } else {
-    uint64_t lo = DecodeFixed32(ptr);
-    uint64_t hi = DecodeFixed32(ptr + 4);
-    return (hi << 32) | lo;
-  }
-}
-
 // Internal routine for use by fallback path of GetVarint32Ptr
 extern const char* GetVarint32PtrFallback(const char* p,
                                           const char* limit,
@@ -165,42 +118,6 @@ inline const char* GetVarint32Ptr(const char* p,
   return GetVarint32PtrFallback(p, limit, value);
 }
 
-// -- Implementation of the functions declared above
-inline void EncodeFixed16(char* buf, uint16_t value) {
-  if (port::kLittleEndian) {
-    memcpy(buf, &value, sizeof(value));
-  } else {
-    buf[0] = value & 0xff;
-    buf[1] = (value >> 8) & 0xff;
-  }
-}
-
-inline void EncodeFixed32(char* buf, uint32_t value) {
-  if (port::kLittleEndian) {
-    memcpy(buf, &value, sizeof(value));
-  } else {
-    buf[0] = value & 0xff;
-    buf[1] = (value >> 8) & 0xff;
-    buf[2] = (value >> 16) & 0xff;
-    buf[3] = (value >> 24) & 0xff;
-  }
-}
-
-inline void EncodeFixed64(char* buf, uint64_t value) {
-  if (port::kLittleEndian) {
-    memcpy(buf, &value, sizeof(value));
-  } else {
-    buf[0] = value & 0xff;
-    buf[1] = (value >> 8) & 0xff;
-    buf[2] = (value >> 16) & 0xff;
-    buf[3] = (value >> 24) & 0xff;
-    buf[4] = (value >> 32) & 0xff;
-    buf[5] = (value >> 40) & 0xff;
-    buf[6] = (value >> 48) & 0xff;
-    buf[7] = (value >> 56) & 0xff;
-  }
-}
-
 // Pull the last 8 bits and cast it to a character
 inline void PutFixed16(std::string* dst, uint16_t value) {
   if (port::kLittleEndian) {
diff --git a/util/coding_lean.h b/util/coding_lean.h
new file mode 100644
index 000000000..6966f7a66
--- /dev/null
+++ b/util/coding_lean.h
@@ -0,0 +1,101 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+// Encoding independent of machine byte order:
+// * Fixed-length numbers are encoded with least-significant byte first
+//   (little endian, native order on Intel and others)
+//
+// More functions in coding.h
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+
+#include "port/port.h"  // for port::kLittleEndian
+
+namespace ROCKSDB_NAMESPACE {
+
+// Lower-level versions of Put... that write directly into a character buffer
+// REQUIRES: dst has enough space for the value being written
+// -- Implementation of the functions declared above
+inline void EncodeFixed16(char* buf, uint16_t value) {
+  if (port::kLittleEndian) {
+    memcpy(buf, &value, sizeof(value));
+  } else {
+    buf[0] = value & 0xff;
+    buf[1] = (value >> 8) & 0xff;
+  }
+}
+
+inline void EncodeFixed32(char* buf, uint32_t value) {
+  if (port::kLittleEndian) {
+    memcpy(buf, &value, sizeof(value));
+  } else {
+    buf[0] = value & 0xff;
+    buf[1] = (value >> 8) & 0xff;
+    buf[2] = (value >> 16) & 0xff;
+    buf[3] = (value >> 24) & 0xff;
+  }
+}
+
+inline void EncodeFixed64(char* buf, uint64_t value) {
+  if (port::kLittleEndian) {
+    memcpy(buf, &value, sizeof(value));
+  } else {
+    buf[0] = value & 0xff;
+    buf[1] = (value >> 8) & 0xff;
+    buf[2] = (value >> 16) & 0xff;
+    buf[3] = (value >> 24) & 0xff;
+    buf[4] = (value >> 32) & 0xff;
+    buf[5] = (value >> 40) & 0xff;
+    buf[6] = (value >> 48) & 0xff;
+    buf[7] = (value >> 56) & 0xff;
+  }
+}
+
+// Lower-level versions of Get... that read directly from a character buffer
+// without any bounds checking.
+
+inline uint16_t DecodeFixed16(const char* ptr) {
+  if (port::kLittleEndian) {
+    // Load the raw bytes
+    uint16_t result;
+    memcpy(&result, ptr, sizeof(result));  // gcc optimizes this to a plain load
+    return result;
+  } else {
+    return ((static_cast<uint16_t>(static_cast<unsigned char>(ptr[0]))) |
+            (static_cast<uint16_t>(static_cast<unsigned char>(ptr[1])) << 8));
+  }
+}
+
+inline uint32_t DecodeFixed32(const char* ptr) {
+  if (port::kLittleEndian) {
+    // Load the raw bytes
+    uint32_t result;
+    memcpy(&result, ptr, sizeof(result));  // gcc optimizes this to a plain load
+    return result;
+  } else {
+    return ((static_cast<uint32_t>(static_cast<unsigned char>(ptr[0]))) |
+            (static_cast<uint32_t>(static_cast<unsigned char>(ptr[1])) << 8) |
+            (static_cast<uint32_t>(static_cast<unsigned char>(ptr[2])) << 16) |
+            (static_cast<uint32_t>(static_cast<unsigned char>(ptr[3])) << 24));
+  }
+}
+
+inline uint64_t DecodeFixed64(const char* ptr) {
+  if (port::kLittleEndian) {
+    // Load the raw bytes
+    uint64_t result;
+    memcpy(&result, ptr, sizeof(result));  // gcc optimizes this to a plain load
+    return result;
+  } else {
+    uint64_t lo = DecodeFixed32(ptr);
+    uint64_t hi = DecodeFixed32(ptr + 4);
+    return (hi << 32) | lo;
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/util/hash_test.cc b/util/hash_test.cc
index da253bdd4..a6530b686 100644
--- a/util/hash_test.cc
+++ b/util/hash_test.cc
@@ -396,7 +396,9 @@ using ROCKSDB_NAMESPACE::BitParity;
 using ROCKSDB_NAMESPACE::BitsSetToOne;
 using ROCKSDB_NAMESPACE::CountTrailingZeroBits;
 using ROCKSDB_NAMESPACE::DecodeFixed128;
+using ROCKSDB_NAMESPACE::DecodeFixedGeneric;
 using ROCKSDB_NAMESPACE::EncodeFixed128;
+using ROCKSDB_NAMESPACE::EncodeFixedGeneric;
 using ROCKSDB_NAMESPACE::FloorLog2;
 using ROCKSDB_NAMESPACE::Lower64of128;
 using ROCKSDB_NAMESPACE::Multiply64to128;
@@ -534,9 +536,10 @@ TEST(MathTest, Math128) {
 
 TEST(MathTest, Coding128) {
   const char *in = "_1234567890123456";
+  // Note: in + 1 is likely unaligned
   Unsigned128 decoded = DecodeFixed128(in + 1);
-  EXPECT_EQ(Lower64of128(decoded), 4050765991979987505U);
-  EXPECT_EQ(Upper64of128(decoded), 3906085646303834169U);
+  EXPECT_EQ(Lower64of128(decoded), 0x3837363534333231U);
+  EXPECT_EQ(Upper64of128(decoded), 0x3635343332313039U);
   char out[18];
   out[0] = '_';
   EncodeFixed128(out + 1, decoded);
@@ -544,6 +547,43 @@ TEST(MathTest, Coding128) {
   EXPECT_EQ(std::string(in), std::string(out));
 }
 
+TEST(MathTest, CodingGeneric) {
+  const char *in = "_1234567890123456";
+  // Decode
+  // Note: in + 1 is likely unaligned
+  Unsigned128 decoded128 = DecodeFixedGeneric<Unsigned128>(in + 1);
+  EXPECT_EQ(Lower64of128(decoded128), 0x3837363534333231U);
+  EXPECT_EQ(Upper64of128(decoded128), 0x3635343332313039U);
+
+  uint64_t decoded64 = DecodeFixedGeneric<uint64_t>(in + 1);
+  EXPECT_EQ(decoded64, 0x3837363534333231U);
+
+  uint32_t decoded32 = DecodeFixedGeneric<uint32_t>(in + 1);
+  EXPECT_EQ(decoded32, 0x34333231U);
+
+  uint16_t decoded16 = DecodeFixedGeneric<uint16_t>(in + 1);
+  EXPECT_EQ(decoded16, 0x3231U);
+
+  // Encode
+  char out[18];
+  out[0] = '_';
+  memset(out + 1, '\0', 17);
+  EncodeFixedGeneric(out + 1, decoded128);
+  EXPECT_EQ(std::string(in), std::string(out));
+
+  memset(out + 1, '\0', 9);
+  EncodeFixedGeneric(out + 1, decoded64);
+  EXPECT_EQ(std::string("_12345678"), std::string(out));
+
+  memset(out + 1, '\0', 5);
+  EncodeFixedGeneric(out + 1, decoded32);
+  EXPECT_EQ(std::string("_1234"), std::string(out));
+
+  memset(out + 1, '\0', 3);
+  EncodeFixedGeneric(out + 1, decoded16);
+  EXPECT_EQ(std::string("_12"), std::string(out));
+}
+
 int main(int argc, char** argv) {
   ::testing::InitGoogleTest(&argc, argv);
 
diff --git a/util/math128.h b/util/math128.h
index 10e99714e..caff7a671 100644
--- a/util/math128.h
+++ b/util/math128.h
@@ -5,7 +5,7 @@
 
 #pragma once
 
-#include "util/coding.h"
+#include "util/coding_lean.h"
 #include "util/math.h"
 
 #ifdef TEST_UINT128_COMPAT
@@ -220,4 +220,66 @@ inline Unsigned128 DecodeFixed128(const char* ptr) {
   return (rv << 64) | DecodeFixed64(ptr);
 }
 
+// A version of EncodeFixed* for generic algorithms. Likely to be used
+// with Unsigned128, so lives here for now.
+template <typename T>
+inline void EncodeFixedGeneric(char* /*dst*/, T /*value*/) {
+  // Unfortunately, GCC does not appear to optimize this simple code down
+  // to a trivial load on Intel:
+  //
+  // T ret_val = 0;
+  // for (size_t i = 0; i < sizeof(T); ++i) {
+  //   ret_val |= (static_cast<T>(static_cast<unsigned char>(ptr[i])) << (8 *
+  //   i));
+  // }
+  // return ret_val;
+  //
+  // But does unroll the loop, and does optimize manually unrolled version
+  // for specific sizes down to a trivial load. I have no idea why it doesn't
+  // do both on this code.
+
+  // So instead, we rely on specializations
+  static_assert(sizeof(T) == 0, "No specialization provided for this type");
+}
+
+template <>
+inline void EncodeFixedGeneric(char* dst, uint16_t value) {
+  return EncodeFixed16(dst, value);
+}
+template <>
+inline void EncodeFixedGeneric(char* dst, uint32_t value) {
+  return EncodeFixed32(dst, value);
+}
+template <>
+inline void EncodeFixedGeneric(char* dst, uint64_t value) {
+  return EncodeFixed64(dst, value);
+}
+template <>
+inline void EncodeFixedGeneric(char* dst, Unsigned128 value) {
+  return EncodeFixed128(dst, value);
+}
+
+// A version of EncodeFixed* for generic algorithms.
+template <typename T>
+inline T DecodeFixedGeneric(const char* /*dst*/) {
+  static_assert(sizeof(T) == 0, "No specialization provided for this type");
+}
+
+template <>
+inline uint16_t DecodeFixedGeneric(const char* dst) {
+  return DecodeFixed16(dst);
+}
+template <>
+inline uint32_t DecodeFixedGeneric(const char* dst) {
+  return DecodeFixed32(dst);
+}
+template <>
+inline uint64_t DecodeFixedGeneric(const char* dst) {
+  return DecodeFixed64(dst);
+}
+template <>
+inline Unsigned128 DecodeFixedGeneric(const char* dst) {
+  return DecodeFixed128(dst);
+}
+
 }  // namespace ROCKSDB_NAMESPACE