Add Encode/DecodeFixedGeneric, coding_lean.h (#7587)

Summary:
To minimize dependencies for Ribbon filter code in progress,
core part of coding.h for fixed sizes has been moved to coding_lean.h.
Also, generic versions of these functions have been added to math128.h
(since the generic versions are likely only to be used along with
Unsigned128).

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7587

Test Plan: Unit tests added for new functions

Reviewed By: jay-zhuang

Differential Revision: D24486718

Pulled By: pdillinger

fbshipit-source-id: a69768f742379689442135fa52237c01dfe2647e
main
Peter Dillinger 4 years ago committed by Facebook GitHub Bot
parent b1cdb8cc86
commit a16d1b2fd3
  1. 91
      util/coding.h
  2. 101
      util/coding_lean.h
  3. 44
      util/hash_test.cc
  4. 64
      util/math128.h

@ -12,15 +12,16 @@
// (little endian, native order on Intel and others) // (little endian, native order on Intel and others)
// * In addition we support variable length "varint" encoding // * In addition we support variable length "varint" encoding
// * Strings are encoded prefixed by their length in varint format // * Strings are encoded prefixed by their length in varint format
//
// Some related functions are provided in coding_lean.h
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <stdint.h>
#include <string.h>
#include <string> #include <string>
#include "rocksdb/write_batch.h"
#include "port/port.h" #include "port/port.h"
#include "rocksdb/slice.h"
#include "util/coding_lean.h"
// Some processors does not allow unaligned access to memory // Some processors does not allow unaligned access to memory
#if defined(__sparc) #if defined(__sparc)
@ -94,60 +95,12 @@ inline const char* GetVarsignedint64Ptr(const char* p, const char* limit,
// Returns the length of the varint32 or varint64 encoding of "v" // Returns the length of the varint32 or varint64 encoding of "v"
extern int VarintLength(uint64_t v); extern int VarintLength(uint64_t v);
// Lower-level versions of Put... that write directly into a character buffer
// REQUIRES: dst has enough space for the value being written
extern void EncodeFixed16(char* dst, uint16_t value);
extern void EncodeFixed32(char* dst, uint32_t value);
extern void EncodeFixed64(char* dst, uint64_t value);
// Lower-level versions of Put... that write directly into a character buffer // Lower-level versions of Put... that write directly into a character buffer
// and return a pointer just past the last byte written. // and return a pointer just past the last byte written.
// REQUIRES: dst has enough space for the value being written // REQUIRES: dst has enough space for the value being written
extern char* EncodeVarint32(char* dst, uint32_t value); extern char* EncodeVarint32(char* dst, uint32_t value);
extern char* EncodeVarint64(char* dst, uint64_t value); extern char* EncodeVarint64(char* dst, uint64_t value);
// Lower-level versions of Get... that read directly from a character buffer
// without any bounds checking.
inline uint16_t DecodeFixed16(const char* ptr) {
if (port::kLittleEndian) {
// Load the raw bytes
uint16_t result;
memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
return result;
} else {
return ((static_cast<uint16_t>(static_cast<unsigned char>(ptr[0]))) |
(static_cast<uint16_t>(static_cast<unsigned char>(ptr[1])) << 8));
}
}
inline uint32_t DecodeFixed32(const char* ptr) {
if (port::kLittleEndian) {
// Load the raw bytes
uint32_t result;
memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
return result;
} else {
return ((static_cast<uint32_t>(static_cast<unsigned char>(ptr[0])))
| (static_cast<uint32_t>(static_cast<unsigned char>(ptr[1])) << 8)
| (static_cast<uint32_t>(static_cast<unsigned char>(ptr[2])) << 16)
| (static_cast<uint32_t>(static_cast<unsigned char>(ptr[3])) << 24));
}
}
inline uint64_t DecodeFixed64(const char* ptr) {
if (port::kLittleEndian) {
// Load the raw bytes
uint64_t result;
memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
return result;
} else {
uint64_t lo = DecodeFixed32(ptr);
uint64_t hi = DecodeFixed32(ptr + 4);
return (hi << 32) | lo;
}
}
// Internal routine for use by fallback path of GetVarint32Ptr // Internal routine for use by fallback path of GetVarint32Ptr
extern const char* GetVarint32PtrFallback(const char* p, extern const char* GetVarint32PtrFallback(const char* p,
const char* limit, const char* limit,
@ -165,42 +118,6 @@ inline const char* GetVarint32Ptr(const char* p,
return GetVarint32PtrFallback(p, limit, value); return GetVarint32PtrFallback(p, limit, value);
} }
// -- Implementation of the functions declared above
inline void EncodeFixed16(char* buf, uint16_t value) {
if (port::kLittleEndian) {
memcpy(buf, &value, sizeof(value));
} else {
buf[0] = value & 0xff;
buf[1] = (value >> 8) & 0xff;
}
}
inline void EncodeFixed32(char* buf, uint32_t value) {
if (port::kLittleEndian) {
memcpy(buf, &value, sizeof(value));
} else {
buf[0] = value & 0xff;
buf[1] = (value >> 8) & 0xff;
buf[2] = (value >> 16) & 0xff;
buf[3] = (value >> 24) & 0xff;
}
}
inline void EncodeFixed64(char* buf, uint64_t value) {
if (port::kLittleEndian) {
memcpy(buf, &value, sizeof(value));
} else {
buf[0] = value & 0xff;
buf[1] = (value >> 8) & 0xff;
buf[2] = (value >> 16) & 0xff;
buf[3] = (value >> 24) & 0xff;
buf[4] = (value >> 32) & 0xff;
buf[5] = (value >> 40) & 0xff;
buf[6] = (value >> 48) & 0xff;
buf[7] = (value >> 56) & 0xff;
}
}
// Pull the last 8 bits and cast it to a character // Pull the last 8 bits and cast it to a character
inline void PutFixed16(std::string* dst, uint16_t value) { inline void PutFixed16(std::string* dst, uint16_t value) {
if (port::kLittleEndian) { if (port::kLittleEndian) {

@ -0,0 +1,101 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
// Encoding independent of machine byte order:
// * Fixed-length numbers are encoded with least-significant byte first
// (little endian, native order on Intel and others)
//
// More functions in coding.h
#pragma once
#include <cstdint>
#include <cstring>
#include "port/port.h" // for port::kLittleEndian
namespace ROCKSDB_NAMESPACE {
// Lower-level versions of Put... that write directly into a character buffer
// REQUIRES: dst has enough space for the value being written
// -- Implementation of the functions declared above
inline void EncodeFixed16(char* buf, uint16_t value) {
if (port::kLittleEndian) {
memcpy(buf, &value, sizeof(value));
} else {
buf[0] = value & 0xff;
buf[1] = (value >> 8) & 0xff;
}
}
inline void EncodeFixed32(char* buf, uint32_t value) {
if (port::kLittleEndian) {
memcpy(buf, &value, sizeof(value));
} else {
buf[0] = value & 0xff;
buf[1] = (value >> 8) & 0xff;
buf[2] = (value >> 16) & 0xff;
buf[3] = (value >> 24) & 0xff;
}
}
inline void EncodeFixed64(char* buf, uint64_t value) {
if (port::kLittleEndian) {
memcpy(buf, &value, sizeof(value));
} else {
buf[0] = value & 0xff;
buf[1] = (value >> 8) & 0xff;
buf[2] = (value >> 16) & 0xff;
buf[3] = (value >> 24) & 0xff;
buf[4] = (value >> 32) & 0xff;
buf[5] = (value >> 40) & 0xff;
buf[6] = (value >> 48) & 0xff;
buf[7] = (value >> 56) & 0xff;
}
}
// Lower-level versions of Get... that read directly from a character buffer
// without any bounds checking.
inline uint16_t DecodeFixed16(const char* ptr) {
if (port::kLittleEndian) {
// Load the raw bytes
uint16_t result;
memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
return result;
} else {
return ((static_cast<uint16_t>(static_cast<unsigned char>(ptr[0]))) |
(static_cast<uint16_t>(static_cast<unsigned char>(ptr[1])) << 8));
}
}
inline uint32_t DecodeFixed32(const char* ptr) {
if (port::kLittleEndian) {
// Load the raw bytes
uint32_t result;
memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
return result;
} else {
return ((static_cast<uint32_t>(static_cast<unsigned char>(ptr[0]))) |
(static_cast<uint32_t>(static_cast<unsigned char>(ptr[1])) << 8) |
(static_cast<uint32_t>(static_cast<unsigned char>(ptr[2])) << 16) |
(static_cast<uint32_t>(static_cast<unsigned char>(ptr[3])) << 24));
}
}
inline uint64_t DecodeFixed64(const char* ptr) {
if (port::kLittleEndian) {
// Load the raw bytes
uint64_t result;
memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
return result;
} else {
uint64_t lo = DecodeFixed32(ptr);
uint64_t hi = DecodeFixed32(ptr + 4);
return (hi << 32) | lo;
}
}
} // namespace ROCKSDB_NAMESPACE

@ -396,7 +396,9 @@ using ROCKSDB_NAMESPACE::BitParity;
using ROCKSDB_NAMESPACE::BitsSetToOne; using ROCKSDB_NAMESPACE::BitsSetToOne;
using ROCKSDB_NAMESPACE::CountTrailingZeroBits; using ROCKSDB_NAMESPACE::CountTrailingZeroBits;
using ROCKSDB_NAMESPACE::DecodeFixed128; using ROCKSDB_NAMESPACE::DecodeFixed128;
using ROCKSDB_NAMESPACE::DecodeFixedGeneric;
using ROCKSDB_NAMESPACE::EncodeFixed128; using ROCKSDB_NAMESPACE::EncodeFixed128;
using ROCKSDB_NAMESPACE::EncodeFixedGeneric;
using ROCKSDB_NAMESPACE::FloorLog2; using ROCKSDB_NAMESPACE::FloorLog2;
using ROCKSDB_NAMESPACE::Lower64of128; using ROCKSDB_NAMESPACE::Lower64of128;
using ROCKSDB_NAMESPACE::Multiply64to128; using ROCKSDB_NAMESPACE::Multiply64to128;
@ -534,9 +536,10 @@ TEST(MathTest, Math128) {
TEST(MathTest, Coding128) { TEST(MathTest, Coding128) {
const char *in = "_1234567890123456"; const char *in = "_1234567890123456";
// Note: in + 1 is likely unaligned
Unsigned128 decoded = DecodeFixed128(in + 1); Unsigned128 decoded = DecodeFixed128(in + 1);
EXPECT_EQ(Lower64of128(decoded), 4050765991979987505U); EXPECT_EQ(Lower64of128(decoded), 0x3837363534333231U);
EXPECT_EQ(Upper64of128(decoded), 3906085646303834169U); EXPECT_EQ(Upper64of128(decoded), 0x3635343332313039U);
char out[18]; char out[18];
out[0] = '_'; out[0] = '_';
EncodeFixed128(out + 1, decoded); EncodeFixed128(out + 1, decoded);
@ -544,6 +547,43 @@ TEST(MathTest, Coding128) {
EXPECT_EQ(std::string(in), std::string(out)); EXPECT_EQ(std::string(in), std::string(out));
} }
TEST(MathTest, CodingGeneric) {
const char *in = "_1234567890123456";
// Decode
// Note: in + 1 is likely unaligned
Unsigned128 decoded128 = DecodeFixedGeneric<Unsigned128>(in + 1);
EXPECT_EQ(Lower64of128(decoded128), 0x3837363534333231U);
EXPECT_EQ(Upper64of128(decoded128), 0x3635343332313039U);
uint64_t decoded64 = DecodeFixedGeneric<uint64_t>(in + 1);
EXPECT_EQ(decoded64, 0x3837363534333231U);
uint32_t decoded32 = DecodeFixedGeneric<uint32_t>(in + 1);
EXPECT_EQ(decoded32, 0x34333231U);
uint16_t decoded16 = DecodeFixedGeneric<uint16_t>(in + 1);
EXPECT_EQ(decoded16, 0x3231U);
// Encode
char out[18];
out[0] = '_';
memset(out + 1, '\0', 17);
EncodeFixedGeneric(out + 1, decoded128);
EXPECT_EQ(std::string(in), std::string(out));
memset(out + 1, '\0', 9);
EncodeFixedGeneric(out + 1, decoded64);
EXPECT_EQ(std::string("_12345678"), std::string(out));
memset(out + 1, '\0', 5);
EncodeFixedGeneric(out + 1, decoded32);
EXPECT_EQ(std::string("_1234"), std::string(out));
memset(out + 1, '\0', 3);
EncodeFixedGeneric(out + 1, decoded16);
EXPECT_EQ(std::string("_12"), std::string(out));
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv); ::testing::InitGoogleTest(&argc, argv);

@ -5,7 +5,7 @@
#pragma once #pragma once
#include "util/coding.h" #include "util/coding_lean.h"
#include "util/math.h" #include "util/math.h"
#ifdef TEST_UINT128_COMPAT #ifdef TEST_UINT128_COMPAT
@ -220,4 +220,66 @@ inline Unsigned128 DecodeFixed128(const char* ptr) {
return (rv << 64) | DecodeFixed64(ptr); return (rv << 64) | DecodeFixed64(ptr);
} }
// A version of EncodeFixed* for generic algorithms. Likely to be used
// with Unsigned128, so lives here for now.
template <typename T>
inline void EncodeFixedGeneric(char* /*dst*/, T /*value*/) {
// Unfortunately, GCC does not appear to optimize this simple code down
// to a trivial load on Intel:
//
// T ret_val = 0;
// for (size_t i = 0; i < sizeof(T); ++i) {
// ret_val |= (static_cast<T>(static_cast<unsigned char>(ptr[i])) << (8 *
// i));
// }
// return ret_val;
//
// But does unroll the loop, and does optimize manually unrolled version
// for specific sizes down to a trivial load. I have no idea why it doesn't
// do both on this code.
// So instead, we rely on specializations
static_assert(sizeof(T) == 0, "No specialization provided for this type");
}
template <>
inline void EncodeFixedGeneric(char* dst, uint16_t value) {
return EncodeFixed16(dst, value);
}
template <>
inline void EncodeFixedGeneric(char* dst, uint32_t value) {
return EncodeFixed32(dst, value);
}
template <>
inline void EncodeFixedGeneric(char* dst, uint64_t value) {
return EncodeFixed64(dst, value);
}
template <>
inline void EncodeFixedGeneric(char* dst, Unsigned128 value) {
return EncodeFixed128(dst, value);
}
// A version of EncodeFixed* for generic algorithms.
template <typename T>
inline T DecodeFixedGeneric(const char* /*dst*/) {
static_assert(sizeof(T) == 0, "No specialization provided for this type");
}
template <>
inline uint16_t DecodeFixedGeneric(const char* dst) {
return DecodeFixed16(dst);
}
template <>
inline uint32_t DecodeFixedGeneric(const char* dst) {
return DecodeFixed32(dst);
}
template <>
inline uint64_t DecodeFixedGeneric(const char* dst) {
return DecodeFixed64(dst);
}
template <>
inline Unsigned128 DecodeFixedGeneric(const char* dst) {
return DecodeFixed128(dst);
}
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

Loading…
Cancel
Save