|
|
|
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include "util/coding_lean.h"
|
|
|
|
#include "util/math.h"
|
|
|
|
|
|
|
|
#ifdef TEST_UINT128_COMPAT
|
|
|
|
#undef HAVE_UINT128_EXTENSION
|
|
|
|
#endif
|
|
|
|
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
|
|
|
|
// Unsigned128 is a 128 bit value supporting (at least) bitwise operators,
|
|
|
|
// shifts, and comparisons. __uint128_t is not always available.
|
|
|
|
|
|
|
|
#ifdef HAVE_UINT128_EXTENSION
|
|
|
|
using Unsigned128 = __uint128_t;
|
|
|
|
#else
|
|
|
|
struct Unsigned128 {
|
|
|
|
uint64_t lo;
|
|
|
|
uint64_t hi;
|
|
|
|
|
|
|
|
inline Unsigned128() {
|
|
|
|
static_assert(sizeof(Unsigned128) == 2 * sizeof(uint64_t),
|
|
|
|
"unexpected overhead in representation");
|
|
|
|
lo = 0;
|
|
|
|
hi = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline Unsigned128(uint64_t lower) {
|
|
|
|
lo = lower;
|
|
|
|
hi = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline Unsigned128(uint64_t lower, uint64_t upper) {
|
|
|
|
lo = lower;
|
|
|
|
hi = upper;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Convert to any integer 64 bits or less.
|
|
|
|
template <typename T,
|
|
|
|
typename = std::enable_if_t<std::is_integral_v<T> &&
|
|
|
|
sizeof(T) <= sizeof(uint64_t)> >
|
|
|
|
explicit operator T() {
|
|
|
|
return static_cast<T>(lo);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
inline Unsigned128 operator<<(const Unsigned128& lhs, unsigned shift) {
|
|
|
|
shift &= 127;
|
|
|
|
Unsigned128 rv;
|
|
|
|
if (shift >= 64) {
|
|
|
|
rv.lo = 0;
|
|
|
|
rv.hi = lhs.lo << (shift & 63);
|
|
|
|
} else {
|
|
|
|
uint64_t tmp = lhs.lo;
|
|
|
|
rv.lo = tmp << shift;
|
|
|
|
// Ensure shift==0 shifts away everything. (This avoids another
|
|
|
|
// conditional branch on shift == 0.)
|
|
|
|
tmp = tmp >> 1 >> (63 - shift);
|
|
|
|
rv.hi = tmp | (lhs.hi << shift);
|
|
|
|
}
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline Unsigned128& operator<<=(Unsigned128& lhs, unsigned shift) {
|
|
|
|
lhs = lhs << shift;
|
|
|
|
return lhs;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline Unsigned128 operator>>(const Unsigned128& lhs, unsigned shift) {
|
|
|
|
shift &= 127;
|
|
|
|
Unsigned128 rv;
|
|
|
|
if (shift >= 64) {
|
|
|
|
rv.hi = 0;
|
|
|
|
rv.lo = lhs.hi >> (shift & 63);
|
|
|
|
} else {
|
|
|
|
uint64_t tmp = lhs.hi;
|
|
|
|
rv.hi = tmp >> shift;
|
|
|
|
// Ensure shift==0 shifts away everything
|
|
|
|
tmp = tmp << 1 << (63 - shift);
|
|
|
|
rv.lo = tmp | (lhs.lo >> shift);
|
|
|
|
}
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline Unsigned128& operator>>=(Unsigned128& lhs, unsigned shift) {
|
|
|
|
lhs = lhs >> shift;
|
|
|
|
return lhs;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline Unsigned128 operator&(const Unsigned128& lhs, const Unsigned128& rhs) {
|
|
|
|
return Unsigned128(lhs.lo & rhs.lo, lhs.hi & rhs.hi);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline Unsigned128& operator&=(Unsigned128& lhs, const Unsigned128& rhs) {
|
|
|
|
lhs = lhs & rhs;
|
|
|
|
return lhs;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline Unsigned128 operator|(const Unsigned128& lhs, const Unsigned128& rhs) {
|
|
|
|
return Unsigned128(lhs.lo | rhs.lo, lhs.hi | rhs.hi);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline Unsigned128& operator|=(Unsigned128& lhs, const Unsigned128& rhs) {
|
|
|
|
lhs = lhs | rhs;
|
|
|
|
return lhs;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline Unsigned128 operator^(const Unsigned128& lhs, const Unsigned128& rhs) {
|
|
|
|
return Unsigned128(lhs.lo ^ rhs.lo, lhs.hi ^ rhs.hi);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline Unsigned128& operator^=(Unsigned128& lhs, const Unsigned128& rhs) {
|
|
|
|
lhs = lhs ^ rhs;
|
|
|
|
return lhs;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline Unsigned128 operator~(const Unsigned128& v) {
|
|
|
|
return Unsigned128(~v.lo, ~v.hi);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool operator==(const Unsigned128& lhs, const Unsigned128& rhs) {
|
|
|
|
return lhs.lo == rhs.lo && lhs.hi == rhs.hi;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool operator!=(const Unsigned128& lhs, const Unsigned128& rhs) {
|
|
|
|
return lhs.lo != rhs.lo || lhs.hi != rhs.hi;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool operator>(const Unsigned128& lhs, const Unsigned128& rhs) {
|
|
|
|
return lhs.hi > rhs.hi || (lhs.hi == rhs.hi && lhs.lo > rhs.lo);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool operator<(const Unsigned128& lhs, const Unsigned128& rhs) {
|
|
|
|
return lhs.hi < rhs.hi || (lhs.hi == rhs.hi && lhs.lo < rhs.lo);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool operator>=(const Unsigned128& lhs, const Unsigned128& rhs) {
|
|
|
|
return lhs.hi > rhs.hi || (lhs.hi == rhs.hi && lhs.lo >= rhs.lo);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool operator<=(const Unsigned128& lhs, const Unsigned128& rhs) {
|
|
|
|
return lhs.hi < rhs.hi || (lhs.hi == rhs.hi && lhs.lo <= rhs.lo);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
inline uint64_t Lower64of128(Unsigned128 v) {
|
|
|
|
#ifdef HAVE_UINT128_EXTENSION
|
|
|
|
return static_cast<uint64_t>(v);
|
|
|
|
#else
|
|
|
|
return v.lo;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
inline uint64_t Upper64of128(Unsigned128 v) {
|
|
|
|
#ifdef HAVE_UINT128_EXTENSION
|
|
|
|
return static_cast<uint64_t>(v >> 64);
|
|
|
|
#else
|
|
|
|
return v.hi;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
// This generally compiles down to a single fast instruction on 64-bit.
|
|
|
|
// This doesn't really make sense as operator* because it's not a
|
|
|
|
// general 128x128 multiply and provides more output than 64x64 multiply.
|
|
|
|
inline Unsigned128 Multiply64to128(uint64_t a, uint64_t b) {
|
|
|
|
#ifdef HAVE_UINT128_EXTENSION
|
|
|
|
return Unsigned128{a} * Unsigned128{b};
|
|
|
|
#else
|
|
|
|
// Full decomposition
|
|
|
|
// NOTE: GCC seems to fully understand this code as 64-bit x 64-bit
|
|
|
|
// -> 128-bit multiplication and optimize it appropriately.
|
|
|
|
uint64_t tmp = uint64_t{b & 0xffffFFFF} * uint64_t{a & 0xffffFFFF};
|
|
|
|
uint64_t lower = tmp & 0xffffFFFF;
|
|
|
|
tmp >>= 32;
|
|
|
|
tmp += uint64_t{b & 0xffffFFFF} * uint64_t{a >> 32};
|
|
|
|
// Avoid overflow: first add lower 32 of tmp2, and later upper 32
|
|
|
|
uint64_t tmp2 = uint64_t{b >> 32} * uint64_t{a & 0xffffFFFF};
|
|
|
|
tmp += tmp2 & 0xffffFFFF;
|
|
|
|
lower |= tmp << 32;
|
|
|
|
tmp >>= 32;
|
|
|
|
tmp += tmp2 >> 32;
|
|
|
|
tmp += uint64_t{b >> 32} * uint64_t{a >> 32};
|
|
|
|
return Unsigned128(lower, tmp);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
inline Unsigned128 BottomNBits(Unsigned128 v, int nbits) {
|
|
|
|
if (nbits < 64) {
|
|
|
|
return BottomNBits(Lower64of128(v), nbits);
|
|
|
|
} else {
|
|
|
|
return (Unsigned128{BottomNBits(Upper64of128(v), nbits - 64)} << 64) |
|
|
|
|
Lower64of128(v);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
inline int FloorLog2(Unsigned128 v) {
|
|
|
|
if (Upper64of128(v) == 0) {
|
|
|
|
return FloorLog2(Lower64of128(v));
|
|
|
|
} else {
|
|
|
|
return FloorLog2(Upper64of128(v)) + 64;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
inline int CountTrailingZeroBits(Unsigned128 v) {
|
|
|
|
if (Lower64of128(v) != 0) {
|
|
|
|
return CountTrailingZeroBits(Lower64of128(v));
|
|
|
|
} else {
|
|
|
|
return CountTrailingZeroBits(Upper64of128(v)) + 64;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
inline int BitsSetToOne(Unsigned128 v) {
|
|
|
|
return BitsSetToOne(Lower64of128(v)) + BitsSetToOne(Upper64of128(v));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
inline int BitParity(Unsigned128 v) {
|
Refine Ribbon configuration, improve testing, add Homogeneous (#7879)
Summary:
This change only affects non-schema-critical aspects of the production candidate Ribbon filter. Specifically, it refines choice of internal configuration parameters based on inputs. The changes are minor enough that the schema tests in bloom_test, some of which depend on this, are unaffected. There are also some minor optimizations and refactorings.
This would be a schema change for "smash" Ribbon, to fix some known issues with small filters, but "smash" Ribbon is not accessible in public APIs. Unit test CompactnessAndBacktrackAndFpRate updated to test small and medium-large filters. Run with --thoroughness=100 or so for much better detection power (not appropriate for continuous regression testing).
Homogenous Ribbon:
This change adds internally a Ribbon filter variant we call Homogeneous Ribbon, in collaboration with Stefan Walzer. The expected "result" value for every key is zero, instead of computed from a hash. Entropy for queries not to be false positives comes from free variables ("overhead") in the solution structure, which are populated pseudorandomly. Construction is slightly faster for not tracking result values, and never fails. Instead, FP rate can jump up whenever and whereever entries are packed too tightly. For small structures, we can choose overhead to make this FP rate jump unlikely, as seen in updated unit test CompactnessAndBacktrackAndFpRate.
Unlike standard Ribbon, Homogeneous Ribbon seems to scale to arbitrary number of keys when accepting an FP rate penalty for small pockets of high FP rate in the structure. For example, 64-bit ribbon with 8 solution columns and 10% allocated space overhead for slots seems to achieve about 10.5% space overhead vs. information-theoretic minimum based on its observed FP rate with expected pockets of degradation. (FP rate is close to 1/256.) If targeting a higher FP rate with fewer solution columns, Homogeneous Ribbon can be even more space efficient, because the penalty from degradation is relatively smaller. If targeting a lower FP rate, Homogeneous Ribbon is less space efficient, as more allocated overhead is needed to keep the FP rate impact of degradation relatively under control. The new OptimizeHomogAtScale tool in ribbon_test helps to find these optimal allocation overheads for different numbers of solution columns. And Ribbon widths, with 128-bit Ribbon apparently cutting space overheads in half vs. 64-bit.
Other misc item specifics:
* Ribbon APIs in util/ribbon_config.h now provide configuration data for not just 5% construction failure rate (95% success), but also 50% and 0.1%.
* Note that the Ribbon structure does not exhibit "threshold" behavior as standard Xor filter does, so there is a roughly fixed space penalty to cut construction failure rate in half. Thus, there isn't really an "almost sure" setting.
* Although we can extrapolate settings for large filters, we don't have a good formula for configuring smaller filters (< 2^17 slots or so), and efforts to summarize with a formula have failed. Thus, small data is hard-coded from updated FindOccupancy tool.
* Enhances ApproximateNumEntries for public API Ribbon using more precise data (new API GetNumToAdd), thus a more accurate but not perfect reversal of CalculateSpace. (bloom_test updated to expect the greater precision)
* Move EndianSwapValue from coding.h to coding_lean.h to keep Ribbon code easily transferable from RocksDB
* Add some missing 'const' to member functions
* Small optimization to 128-bit BitParity
* Small refactoring of BandingStorage in ribbon_alg.h to support Homogeneous Ribbon
* CompactnessAndBacktrackAndFpRate now has an "expand" test: on construction failure, a possible alternative to re-seeding hash functions is simply to increase the number of slots (allocated space overhead) and try again with essentially the same hash values. (Start locations will be different roundings of the same scaled hash values--because fastrange not mod.) This seems to be as effective or more effective than re-seeding, as long as we increase the number of slots (m) by roughly m += m/w where w is the Ribbon width. This way, there is effectively an expansion by one slot for each ribbon-width window in the banding. (This approach assumes that getting "bad data" from your hash function is as unlikely as it naturally should be, e.g. no adversary.)
* 32-bit and 16-bit Ribbon configurations are added to ribbon_test for understanding their behavior, e.g. with FindOccupancy. They are not considered useful at this time and not tested with CompactnessAndBacktrackAndFpRate.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7879
Test Plan: unit test updates included
Reviewed By: jay-zhuang
Differential Revision: D26371245
Pulled By: pdillinger
fbshipit-source-id: da6600d90a3785b99ad17a88b2a3027710b4ea3a
4 years ago
|
|
|
return BitParity(Lower64of128(v) ^ Upper64of128(v));
|
|
|
|
}
|
|
|
|
|
New stable, fixed-length cache keys (#9126)
Summary:
This change standardizes on a new 16-byte cache key format for
block cache (incl compressed and secondary) and persistent cache (but
not table cache and row cache).
The goal is a really fast cache key with practically ideal stability and
uniqueness properties without external dependencies (e.g. from FileSystem).
A fixed key size of 16 bytes should enable future optimizations to the
concurrent hash table for block cache, which is a heavy CPU user /
bottleneck, but there appears to be measurable performance improvement
even with no changes to LRUCache.
This change replaces a lot of disjointed and ugly code handling cache
keys with calls to a simple, clean new internal API (cache_key.h).
(Preserving the old cache key logic under an option would be very ugly
and likely negate the performance gain of the new approach. Complete
replacement carries some inherent risk, but I think that's acceptable
with sufficient analysis and testing.)
The scheme for encoding new cache keys is complicated but explained
in cache_key.cc.
Also: EndianSwapValue is moved to math.h to be next to other bit
operations. (Explains some new include "math.h".) ReverseBits operation
added and unit tests added to hash_test for both.
Fixes https://github.com/facebook/rocksdb/issues/7405 (presuming a root cause)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9126
Test Plan:
### Basic correctness
Several tests needed updates to work with the new functionality, mostly
because we are no longer relying on filesystem for stable cache keys
so table builders & readers need more context info to agree on cache
keys. This functionality is so core, a huge number of existing tests
exercise the cache key functionality.
### Performance
Create db with
`TEST_TMPDIR=/dev/shm ./db_bench -bloom_bits=10 -benchmarks=fillrandom -num=3000000 -partition_index_and_filters`
And test performance with
`TEST_TMPDIR=/dev/shm ./db_bench -readonly -use_existing_db -bloom_bits=10 -benchmarks=readrandom -num=3000000 -duration=30 -cache_index_and_filter_blocks -cache_size=250000 -threads=4`
using DEBUG_LEVEL=0 and simultaneous before & after runs.
Before ops/sec, avg over 100 runs: 121924
After ops/sec, avg over 100 runs: 125385 (+2.8%)
### Collision probability
I have built a tool, ./cache_bench -stress_cache_key to broadly simulate host-wide cache activity
over many months, by making some pessimistic simplifying assumptions:
* Every generated file has a cache entry for every byte offset in the file (contiguous range of cache keys)
* All of every file is cached for its entire lifetime
We use a simple table with skewed address assignment and replacement on address collision
to simulate files coming & going, with quite a variance (super-Poisson) in ages. Some output
with `./cache_bench -stress_cache_key -sck_keep_bits=40`:
```
Total cache or DBs size: 32TiB Writing 925.926 MiB/s or 76.2939TiB/day
Multiply by 9.22337e+18 to correct for simulation losses (but still assume whole file cached)
```
These come from default settings of 2.5M files per day of 32 MB each, and
`-sck_keep_bits=40` means that to represent a single file, we are only keeping 40 bits of
the 128-bit cache key. With file size of 2\*\*25 contiguous keys (pessimistic), our simulation
is about 2\*\*(128-40-25) or about 9 billion billion times more prone to collision than reality.
More default assumptions, relatively pessimistic:
* 100 DBs in same process (doesn't matter much)
* Re-open DB in same process (new session ID related to old session ID) on average
every 100 files generated
* Restart process (all new session IDs unrelated to old) 24 times per day
After enough data, we get a result at the end:
```
(keep 40 bits) 17 collisions after 2 x 90 days, est 10.5882 days between (9.76592e+19 corrected)
```
If we believe the (pessimistic) simulation and the mathematical generalization, we would need to run a billion machines all for 97 billion days to expect a cache key collision. To help verify that our generalization ("corrected") is robust, we can make our simulation more precise with `-sck_keep_bits=41` and `42`, which takes more running time to get enough data:
```
(keep 41 bits) 16 collisions after 4 x 90 days, est 22.5 days between (1.03763e+20 corrected)
(keep 42 bits) 19 collisions after 10 x 90 days, est 47.3684 days between (1.09224e+20 corrected)
```
The generalized prediction still holds. With the `-sck_randomize` option, we can see that we are beating "random" cache keys (except offsets still non-randomized) by a modest amount (roughly 20x less collision prone than random), which should make us reasonably comfortable even in "degenerate" cases:
```
197 collisions after 1 x 90 days, est 0.456853 days between (4.21372e+18 corrected)
```
I've run other tests to validate other conditions behave as expected, never behaving "worse than random" unless we start chopping off structured data.
Reviewed By: zhichao-cao
Differential Revision: D33171746
Pulled By: pdillinger
fbshipit-source-id: f16a57e369ed37be5e7e33525ace848d0537c88f
3 years ago
|
|
|
template <>
|
|
|
|
inline Unsigned128 EndianSwapValue(Unsigned128 v) {
|
|
|
|
return (Unsigned128{EndianSwapValue(Lower64of128(v))} << 64) |
|
|
|
|
EndianSwapValue(Upper64of128(v));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
inline Unsigned128 ReverseBits(Unsigned128 v) {
|
|
|
|
return (Unsigned128{ReverseBits(Lower64of128(v))} << 64) |
|
|
|
|
ReverseBits(Upper64of128(v));
|
|
|
|
}
|
|
|
|
|
Derive cache keys from SST unique IDs (#10394)
Summary:
... so that cache keys can be derived from DB manifest data
before reading the file from storage--so that every part of the file
can potentially go in a persistent cache.
See updated comments in cache_key.cc for technical details. Importantly,
the new cache key encoding uses some fancy but efficient math to pack
data into the cache key without depending on the sizes of the various
pieces. This simplifies some existing code creating cache keys, like
cache warming before the file size is known.
This should provide us an essentially permanent mapping between SST
unique IDs and base cache keys, with the ability to "upgrade" SST
unique IDs (and thus cache keys) with new SST format_versions.
These cache keys are of similar, perhaps indistinguishable quality to
the previous generation. Before this change (see "corrected" days
between collision):
```
./cache_bench -stress_cache_key -sck_keep_bits=43
18 collisions after 2 x 90 days, est 10 days between (1.15292e+19 corrected)
```
After this change (keep 43 bits, up through 50, to validate "trajectory"
is ok on "corrected" days between collision):
```
19 collisions after 3 x 90 days, est 14.2105 days between (1.63836e+19 corrected)
16 collisions after 5 x 90 days, est 28.125 days between (1.6213e+19 corrected)
15 collisions after 7 x 90 days, est 42 days between (1.21057e+19 corrected)
15 collisions after 17 x 90 days, est 102 days between (1.46997e+19 corrected)
15 collisions after 49 x 90 days, est 294 days between (2.11849e+19 corrected)
15 collisions after 62 x 90 days, est 372 days between (1.34027e+19 corrected)
15 collisions after 53 x 90 days, est 318 days between (5.72858e+18 corrected)
15 collisions after 309 x 90 days, est 1854 days between (1.66994e+19 corrected)
```
However, the change does modify (probably weaken) the "guaranteed unique" promise from this
> SST files generated in a single process are guaranteed to have unique cache keys, unless/until number session ids * max file number = 2**86
to this (see https://github.com/facebook/rocksdb/issues/10388)
> With the DB id limitation, we only have nice guaranteed unique cache keys for files generated in a single process until biggest session_id_counter and offset_in_file reach combined 64 bits
I don't think this is a practical concern, though.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10394
Test Plan: unit tests updated, see simulation results above
Reviewed By: jay-zhuang
Differential Revision: D38667529
Pulled By: pdillinger
fbshipit-source-id: 49af3fe7f47e5b61162809a78b76c769fd519fba
2 years ago
|
|
|
template <>
|
|
|
|
inline Unsigned128 DownwardInvolution(Unsigned128 v) {
|
|
|
|
return (Unsigned128{DownwardInvolution(Upper64of128(v))} << 64) |
|
|
|
|
DownwardInvolution(Upper64of128(v) ^ Lower64of128(v));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename A>
|
|
|
|
inline std::remove_reference_t<A> BitwiseAnd(A a, Unsigned128 b) {
|
|
|
|
static_assert(sizeof(A) <= sizeof(Unsigned128));
|
|
|
|
return static_cast<A>(a & b);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename B>
|
|
|
|
inline std::remove_reference_t<B> BitwiseAnd(Unsigned128 a, B b) {
|
|
|
|
static_assert(sizeof(B) <= sizeof(Unsigned128));
|
|
|
|
return static_cast<B>(a & b);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct IsUnsignedUpTo128
|
|
|
|
: std::integral_constant<bool, std::is_unsigned<T>::value ||
|
|
|
|
std::is_same<T, Unsigned128>::value> {};
|
|
|
|
|
|
|
|
inline void EncodeFixed128(char* dst, Unsigned128 value) {
|
|
|
|
EncodeFixed64(dst, Lower64of128(value));
|
|
|
|
EncodeFixed64(dst + 8, Upper64of128(value));
|
|
|
|
}
|
|
|
|
|
|
|
|
inline Unsigned128 DecodeFixed128(const char* ptr) {
|
|
|
|
Unsigned128 rv = DecodeFixed64(ptr + 8);
|
|
|
|
return (rv << 64) | DecodeFixed64(ptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
// A version of EncodeFixed* for generic algorithms. Likely to be used
|
|
|
|
// with Unsigned128, so lives here for now.
|
|
|
|
template <typename T>
|
|
|
|
inline void EncodeFixedGeneric(char* /*dst*/, T /*value*/) {
|
|
|
|
// Unfortunately, GCC does not appear to optimize this simple code down
|
|
|
|
// to a trivial load on Intel:
|
|
|
|
//
|
|
|
|
// T ret_val = 0;
|
|
|
|
// for (size_t i = 0; i < sizeof(T); ++i) {
|
|
|
|
// ret_val |= (static_cast<T>(static_cast<unsigned char>(ptr[i])) << (8 *
|
|
|
|
// i));
|
|
|
|
// }
|
|
|
|
// return ret_val;
|
|
|
|
//
|
|
|
|
// But does unroll the loop, and does optimize manually unrolled version
|
|
|
|
// for specific sizes down to a trivial load. I have no idea why it doesn't
|
|
|
|
// do both on this code.
|
|
|
|
|
|
|
|
// So instead, we rely on specializations
|
|
|
|
static_assert(sizeof(T) == 0, "No specialization provided for this type");
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
inline void EncodeFixedGeneric(char* dst, uint16_t value) {
|
|
|
|
return EncodeFixed16(dst, value);
|
|
|
|
}
|
|
|
|
template <>
|
|
|
|
inline void EncodeFixedGeneric(char* dst, uint32_t value) {
|
|
|
|
return EncodeFixed32(dst, value);
|
|
|
|
}
|
|
|
|
template <>
|
|
|
|
inline void EncodeFixedGeneric(char* dst, uint64_t value) {
|
|
|
|
return EncodeFixed64(dst, value);
|
|
|
|
}
|
|
|
|
template <>
|
|
|
|
inline void EncodeFixedGeneric(char* dst, Unsigned128 value) {
|
|
|
|
return EncodeFixed128(dst, value);
|
|
|
|
}
|
|
|
|
|
|
|
|
// A version of EncodeFixed* for generic algorithms.
|
|
|
|
template <typename T>
|
|
|
|
inline T DecodeFixedGeneric(const char* /*dst*/) {
|
|
|
|
static_assert(sizeof(T) == 0, "No specialization provided for this type");
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
inline uint16_t DecodeFixedGeneric(const char* dst) {
|
|
|
|
return DecodeFixed16(dst);
|
|
|
|
}
|
|
|
|
template <>
|
|
|
|
inline uint32_t DecodeFixedGeneric(const char* dst) {
|
|
|
|
return DecodeFixed32(dst);
|
|
|
|
}
|
|
|
|
template <>
|
|
|
|
inline uint64_t DecodeFixedGeneric(const char* dst) {
|
|
|
|
return DecodeFixed64(dst);
|
|
|
|
}
|
|
|
|
template <>
|
|
|
|
inline Unsigned128 DecodeFixedGeneric(const char* dst) {
|
|
|
|
return DecodeFixed128(dst);
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|