Add new persistent 64-bit hash (#5984)

Summary:
For upcoming new SST filter implementations, we will use a new
64-bit hash function (XXH3 preview, slightly modified). This change
updates hash.{h,cc} for that change, adds unit tests, and out-of-lines
the implementations to keep hash.h as clean/small as possible.

In developing the unit tests, I discovered that the XXH3 preview always
returns zero for the empty string. Zero is problematic for some
algorithms (including an upcoming SST filter implementation) if it
occurs more often than at the "natural" rate, so it should not be
returned from trivial values using trivial seeds. I modified our fork
of XXH3 to return a modest hash of the seed for the empty string.

With hash function details out-of-lines in hash.h, it makes sense to
enable XXH_INLINE_ALL, so that direct calls to XXH64/XXH32/XXH3p
are inlined. To fix array-bounds warnings on some inline calls, I
injected some casts to uintptr_t in xxhash.cc. (Issue reported to Yann.)
Revised: Reverted using XXH_INLINE_ALL for now.  Some Facebook
checks are unhappy about #include on xxhash.cc file. I would
fix that by rename to xxhash_cc.h, but to best preserve history I want
to do that in a separate commit (PR) from the uintptr casts.

Also updated filter_bench for this change, improving the performance
predictability of dry run hashing and adding support for 64-bit hash
(for upcoming new SST filter implementations, minor dead code in the
tool for now).
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5984

Differential Revision: D18246567

Pulled By: pdillinger

fbshipit-source-id: 6162fbf6381d63c8cc611dd7ec70e1ddc883fbb8
main
Peter Dillinger 5 years ago committed by Facebook Github Bot
parent 685e895652
commit 18f57f5ef8
  1. 3
      HISTORY.md
  2. 1
      table/format.cc
  3. 46
      util/filter_bench.cc
  4. 28
      util/hash.cc
  5. 47
      util/hash.h
  6. 202
      util/hash_test.cc
  7. 7
      util/xxh3p.h
  8. 16
      util/xxhash.cc

@ -9,6 +9,9 @@ file_creation_time of the oldest SST file in the DB.
### New Features ### New Features
* Universal compaction to support options.periodic_compaction_seconds. A full compaction will be triggered if any file is over the threshold. * Universal compaction to support options.periodic_compaction_seconds. A full compaction will be triggered if any file is over the threshold.
### Performance Improvements
* For 64-bit hashing, RocksDB is standardizing on a slightly modified preview version of XXH3. This function is now used for many non-persisted hashes, along with fastrange64() in place of the modulus operator, and some benchmarks show a slight improvement.
## 6.5.1 (10/16/2019) ## 6.5.1 (10/16/2019)
### Bug Fixes ### Bug Fixes
* Revert the feature "Merging iterator to avoid child iterator reseek for some cases (#5286)" since it might cause strange results when reseek happens with a different iterator upper bound. * Revert the feature "Merging iterator to avoid child iterator reseek for some cases (#5286)" since it might cause strange results when reseek happens with a different iterator upper bound.

@ -27,7 +27,6 @@
#include "util/crc32c.h" #include "util/crc32c.h"
#include "util/stop_watch.h" #include "util/stop_watch.h"
#include "util/string_util.h" #include "util/string_util.h"
#include "util/xxhash.h"
namespace rocksdb { namespace rocksdb {

@ -101,6 +101,8 @@ using rocksdb::FilterBitsBuilder;
using rocksdb::FilterBitsReader; using rocksdb::FilterBitsReader;
using rocksdb::FullFilterBlockReader; using rocksdb::FullFilterBlockReader;
using rocksdb::GetSliceHash; using rocksdb::GetSliceHash;
using rocksdb::GetSliceHash64;
using rocksdb::Lower32of64;
using rocksdb::ParsedFullFilterBlock; using rocksdb::ParsedFullFilterBlock;
using rocksdb::PlainTableBloomV1; using rocksdb::PlainTableBloomV1;
using rocksdb::Random32; using rocksdb::Random32;
@ -212,7 +214,7 @@ const char *TestModeToString(TestMode tm) {
// Do just enough to keep some data dependence for the // Do just enough to keep some data dependence for the
// compiler / CPU // compiler / CPU
static inline uint32_t NoHash(Slice &s) { static uint32_t DryRunNoHash(Slice &s) {
uint32_t sz = static_cast<uint32_t>(s.size()); uint32_t sz = static_cast<uint32_t>(s.size());
if (sz >= 4) { if (sz >= 4) {
return sz + s.data()[3]; return sz + s.data()[3];
@ -221,6 +223,15 @@ static inline uint32_t NoHash(Slice &s) {
} }
} }
static uint32_t DryRunHash32(Slice &s) {
// Same perf characteristics as GetSliceHash()
return BloomHash(s);
}
static uint32_t DryRunHash64(Slice &s) {
return Lower32of64(GetSliceHash64(s));
}
struct FilterBench : public MockBlockBasedTableTester { struct FilterBench : public MockBlockBasedTableTester {
std::vector<KeyMaker> kms_; std::vector<KeyMaker> kms_;
std::vector<FilterInfo> infos_; std::vector<FilterInfo> infos_;
@ -427,6 +438,15 @@ double FilterBench::RandomQueryTest(uint32_t inside_threshold, bool dry_run,
info.false_positives_ = 0; info.false_positives_ = 0;
} }
auto dry_run_hash_fn = DryRunNoHash;
if (!FLAGS_net_includes_hashing) {
if (FLAGS_impl < 2 || FLAGS_use_plain_table_bloom) {
dry_run_hash_fn = DryRunHash32;
} else {
dry_run_hash_fn = DryRunHash64;
}
}
uint32_t num_infos = static_cast<uint32_t>(infos_.size()); uint32_t num_infos = static_cast<uint32_t>(infos_.size());
uint32_t dry_run_hash = 0; uint32_t dry_run_hash = 0;
uint64_t max_queries = uint64_t max_queries =
@ -504,11 +524,7 @@ double FilterBench::RandomQueryTest(uint32_t inside_threshold, bool dry_run,
if (dry_run) { if (dry_run) {
for (uint32_t i = 0; i < batch_size; ++i) { for (uint32_t i = 0; i < batch_size; ++i) {
batch_results[i] = true; batch_results[i] = true;
if (FLAGS_net_includes_hashing) { dry_run_hash += dry_run_hash_fn(batch_slices[i]);
dry_run_hash += NoHash(batch_slices[i]);
} else {
dry_run_hash ^= BloomHash(batch_slices[i]);
}
} }
} else { } else {
info.reader_->MayMatch(batch_size, batch_slice_ptrs.get(), info.reader_->MayMatch(batch_size, batch_slice_ptrs.get(),
@ -526,11 +542,7 @@ double FilterBench::RandomQueryTest(uint32_t inside_threshold, bool dry_run,
bool may_match; bool may_match;
if (FLAGS_use_plain_table_bloom) { if (FLAGS_use_plain_table_bloom) {
if (dry_run) { if (dry_run) {
if (FLAGS_net_includes_hashing) { dry_run_hash += dry_run_hash_fn(batch_slices[i]);
dry_run_hash += NoHash(batch_slices[i]);
} else {
dry_run_hash ^= GetSliceHash(batch_slices[i]);
}
may_match = true; may_match = true;
} else { } else {
uint32_t hash = GetSliceHash(batch_slices[i]); uint32_t hash = GetSliceHash(batch_slices[i]);
@ -538,11 +550,7 @@ double FilterBench::RandomQueryTest(uint32_t inside_threshold, bool dry_run,
} }
} else if (FLAGS_use_full_block_reader) { } else if (FLAGS_use_full_block_reader) {
if (dry_run) { if (dry_run) {
if (FLAGS_net_includes_hashing) { dry_run_hash += dry_run_hash_fn(batch_slices[i]);
dry_run_hash += NoHash(batch_slices[i]);
} else {
dry_run_hash ^= BloomHash(batch_slices[i]);
}
may_match = true; may_match = true;
} else { } else {
may_match = info.full_block_reader_->KeyMayMatch( may_match = info.full_block_reader_->KeyMayMatch(
@ -555,11 +563,7 @@ double FilterBench::RandomQueryTest(uint32_t inside_threshold, bool dry_run,
} }
} else { } else {
if (dry_run) { if (dry_run) {
if (FLAGS_net_includes_hashing) { dry_run_hash += dry_run_hash_fn(batch_slices[i]);
dry_run_hash += NoHash(batch_slices[i]);
} else {
dry_run_hash ^= BloomHash(batch_slices[i]);
}
may_match = true; may_match = true;
} else { } else {
may_match = info.reader_->MayMatch(batch_slices[i]); may_match = info.reader_->MayMatch(batch_slices[i]);

@ -11,11 +11,14 @@
#include "util/coding.h" #include "util/coding.h"
#include "util/hash.h" #include "util/hash.h"
#include "util/util.h" #include "util/util.h"
#include "util/xxhash.h"
namespace rocksdb { namespace rocksdb {
uint32_t Hash(const char* data, size_t n, uint32_t seed) { uint32_t Hash(const char* data, size_t n, uint32_t seed) {
// Similar to murmur hash // MurmurHash1 - fast but mediocre quality
// https://github.com/aappleby/smhasher/wiki/MurmurHash1
//
const uint32_t m = 0xc6a4a793; const uint32_t m = 0xc6a4a793;
const uint32_t r = 24; const uint32_t r = 24;
const char* limit = data + n; const char* limit = data + n;
@ -54,4 +57,27 @@ uint32_t Hash(const char* data, size_t n, uint32_t seed) {
return h; return h;
} }
// We are standardizing on a preview release of XXH3, because that's
// the best available at time of standardizing.
//
// In testing (mostly Intel Skylake), this hash function is much more
// thorough than Hash32 and is almost universally faster. Hash() only
// seems faster when passing runtime-sized keys of the same small size
// (less than about 24 bytes) thousands of times in a row; this seems
// to allow the branch predictor to work some magic. XXH3's speed is
// much less dependent on branch prediction.
//
// Hashing with a prefix extractor is potentially a common case of
// hashing objects of small, predictable size. We could consider
// bundling hash functions specialized for particular lengths with
// the prefix extractors.
uint64_t Hash64(const char* data, size_t n, uint64_t seed) {
return XXH3p_64bits_withSeed(data, n, seed);
}
uint64_t Hash64(const char* data, size_t n) {
// Same as seed = 0
return XXH3p_64bits(data, n);
}
} // namespace rocksdb } // namespace rocksdb

@ -7,41 +7,76 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
// //
// Common hash functions with convenient interfaces. // Common hash functions with convenient interfaces. If hashing a
// statically-sized input in a performance-critical context, consider
// calling a specific hash implementation directly, such as
// XXH3p_64bits from xxhash.h.
//
// Since this is a very common header, implementation details are kept
// out-of-line. Out-of-lining also aids in tracking the time spent in
// hashing functions. Inlining is of limited benefit for runtime-sized
// hash inputs.
#pragma once #pragma once
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
#include "util/xxhash.h"
namespace rocksdb { namespace rocksdb {
// Stable/persistent 64-bit hash. Higher quality and generally faster than
// Hash(), especially for inputs > 24 bytes.
extern uint64_t Hash64(const char* data, size_t n, uint64_t seed);
// Specific optimization without seed (same as seed = 0)
extern uint64_t Hash64(const char* data, size_t n);
// Non-persistent hash. Must only used for in-memory data structure. // Non-persistent hash. Must only used for in-memory data structure.
// The hash results are thus applicable to change. (Thus, it rarely makes // The hash results are thus applicable to change. (Thus, it rarely makes
// sense to specify a seed for this function.) // sense to specify a seed for this function.)
inline uint64_t NPHash64(const char* data, size_t n, uint32_t seed = 0) { inline uint64_t NPHash64(const char* data, size_t n, uint32_t seed) {
// XXH3 currently experimental, but generally faster than other quality // Currently same as Hash64
// 64-bit hash functions. return Hash64(data, n, seed);
return XXH3p_64bits_withSeed(data, n, seed); }
// Specific optimization without seed (same as seed = 0)
inline uint64_t NPHash64(const char* data, size_t n) {
// Currently same as Hash64
return Hash64(data, n);
} }
// Stable/persistent 32-bit hash. Moderate quality and high speed on
// small inputs.
// TODO: consider rename to Hash32
extern uint32_t Hash(const char* data, size_t n, uint32_t seed); extern uint32_t Hash(const char* data, size_t n, uint32_t seed);
// TODO: consider rename to LegacyBloomHash32
inline uint32_t BloomHash(const Slice& key) { inline uint32_t BloomHash(const Slice& key) {
return Hash(key.data(), key.size(), 0xbc9f1d34); return Hash(key.data(), key.size(), 0xbc9f1d34);
} }
inline uint64_t GetSliceHash64(const Slice& key) {
return Hash64(key.data(), key.size());
}
inline uint64_t GetSliceNPHash64(const Slice& s) { inline uint64_t GetSliceNPHash64(const Slice& s) {
return NPHash64(s.data(), s.size()); return NPHash64(s.data(), s.size());
} }
// TODO: consider rename to GetSliceHash32
inline uint32_t GetSliceHash(const Slice& s) { inline uint32_t GetSliceHash(const Slice& s) {
return Hash(s.data(), s.size(), 397); return Hash(s.data(), s.size(), 397);
} }
// Useful for splitting up a 64-bit hash
inline uint32_t Upper32of64(uint64_t v) {
return static_cast<uint32_t>(v >> 32);
}
inline uint32_t Lower32of64(uint64_t v) { return static_cast<uint32_t>(v); }
// std::hash compatible interface. // std::hash compatible interface.
// TODO: consider rename to SliceHasher32
struct SliceHasher { struct SliceHasher {
uint32_t operator()(const Slice& s) const { return GetSliceHash(s); } uint32_t operator()(const Slice& s) const { return GetSliceHash(s); }
}; };

@ -7,16 +7,25 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <cstring>
#include <vector> #include <vector>
#include "test_util/testharness.h" #include "test_util/testharness.h"
#include "util/coding.h"
#include "util/hash.h" #include "util/hash.h"
using rocksdb::EncodeFixed32;
using rocksdb::GetSliceHash64;
using rocksdb::Hash;
using rocksdb::Hash64;
using rocksdb::Lower32of64;
using rocksdb::Upper32of64;
using rocksdb::Slice;
// The hash algorithm is part of the file format, for example for the Bloom // The hash algorithm is part of the file format, for example for the Bloom
// filters. Test that the hash values are stable for a set of random strings of // filters. Test that the hash values are stable for a set of random strings of
// varying lengths. // varying lengths.
TEST(HashTest, Values) { TEST(HashTest, Values) {
using rocksdb::Hash;
constexpr uint32_t kSeed = 0xbc9f1d34; // Same as BloomHash. constexpr uint32_t kSeed = 0xbc9f1d34; // Same as BloomHash.
EXPECT_EQ(Hash("", 0, kSeed), 3164544308u); EXPECT_EQ(Hash("", 0, kSeed), 3164544308u);
@ -70,6 +79,192 @@ TEST(HashTest, Values) {
3382479516u); 3382479516u);
} }
// The hash algorithm is part of the file format, for example for the Bloom
// filters.
TEST(HashTest, Hash64Misc) {
constexpr uint32_t kSeed = 0; // Same as GetSliceHash64
for (char fill : {'\0', 'a', '1', '\xff'}) {
const size_t max_size = 1000;
const std::string str(max_size, fill);
for (size_t size = 0; size <= max_size; ++size) {
uint64_t here = Hash64(str.data(), size, kSeed);
// Must be same as GetSliceHash64
EXPECT_EQ(here, GetSliceHash64(Slice(str.data(), size)));
// Upper and Lower must reconstruct hash
EXPECT_EQ(here, (uint64_t{Upper32of64(here)} << 32) | Lower32of64(here));
EXPECT_EQ(here, (uint64_t{Upper32of64(here)} << 32) + Lower32of64(here));
EXPECT_EQ(here, (uint64_t{Upper32of64(here)} << 32) ^ Lower32of64(here));
// Seed changes hash value (with high probability)
for (uint64_t var_seed = 1; var_seed != 0; var_seed <<= 1) {
EXPECT_NE(here, Hash64(str.data(), size, var_seed));
}
// Size changes hash value (with high probability)
size_t max_smaller_by = std::min(size_t{30}, size);
for (size_t smaller_by = 1; smaller_by <= max_smaller_by; ++smaller_by) {
EXPECT_NE(here, Hash64(str.data(), size - smaller_by, kSeed));
}
}
}
}
// Test that hash values are "non-trivial" for "trivial" inputs
TEST(HashTest, Hash64Trivial) {
// Thorough test too slow for regression testing
constexpr bool thorough = false;
// For various seeds, make sure hash of empty string is not zero.
constexpr uint64_t max_seed = thorough ? 0x1000000 : 0x10000;
for (uint64_t seed = 0; seed < max_seed; ++seed) {
uint64_t here = Hash64("", 0, seed);
EXPECT_NE(Lower32of64(here), 0u);
EXPECT_NE(Upper32of64(here), 0u);
}
// For standard seed, make sure hash of small strings are not zero
constexpr uint32_t kSeed = 0; // Same as GetSliceHash64
char input[4];
constexpr int max_len = thorough ? 3 : 2;
for (int len = 1; len <= max_len; ++len) {
for (uint32_t i = 0; (i >> (len * 8)) == 0; ++i) {
EncodeFixed32(input, i);
uint64_t here = Hash64(input, len, kSeed);
EXPECT_NE(Lower32of64(here), 0u);
EXPECT_NE(Upper32of64(here), 0u);
}
}
}
// Test that the hash values are stable for a set of random strings of
// varying small lengths.
TEST(HashTest, Hash64SmallValueSchema) {
constexpr uint32_t kSeed = 0; // Same as GetSliceHash64
EXPECT_EQ(Hash64("", 0, kSeed), uint64_t{5999572062939766020u});
EXPECT_EQ(Hash64("\x08", 1, kSeed), uint64_t{583283813901344696u});
EXPECT_EQ(Hash64("\x17", 1, kSeed), uint64_t{16175549975585474943u});
EXPECT_EQ(Hash64("\x9a", 1, kSeed), uint64_t{16322991629225003903u});
EXPECT_EQ(Hash64("\x1c", 1, kSeed), uint64_t{13269285487706833447u});
EXPECT_EQ(Hash64("\x4d\x76", 2, kSeed), uint64_t{6859542833406258115u});
EXPECT_EQ(Hash64("\x52\xd5", 2, kSeed), uint64_t{4919611532550636959u});
EXPECT_EQ(Hash64("\x91\xf7", 2, kSeed), uint64_t{14199427467559720719u});
EXPECT_EQ(Hash64("\xd6\x27", 2, kSeed), uint64_t{12292689282614532691u});
EXPECT_EQ(Hash64("\x30\x46\x0b", 3, kSeed), uint64_t{11404699285340020889u});
EXPECT_EQ(Hash64("\x56\xdc\xd6", 3, kSeed), uint64_t{12404347133785524237u});
EXPECT_EQ(Hash64("\xd4\x52\x33", 3, kSeed), uint64_t{15853805298481534034u});
EXPECT_EQ(Hash64("\x6a\xb5\xf4", 3, kSeed), uint64_t{16863488758399383382u});
EXPECT_EQ(Hash64("\x67\x53\x81\x1c", 4, kSeed),
uint64_t{9010661983527562386u});
EXPECT_EQ(Hash64("\x69\xb8\xc0\x88", 4, kSeed),
uint64_t{6611781377647041447u});
EXPECT_EQ(Hash64("\x1e\x84\xaf\x2d", 4, kSeed),
uint64_t{15290969111616346501u});
EXPECT_EQ(Hash64("\x46\xdc\x54\xbe", 4, kSeed),
uint64_t{7063754590279313623u});
EXPECT_EQ(Hash64("\xd0\x7a\x6e\xea\x56", 5, kSeed),
uint64_t{6384167718754869899u});
EXPECT_EQ(Hash64("\x86\x83\xd5\xa4\xd8", 5, kSeed),
uint64_t{16874407254108011067u});
EXPECT_EQ(Hash64("\xb7\x46\xbb\x77\xce", 5, kSeed),
uint64_t{16809880630149135206u});
EXPECT_EQ(Hash64("\x6c\xa8\xbc\xe5\x99", 5, kSeed),
uint64_t{1249038833153141148u});
EXPECT_EQ(Hash64("\x5c\x5e\xe1\xa0\x73\x81", 6, kSeed),
uint64_t{17358142495308219330u});
EXPECT_EQ(Hash64("\x08\x5d\x73\x1c\xe5\x2e", 6, kSeed),
uint64_t{4237646583134806322u});
EXPECT_EQ(Hash64("\x42\xfb\xf2\x52\xb4\x10", 6, kSeed),
uint64_t{4373664924115234051u});
EXPECT_EQ(Hash64("\x73\xe1\xff\x56\x9c\xce", 6, kSeed),
uint64_t{12012981210634596029u});
EXPECT_EQ(Hash64("\x5c\xbe\x97\x75\x54\x9a\x52", 7, kSeed),
uint64_t{5716522398211028826u});
EXPECT_EQ(Hash64("\x16\x82\x39\x49\x88\x2b\x36", 7, kSeed),
uint64_t{15604531309862565013u});
EXPECT_EQ(Hash64("\x59\x77\xf0\xa7\x24\xf4\x78", 7, kSeed),
uint64_t{8601330687345614172u});
EXPECT_EQ(Hash64("\xd3\xa5\x7c\x0e\xc0\x02\x07", 7, kSeed),
uint64_t{8088079329364056942u});
EXPECT_EQ(Hash64("\x31\x1b\x98\x75\x96\x22\xd3\x9a", 8, kSeed),
uint64_t{9844314944338447628u});
EXPECT_EQ(Hash64("\x38\xd6\xf7\x28\x20\xb4\x8a\xe9", 8, kSeed),
uint64_t{10973293517982163143u});
EXPECT_EQ(Hash64("\xbb\x18\x5d\xf4\x12\x03\xf7\x99", 8, kSeed),
uint64_t{9986007080564743219u});
EXPECT_EQ(Hash64("\x80\xd4\x3b\x3b\xae\x22\xa2\x78", 8, kSeed),
uint64_t{1729303145008254458u});
EXPECT_EQ(Hash64("\x1a\xb5\xd0\xfe\xab\xc3\x61\xb2\x99", 9, kSeed),
uint64_t{13253403748084181481u});
EXPECT_EQ(Hash64("\x8e\x4a\xc3\x18\x20\x2f\x06\xe6\x3c", 9, kSeed),
uint64_t{7768754303876232188u});
EXPECT_EQ(Hash64("\xb6\xc0\xdd\x05\x3f\xc4\x86\x4c\xef", 9, kSeed),
uint64_t{12439346786701492u});
EXPECT_EQ(Hash64("\x9a\x5f\x78\x0d\xaf\x50\xe1\x1f\x55", 9, kSeed),
uint64_t{10841838338450144690u});
EXPECT_EQ(Hash64("\x22\x6f\x39\x1f\xf8\xdd\x4f\x52\x17\x94", 10, kSeed),
uint64_t{12883919702069153152u});
EXPECT_EQ(Hash64("\x32\x89\x2a\x75\x48\x3a\x4a\x02\x69\xdd", 10, kSeed),
uint64_t{12692903507676842188u});
EXPECT_EQ(Hash64("\x06\x92\x5c\xf4\x88\x0e\x7e\x68\x38\x3e", 10, kSeed),
uint64_t{6540985900674032620u});
EXPECT_EQ(Hash64("\xbd\x2c\x63\x38\xbf\xe9\x78\xb7\xbf\x15", 10, kSeed),
uint64_t{10551812464348219044u});
}
std::string Hash64TestDescriptor(const char *repeat, size_t limit) {
const char *mod61_encode =
"abcdefghijklmnopqrstuvwxyz123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
std::string input;
while (input.size() < limit) {
input.append(repeat);
}
std::string rv;
for (size_t i = 0; i < limit; ++i) {
uint64_t h = GetSliceHash64(Slice(input.data(), i));
rv.append(1, mod61_encode[static_cast<size_t>(h % 61)]);
}
return rv;
}
// XXH3p changes its algorithm for various sizes up through 250 bytes, so
// we need to check the stability of larger sizes also.
TEST(HashTest, Hash64LargeValueSchema) {
// Each of these derives a "descriptor" from the hash values for all
// lengths up to 430.
// Note that "c" is common for the zero-length string.
EXPECT_EQ(
Hash64TestDescriptor("foo", 430),
"cRhyWsY67B6klRA1udmOuiYuX7IthyGBKqbeosz2hzVglWCmQx8nEdnpkvPfYX56Up2OWOTV"
"lTzfAoYwvtqKzjD8E9xttR2unelbXbIV67NUe6bOO23BxaSFRcA3njGu5cUWfgwOqNoTsszp"
"uPvKRP6qaUR5VdoBkJUCFIefd7edlNK5mv6JYWaGdwxehg65hTkTmjZoPKxTZo4PLyzbL9U4"
"xt12ITSfeP2MfBHuLI2z2pDlBb44UQKVMx27LEoAHsdLp3WfWfgH3sdRBRCHm33UxCM4QmE2"
"xJ7gqSvNwTeH7v9GlC8zWbGroyD3UVNeShMLx29O7tH1biemLULwAHyIw8zdtLMDpEJ8m2ic"
"l6Lb4fDuuFNAs1GCVUthjK8CV8SWI8Rsz5THSwn5CGhpqUwSZcFknjwWIl5rNCvDxXJqYr");
// Note that "1EeRk" is common for "Rocks"
EXPECT_EQ(
Hash64TestDescriptor("Rocks", 430),
"c1EeRkrzgOYWLA8PuhJrwTePJewoB44WdXYDfhbk3ZxTqqg25WlPExDl7IKIQLJvnA6gJxxn"
"9TCSLkFGfJeXehaSS1GBqWSzfhEH4VXiXIUCuxJXxtKXcSC6FrNIQGTZbYDiUOLD6Y5inzrF"
"9etwQhXUBanw55xAUdNMFQAm2GjJ6UDWp2mISLiMMkLjANWMKLaZMqaFLX37qB4MRO1ooVRv"
"zSvaNRSCLxlggQCasQq8icWjzf3HjBlZtU6pd4rkaUxSzHqmo9oM5MghbU5Rtxg8wEfO7lVN"
"5wdMONYecslQTwjZUpO1K3LDf3K3XK6sUXM6ShQQ3RHmMn2acB4YtTZ3QQcHYJSOHn2DuWpa"
"Q8RqzX5lab92YmOLaCdOHq1BPsM7SIBzMdLgePNsJ1vvMALxAaoDUHPxoFLO2wx18IXnyX");
EXPECT_EQ(
Hash64TestDescriptor("RocksDB", 430),
"c1EeRkukbkb28wLTahwD2sfUhZzaBEnF8SVrxnPVB6A7b8CaAl3UKsDZISF92GSq2wDCukOq"
"Jgrsp7A3KZhDiLW8dFXp8UPqPxMCRlMdZeVeJ2dJxrmA6cyt99zkQFj7ELbut6jAeVqARFnw"
"fnWVXOsaLrq7bDCbMcns2DKvTaaqTCLMYxI7nhtLpFN1jR755FRQFcOzrrDbh7QhypjdvlYw"
"cdAMSZgp9JMHxbM23wPSuH6BOFgxejz35PScZfhDPvTOxIy1jc3MZsWrMC3P324zNolO7JdW"
"CX2I5UDKjjaEJfxbgVgJIXxtQGlmj2xkO5sPpjULQV4X2HlY7FQleJ4QRaJIB4buhCA4vUTF"
"eMFlxCIYUpTCsal2qsmnGOWa8WCcefrohMjDj1fjzSvSaQwlpyR1GZHF2uPOoQagiCpHpm");
}
TEST(Fastrange32Test, Values) { TEST(Fastrange32Test, Values) {
using rocksdb::fastrange32; using rocksdb::fastrange32;
// Zero range // Zero range
@ -175,11 +370,6 @@ size_t fastrange64(uint64_t hash, size_t range) {
return rocksdb::fastrange64(hash, range); return rocksdb::fastrange64(hash, range);
} }
// for inspection of disassembly
uint64_t NPHash64(const char* data, size_t n) {
return rocksdb::NPHash64(data, n);
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv); ::testing::InitGoogleTest(&argc, argv);

@ -487,7 +487,12 @@ XXH3p_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64
{ if (len > 8) return XXH3p_len_9to16_64b(input, len, secret, seed); { if (len > 8) return XXH3p_len_9to16_64b(input, len, secret, seed);
if (len >= 4) return XXH3p_len_4to8_64b(input, len, secret, seed); if (len >= 4) return XXH3p_len_4to8_64b(input, len, secret, seed);
if (len) return XXH3p_len_1to3_64b(input, len, secret, seed); if (len) return XXH3p_len_1to3_64b(input, len, secret, seed);
return 0; /*
* RocksDB modification from XXH3 preview: zero result for empty
* string can be problematic for multiplication-based algorithms.
* Return a hash of the seed instead.
*/
return XXH3p_mul128_fold64(seed + XXH_readLE64(secret), PRIME64_2);
} }
} }

@ -591,8 +591,10 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
state->memsize = 0; state->memsize = 0;
} }
if (p <= bEnd-16) { // uintptr_t casts added to avoid array-bounds error on
const xxh_u8* const limit = bEnd - 16; // some inlined calls
if ((uintptr_t)p <= (uintptr_t)bEnd - 16) {
const uintptr_t limit = (uintptr_t)bEnd - 16;
xxh_u32 v1 = state->v1; xxh_u32 v1 = state->v1;
xxh_u32 v2 = state->v2; xxh_u32 v2 = state->v2;
xxh_u32 v3 = state->v3; xxh_u32 v3 = state->v3;
@ -603,7 +605,7 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
v2 = XXH32_round(v2, XXH_readLE32(p)); p+=4; v2 = XXH32_round(v2, XXH_readLE32(p)); p+=4;
v3 = XXH32_round(v3, XXH_readLE32(p)); p+=4; v3 = XXH32_round(v3, XXH_readLE32(p)); p+=4;
v4 = XXH32_round(v4, XXH_readLE32(p)); p+=4; v4 = XXH32_round(v4, XXH_readLE32(p)); p+=4;
} while (p<=limit); } while ((uintptr_t)p <= limit);
state->v1 = v1; state->v1 = v1;
state->v2 = v2; state->v2 = v2;
@ -1072,8 +1074,10 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len)
state->memsize = 0; state->memsize = 0;
} }
if (p+32 <= bEnd) { // uintptr_t casts added to avoid array-bounds error on
const xxh_u8* const limit = bEnd - 32; // some inlined calls
if ((uintptr_t)p + 32 <= (uintptr_t)bEnd) {
const uintptr_t limit = (uintptr_t)bEnd - 32;
xxh_u64 v1 = state->v1; xxh_u64 v1 = state->v1;
xxh_u64 v2 = state->v2; xxh_u64 v2 = state->v2;
xxh_u64 v3 = state->v3; xxh_u64 v3 = state->v3;
@ -1084,7 +1088,7 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len)
v2 = XXH64_round(v2, XXH_readLE64(p)); p+=8; v2 = XXH64_round(v2, XXH_readLE64(p)); p+=8;
v3 = XXH64_round(v3, XXH_readLE64(p)); p+=8; v3 = XXH64_round(v3, XXH_readLE64(p)); p+=8;
v4 = XXH64_round(v4, XXH_readLE64(p)); p+=8; v4 = XXH64_round(v4, XXH_readLE64(p)); p+=8;
} while (p<=limit); } while ((uintptr_t)p <= limit);
state->v1 = v1; state->v1 = v1;
state->v2 = v2; state->v2 = v2;

Loading…
Cancel
Save