diff --git a/CMakeLists.txt b/CMakeLists.txt index cbbde8eda..1d0ad1cb0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -697,6 +697,7 @@ set(TESTS util/event_logger_test.cc util/file_reader_writer_test.cc util/filelock_test.cc + util/hash_test.cc util/heap_test.cc util/rate_limiter_test.cc util/slice_transform_test.cc diff --git a/Makefile b/Makefile index bf0d836f8..1b273224b 100644 --- a/Makefile +++ b/Makefile @@ -336,6 +336,7 @@ TESTS = \ inlineskiplist_test \ env_basic_test \ env_test \ + hash_test \ thread_local_test \ rate_limiter_test \ perf_context_test \ @@ -990,6 +991,9 @@ cache_test: cache/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) +hash_test: util/hash_test.o $(LIBOBJECTS) $(TESTHARNESS) + $(AM_LINK) + option_change_migration_test: utilities/option_change_migration/option_change_migration_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) diff --git a/TARGETS b/TARGETS index 74de1608b..001bf7cb2 100644 --- a/TARGETS +++ b/TARGETS @@ -11,7 +11,7 @@ rocksdb_compiler_flags = [ "-DROCKSDB_MALLOC_USABLE_SIZE", "-DROCKSDB_RANGESYNC_PRESENT", "-DROCKSDB_SCHED_GETCPU_PRESENT", - "-DROCKSDB_SUPPORT_THREAD_LOCAL", + "-DROCKSDB_SUPPORT_THREAD_LOCAL", "-DHAVE_SSE42", "-DOS_LINUX", # Flags to enable libs we include @@ -407,6 +407,7 @@ ROCKS_TESTS = [['arena_test', 'util/arena_test.cc', 'serial'], ['hash_table_test', 'utilities/persistent_cache/hash_table_test.cc', 'serial'], + ['hash_test', 'util/hash_test.cc', 'serial'], ['heap_test', 'util/heap_test.cc', 'serial'], ['histogram_test', 'monitoring/histogram_test.cc', 'serial'], ['inlineskiplist_test', 'memtable/inlineskiplist_test.cc', 'parallel'], diff --git a/util/hash.cc b/util/hash.cc index 5af5c5a1d..aaa3070e9 100644 --- a/util/hash.cc +++ b/util/hash.cc @@ -15,13 +15,6 @@ namespace rocksdb { -#ifdef ROCKSDB_UBSAN_RUN -#if defined(__clang__) -__attribute__((__no_sanitize__("shift"))) -#elif defined(__GNUC__) -__attribute__((__no_sanitize_undefined__)) -#endif -#endif uint32_t Hash(const char* data, size_t n, uint32_t seed) { // Similar to murmur hash const uint32_t m = 0xc6a4a793; @@ -40,26 +33,21 @@ uint32_t Hash(const char* data, size_t n, uint32_t seed) { // Pick up remaining bytes switch (limit - data) { - // Note: It would be better if this was cast to unsigned char, but that - // would be a disk format change since we previously didn't have any cast - // at all (so gcc used signed char). - // To understand the difference between shifting unsigned and signed chars, - // let's use 250 as an example. unsigned char will be 250, while signed char - // will be -6. Bit-wise, they are equivalent: 11111010. However, when - // converting negative number (signed char) to int, it will be converted - // into negative int (of equivalent value, which is -6), while converting - // positive number (unsigned char) will be converted to 250. Bitwise, - // this looks like this: - // signed char 11111010 -> int 11111111111111111111111111111010 - // unsigned char 11111010 -> int 00000000000000000000000011111010 + // Note: The original hash implementation used data[i] << shift, which + // promotes the char to int and then performs the shift. If the char is + // negative, the shift is undefined behavior in C++. The hash algorithm is + // part of the format definition, so we cannot change it; to obtain the same + // behavior in a legal way we just cast to uint32_t, which will do + // sign-extension. To guarantee compatibility with architectures where chars + // are unsigned we first cast the char to int8_t. case 3: - h += static_cast(static_cast(data[2]) << 16); + h += static_cast(static_cast(data[2])) << 16; // fall through case 2: - h += static_cast(static_cast(data[1]) << 8); + h += static_cast(static_cast(data[1])) << 8; // fall through case 1: - h += static_cast(static_cast(data[0])); + h += static_cast(static_cast(data[0])); h *= m; h ^= (h >> r); break; diff --git a/util/hash_test.cc b/util/hash_test.cc new file mode 100644 index 000000000..2f0302631 --- /dev/null +++ b/util/hash_test.cc @@ -0,0 +1,75 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// This source code is also licensed under the GPLv2 license found in the +// COPYING file in the root directory of this source tree. +// +// Copyright (c) 2012 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include + +#include "util/hash.h" +#include "util/testharness.h" + +// The hash algorithm is part of the file format, for example for the Bloom +// filters. Test that the hash values are stable for a set of random strings of +// varying lengths. +TEST(HashTest, Values) { + using rocksdb::Hash; + constexpr uint32_t kSeed = 0xbc9f1d34; // Same as BloomHash. + + EXPECT_EQ(Hash("", 0, kSeed), 3164544308); + EXPECT_EQ(Hash("\x08", 1, kSeed), 422599524); + EXPECT_EQ(Hash("\x17", 1, kSeed), 3168152998); + EXPECT_EQ(Hash("\x9a", 1, kSeed), 3195034349); + EXPECT_EQ(Hash("\x1c", 1, kSeed), 2651681383); + EXPECT_EQ(Hash("\x4d\x76", 2, kSeed), 2447836956); + EXPECT_EQ(Hash("\x52\xd5", 2, kSeed), 3854228105); + EXPECT_EQ(Hash("\x91\xf7", 2, kSeed), 31066776); + EXPECT_EQ(Hash("\xd6\x27", 2, kSeed), 1806091603); + EXPECT_EQ(Hash("\x30\x46\x0b", 3, kSeed), 3808221797); + EXPECT_EQ(Hash("\x56\xdc\xd6", 3, kSeed), 2157698265); + EXPECT_EQ(Hash("\xd4\x52\x33", 3, kSeed), 1721992661); + EXPECT_EQ(Hash("\x6a\xb5\xf4", 3, kSeed), 2469105222); + EXPECT_EQ(Hash("\x67\x53\x81\x1c", 4, kSeed), 118283265); + EXPECT_EQ(Hash("\x69\xb8\xc0\x88", 4, kSeed), 3416318611); + EXPECT_EQ(Hash("\x1e\x84\xaf\x2d", 4, kSeed), 3315003572); + EXPECT_EQ(Hash("\x46\xdc\x54\xbe", 4, kSeed), 447346355); + EXPECT_EQ(Hash("\xd0\x7a\x6e\xea\x56", 5, kSeed), 4255445370); + EXPECT_EQ(Hash("\x86\x83\xd5\xa4\xd8", 5, kSeed), 2390603402); + EXPECT_EQ(Hash("\xb7\x46\xbb\x77\xce", 5, kSeed), 2048907743); + EXPECT_EQ(Hash("\x6c\xa8\xbc\xe5\x99", 5, kSeed), 2177978500); + EXPECT_EQ(Hash("\x5c\x5e\xe1\xa0\x73\x81", 6, kSeed), 1036846008); + EXPECT_EQ(Hash("\x08\x5d\x73\x1c\xe5\x2e", 6, kSeed), 229980482); + EXPECT_EQ(Hash("\x42\xfb\xf2\x52\xb4\x10", 6, kSeed), 3655585422); + EXPECT_EQ(Hash("\x73\xe1\xff\x56\x9c\xce", 6, kSeed), 3502708029); + EXPECT_EQ(Hash("\x5c\xbe\x97\x75\x54\x9a\x52", 7, kSeed), 815120748); + EXPECT_EQ(Hash("\x16\x82\x39\x49\x88\x2b\x36", 7, kSeed), 3056033698); + EXPECT_EQ(Hash("\x59\x77\xf0\xa7\x24\xf4\x78", 7, kSeed), 587205227); + EXPECT_EQ(Hash("\xd3\xa5\x7c\x0e\xc0\x02\x07", 7, kSeed), 2030937252); + EXPECT_EQ(Hash("\x31\x1b\x98\x75\x96\x22\xd3\x9a", 8, kSeed), 469635402); + EXPECT_EQ(Hash("\x38\xd6\xf7\x28\x20\xb4\x8a\xe9", 8, kSeed), 3530274698); + EXPECT_EQ(Hash("\xbb\x18\x5d\xf4\x12\x03\xf7\x99", 8, kSeed), 1974545809); + EXPECT_EQ(Hash("\x80\xd4\x3b\x3b\xae\x22\xa2\x78", 8, kSeed), 3563570120); + EXPECT_EQ(Hash("\x1a\xb5\xd0\xfe\xab\xc3\x61\xb2\x99", 9, kSeed), 2706087434); + EXPECT_EQ(Hash("\x8e\x4a\xc3\x18\x20\x2f\x06\xe6\x3c", 9, kSeed), 1534654151); + EXPECT_EQ(Hash("\xb6\xc0\xdd\x05\x3f\xc4\x86\x4c\xef", 9, kSeed), 2355554696); + EXPECT_EQ(Hash("\x9a\x5f\x78\x0d\xaf\x50\xe1\x1f\x55", 9, kSeed), 1400800912); + EXPECT_EQ(Hash("\x22\x6f\x39\x1f\xf8\xdd\x4f\x52\x17\x94", 10, kSeed), + 3420325137); + EXPECT_EQ(Hash("\x32\x89\x2a\x75\x48\x3a\x4a\x02\x69\xdd", 10, kSeed), + 3427803584); + EXPECT_EQ(Hash("\x06\x92\x5c\xf4\x88\x0e\x7e\x68\x38\x3e", 10, kSeed), + 1152407945); + EXPECT_EQ(Hash("\xbd\x2c\x63\x38\xbf\xe9\x78\xb7\xbf\x15", 10, kSeed), + 3382479516); +} + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +}