From 88841bd007a66417055c1dcad97f86263262de00 Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Mon, 8 Sep 2014 18:57:40 -0700 Subject: [PATCH] Explicitly cast char to signed char in Hash() Summary: The compilers we use treat char as signed. However, this is not guarantee of C standard and some compilers (for ARM platform for example), treat char as unsigned. Code that assumes that char is either signed or unsigned is wrong. This change explicitly casts the char to signed version. This will not break any of our use cases on x86, which, I believe are all of them. In case somebody out there is using RocksDB on ARM AND using bloom filters, they're going to have a bad time. However, it is very unlikely that this is the case. Test Plan: sanity test with previous commit (with new sanity test) Reviewers: yhchiang, ljin, sdong Reviewed By: ljin Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D22767 --- HISTORY.md | 2 ++ tools/auto_sanity_test.sh | 10 ++++++++++ tools/db_sanity_test.cc | 29 ++++++++++++++--------------- util/hash.cc | 22 +++++++++++++++++----- 4 files changed, 43 insertions(+), 20 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index ca117b273..80cac265b 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,8 @@ # Rocksdb Change Log ## Unreleased (will be released with 3.6) +### Disk format changes +* If you're using RocksDB on ARM platforms and you're using default bloom filter, there is a disk format change you need to be aware of. There are three steps you need to do when you convert to new release: 1. turn off filter policy, 2. compact the whole database, 3. turn on filter policy ### Behavior changes * We have refactored our system of stalling writes. Any stall-related statistics' meanings are changed. Instead of per-write stall counts, we now count stalls per-epoch, where epochs are periods between flushes and compactions. You'll find more information in our Tuning Perf Guide once we release RocksDB 3.6. diff --git a/tools/auto_sanity_test.sh b/tools/auto_sanity_test.sh index 2d63c0a85..138c855c0 100755 --- a/tools/auto_sanity_test.sh +++ b/tools/auto_sanity_test.sh @@ -37,6 +37,11 @@ echo "Running db sanity check with commits $commit_new and $commit_old." echo "=============================================================" echo "Making build $commit_new" +git checkout $commit_new +if [ $? -ne 0 ]; then + echo "[ERROR] Can't checkout $commit_new" + exit 1 +fi makestuff mv db_sanity_test new_db_sanity_test echo "Creating db based on the new commit --- $commit_new" @@ -44,6 +49,11 @@ echo "Creating db based on the new commit --- $commit_new" echo "=============================================================" echo "Making build $commit_old" +git checkout $commit_old +if [ $? -ne 0 ]; then + echo "[ERROR] Can't checkout $commit_old" + exit 1 +fi makestuff mv db_sanity_test old_db_sanity_test echo "Creating db based on the old commit --- $commit_old" diff --git a/tools/db_sanity_test.cc b/tools/db_sanity_test.cc index 7cf7c1cca..237ef07d0 100644 --- a/tools/db_sanity_test.cc +++ b/tools/db_sanity_test.cc @@ -8,15 +8,15 @@ #include #include -#include "include/rocksdb/db.h" -#include "include/rocksdb/options.h" -#include "include/rocksdb/env.h" -#include "include/rocksdb/slice.h" -#include "include/rocksdb/status.h" -#include "include/rocksdb/comparator.h" -#include "include/rocksdb/table.h" -#include "include/rocksdb/slice_transform.h" -#include "include/rocksdb/filter_policy.h" +#include "rocksdb/db.h" +#include "rocksdb/options.h" +#include "rocksdb/env.h" +#include "rocksdb/slice.h" +#include "rocksdb/status.h" +#include "rocksdb/comparator.h" +#include "rocksdb/table.h" +#include "rocksdb/slice_transform.h" +#include "rocksdb/filter_policy.h" namespace rocksdb { @@ -50,7 +50,7 @@ class SanityTest { return s; } } - return Status::OK(); + return db->Flush(FlushOptions()); } Status Verify() { DB* db; @@ -149,10 +149,10 @@ class SanityTestPlainTableFactory : public SanityTest { class SanityTestBloomFilter : public SanityTest { public: - explicit SanityTestBloomFilter(const std::string& path) - : SanityTest(path) { - table_options_.filter_policy.reset(NewBloomFilterPolicy(10)); - options_.table_factory.reset(NewBlockBasedTableFactory(table_options_)); + explicit SanityTestBloomFilter(const std::string& path) : SanityTest(path) { + BlockBasedTableOptions table_options; + table_options.filter_policy.reset(NewBloomFilterPolicy(10)); + options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); } ~SanityTestBloomFilter() {} virtual Options GetOptions() const { return options_; } @@ -160,7 +160,6 @@ class SanityTestBloomFilter : public SanityTest { private: Options options_; - BlockBasedTableOptions table_options_; }; namespace { diff --git a/util/hash.cc b/util/hash.cc index e38c186c3..37eaa4057 100644 --- a/util/hash.cc +++ b/util/hash.cc @@ -31,14 +31,26 @@ uint32_t Hash(const char* data, size_t n, uint32_t seed) { // Pick up remaining bytes switch (limit - data) { + // Note: It would be better if this was cast to unsigned char, but that + // would be a disk format change since we previously didn't have any cast + // at all (so gcc used signed char). + // To understand the difference between shifting unsigned and signed chars, + // let's use 250 as an example. unsigned char will be 250, while signed char + // will be -6. Bit-wise, they are equivalent: 11111010. However, when + // converting negative number (signed char) to int, it will be converted + // into negative int (of equivalent value, which is -6), while converting + // positive number (unsigned char) will be converted to 250. Bitwise, + // this looks like this: + // signed char 11111010 -> int 11111111111111111111111111111010 + // unsigned char 11111010 -> int 00000000000000000000000011111010 case 3: - h += data[2] << 16; - // fall through + h += static_cast(static_cast(data[2]) << 16); + // fall through case 2: - h += data[1] << 8; - // fall through + h += static_cast(static_cast(data[1]) << 8); + // fall through case 1: - h += data[0]; + h += static_cast(static_cast(data[0])); h *= m; h ^= (h >> r); break;