rocksdb/util/murmurhash.cc

//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).
//
/*
  Murmurhash from http://sites.google.com/site/murmurhash/

  All code is released to the public domain. For business purposes, Murmurhash
  is under the MIT license.
*/
#include "murmurhash.h"

#if defined(__x86_64__)

// -------------------------------------------------------------------
//
// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
// and endian-ness issues if used across multiple platforms.
//
// 64-bit hash for 64-bit platforms

uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed )
{
    const uint64_t m = 0xc6a4a7935bd1e995;
    const int r = 47;

    uint64_t h = seed ^ (len * m);

    const uint64_t * data = (const uint64_t *)key;
    const uint64_t * end = data + (len/8);

    while(data != end)
    {
        uint64_t k = *data++;

        k *= m;
        k ^= k >> r;
        k *= m;

        h ^= k;
        h *= m;
    }

    const unsigned char * data2 = (const unsigned char*)data;

    switch(len & 7)
    {
    case 7: h ^= ((uint64_t)data2[6]) << 48; // fallthrough
    case 6: h ^= ((uint64_t)data2[5]) << 40; // fallthrough
    case 5: h ^= ((uint64_t)data2[4]) << 32; // fallthrough
    case 4: h ^= ((uint64_t)data2[3]) << 24; // fallthrough
    case 3: h ^= ((uint64_t)data2[2]) << 16; // fallthrough
    case 2: h ^= ((uint64_t)data2[1]) << 8; // fallthrough
    case 1: h ^= ((uint64_t)data2[0]);
        h *= m;
    };

    h ^= h >> r;
    h *= m;
    h ^= h >> r;

    return h;
}

#elif defined(__i386__)

// -------------------------------------------------------------------
//
// Note - This code makes a few assumptions about how your machine behaves -
//
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
//
// And it has a few limitations -
//
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
//    machines.

unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
{
    // 'm' and 'r' are mixing constants generated offline.
    // They're not really 'magic', they just happen to work well.

    const unsigned int m = 0x5bd1e995;
    const int r = 24;

    // Initialize the hash to a 'random' value

    unsigned int h = seed ^ len;

    // Mix 4 bytes at a time into the hash

    const unsigned char * data = (const unsigned char *)key;

    while(len >= 4)
    {
        unsigned int k = *(unsigned int *)data;

        k *= m;
        k ^= k >> r;
        k *= m;

        h *= m;
        h ^= k;

        data += 4;
        len -= 4;
    }

    // Handle the last few bytes of the input array

    switch(len)
    {
    case 3: h ^= data[2] << 16;
    case 2: h ^= data[1] << 8;
    case 1: h ^= data[0];
        h *= m;
    };

    // Do a few final mixes of the hash to ensure the last few
    // bytes are well-incorporated.

    h ^= h >> 13;
    h *= m;
    h ^= h >> 15;

    return h;
}

#else

// -------------------------------------------------------------------
//
// Same as MurmurHash2, but endian- and alignment-neutral.
// Half the speed though, alas.

unsigned int MurmurHashNeutral2 ( const void * key, int len, unsigned int seed )
{
    const unsigned int m = 0x5bd1e995;
    const int r = 24;

    unsigned int h = seed ^ len;

    const unsigned char * data = (const unsigned char *)key;

    while(len >= 4)
    {
        unsigned int k;

        k  = data[0];
        k |= data[1] << 8;
        k |= data[2] << 16;
        k |= data[3] << 24;

        k *= m;
        k ^= k >> r;
        k *= m;

        h *= m;
        h ^= k;

        data += 4;
        len -= 4;
    }

    switch(len)
    {
    case 3: h ^= data[2] << 16; // fallthrough
    case 2: h ^= data[1] << 8; // fallthrough
    case 1: h ^= data[0];
        h *= m;
    };

    h ^= h >> 13;
    h *= m;
    h ^= h >> 15;

    return h;
}

#endif
Updated all copyright headers to the new format. 9 years ago			`// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.`
Change RocksDB License Summary: Closes https://github.com/facebook/rocksdb/pull/2589 Differential Revision: D5431502 Pulled By: siying fbshipit-source-id: 8ebf8c87883daa9daa54b2303d11ce01ab1f6f75 7 years ago			`// This source code is licensed under both the GPLv2 (found in the`
			`// COPYING file in the root directory) and Apache 2.0 License`
			`// (found in the LICENSE.Apache file in the root directory).`
Add appropriate LICENSE and Copyright message. Summary: Add appropriate LICENSE and Copyright message. Test Plan: make check Reviewers: CC: Task ID: # Blame Rev: 11 years ago			`//`
Implement RowLocks for assoc schema Summary: Each assoc is identified by (id1, assocType). This is the rowkey. Each row has a read/write rowlock. There is statically allocated array of 2000 read/write locks. A rowkey is murmur-hashed to one of the read/write locks. assocPut and assocDelete acquires the rowlock in Write mode. The key-updates are done within the rowlock with a atomic nosync batch write to leveldb. Then the rowlock is released and a write-with-sync is done to sync leveldb transaction log. Test Plan: added unit test Reviewers: heyongqiang Reviewed By: heyongqiang Differential Revision: https://reviews.facebook.net/D5859 12 years ago			`/*`
			`Murmurhash from http://sites.google.com/site/murmurhash/`

Optimize for serial commits in 2PC Summary: Throughput: 46k tps in our sysbench settings (filling the details later) The idea is to have the simplest change that gives us a reasonable boost in 2PC throughput. Major design changes: 1. The WAL file internal buffer is not flushed after each write. Instead it is flushed before critical operations (WAL copy via fs) or when FlushWAL is called by MySQL. Flushing the WAL buffer is also protected via mutex_. 2. Use two sequence numbers: last seq, and last seq for write. Last seq is the last visible sequence number for reads. Last seq for write is the next sequence number that should be used to write to WAL/memtable. This allows to have a memtable write be in parallel to WAL writes. 3. BatchGroup is not used for writes. This means that we can have parallel writers which changes a major assumption in the code base. To accommodate for that i) allow only 1 WriteImpl that intends to write to memtable via mem_mutex_--which is fine since in 2PC almost all of the memtable writes come via group commit phase which is serial anyway, ii) make all the parts in the code base that assumed to be the only writer (via EnterUnbatched) to also acquire mem_mutex_, iii) stat updates are protected via a stat_mutex_. Note: the first commit has the approach figured out but is not clean. Submitting the PR anyway to get the early feedback on the approach. If we are ok with the approach I will go ahead with this updates: 0) Rebase with Yi's pipelining changes 1) Currently batching is disabled by default to make sure that it will be consistent with all unit tests. Will make this optional via a config. 2) A couple of unit tests are disabled. They need to be updated with the serial commit of 2PC taken into account. 3) Replacing BatchGroup with mem_mutex_ got a bit ugly as it requires releasing mutex_ beforehand (the same way EnterUnbatched does). This needs to be cleaned up. Closes https://github.com/facebook/rocksdb/pull/2345 Differential Revision: D5210732 Pulled By: maysamyabandeh fbshipit-source-id: 78653bd95a35cd1e831e555e0e57bdfd695355a4 8 years ago			`All code is released to the public domain. For business purposes, Murmurhash`
			`is under the MIT license.`
Implement RowLocks for assoc schema Summary: Each assoc is identified by (id1, assocType). This is the rowkey. Each row has a read/write rowlock. There is statically allocated array of 2000 read/write locks. A rowkey is murmur-hashed to one of the read/write locks. assocPut and assocDelete acquires the rowlock in Write mode. The key-updates are done within the rowlock with a atomic nosync batch write to leveldb. Then the rowlock is released and a write-with-sync is done to sync leveldb transaction log. Test Plan: added unit test Reviewers: heyongqiang Reviewed By: heyongqiang Differential Revision: https://reviews.facebook.net/D5859 12 years ago			`*/`
			`#include "murmurhash.h"`

			`#if defined(__x86_64__)`

			`// -------------------------------------------------------------------`
			`//`
			`// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment`
			`// and endian-ness issues if used across multiple platforms.`
			`//`
			`// 64-bit hash for 64-bit platforms`

			`uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed )`
			`{`
			`const uint64_t m = 0xc6a4a7935bd1e995;`
			`const int r = 47;`

			`uint64_t h = seed ^ (len * m);`

			`const uint64_t * data = (const uint64_t *)key;`
			`const uint64_t * end = data + (len/8);`

			`while(data != end)`
			`{`
			`uint64_t k = *data++;`

			`k *= m;`
			`k ^= k >> r;`
			`k *= m;`

			`h ^= k;`
			`h *= m;`
			`}`

			`const unsigned char * data2 = (const unsigned char*)data;`

			`switch(len & 7)`
			`{`
Gcc 7 fallthrough Summary: hopefully the last of the gcc-7 compile errors Closes https://github.com/facebook/rocksdb/pull/1675 Differential Revision: D4332106 Pulled By: IslamAbdelRahman fbshipit-source-id: 139448c 8 years ago			`case 7: h ^= ((uint64_t)data2[6]) << 48; // fallthrough`
			`case 6: h ^= ((uint64_t)data2[5]) << 40; // fallthrough`
			`case 5: h ^= ((uint64_t)data2[4]) << 32; // fallthrough`
			`case 4: h ^= ((uint64_t)data2[3]) << 24; // fallthrough`
			`case 3: h ^= ((uint64_t)data2[2]) << 16; // fallthrough`
			`case 2: h ^= ((uint64_t)data2[1]) << 8; // fallthrough`
Implement RowLocks for assoc schema Summary: Each assoc is identified by (id1, assocType). This is the rowkey. Each row has a read/write rowlock. There is statically allocated array of 2000 read/write locks. A rowkey is murmur-hashed to one of the read/write locks. assocPut and assocDelete acquires the rowlock in Write mode. The key-updates are done within the rowlock with a atomic nosync batch write to leveldb. Then the rowlock is released and a write-with-sync is done to sync leveldb transaction log. Test Plan: added unit test Reviewers: heyongqiang Reviewed By: heyongqiang Differential Revision: https://reviews.facebook.net/D5859 12 years ago			`case 1: h ^= ((uint64_t)data2[0]);`
			`h *= m;`
			`};`

			`h ^= h >> r;`
			`h *= m;`
			`h ^= h >> r;`

			`return h;`
			`}`

			`#elif defined(__i386__)`

			`// -------------------------------------------------------------------`
			`//`
			`// Note - This code makes a few assumptions about how your machine behaves -`
			`//`
			`// 1. We can read a 4-byte value from any address without crashing`
			`// 2. sizeof(int) == 4`
			`//`
			`// And it has a few limitations -`
			`//`
			`// 1. It will not work incrementally.`
			`// 2. It will not produce the same results on little-endian and big-endian`
			`// machines.`

			`unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )`
			`{`
			`// 'm' and 'r' are mixing constants generated offline.`
			`// They're not really 'magic', they just happen to work well.`

			`const unsigned int m = 0x5bd1e995;`
			`const int r = 24;`

			`// Initialize the hash to a 'random' value`

			`unsigned int h = seed ^ len;`

			`// Mix 4 bytes at a time into the hash`

			`const unsigned char * data = (const unsigned char *)key;`

			`while(len >= 4)`
			`{`
			`unsigned int k = (unsigned int )data;`

			`k *= m;`
			`k ^= k >> r;`
			`k *= m;`

			`h *= m;`
			`h ^= k;`

			`data += 4;`
			`len -= 4;`
			`}`

			`// Handle the last few bytes of the input array`

			`switch(len)`
			`{`
			`case 3: h ^= data[2] << 16;`
			`case 2: h ^= data[1] << 8;`
			`case 1: h ^= data[0];`
			`h *= m;`
			`};`

			`// Do a few final mixes of the hash to ensure the last few`
			`// bytes are well-incorporated.`

			`h ^= h >> 13;`
			`h *= m;`
			`h ^= h >> 15;`

			`return h;`
			`}`

			`#else`

			`// -------------------------------------------------------------------`
			`//`
			`// Same as MurmurHash2, but endian- and alignment-neutral.`
			`// Half the speed though, alas.`

			`unsigned int MurmurHashNeutral2 ( const void * key, int len, unsigned int seed )`
			`{`
			`const unsigned int m = 0x5bd1e995;`
			`const int r = 24;`

			`unsigned int h = seed ^ len;`

			`const unsigned char * data = (const unsigned char *)key;`

			`while(len >= 4)`
			`{`
			`unsigned int k;`

			`k = data[0];`
			`k \|= data[1] << 8;`
			`k \|= data[2] << 16;`
			`k \|= data[3] << 24;`

			`k *= m;`
			`k ^= k >> r;`
			`k *= m;`

			`h *= m;`
			`h ^= k;`

			`data += 4;`
			`len -= 4;`
			`}`

			`switch(len)`
			`{`
Gcc 7 fallthrough Summary: hopefully the last of the gcc-7 compile errors Closes https://github.com/facebook/rocksdb/pull/1675 Differential Revision: D4332106 Pulled By: IslamAbdelRahman fbshipit-source-id: 139448c 8 years ago			`case 3: h ^= data[2] << 16; // fallthrough`
			`case 2: h ^= data[1] << 8; // fallthrough`
Implement RowLocks for assoc schema Summary: Each assoc is identified by (id1, assocType). This is the rowkey. Each row has a read/write rowlock. There is statically allocated array of 2000 read/write locks. A rowkey is murmur-hashed to one of the read/write locks. assocPut and assocDelete acquires the rowlock in Write mode. The key-updates are done within the rowlock with a atomic nosync batch write to leveldb. Then the rowlock is released and a write-with-sync is done to sync leveldb transaction log. Test Plan: added unit test Reviewers: heyongqiang Reviewed By: heyongqiang Differential Revision: https://reviews.facebook.net/D5859 12 years ago			`case 1: h ^= data[0];`
			`h *= m;`
			`};`

			`h ^= h >> 13;`
			`h *= m;`
			`h ^= h >> 15;`

			`return h;`
			`}`

			`#endif`