fork of https://github.com/oxigraph/rocksdb and https://github.com/facebook/rocksdb for nextgraph and oxigraph
				
			
			
		
			You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							238 lines
						
					
					
						
							6.4 KiB
						
					
					
				
			
		
		
	
	
							238 lines
						
					
					
						
							6.4 KiB
						
					
					
				| //  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
 | |
| //  This source code is licensed under the BSD-style license found in the
 | |
| //  LICENSE file in the root directory of this source tree. An additional grant
 | |
| //  of patent rights can be found in the PATENTS file in the same directory.
 | |
| //
 | |
| #pragma once
 | |
| 
 | |
| #ifndef ROCKSDB_LITE
 | |
| 
 | |
| #include <assert.h>
 | |
| #include <list>
 | |
| #include <vector>
 | |
| 
 | |
| #ifdef OS_LINUX
 | |
| #include <sys/mman.h>
 | |
| #endif
 | |
| 
 | |
| #include "include/rocksdb/env.h"
 | |
| #include "util/mutexlock.h"
 | |
| 
 | |
| namespace rocksdb {
 | |
| 
 | |
| // HashTable<T, Hash, Equal>
 | |
| //
 | |
| // Traditional implementation of hash table with synchronization built on top
 | |
| // don't perform very well in multi-core scenarios. This is an implementation
 | |
| // designed for multi-core scenarios with high lock contention.
 | |
| //
 | |
| //                         |<-------- alpha ------------->|
 | |
| //               Buckets   Collision list
 | |
| //          ---- +----+    +---+---+--- ...... ---+---+---+
 | |
| //         /     |    |--->|   |   |              |   |   |
 | |
| //        /      +----+    +---+---+--- ...... ---+---+---+
 | |
| //       /       |    |
 | |
| // Locks/        +----+
 | |
| // +--+/         .    .
 | |
| // |  |          .    .
 | |
| // +--+          .    .
 | |
| // |  |          .    .
 | |
| // +--+          .    .
 | |
| // |  |          .    .
 | |
| // +--+          .    .
 | |
| //     \         +----+
 | |
| //      \        |    |
 | |
| //       \       +----+
 | |
| //        \      |    |
 | |
| //         \---- +----+
 | |
| //
 | |
| // The lock contention is spread over an array of locks. This helps improve
 | |
| // concurrent access. The spine is designed for a certain capacity and load
 | |
| // factor. When the capacity planning is done correctly we can expect
 | |
| // O(load_factor = 1) insert, access and remove time.
 | |
| //
 | |
| // Micro benchmark on debug build gives about .5 Million/sec rate of insert,
 | |
| // erase and lookup in parallel (total of about 1.5 Million ops/sec). If the
 | |
| // blocks were of 4K, the hash table can support  a virtual throughput of
 | |
| // 6 GB/s.
 | |
| //
 | |
| // T      Object type (contains both key and value)
 | |
| // Hash   Function that returns an hash from type T
 | |
| // Equal  Returns if two objects are equal
 | |
| //        (We need explicit equal for pointer type)
 | |
| //
 | |
| template <class T, class Hash, class Equal>
 | |
| class HashTable {
 | |
|  public:
 | |
|   explicit HashTable(const size_t capacity = 1024 * 1024,
 | |
|                      const float load_factor = 2.0, const uint32_t nlocks = 256)
 | |
|       : nbuckets_(
 | |
|             static_cast<uint32_t>(load_factor ? capacity / load_factor : 0)),
 | |
|         nlocks_(nlocks) {
 | |
|     // pre-conditions
 | |
|     assert(capacity);
 | |
|     assert(load_factor);
 | |
|     assert(nbuckets_);
 | |
|     assert(nlocks_);
 | |
| 
 | |
|     buckets_.reset(new Bucket[nbuckets_]);
 | |
| #ifdef OS_LINUX
 | |
|     mlock(buckets_.get(), nbuckets_ * sizeof(Bucket));
 | |
| #endif
 | |
| 
 | |
|     // initialize locks
 | |
|     locks_.reset(new port::RWMutex[nlocks_]);
 | |
| #ifdef OS_LINUX
 | |
|     mlock(locks_.get(), nlocks_ * sizeof(port::RWMutex));
 | |
| #endif
 | |
| 
 | |
|     // post-conditions
 | |
|     assert(buckets_);
 | |
|     assert(locks_);
 | |
|   }
 | |
| 
 | |
|   virtual ~HashTable() { AssertEmptyBuckets(); }
 | |
| 
 | |
|   //
 | |
|   // Insert given record to hash table
 | |
|   //
 | |
|   bool Insert(const T& t) {
 | |
|     const uint64_t h = Hash()(t);
 | |
|     const uint32_t bucket_idx = h % nbuckets_;
 | |
|     const uint32_t lock_idx = bucket_idx % nlocks_;
 | |
| 
 | |
|     WriteLock _(&locks_[lock_idx]);
 | |
|     auto& bucket = buckets_[bucket_idx];
 | |
|     return Insert(&bucket, t);
 | |
|   }
 | |
| 
 | |
|   // Lookup hash table
 | |
|   //
 | |
|   // Please note that read lock should be held by the caller. This is because
 | |
|   // the caller owns the data, and should hold the read lock as long as he
 | |
|   // operates on the data.
 | |
|   bool Find(const T& t, T* ret, port::RWMutex** ret_lock) {
 | |
|     const uint64_t h = Hash()(t);
 | |
|     const uint32_t bucket_idx = h % nbuckets_;
 | |
|     const uint32_t lock_idx = bucket_idx % nlocks_;
 | |
| 
 | |
|     port::RWMutex& lock = locks_[lock_idx];
 | |
|     lock.ReadLock();
 | |
| 
 | |
|     auto& bucket = buckets_[bucket_idx];
 | |
|     if (Find(&bucket, t, ret)) {
 | |
|       *ret_lock = &lock;
 | |
|       return true;
 | |
|     }
 | |
| 
 | |
|     lock.ReadUnlock();
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   //
 | |
|   // Erase a given key from the hash table
 | |
|   //
 | |
|   bool Erase(const T& t, T* ret) {
 | |
|     const uint64_t h = Hash()(t);
 | |
|     const uint32_t bucket_idx = h % nbuckets_;
 | |
|     const uint32_t lock_idx = bucket_idx % nlocks_;
 | |
| 
 | |
|     WriteLock _(&locks_[lock_idx]);
 | |
| 
 | |
|     auto& bucket = buckets_[bucket_idx];
 | |
|     return Erase(&bucket, t, ret);
 | |
|   }
 | |
| 
 | |
|   // Fetch the mutex associated with a key
 | |
|   // This call is used to hold the lock for a given data for extended period of
 | |
|   // time.
 | |
|   port::RWMutex* GetMutex(const T& t) {
 | |
|     const uint64_t h = Hash()(t);
 | |
|     const uint32_t bucket_idx = h % nbuckets_;
 | |
|     const uint32_t lock_idx = bucket_idx % nlocks_;
 | |
| 
 | |
|     return &locks_[lock_idx];
 | |
|   }
 | |
| 
 | |
|   void Clear(void (*fn)(T)) {
 | |
|     for (uint32_t i = 0; i < nbuckets_; ++i) {
 | |
|       const uint32_t lock_idx = i % nlocks_;
 | |
|       WriteLock _(&locks_[lock_idx]);
 | |
|       for (auto& t : buckets_[i].list_) {
 | |
|         (*fn)(t);
 | |
|       }
 | |
|       buckets_[i].list_.clear();
 | |
|     }
 | |
|   }
 | |
| 
 | |
|  protected:
 | |
|   // Models bucket of keys that hash to the same bucket number
 | |
|   struct Bucket {
 | |
|     std::list<T> list_;
 | |
|   };
 | |
| 
 | |
|   // Substitute for std::find with custom comparator operator
 | |
|   typename std::list<T>::iterator Find(std::list<T>* list, const T& t) {
 | |
|     for (auto it = list->begin(); it != list->end(); ++it) {
 | |
|       if (Equal()(*it, t)) {
 | |
|         return it;
 | |
|       }
 | |
|     }
 | |
|     return list->end();
 | |
|   }
 | |
| 
 | |
|   bool Insert(Bucket* bucket, const T& t) {
 | |
|     // Check if the key already exists
 | |
|     auto it = Find(&bucket->list_, t);
 | |
|     if (it != bucket->list_.end()) {
 | |
|       return false;
 | |
|     }
 | |
| 
 | |
|     // insert to bucket
 | |
|     bucket->list_.push_back(t);
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   bool Find(Bucket* bucket, const T& t, T* ret) {
 | |
|     auto it = Find(&bucket->list_, t);
 | |
|     if (it != bucket->list_.end()) {
 | |
|       if (ret) {
 | |
|         *ret = *it;
 | |
|       }
 | |
|       return true;
 | |
|     }
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   bool Erase(Bucket* bucket, const T& t, T* ret) {
 | |
|     auto it = Find(&bucket->list_, t);
 | |
|     if (it != bucket->list_.end()) {
 | |
|       if (ret) {
 | |
|         *ret = *it;
 | |
|       }
 | |
| 
 | |
|       bucket->list_.erase(it);
 | |
|       return true;
 | |
|     }
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   // assert that all buckets are empty
 | |
|   void AssertEmptyBuckets() {
 | |
| #ifndef NDEBUG
 | |
|     for (size_t i = 0; i < nbuckets_; ++i) {
 | |
|       WriteLock _(&locks_[i % nlocks_]);
 | |
|       assert(buckets_[i].list_.empty());
 | |
|     }
 | |
| #endif
 | |
|   }
 | |
| 
 | |
|   const uint32_t nbuckets_;                 // No. of buckets in the spine
 | |
|   std::unique_ptr<Bucket[]> buckets_;       // Spine of the hash buckets
 | |
|   const uint32_t nlocks_;                   // No. of locks
 | |
|   std::unique_ptr<port::RWMutex[]> locks_;  // Granular locks
 | |
| };
 | |
| 
 | |
| }  // namespace rocksdb
 | |
| 
 | |
| #endif
 | |
| 
 |