fork of https://github.com/oxigraph/rocksdb and https://github.com/facebook/rocksdb for nextgraph and oxigraph
				
			
			
		
			You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							374 lines
						
					
					
						
							11 KiB
						
					
					
				
			
		
		
	
	
							374 lines
						
					
					
						
							11 KiB
						
					
					
				| //  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
 | |
| //  This source code is licensed under the BSD-style license found in the
 | |
| //  LICENSE file in the root directory of this source tree. An additional grant
 | |
| //  of patent rights can be found in the PATENTS file in the same directory.
 | |
| 
 | |
| #ifndef __STDC_FORMAT_MACROS
 | |
| #define __STDC_FORMAT_MACROS
 | |
| #endif
 | |
| 
 | |
| #if !defined(GFLAGS) || defined(ROCKSDB_LITE)
 | |
| #include <cstdio>
 | |
| int main() {
 | |
|   fprintf(stderr, "Please install gflags to run rocksdb tools\n");
 | |
|   return 1;
 | |
| }
 | |
| #elif defined(OS_MACOSX) || defined(OS_WIN)
 | |
| // Block forward_iterator_bench under MAC and Windows
 | |
| int main() { return 0; }
 | |
| #else
 | |
| #include <gflags/gflags.h>
 | |
| #include <semaphore.h>
 | |
| #include <atomic>
 | |
| #include <bitset>
 | |
| #include <chrono>
 | |
| #include <climits>
 | |
| #include <condition_variable>
 | |
| #include <limits>
 | |
| #include <mutex>
 | |
| #include <queue>
 | |
| #include <random>
 | |
| #include <thread>
 | |
| 
 | |
| #include "rocksdb/cache.h"
 | |
| #include "rocksdb/db.h"
 | |
| #include "rocksdb/status.h"
 | |
| #include "rocksdb/table.h"
 | |
| #include "util/testharness.h"
 | |
| 
 | |
| const int MAX_SHARDS = 100000;
 | |
| 
 | |
| DEFINE_int32(writers, 8, "");
 | |
| DEFINE_int32(readers, 8, "");
 | |
| DEFINE_int64(rate, 100000, "");
 | |
| DEFINE_int64(value_size, 300, "");
 | |
| DEFINE_int64(shards, 1000, "");
 | |
| DEFINE_int64(memtable_size, 500000000, "");
 | |
| DEFINE_int64(block_cache_size, 300000000, "");
 | |
| DEFINE_int64(block_size, 65536, "");
 | |
| DEFINE_double(runtime, 300.0, "");
 | |
| DEFINE_bool(cache_only_first, true, "");
 | |
| DEFINE_bool(iterate_upper_bound, true, "");
 | |
| 
 | |
| struct Stats {
 | |
|   char pad1[128] __attribute__((__unused__));
 | |
|   std::atomic<uint64_t> written{0};
 | |
|   char pad2[128] __attribute__((__unused__));
 | |
|   std::atomic<uint64_t> read{0};
 | |
|   std::atomic<uint64_t> cache_misses{0};
 | |
|   char pad3[128] __attribute__((__unused__));
 | |
| } stats;
 | |
| 
 | |
| struct Key {
 | |
|   Key() {}
 | |
|   Key(uint64_t shard_in, uint64_t seqno_in)
 | |
|       : shard_be(htobe64(shard_in)), seqno_be(htobe64(seqno_in)) {}
 | |
| 
 | |
|   uint64_t shard() const { return be64toh(shard_be); }
 | |
|   uint64_t seqno() const { return be64toh(seqno_be); }
 | |
| 
 | |
|  private:
 | |
|   uint64_t shard_be;
 | |
|   uint64_t seqno_be;
 | |
| } __attribute__((__packed__));
 | |
| 
 | |
| struct Reader;
 | |
| struct Writer;
 | |
| 
 | |
| struct ShardState {
 | |
|   char pad1[128] __attribute__((__unused__));
 | |
|   std::atomic<uint64_t> last_written{0};
 | |
|   Writer* writer;
 | |
|   Reader* reader;
 | |
|   char pad2[128] __attribute__((__unused__));
 | |
|   std::atomic<uint64_t> last_read{0};
 | |
|   std::unique_ptr<rocksdb::Iterator> it;
 | |
|   std::unique_ptr<rocksdb::Iterator> it_cacheonly;
 | |
|   Key upper_bound;
 | |
|   rocksdb::Slice upper_bound_slice;
 | |
|   char pad3[128] __attribute__((__unused__));
 | |
| };
 | |
| 
 | |
| struct Reader {
 | |
|  public:
 | |
|   explicit Reader(std::vector<ShardState>* shard_states, rocksdb::DB* db)
 | |
|       : shard_states_(shard_states), db_(db) {
 | |
|     sem_init(&sem_, 0, 0);
 | |
|     thread_ = std::thread(&Reader::run, this);
 | |
|   }
 | |
| 
 | |
|   void run() {
 | |
|     while (1) {
 | |
|       sem_wait(&sem_);
 | |
|       if (done_.load()) {
 | |
|         break;
 | |
|       }
 | |
| 
 | |
|       uint64_t shard;
 | |
|       {
 | |
|         std::lock_guard<std::mutex> guard(queue_mutex_);
 | |
|         assert(!shards_pending_queue_.empty());
 | |
|         shard = shards_pending_queue_.front();
 | |
|         shards_pending_queue_.pop();
 | |
|         shards_pending_set_.reset(shard);
 | |
|       }
 | |
|       readOnceFromShard(shard);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   void readOnceFromShard(uint64_t shard) {
 | |
|     ShardState& state = (*shard_states_)[shard];
 | |
|     if (!state.it) {
 | |
|       // Initialize iterators
 | |
|       rocksdb::ReadOptions options;
 | |
|       options.tailing = true;
 | |
|       if (FLAGS_iterate_upper_bound) {
 | |
|         state.upper_bound = Key(shard, std::numeric_limits<uint64_t>::max());
 | |
|         state.upper_bound_slice = rocksdb::Slice(
 | |
|             (const char*)&state.upper_bound, sizeof(state.upper_bound));
 | |
|         options.iterate_upper_bound = &state.upper_bound_slice;
 | |
|       }
 | |
| 
 | |
|       state.it.reset(db_->NewIterator(options));
 | |
| 
 | |
|       if (FLAGS_cache_only_first) {
 | |
|         options.read_tier = rocksdb::ReadTier::kBlockCacheTier;
 | |
|         state.it_cacheonly.reset(db_->NewIterator(options));
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     const uint64_t upto = state.last_written.load();
 | |
|     for (rocksdb::Iterator* it : {state.it_cacheonly.get(), state.it.get()}) {
 | |
|       if (it == nullptr) {
 | |
|         continue;
 | |
|       }
 | |
|       if (state.last_read.load() >= upto) {
 | |
|         break;
 | |
|       }
 | |
|       bool need_seek = true;
 | |
|       for (uint64_t seq = state.last_read.load() + 1; seq <= upto; ++seq) {
 | |
|         if (need_seek) {
 | |
|           Key from(shard, state.last_read.load() + 1);
 | |
|           it->Seek(rocksdb::Slice((const char*)&from, sizeof(from)));
 | |
|           need_seek = false;
 | |
|         } else {
 | |
|           it->Next();
 | |
|         }
 | |
|         if (it->status().IsIncomplete()) {
 | |
|           ++::stats.cache_misses;
 | |
|           break;
 | |
|         }
 | |
|         assert(it->Valid());
 | |
|         assert(it->key().size() == sizeof(Key));
 | |
|         Key key;
 | |
|         memcpy(&key, it->key().data(), it->key().size());
 | |
|         // fprintf(stderr, "Expecting (%ld, %ld) read (%ld, %ld)\n",
 | |
|         //         shard, seq, key.shard(), key.seqno());
 | |
|         assert(key.shard() == shard);
 | |
|         assert(key.seqno() == seq);
 | |
|         state.last_read.store(seq);
 | |
|         ++::stats.read;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   void onWrite(uint64_t shard) {
 | |
|     {
 | |
|       std::lock_guard<std::mutex> guard(queue_mutex_);
 | |
|       if (!shards_pending_set_.test(shard)) {
 | |
|         shards_pending_queue_.push(shard);
 | |
|         shards_pending_set_.set(shard);
 | |
|         sem_post(&sem_);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   ~Reader() {
 | |
|     done_.store(true);
 | |
|     sem_post(&sem_);
 | |
|     thread_.join();
 | |
|   }
 | |
| 
 | |
|  private:
 | |
|   char pad1[128] __attribute__((__unused__));
 | |
|   std::vector<ShardState>* shard_states_;
 | |
|   rocksdb::DB* db_;
 | |
|   std::thread thread_;
 | |
|   sem_t sem_;
 | |
|   std::mutex queue_mutex_;
 | |
|   std::bitset<MAX_SHARDS + 1> shards_pending_set_;
 | |
|   std::queue<uint64_t> shards_pending_queue_;
 | |
|   std::atomic<bool> done_{false};
 | |
|   char pad2[128] __attribute__((__unused__));
 | |
| };
 | |
| 
 | |
| struct Writer {
 | |
|   explicit Writer(std::vector<ShardState>* shard_states, rocksdb::DB* db)
 | |
|       : shard_states_(shard_states), db_(db) {}
 | |
| 
 | |
|   void start() { thread_ = std::thread(&Writer::run, this); }
 | |
| 
 | |
|   void run() {
 | |
|     std::queue<std::chrono::steady_clock::time_point> workq;
 | |
|     std::chrono::steady_clock::time_point deadline(
 | |
|         std::chrono::steady_clock::now() +
 | |
|         std::chrono::nanoseconds((uint64_t)(1000000000 * FLAGS_runtime)));
 | |
|     std::vector<uint64_t> my_shards;
 | |
|     for (int i = 1; i <= FLAGS_shards; ++i) {
 | |
|       if ((*shard_states_)[i].writer == this) {
 | |
|         my_shards.push_back(i);
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     std::mt19937 rng{std::random_device()()};
 | |
|     std::uniform_int_distribution<int> shard_dist(
 | |
|         0, static_cast<int>(my_shards.size()) - 1);
 | |
|     std::string value(FLAGS_value_size, '*');
 | |
| 
 | |
|     while (1) {
 | |
|       auto now = std::chrono::steady_clock::now();
 | |
|       if (FLAGS_runtime >= 0 && now >= deadline) {
 | |
|         break;
 | |
|       }
 | |
|       if (workq.empty()) {
 | |
|         for (int i = 0; i < FLAGS_rate; i += FLAGS_writers) {
 | |
|           std::chrono::nanoseconds offset(1000000000LL * i / FLAGS_rate);
 | |
|           workq.push(now + offset);
 | |
|         }
 | |
|       }
 | |
|       while (!workq.empty() && workq.front() < now) {
 | |
|         workq.pop();
 | |
|         uint64_t shard = my_shards[shard_dist(rng)];
 | |
|         ShardState& state = (*shard_states_)[shard];
 | |
|         uint64_t seqno = state.last_written.load() + 1;
 | |
|         Key key(shard, seqno);
 | |
|         // fprintf(stderr, "Writing (%ld, %ld)\n", shard, seqno);
 | |
|         rocksdb::Status status =
 | |
|             db_->Put(rocksdb::WriteOptions(),
 | |
|                      rocksdb::Slice((const char*)&key, sizeof(key)),
 | |
|                      rocksdb::Slice(value));
 | |
|         assert(status.ok());
 | |
|         state.last_written.store(seqno);
 | |
|         state.reader->onWrite(shard);
 | |
|         ++::stats.written;
 | |
|       }
 | |
|       std::this_thread::sleep_for(std::chrono::milliseconds(1));
 | |
|     }
 | |
|     // fprintf(stderr, "Writer done\n");
 | |
|   }
 | |
| 
 | |
|   ~Writer() { thread_.join(); }
 | |
| 
 | |
|  private:
 | |
|   char pad1[128] __attribute__((__unused__));
 | |
|   std::vector<ShardState>* shard_states_;
 | |
|   rocksdb::DB* db_;
 | |
|   std::thread thread_;
 | |
|   char pad2[128] __attribute__((__unused__));
 | |
| };
 | |
| 
 | |
| struct StatsThread {
 | |
|   explicit StatsThread(rocksdb::DB* db)
 | |
|       : db_(db), thread_(&StatsThread::run, this) {}
 | |
| 
 | |
|   void run() {
 | |
|     //    using namespace std::chrono;
 | |
|     auto tstart = std::chrono::steady_clock::now(), tlast = tstart;
 | |
|     uint64_t wlast = 0, rlast = 0;
 | |
|     while (!done_.load()) {
 | |
|       {
 | |
|         std::unique_lock<std::mutex> lock(cvm_);
 | |
|         cv_.wait_for(lock, std::chrono::seconds(1));
 | |
|       }
 | |
|       auto now = std::chrono::steady_clock::now();
 | |
|       double elapsed =
 | |
|           std::chrono::duration_cast<std::chrono::duration<double> >(
 | |
|               now - tlast).count();
 | |
|       uint64_t w = ::stats.written.load();
 | |
|       uint64_t r = ::stats.read.load();
 | |
|       fprintf(stderr,
 | |
|               "%s elapsed %4lds | written %10ld | w/s %10.0f | read %10ld | "
 | |
|               "r/s %10.0f | cache misses %10ld\n",
 | |
|               db_->GetEnv()->TimeToString(time(nullptr)).c_str(),
 | |
|               std::chrono::duration_cast<std::chrono::seconds>(now - tstart)
 | |
|                   .count(),
 | |
|               w, (w - wlast) / elapsed, r, (r - rlast) / elapsed,
 | |
|               ::stats.cache_misses.load());
 | |
|       wlast = w;
 | |
|       rlast = r;
 | |
|       tlast = now;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   ~StatsThread() {
 | |
|     {
 | |
|       std::lock_guard<std::mutex> guard(cvm_);
 | |
|       done_.store(true);
 | |
|     }
 | |
|     cv_.notify_all();
 | |
|     thread_.join();
 | |
|   }
 | |
| 
 | |
|  private:
 | |
|   rocksdb::DB* db_;
 | |
|   std::mutex cvm_;
 | |
|   std::condition_variable cv_;
 | |
|   std::thread thread_;
 | |
|   std::atomic<bool> done_{false};
 | |
| };
 | |
| 
 | |
| int main(int argc, char** argv) {
 | |
|   GFLAGS::ParseCommandLineFlags(&argc, &argv, true);
 | |
| 
 | |
|   std::mt19937 rng{std::random_device()()};
 | |
|   rocksdb::Status status;
 | |
|   std::string path = rocksdb::test::TmpDir() + "/forward_iterator_test";
 | |
|   fprintf(stderr, "db path is %s\n", path.c_str());
 | |
|   rocksdb::Options options;
 | |
|   options.create_if_missing = true;
 | |
|   options.compression = rocksdb::CompressionType::kNoCompression;
 | |
|   options.compaction_style = rocksdb::CompactionStyle::kCompactionStyleNone;
 | |
|   options.level0_slowdown_writes_trigger = 99999;
 | |
|   options.level0_stop_writes_trigger = 99999;
 | |
|   options.allow_os_buffer = false;
 | |
|   options.write_buffer_size = FLAGS_memtable_size;
 | |
|   rocksdb::BlockBasedTableOptions table_options;
 | |
|   table_options.block_cache = rocksdb::NewLRUCache(FLAGS_block_cache_size);
 | |
|   table_options.block_size = FLAGS_block_size;
 | |
|   options.table_factory.reset(
 | |
|       rocksdb::NewBlockBasedTableFactory(table_options));
 | |
| 
 | |
|   status = rocksdb::DestroyDB(path, options);
 | |
|   assert(status.ok());
 | |
|   rocksdb::DB* db_raw;
 | |
|   status = rocksdb::DB::Open(options, path, &db_raw);
 | |
|   assert(status.ok());
 | |
|   std::unique_ptr<rocksdb::DB> db(db_raw);
 | |
| 
 | |
|   std::vector<ShardState> shard_states(FLAGS_shards + 1);
 | |
|   std::deque<Reader> readers;
 | |
|   while (static_cast<int>(readers.size()) < FLAGS_readers) {
 | |
|     readers.emplace_back(&shard_states, db_raw);
 | |
|   }
 | |
|   std::deque<Writer> writers;
 | |
|   while (static_cast<int>(writers.size()) < FLAGS_writers) {
 | |
|     writers.emplace_back(&shard_states, db_raw);
 | |
|   }
 | |
| 
 | |
|   // Each shard gets a random reader and random writer assigned to it
 | |
|   for (int i = 1; i <= FLAGS_shards; ++i) {
 | |
|     std::uniform_int_distribution<int> reader_dist(0, FLAGS_readers - 1);
 | |
|     std::uniform_int_distribution<int> writer_dist(0, FLAGS_writers - 1);
 | |
|     shard_states[i].reader = &readers[reader_dist(rng)];
 | |
|     shard_states[i].writer = &writers[writer_dist(rng)];
 | |
|   }
 | |
| 
 | |
|   StatsThread stats_thread(db_raw);
 | |
|   for (Writer& w : writers) {
 | |
|     w.start();
 | |
|   }
 | |
| 
 | |
|   writers.clear();
 | |
|   readers.clear();
 | |
| }
 | |
| #endif  // !defined(GFLAGS) || defined(ROCKSDB_LITE)
 | |
| 
 |