From ef8b8a8ef601fb5cfb2d9d7228e565ced0e5fccf Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Mon, 21 Apr 2014 15:40:46 -0700 Subject: [PATCH] [Java] Add Java bindings for memtables and sst format. Summary: Add Java bindings for memtables and sst format. Specifically, add two abstract Java classses --- MemTableConfig and SstFormatConfig. Each MemTable / SST implementation should has its own config class extends MemTableConfig / SstFormatConfig respectively and pass it to Options via setMemTableConfig / setSstConfig. Test Plan: make rocksdbjava make jdb_test make jdb_bench java/jdb_bench.sh \ --benchmarks=fillseq,readrandom,readwhilewriting \ --memtablerep=hash_skiplist \ --use_plain_table=1 \ --key_size=20 \ --prefix_size=12 \ --value_size=100 \ --cache_size=17179869184 \ --disable_wal=0 \ --sync=0 \ Reviewers: haobo, ankgup87, sdong Reviewed By: haobo CC: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D17997 --- java/Makefile | 26 +-- java/RocksDBSample.java | 23 +++ java/jdb_bench.sh | 1 + .../rocksdb/HashLinkedListMemTableConfig.java | 52 ++++++ .../rocksdb/HashSkipListMemTableConfig.java | 97 +++++++++++ java/org/rocksdb/MemTableConfig.java | 27 +++ java/org/rocksdb/Options.java | 71 ++++++++ java/org/rocksdb/PlainTableConfig.java | 123 ++++++++++++++ java/org/rocksdb/SkipListMemTableConfig.java | 15 ++ java/org/rocksdb/TableFormatConfig.java | 20 +++ java/org/rocksdb/VectorMemTableConfig.java | 40 +++++ java/org/rocksdb/benchmark/DbBenchmark.java | 154 +++++++++++++++--- java/rocksjni/memtablejni.cc | 58 +++++++ java/rocksjni/options.cc | 70 ++++++++ java/rocksjni/table.cc | 25 +++ 15 files changed, 755 insertions(+), 47 deletions(-) create mode 100755 java/jdb_bench.sh create mode 100644 java/org/rocksdb/HashLinkedListMemTableConfig.java create mode 100644 java/org/rocksdb/HashSkipListMemTableConfig.java create mode 100644 java/org/rocksdb/MemTableConfig.java create mode 100644 java/org/rocksdb/PlainTableConfig.java create mode 100644 java/org/rocksdb/SkipListMemTableConfig.java create mode 100644 java/org/rocksdb/TableFormatConfig.java create mode 100644 java/org/rocksdb/VectorMemTableConfig.java create mode 100644 java/rocksjni/memtablejni.cc create mode 100644 java/rocksjni/table.cc diff --git a/java/Makefile b/java/Makefile index 81b5afc79..fdee3ba28 100644 --- a/java/Makefile +++ b/java/Makefile @@ -1,4 +1,4 @@ -NATIVE_JAVA_CLASSES = org.rocksdb.RocksDB org.rocksdb.Options org.rocksdb.WriteBatch org.rocksdb.WriteBatchInternal org.rocksdb.WriteBatchTest org.rocksdb.WriteOptions org.rocksdb.BackupableDB org.rocksdb.BackupableDBOptions org.rocksdb.Statistics org.rocksdb.Iterator +NATIVE_JAVA_CLASSES = org.rocksdb.RocksDB org.rocksdb.Options org.rocksdb.WriteBatch org.rocksdb.WriteBatchInternal org.rocksdb.WriteBatchTest org.rocksdb.WriteOptions org.rocksdb.BackupableDB org.rocksdb.BackupableDBOptions org.rocksdb.Statistics org.rocksdb.Iterator org.rocksdb.VectorMemTableConfig org.rocksdb.SkipListMemTableConfig org.rocksdb.HashLinkedListMemTableConfig org.rocksdb.HashSkipListMemTableConfig org.rocksdb.PlainTableConfig NATIVE_INCLUDE = ./include ROCKSDB_JAR = rocksdbjni.jar @@ -28,27 +28,3 @@ test: java db_bench: java javac org/rocksdb/benchmark/*.java - rm -rf /tmp/rocksdbjni-bench - java -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.benchmark.DbBenchmark --threads=1 --benchmarks=fillseq,readrandom,readwhilewriting - rm -rf /tmp/rocksdbjni-bench - java -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.benchmark.DbBenchmark --threads=1 --benchmarks=fillseq,readrandom,readwhilewriting --cache_size=200000000 - rm -rf /tmp/rocksdbjni-bench - java -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.benchmark.DbBenchmark --threads=2 --benchmarks=fillseq,readrandom,readwhilewriting - rm -rf /tmp/rocksdbjni-bench - java -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.benchmark.DbBenchmark --threads=2 --benchmarks=fillseq,readrandom,readwhilewriting --cache_size=200000000 - rm -rf /tmp/rocksdbjni-bench - java -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.benchmark.DbBenchmark --threads=4 --benchmarks=fillseq,readrandom,readwhilewriting - rm -rf /tmp/rocksdbjni-bench - java -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.benchmark.DbBenchmark --threads=4 --benchmarks=fillseq,readrandom,readwhilewriting --cache_size=200000000 - rm -rf /tmp/rocksdbjni-bench - java -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.benchmark.DbBenchmark --threads=8 --benchmarks=fillseq,readrandom,readwhilewriting - rm -rf /tmp/rocksdbjni-bench - java -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.benchmark.DbBenchmark --threads=8 --benchmarks=fillseq,readrandom,readwhilewriting --cache_size=200000000 - rm -rf /tmp/rocksdbjni-bench - java -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.benchmark.DbBenchmark --threads=16 --benchmarks=fillseq,readrandom,readwhilewriting - rm -rf /tmp/rocksdbjni-bench - java -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.benchmark.DbBenchmark --threads=16 --benchmarks=fillseq,readrandom,readwhilewriting --cache_size=200000000 - rm -rf /tmp/rocksdbjni-bench - java -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.benchmark.DbBenchmark --threads=32 --benchmarks=fillseq,readrandom,readwhilewriting - rm -rf /tmp/rocksdbjni-bench - java -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.benchmark.DbBenchmark --threads=32 --benchmarks=fillseq,readrandom,readwhilewriting --cache_size=200000000 diff --git a/java/RocksDBSample.java b/java/RocksDBSample.java index d33428a50..bdb800ec2 100644 --- a/java/RocksDBSample.java +++ b/java/RocksDBSample.java @@ -48,6 +48,29 @@ public class RocksDBSample { assert(options.blockSize() == 64 * SizeUnit.KB); assert(options.maxBackgroundCompactions() == 10); + assert(options.memTableFactoryName().equals("SkipListFactory")); + options.setMemTableConfig( + new HashSkipListMemTableConfig() + .setHeight(4) + .setBranchingFactor(4) + .setBucketCount(2000000)); + assert(options.memTableFactoryName().equals("HashSkipListRepFactory")); + + options.setMemTableConfig( + new HashLinkedListMemTableConfig() + .setBucketCount(100000)); + assert(options.memTableFactoryName().equals("HashLinkedListRepFactory")); + + options.setMemTableConfig( + new VectorMemTableConfig().setReservedSize(10000)); + assert(options.memTableFactoryName().equals("VectorRepFactory")); + + options.setMemTableConfig(new SkipListMemTableConfig()); + assert(options.memTableFactoryName().equals("SkipListFactory")); + + options.setTableFormatConfig(new PlainTableConfig()); + assert(options.tableFactoryName().equals("PlainTable")); + try { db = RocksDB.open(options, db_path_not_found); db.put("hello".getBytes(), "world".getBytes()); diff --git a/java/jdb_bench.sh b/java/jdb_bench.sh new file mode 100755 index 000000000..3fb610c20 --- /dev/null +++ b/java/jdb_bench.sh @@ -0,0 +1 @@ +java -Djava.library.path=.:../ -cp "rocksdbjni.jar:.:./*" org.rocksdb.benchmark.DbBenchmark $@ diff --git a/java/org/rocksdb/HashLinkedListMemTableConfig.java b/java/org/rocksdb/HashLinkedListMemTableConfig.java new file mode 100644 index 000000000..24fcd8b52 --- /dev/null +++ b/java/org/rocksdb/HashLinkedListMemTableConfig.java @@ -0,0 +1,52 @@ +package org.rocksdb; + +/** + * The config for hash linked list memtable representation + * Such memtable contains a fix-sized array of buckets, where + * each bucket points to a sorted singly-linked + * list (or null if the bucket is empty). + * + * Note that since this mem-table representation relies on the + * key prefix, it is required to invoke one of the usePrefixExtractor + * functions to specify how to extract key prefix given a key. + * If proper prefix-extractor is not set, then RocksDB will + * use the default memtable representation (SkipList) instead + * and post a warning in the LOG. + */ +public class HashLinkedListMemTableConfig extends MemTableConfig { + public static final long DEFAULT_BUCKET_COUNT = 50000; + + public HashLinkedListMemTableConfig() { + bucketCount_ = DEFAULT_BUCKET_COUNT; + } + + /** + * Set the number of buckets in the fixed-size array used + * in the hash linked-list mem-table. + * + * @param count the number of hash buckets. + * @return the reference to the current HashLinkedListMemTableConfig. + */ + public HashLinkedListMemTableConfig setBucketCount(long count) { + bucketCount_ = count; + return this; + } + + /** + * Returns the number of buckets that will be used in the memtable + * created based on this config. + * + * @return the number of buckets + */ + public long bucketCount() { + return bucketCount_; + } + + @Override protected long newMemTableFactoryHandle() { + return newMemTableFactoryHandle(bucketCount_); + } + + private native long newMemTableFactoryHandle(long bucketCount); + + private long bucketCount_; +} diff --git a/java/org/rocksdb/HashSkipListMemTableConfig.java b/java/org/rocksdb/HashSkipListMemTableConfig.java new file mode 100644 index 000000000..74fb0dba2 --- /dev/null +++ b/java/org/rocksdb/HashSkipListMemTableConfig.java @@ -0,0 +1,97 @@ +package org.rocksdb; + +/** + * The config for hash skip-list mem-table representation. + * Such mem-table representation contains a fix-sized array of + * buckets, where each bucket points to a skiplist (or null if the + * bucket is empty). + * + * Note that since this mem-table representation relies on the + * key prefix, it is required to invoke one of the usePrefixExtractor + * functions to specify how to extract key prefix given a key. + * If proper prefix-extractor is not set, then RocksDB will + * use the default memtable representation (SkipList) instead + * and post a warning in the LOG. + */ +public class HashSkipListMemTableConfig extends MemTableConfig { + public static final int DEFAULT_BUCKET_COUNT = 1000000; + public static final int DEFAULT_BRANCHING_FACTOR = 4; + public static final int DEFAULT_HEIGHT = 4; + + public HashSkipListMemTableConfig() { + bucketCount_ = DEFAULT_BUCKET_COUNT; + branchingFactor_ = DEFAULT_BRANCHING_FACTOR; + height_ = DEFAULT_HEIGHT; + } + + /** + * Set the number of hash buckets used in the hash skiplist memtable. + * Default = 1000000. + * + * @param count the number of hash buckets used in the hash + * skiplist memtable. + * @return the reference to the current HashSkipListMemTableConfig. + */ + public HashSkipListMemTableConfig setBucketCount(long count) { + bucketCount_ = count; + return this; + } + + /** + * @return the number of hash buckets + */ + public long bucketCount() { + return bucketCount_; + } + + /** + * Set the height of the skip list. Default = 4. + * + * @return the reference to the current HashSkipListMemTableConfig. + */ + public HashSkipListMemTableConfig setHeight(int height) { + height_ = height; + return this; + } + + /** + * @return the height of the skip list. + */ + public int height() { + return height_; + } + + /** + * Set the branching factor used in the hash skip-list memtable. + * This factor controls the probabilistic size ratio between adjacent + * links in the skip list. + * + * @param bf the probabilistic size ratio between adjacent link + * lists in the skip list. + * @return the reference to the current HashSkipListMemTableConfig. + */ + public HashSkipListMemTableConfig setBranchingFactor(int bf) { + branchingFactor_ = bf; + return this; + } + + /** + * @return branching factor, the probabilistic size ratio between + * adjacent links in the skip list. + */ + public int branchingFactor() { + return branchingFactor_; + } + + @Override protected long newMemTableFactoryHandle() { + return newMemTableFactoryHandle( + bucketCount_, height_, branchingFactor_); + } + + private native long newMemTableFactoryHandle( + long bucketCount, int height, int branchingFactor); + + private long bucketCount_; + private int branchingFactor_; + private int height_; +} diff --git a/java/org/rocksdb/MemTableConfig.java b/java/org/rocksdb/MemTableConfig.java new file mode 100644 index 000000000..a473c2585 --- /dev/null +++ b/java/org/rocksdb/MemTableConfig.java @@ -0,0 +1,27 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +package org.rocksdb; + +/** + * MemTableConfig is used to config the internal mem-table of a RocksDB. + * It is required for each memtable to have one such sub-class to allow + * Java developers to use it. + * + * To make a RocksDB to use a specific MemTable format, its associated + * MemTableConfig should be properly set and passed into Options + * via Options.setMemTableFactory() and open the db using that Options. + * + * @see Options + */ +public abstract class MemTableConfig { + /** + * This function should only be called by Options.setMemTableConfig(), + * which will create a c++ shared-pointer to the c++ MemTableRepFactory + * that associated with the Java MemTableConfig. + * + * @see Options.setMemTableFactory() + */ + abstract protected long newMemTableFactoryHandle(); +} diff --git a/java/org/rocksdb/Options.java b/java/org/rocksdb/Options.java index 13b35e434..287ef29fa 100644 --- a/java/org/rocksdb/Options.java +++ b/java/org/rocksdb/Options.java @@ -1129,6 +1129,64 @@ public class Options { private native void setAllowThreadLocal( long handle, boolean allowThreadLocal); + /** + * Set the config for mem-table. + * + * @param config the mem-table config. + * @return the instance of the current Options. + */ + public Options setMemTableConfig(MemTableConfig config) { + setMemTableFactory(nativeHandle_, config.newMemTableFactoryHandle()); + return this; + } + + /** + * Returns the name of the current mem table representation. + * Memtable format can be set using setTableFormatConfig. + * + * @return the name of the currently-used memtable factory. + * @see setTableFormatConfig() + */ + public String memTableFactoryName() { + assert(isInitialized()); + return memTableFactoryName(nativeHandle_); + } + + /** + * Set the config for table format. + * + * @param config the table format config. + * @return the reference of the current Options. + */ + public Options setTableFormatConfig(TableFormatConfig config) { + setTableFactory(nativeHandle_, config.newTableFactoryHandle()); + return this; + } + + /** + * @return the name of the currently used table factory. + */ + public String tableFactoryName() { + assert(isInitialized()); + return tableFactoryName(nativeHandle_); + } + + /** + * This prefix-extractor uses the first n bytes of a key as its prefix. + * + * In some hash-based memtable representation such as HashLinkedList + * and HashSkipList, prefixes are used to partition the keys into + * several buckets. Prefix extractor is used to specify how to + * extract the prefix given a key. + * + * @param n use the first n bytes of a key as its prefix. + */ + public Options useFixedLengthPrefixExtractor(int n) { + assert(isInitialized()); + useFixedLengthPrefixExtractor(nativeHandle_, n); + return this; + } + /** * Release the memory allocated for the current instance * in the c++ side. @@ -1147,6 +1205,10 @@ public class Options { return (nativeHandle_ != 0); } + static final int DEFAULT_PLAIN_TABLE_BLOOM_BITS_PER_KEY = 10; + static final double DEFAULT_PLAIN_TABLE_HASH_TABLE_RATIO = 0.75; + static final int DEFAULT_PLAIN_TABLE_INDEX_SPARSENESS = 16; + private native void newOptions(); private native void dispose0(); private native void setCreateIfMissing(long handle, boolean flag); @@ -1167,6 +1229,15 @@ public class Options { private native void createStatistics(long optHandle); private native long statisticsPtr(long optHandle); + private native void setMemTableFactory(long handle, long factoryHandle); + private native String memTableFactoryName(long handle); + + private native void setTableFactory(long handle, long factoryHandle); + private native String tableFactoryName(long handle); + + private native void useFixedLengthPrefixExtractor( + long handle, int prefixLength); + long nativeHandle_; long cacheSize_; } diff --git a/java/org/rocksdb/PlainTableConfig.java b/java/org/rocksdb/PlainTableConfig.java new file mode 100644 index 000000000..554ce3840 --- /dev/null +++ b/java/org/rocksdb/PlainTableConfig.java @@ -0,0 +1,123 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +package org.rocksdb; + +/** + * The config for plain table sst format. + * + * PlainTable is a RocksDB's SST file format optimized for low query latency + * on pure-memory or really low-latency media. It also support prefix + * hash feature. + */ +public class PlainTableConfig extends TableFormatConfig { + public static final int VARIABLE_LENGTH = 0; + public static final int DEFAULT_BLOOM_BITS_PER_KEY = 10; + public static final double DEFAULT_HASH_TABLE_RATIO = 0.75; + public static final int DEFAULT_INDEX_SPARSENESS = 16; + + public PlainTableConfig() { + keySize_ = VARIABLE_LENGTH; + bloomBitsPerKey_ = DEFAULT_BLOOM_BITS_PER_KEY; + hashTableRatio_ = DEFAULT_HASH_TABLE_RATIO; + indexSparseness_ = DEFAULT_INDEX_SPARSENESS; + } + + /** + * Set the length of the user key. If it is set to be VARIABLE_LENGTH, + * then it indicates the user keys are variable-lengthed. Otherwise, + * all the keys need to have the same length in byte. + * DEFAULT: VARIABLE_LENGTH + * + * @param keySize the length of the user key. + * @return the reference to the current config. + */ + public PlainTableConfig setKeySize(int keySize) { + keySize_ = keySize; + return this; + } + + /** + * @return the specified size of the user key. If VARIABLE_LENGTH, + * then it indicates variable-length key. + */ + public int keySize() { + return keySize_; + } + + /** + * Set the number of bits per key used by the internal bloom filter + * in the plain table sst format. + * + * @param bitsPerKey the number of bits per key for bloom filer. + * @return the reference to the current config. + */ + public PlainTableConfig setBloomBitsPerKey(int bitsPerKey) { + bloomBitsPerKey_ = bitsPerKey; + return this; + } + + /** + * @return the number of bits per key used for the bloom filter. + */ + public int bloomBitsPerKey() { + return bloomBitsPerKey_; + } + + /** + * hashTableRatio is the desired utilization of the hash table used + * for prefix hashing. The ideal ratio would be the number of + * prefixes / the number of hash buckets. If this value is set to + * zero, then hash table will not be used. + * + * @param ratio the hash table ratio. + * @return the reference to the current config. + */ + public PlainTableConfig setHashTableRatio(double ratio) { + hashTableRatio_ = ratio; + return this; + } + + /** + * @return the hash table ratio. + */ + public double hashTableRatio() { + return hashTableRatio_; + } + + /** + * Index sparseness determines the index interval for keys inside the + * same prefix. This number is equal to the maximum number of linear + * search required after hash and binary search. If it's set to 0, + * then each key will be indexed. + * + * @param sparseness the index sparseness. + * @return the reference to the current config. + */ + public PlainTableConfig setIndexSparseness(int sparseness) { + indexSparseness_ = sparseness; + return this; + } + + /** + * @return the index sparseness. + */ + public int indexSparseness() { + return indexSparseness_; + } + + @Override protected long newTableFactoryHandle() { + return newTableFactoryHandle(keySize_, bloomBitsPerKey_, + hashTableRatio_, indexSparseness_); + } + + private native long newTableFactoryHandle( + int keySize, int bloomBitsPerKey, + double hashTableRatio, int indexSparseness); + + private int keySize_; + private int bloomBitsPerKey_; + private double hashTableRatio_; + private int indexSparseness_; +} diff --git a/java/org/rocksdb/SkipListMemTableConfig.java b/java/org/rocksdb/SkipListMemTableConfig.java new file mode 100644 index 000000000..7f9f5cb5f --- /dev/null +++ b/java/org/rocksdb/SkipListMemTableConfig.java @@ -0,0 +1,15 @@ +package org.rocksdb; + +/** + * The config for skip-list memtable representation. + */ +public class SkipListMemTableConfig extends MemTableConfig { + public SkipListMemTableConfig() { + } + + @Override protected long newMemTableFactoryHandle() { + return newMemTableFactoryHandle0(); + } + + private native long newMemTableFactoryHandle0(); +} diff --git a/java/org/rocksdb/TableFormatConfig.java b/java/org/rocksdb/TableFormatConfig.java new file mode 100644 index 000000000..e5c63411f --- /dev/null +++ b/java/org/rocksdb/TableFormatConfig.java @@ -0,0 +1,20 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +package org.rocksdb; + +/** + * TableFormatConfig is used to config the internal Table format of a RocksDB. + * To make a RocksDB to use a specific Table format, its associated + * TableFormatConfig should be properly set and passed into Options via + * Options.setTableFormatConfig() and open the db using that Options. + */ +public abstract class TableFormatConfig { + /** + * This function should only be called by Options.setTableFormatConfig(), + * which will create a c++ shared-pointer to the c++ TableFactory + * that associated with the Java TableFormatConfig. + */ + abstract protected long newTableFactoryHandle(); +} diff --git a/java/org/rocksdb/VectorMemTableConfig.java b/java/org/rocksdb/VectorMemTableConfig.java new file mode 100644 index 000000000..b7a413f19 --- /dev/null +++ b/java/org/rocksdb/VectorMemTableConfig.java @@ -0,0 +1,40 @@ +package org.rocksdb; + +/** + * The config for vector memtable representation. + */ +public class VectorMemTableConfig extends MemTableConfig { + public static final int DEFAULT_RESERVED_SIZE = 0; + public VectorMemTableConfig() { + reservedSize_ = DEFAULT_RESERVED_SIZE; + } + + /** + * Set the initial size of the vector that will be used + * by the memtable created based on this config. + * + * @param size the initial size of the vector. + * @return the reference to the current config. + */ + public VectorMemTableConfig setReservedSize(int size) { + reservedSize_ = size; + return this; + } + + /** + * Returns the initial size of the vector used by the memtable + * created based on this config. + * + * @return the initial size of the vector. + */ + public int reservedSize() { + return reservedSize_; + } + + @Override protected long newMemTableFactoryHandle() { + return newMemTableFactoryHandle(reservedSize_); + } + + private native long newMemTableFactoryHandle(long reservedSize); + private int reservedSize_; +} diff --git a/java/org/rocksdb/benchmark/DbBenchmark.java b/java/org/rocksdb/benchmark/DbBenchmark.java index 610269ea1..0106413cf 100644 --- a/java/org/rocksdb/benchmark/DbBenchmark.java +++ b/java/org/rocksdb/benchmark/DbBenchmark.java @@ -39,6 +39,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import org.rocksdb.*; +import org.rocksdb.util.SizeUnit; class Stats { int id_; @@ -187,11 +188,11 @@ public class DbBenchmark { protected Stats stats_; protected void getFixedKey(byte[] key, long sn) { - DbBenchmark.formatNumber(key, sn); + generateKeyFromLong(key, sn); } protected void getRandomKey(byte[] key, long range) { - DbBenchmark.formatNumber(key, Math.abs(rand_.nextLong() % range)); + generateKeyFromLong(key, Math.abs(rand_.nextLong() % range)); } } @@ -353,21 +354,27 @@ public class DbBenchmark { public DbBenchmark(Map flags) throws Exception { benchmarks_ = (List) flags.get(Flag.benchmarks); - num_ = (int) flags.get(Flag.num); - threadNum_ = (int) flags.get(Flag.threads); - reads_ = (int) (flags.get(Flag.reads) == null ? + num_ = (Integer) flags.get(Flag.num); + threadNum_ = (Integer) flags.get(Flag.threads); + reads_ = (Integer) (flags.get(Flag.reads) == null ? flags.get(Flag.num) : flags.get(Flag.reads)); - keySize_ = (int) flags.get(Flag.key_size); - valueSize_ = (int) flags.get(Flag.value_size); - writeBufferSize_ = (int) flags.get(Flag.write_buffer_size) > 0 ? - (int) flags.get(Flag.write_buffer_size) : 0; - compressionRatio_ = (double) flags.get(Flag.compression_ratio); - useExisting_ = (boolean) flags.get(Flag.use_existing_db); - randSeed_ = (long) flags.get(Flag.seed); + keySize_ = (Integer) flags.get(Flag.key_size); + valueSize_ = (Integer) flags.get(Flag.value_size); + writeBufferSize_ = (Integer) flags.get(Flag.write_buffer_size) > 0 ? + (Integer) flags.get(Flag.write_buffer_size) : 0; + compressionRatio_ = (Double) flags.get(Flag.compression_ratio); + useExisting_ = (Boolean) flags.get(Flag.use_existing_db); + randSeed_ = (Long) flags.get(Flag.seed); databaseDir_ = (String) flags.get(Flag.db); - writesPerSeconds_ = (int) flags.get(Flag.writes_per_second); - cacheSize_ = (long) flags.get(Flag.cache_size); + writesPerSeconds_ = (Integer) flags.get(Flag.writes_per_second); + cacheSize_ = (Long) flags.get(Flag.cache_size); gen_ = new RandomGenerator(compressionRatio_); + memtable_ = (String) flags.get(Flag.memtablerep); + maxWriteBufferNumber_ = (Integer) flags.get(Flag.max_write_buffer_number); + prefixSize_ = (Integer) flags.get(Flag.prefix_size); + keysPerPrefix_ = (Integer) flags.get(Flag.keys_per_prefix); + hashBucketCount_ = (Long) flags.get(Flag.hash_bucket_count); + usePlainTable_ = (Boolean) flags.get(Flag.use_plain_table); finishLock_ = new Object(); } @@ -376,6 +383,31 @@ public class DbBenchmark { if (!useExisting_) { options.setCreateIfMissing(true); } + if (memtable_.equals("skip_list")) { + options.setMemTableConfig(new SkipListMemTableConfig()); + } else if (memtable_.equals("vector")) { + options.setMemTableConfig(new VectorMemTableConfig()); + } else if (memtable_.equals("hash_linkedlist")) { + options.setMemTableConfig( + new HashLinkedListMemTableConfig() + .setBucketCount(hashBucketCount_)); + options.useFixedLengthPrefixExtractor(prefixSize_); + } else if (memtable_.equals("hash_skiplist") || + memtable_.equals("prefix_hash")) { + options.setMemTableConfig( + new HashSkipListMemTableConfig() + .setBucketCount(hashBucketCount_)); + options.useFixedLengthPrefixExtractor(prefixSize_); + } else { + System.err.format( + "unable to detect the specified memtable, " + + "use the default memtable factory %s%n", + options.memTableFactoryName()); + } + if (usePlainTable_) { + options.setSstFormatConfig( + new PlainTableConfig().setKeySize(keySize_)); + } } private void run() throws RocksDBException { @@ -386,7 +418,7 @@ public class DbBenchmark { prepareOptions(options); open(options); - printHeader(); + printHeader(options); for (String benchmark : benchmarks_) { List> tasks = new ArrayList>(); @@ -481,7 +513,7 @@ public class DbBenchmark { db_.close(); } - private void printHeader() { + private void printHeader(Options options) { int kKeySize = 16; System.out.printf("Keys: %d bytes each\n", kKeySize); System.out.printf("Values: %d bytes each (%d bytes after compression)\n", @@ -493,6 +525,8 @@ public class DbBenchmark { System.out.printf("FileSize: %.1f MB (estimated)\n", (((kKeySize + valueSize_ * compressionRatio_) * num_) / 1048576.0)); + System.out.format("Memtable Factory: %s%n", options.memTableFactoryName()); + System.out.format("Prefix: %d bytes%n", prefixSize_); printWarnings(); System.out.printf("------------------------------------------------\n"); } @@ -544,10 +578,25 @@ public class DbBenchmark { taskFinishedCount, concurrentThreads); } - public static void formatNumber(byte[] slice, long n) { + public void generateKeyFromLong(byte[] slice, long n) { assert(n >= 0); + int startPos = 0; + + if (keysPerPrefix_ > 0) { + long numPrefix = (num_ + keysPerPrefix_ - 1) / keysPerPrefix_; + long prefix = n % numPrefix; + int bytesToFill = Math.min(prefixSize_, 8); + for (int i = 0; i < bytesToFill; ++i) { + slice[i] = (byte) (prefix % 256); + prefix /= 256; + } + for (int i = 8; i < bytesToFill; ++i) { + slice[i] = '0'; + } + startPos = bytesToFill; + } - for (int i = slice.length - 1; i >= 0; --i) { + for (int i = slice.length - 1; i >= startPos; --i) { slice[i] = (byte) ('0' + (n % 10)); n /= 10; } @@ -654,7 +703,7 @@ public class DbBenchmark { use_existing_db(false, "If true, do not destroy the existing database. If you set this\n" + - "\tflag and also specify a benchmark that wants a fresh database," + + "\tflag and also specify a benchmark that wants a fresh database,\n" + "\tthat benchmark will fail.") { @Override public Object parseValue(String value) { return Boolean.parseBoolean(value); @@ -705,15 +754,65 @@ public class DbBenchmark { } }, + max_write_buffer_number(2, + "The number of in-memory memtables. Each memtable is of size\n" + + "\twrite_buffer_size.") { + @Override public Object parseValue(String value) { + return Integer.parseInt(value); + } + }, + + prefix_size(0, "Controls the prefix size for HashSkipList, HashLinkedList,\n" + + "\tand plain table.") { + @Override public Object parseValue(String value) { + return Integer.parseInt(value); + } + }, + + keys_per_prefix(0, "Controls the average number of keys generated\n" + + "\tper prefix, 0 means no special handling of the prefix,\n" + + "\ti.e. use the prefix comes with the generated random number.") { + @Override public Object parseValue(String value) { + return Integer.parseInt(value); + } + }, + + memtablerep("skip_list", + "The memtable format. Available options are\n" + + "\tskip_list,\n" + + "\tvector,\n" + + "\thash_linkedlist,\n" + + "\thash_skiplist (prefix_hash.)") { + @Override public Object parseValue(String value) { + return value; + } + }, + + hash_bucket_count(SizeUnit.MB, + "The number of hash buckets used in the hash-bucket-based\n" + + "\tmemtables. Memtables that currently support this argument are\n" + + "\thash_linkedlist and hash_skiplist.") { + @Override public Object parseValue(String value) { + return Long.parseLong(value); + } + }, + writes_per_second(10000, "The write-rate of the background writer used in the\n" + - "`readwhilewriting` benchmark. Non-positive number indicates\n" + - "using an unbounded write-rate in `readwhilewriting` benchmark.") { + "\t`readwhilewriting` benchmark. Non-positive number indicates\n" + + "\tusing an unbounded write-rate in `readwhilewriting` benchmark.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, + use_plain_table(false, + "Use plain-table sst format.") { + @Override public Object parseValue(String value) { + return Boolean.parseBoolean(value); + } + }, + cache_size(-1L, "Number of bytes to use as a cache of uncompressed data.\n" + "\tNegative means use default settings.") { @@ -801,7 +900,6 @@ public class DbBenchmark { final int reads_; final int keySize_; final int valueSize_; - final int writeBufferSize_; final int threadNum_; final int writesPerSeconds_; final long randSeed_; @@ -811,6 +909,18 @@ public class DbBenchmark { final double compressionRatio_; RandomGenerator gen_; long startTime_; + + // memtable related + final int writeBufferSize_; + final int maxWriteBufferNumber_; + final int prefixSize_; + final int keysPerPrefix_; + final String memtable_; + final long hashBucketCount_; + + // sst format related + boolean usePlainTable_; + Object finishLock_; boolean isFinished_; } diff --git a/java/rocksjni/memtablejni.cc b/java/rocksjni/memtablejni.cc new file mode 100644 index 000000000..a0d50f5f5 --- /dev/null +++ b/java/rocksjni/memtablejni.cc @@ -0,0 +1,58 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// +// This file implements the "bridge" between Java and C++ for MemTables. + +#include "include/org_rocksdb_HashSkipListMemTableConfig.h" +#include "include/org_rocksdb_HashLinkedListMemTableConfig.h" +#include "include/org_rocksdb_VectorMemTableConfig.h" +#include "include/org_rocksdb_SkipListMemTableConfig.h" +#include "rocksdb/memtablerep.h" + +/* + * Class: org_rocksdb_HashSkipListMemTableConfig + * Method: newMemTableFactoryHandle + * Signature: (JII)J + */ +jlong Java_org_rocksdb_HashSkipListMemTableConfig_newMemTableFactoryHandle( + JNIEnv* env, jobject jobj, jlong jbucket_count, + jint jheight, jint jbranching_factor) { + return reinterpret_cast(rocksdb::NewHashSkipListRepFactory( + static_cast(jbucket_count), + static_cast(jheight), + static_cast(jbranching_factor))); +} + +/* + * Class: org_rocksdb_HashLinkedListMemTableConfig + * Method: newMemTableFactoryHandle + * Signature: (J)J + */ +jlong Java_org_rocksdb_HashLinkedListMemTableConfig_newMemTableFactoryHandle( + JNIEnv* env, jobject jobj, jlong jbucket_count) { + return reinterpret_cast(rocksdb::NewHashLinkListRepFactory( + static_cast(jbucket_count))); +} + +/* + * Class: org_rocksdb_VectorMemTableConfig + * Method: newMemTableFactoryHandle + * Signature: (J)J + */ +jlong Java_org_rocksdb_VectorMemTableConfig_newMemTableFactoryHandle( + JNIEnv* env, jobject jobj, jlong jreserved_size) { + return reinterpret_cast(new rocksdb::VectorRepFactory( + static_cast(jreserved_size))); +} + +/* + * Class: org_rocksdb_SkipListMemTableConfig + * Method: newMemTableFactoryHandle0 + * Signature: ()J + */ +jlong Java_org_rocksdb_SkipListMemTableConfig_newMemTableFactoryHandle0( + JNIEnv* env, jobject jobj) { + return reinterpret_cast(new rocksdb::SkipListFactory()); +} diff --git a/java/rocksjni/options.cc b/java/rocksjni/options.cc index 50aad034f..6b847772f 100644 --- a/java/rocksjni/options.cc +++ b/java/rocksjni/options.cc @@ -17,6 +17,9 @@ #include "rocksdb/db.h" #include "rocksdb/options.h" #include "rocksdb/statistics.h" +#include "rocksdb/memtablerep.h" +#include "rocksdb/table.h" +#include "rocksdb/slice_transform.h" /* * Class: org_rocksdb_Options @@ -479,6 +482,27 @@ jlong Java_org_rocksdb_Options_maxManifestFileSize( return reinterpret_cast(jhandle)->max_manifest_file_size; } +/* + * Method: memTableFactoryName + * Signature: (J)Ljava/lang/String + */ +jstring Java_org_rocksdb_Options_memTableFactoryName( + JNIEnv* env, jobject jobj, jlong jhandle) { + auto opt = reinterpret_cast(jhandle); + rocksdb::MemTableRepFactory* tf = opt->memtable_factory.get(); + + // Should never be nullptr. + // Default memtable factory is SkipListFactory + assert(tf); + + // temporarly fix for the historical typo + if (strcmp(tf->Name(), "HashLinkListRepFactory") == 0) { + return env->NewStringUTF("HashLinkedListRepFactory"); + } + + return env->NewStringUTF(tf->Name()); +} + /* * Class: org_rocksdb_Options * Method: setMaxManifestFileSize @@ -490,6 +514,16 @@ void Java_org_rocksdb_Options_setMaxManifestFileSize( static_cast(max_manifest_file_size); } +/* + * Method: setMemTableFactory + * Signature: (JJ)V + */ +void Java_org_rocksdb_Options_setMemTableFactory( + JNIEnv* env, jobject jobj, jlong jhandle, jlong jfactory_handle) { + reinterpret_cast(jhandle)->memtable_factory.reset( + reinterpret_cast(jfactory_handle)); +} + /* * Class: org_rocksdb_Options * Method: tableCacheNumshardbits @@ -533,6 +567,16 @@ void Java_org_rocksdb_Options_setTableCacheRemoveScanCountLimit( jhandle)->table_cache_remove_scan_count_limit = static_cast(limit); } +/* + * Method: useFixedLengthPrefixExtractor + * Signature: (JI)V + */ +void Java_org_rocksdb_Options_useFixedLengthPrefixExtractor( + JNIEnv* env, jobject jobj, jlong jhandle, jint jprefix_length) { + reinterpret_cast(jhandle)->prefix_extractor.reset( + rocksdb::NewFixedPrefixTransform(static_cast(jprefix_length))); +} + /* * Class: org_rocksdb_Options * Method: walTtlSeconds @@ -597,6 +641,16 @@ void Java_org_rocksdb_Options_setAllowOsBuffer( static_cast(allow_os_buffer); } +/* + * Method: setTableFactory + * Signature: (JJ)V + */ +void Java_org_rocksdb_Options_setTableFactory( + JNIEnv* env, jobject jobj, jlong jhandle, jlong jfactory_handle) { + reinterpret_cast(jhandle)->table_factory.reset( + reinterpret_cast(jfactory_handle)); +} + /* * Class: org_rocksdb_Options * Method: allowMmapReads @@ -787,6 +841,22 @@ void Java_org_rocksdb_Options_setAllowThreadLocal( static_cast(allow_thread_local); } +/* + * Method: tableFactoryName + * Signature: (J)Ljava/lang/String + */ +jstring Java_org_rocksdb_Options_tableFactoryName( + JNIEnv* env, jobject jobj, jlong jhandle) { + auto opt = reinterpret_cast(jhandle); + rocksdb::TableFactory* tf = opt->table_factory.get(); + + // Should never be nullptr. + // Default memtable factory is SkipListFactory + assert(tf); + + return env->NewStringUTF(tf->Name()); +} + ////////////////////////////////////////////////////////////////////////////// // WriteOptions diff --git a/java/rocksjni/table.cc b/java/rocksjni/table.cc new file mode 100644 index 000000000..c21501bb4 --- /dev/null +++ b/java/rocksjni/table.cc @@ -0,0 +1,25 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// +// This file implements the "bridge" between Java and C++ for rocksdb::Options. + +#include +#include "include/org_rocksdb_PlainTableConfig.h" +#include "rocksdb/table.h" + +/* + * Class: org_rocksdb_PlainTableConfig + * Method: newTableFactoryHandle + * Signature: (IIDI)J + */ +jlong Java_org_rocksdb_PlainTableConfig_newTableFactoryHandle( + JNIEnv* env, jobject jobj, jint jkey_size, jint jbloom_bits_per_key, + jdouble jhash_table_ratio, jint jindex_sparseness) { + return reinterpret_cast(rocksdb::NewPlainTableFactory( + static_cast(jkey_size), + static_cast(jbloom_bits_per_key), + static_cast(jhash_table_ratio), + static_cast(jindex_sparseness))); +}