From 2dd9bfe3a84f7cfe4b9e684f7dd8e8044d5f5de4 Mon Sep 17 00:00:00 2001 From: Lei Jin Date: Fri, 17 Oct 2014 21:18:36 -0700 Subject: [PATCH 01/33] Sanitize block-based table index type and check prefix_extractor Summary: Respond to issue reported https://www.facebook.com/groups/rocksdb.dev/permalink/651090261656158/ Change the Sanitize signature to take both DBOptions and CFOptions Test Plan: unit test Reviewers: sdong, yhchiang, igor Reviewed By: igor Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D25041 --- db/db_impl.cc | 8 ++++---- db/db_test.cc | 11 +++++++++++ include/rocksdb/table.h | 6 ++++-- table/adaptive_table_factory.h | 5 +++-- table/block_based_table_factory.cc | 11 +++++++++++ table/block_based_table_factory.h | 5 ++--- table/cuckoo_table_factory.h | 3 ++- table/plain_table_factory.h | 5 +++-- 8 files changed, 40 insertions(+), 14 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index d8df10c5a..c5bc7680a 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -282,12 +282,12 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) { namespace { -Status SanitizeDBOptionsByCFOptions( - const DBOptions* db_opts, +Status SanitizeOptionsByTable( + const DBOptions& db_opts, const std::vector& column_families) { Status s; for (auto cf : column_families) { - s = cf.options.table_factory->SanitizeDBOptions(db_opts); + s = cf.options.table_factory->SanitizeOptions(db_opts, cf.options); if (!s.ok()) { return s; } @@ -4703,7 +4703,7 @@ Status DB::Open(const Options& options, const std::string& dbname, DB** dbptr) { Status DB::Open(const DBOptions& db_options, const std::string& dbname, const std::vector& column_families, std::vector* handles, DB** dbptr) { - Status s = SanitizeDBOptionsByCFOptions(&db_options, column_families); + Status s = SanitizeOptionsByTable(db_options, column_families); if (!s.ok()) { return s; } diff --git a/db/db_test.cc b/db/db_test.cc index 169718387..ebe946cf4 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -8354,6 +8354,17 @@ TEST(DBTest, TableOptionsSanitizeTest) { options.prefix_extractor.reset(NewNoopTransform()); Destroy(&options); ASSERT_TRUE(TryReopen(&options).IsNotSupported()); + + // Test for check of prefix_extractor when hash index is used for + // block-based table + BlockBasedTableOptions to; + to.index_type = BlockBasedTableOptions::kHashSearch; + options = Options(); + options.create_if_missing = true; + options.table_factory.reset(NewBlockBasedTableFactory(to)); + ASSERT_TRUE(TryReopen(&options).IsInvalidArgument()); + options.prefix_extractor.reset(NewFixedPrefixTransform(1)); + ASSERT_OK(TryReopen(&options)); } TEST(DBTest, DBIteratorBoundTest) { diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 4c06c23f7..4fddab4b3 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -355,11 +355,13 @@ class TableFactory { WritableFile* file, const CompressionType compression_type, const CompressionOptions& compression_opts) const = 0; - // Sanitizes the specified DB Options. + // Sanitizes the specified DB Options and ColumnFamilyOptions. // // If the function cannot find a way to sanitize the input DB Options, // a non-ok Status will be returned. - virtual Status SanitizeDBOptions(const DBOptions* db_opts) const = 0; + virtual Status SanitizeOptions( + const DBOptions& db_opts, + const ColumnFamilyOptions& cf_opts) const = 0; // Return a string that contains printable format of table configurations. // RocksDB prints configurations at DB Open(). diff --git a/table/adaptive_table_factory.h b/table/adaptive_table_factory.h index f0920db97..3c6455f90 100644 --- a/table/adaptive_table_factory.h +++ b/table/adaptive_table_factory.h @@ -47,8 +47,9 @@ class AdaptiveTableFactory : public TableFactory { const CompressionOptions& compression_opts) const override; // Sanitizes the specified DB Options. - Status SanitizeDBOptions(const DBOptions* db_opts) const override { - if (db_opts->allow_mmap_reads == false) { + Status SanitizeOptions(const DBOptions& db_opts, + const ColumnFamilyOptions& cf_opts) const override { + if (db_opts.allow_mmap_reads == false) { return Status::NotSupported( "AdaptiveTable with allow_mmap_reads == false is not supported."); } diff --git a/table/block_based_table_factory.cc b/table/block_based_table_factory.cc index b4e2e7d1f..3155f3394 100644 --- a/table/block_based_table_factory.cc +++ b/table/block_based_table_factory.cc @@ -63,6 +63,17 @@ TableBuilder* BlockBasedTableFactory::NewTableBuilder( return table_builder; } +Status BlockBasedTableFactory::SanitizeOptions( + const DBOptions& db_opts, + const ColumnFamilyOptions& cf_opts) const { + if (table_options_.index_type == BlockBasedTableOptions::kHashSearch && + cf_opts.prefix_extractor == nullptr) { + return Status::InvalidArgument("Hash index is specified for block-based " + "table, but prefix_extractor is not given"); + } + return Status::OK(); +} + std::string BlockBasedTableFactory::GetPrintableTableOptions() const { std::string ret; ret.reserve(20000); diff --git a/table/block_based_table_factory.h b/table/block_based_table_factory.h index 2dcfda6d4..247fcd691 100644 --- a/table/block_based_table_factory.h +++ b/table/block_based_table_factory.h @@ -46,9 +46,8 @@ class BlockBasedTableFactory : public TableFactory { const CompressionOptions& compression_opts) const override; // Sanitizes the specified DB Options. - Status SanitizeDBOptions(const DBOptions* db_opts) const override { - return Status::OK(); - } + Status SanitizeOptions(const DBOptions& db_opts, + const ColumnFamilyOptions& cf_opts) const override; std::string GetPrintableTableOptions() const override; diff --git a/table/cuckoo_table_factory.h b/table/cuckoo_table_factory.h index 599908678..714fdc2a0 100644 --- a/table/cuckoo_table_factory.h +++ b/table/cuckoo_table_factory.h @@ -64,7 +64,8 @@ class CuckooTableFactory : public TableFactory { const CompressionType, const CompressionOptions&) const override; // Sanitizes the specified DB Options. - Status SanitizeDBOptions(const DBOptions* db_opts) const override { + Status SanitizeOptions(const DBOptions& db_opts, + const ColumnFamilyOptions& cf_opts) const override { return Status::OK(); } diff --git a/table/plain_table_factory.h b/table/plain_table_factory.h index e79475221..23b54f092 100644 --- a/table/plain_table_factory.h +++ b/table/plain_table_factory.h @@ -170,8 +170,9 @@ class PlainTableFactory : public TableFactory { static const char kValueTypeSeqId0 = 0xFF; // Sanitizes the specified DB Options. - Status SanitizeDBOptions(const DBOptions* db_opts) const override { - if (db_opts->allow_mmap_reads == false) { + Status SanitizeOptions(const DBOptions& db_opts, + const ColumnFamilyOptions& cf_opts) const override { + if (db_opts.allow_mmap_reads == false) { return Status::NotSupported( "PlainTable with allow_mmap_reads == false is not supported."); } From ff8f74c204eba8cafc2779eac55475beff559941 Mon Sep 17 00:00:00 2001 From: Lei Jin Date: Fri, 17 Oct 2014 21:15:39 -0700 Subject: [PATCH 02/33] remove checking lower bound of level size Summary: as title Test Plan: make db_test --- db/db_test.cc | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/db/db_test.cc b/db/db_test.cc index ebe946cf4..d13929fc6 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -8763,12 +8763,9 @@ TEST(DBTest, DynamicCompactionOptions) { gen_l0_kb(i, 64, 32); } dbfull()->TEST_WaitForCompact(); - ASSERT_TRUE(SizeAtLevel(1) > k128KB * 0.8 && - SizeAtLevel(1) < k128KB * 1.2); - ASSERT_TRUE(SizeAtLevel(2) > 2 * k128KB * 0.8 && - SizeAtLevel(2) < 2 * k128KB * 1.2); - ASSERT_TRUE(SizeAtLevel(3) > 4 * k128KB * 0.8 && - SizeAtLevel(3) < 4 * k128KB * 1.2); + ASSERT_TRUE(SizeAtLevel(1) < k128KB * 1.2); + ASSERT_TRUE(SizeAtLevel(2) < 2 * k128KB * 1.2); + ASSERT_TRUE(SizeAtLevel(3) < 4 * k128KB * 1.2); // Clean up memtable and L0 dbfull()->CompactRange(nullptr, nullptr); From 5bfb7f5d0b2a09ccc3b6c8eb94fbf7cf1672a83a Mon Sep 17 00:00:00 2001 From: sdong Date: Fri, 17 Oct 2014 10:20:25 -0700 Subject: [PATCH 03/33] db_bench: seekrandom can specify --seek_nexts to read specific keys after seek. Summary: Add a function as tittle. Also use the same parameter to fillseekseq too. Test Plan: Run seekrandom using the new parameter Reviewers: ljin, MarkCallaghan Reviewed By: MarkCallaghan Subscribers: rven, igor, yhchiang, leveldb Differential Revision: https://reviews.facebook.net/D25035 --- db/db_bench.cc | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/db/db_bench.cc b/db/db_bench.cc index 6d611ae1c..d018ce70f 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -168,8 +168,9 @@ DEFINE_int32(duration, 0, "Time in seconds for the random-ops tests to run." DEFINE_int32(value_size, 100, "Size of each value"); -DEFINE_int32(seekseq_next, 0, "How many times to call Next() after Seek() in " - "fillseekseq"); +DEFINE_int32(seek_nexts, 0, + "How many times to call Next() after Seek() in " + "fillseekseq and seekrandom"); DEFINE_bool(use_uint64_comparator, false, "use Uint64 user comparator"); @@ -2265,6 +2266,7 @@ class Benchmark { std::unique_ptr key_guard(key.data()); Duration duration(FLAGS_duration, reads_); + char value_buffer[256]; while (!duration.Done(1)) { if (!FLAGS_use_tailing_iterator && FLAGS_iter_refresh_interval_us >= 0) { uint64_t now = FLAGS_env->NowMicros(); @@ -2296,6 +2298,16 @@ class Benchmark { if (iter_to_use->Valid() && iter_to_use->key().compare(key) == 0) { found++; } + + for (int j = 0; j < FLAGS_seek_nexts && iter_to_use->Valid(); ++j) { + // Copy out iterator's value to make sure we read them. + Slice value = iter_to_use->value(); + memcpy(value_buffer, value.data(), + std::min(value.size(), sizeof(value_buffer))); + iter_to_use->Next(); + assert(iter_to_use->status().ok()); + } + thread->stats.FinishedOps(&db_, db_.db, 1); } delete single_iter; @@ -2820,7 +2832,7 @@ class Benchmark { assert(iter->Valid() && iter->key() == key); thread->stats.FinishedOps(nullptr, db, 1); - for (int j = 0; j < FLAGS_seekseq_next && i+1 < FLAGS_num; ++j) { + for (int j = 0; j < FLAGS_seek_nexts && i + 1 < FLAGS_num; ++j) { iter->Next(); GenerateKeyFromInt(++i, FLAGS_num, &key); assert(iter->Valid() && iter->key() == key); From 700f6ec3ffd4e9c877a848fa7b05268052f9e7b3 Mon Sep 17 00:00:00 2001 From: Adam Retter Date: Sun, 3 Aug 2014 21:11:15 +0100 Subject: [PATCH 04/33] Ignore IntelliJ idea project files and ignore java/out folder --- .gitignore | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.gitignore b/.gitignore index 4ed05c5e3..ccbb46b03 100644 --- a/.gitignore +++ b/.gitignore @@ -31,8 +31,14 @@ coverage/COVERAGE_REPORT package/ .phutil_module_cache tags + +java/out java/*.log java/include/org_rocksdb_*.h + +.idea/ +*.iml + unity.cc java/crossbuild/.vagrant .vagrant/ From d6fe8dacc8c7121ab33e910c1589c1a1a449fc68 Mon Sep 17 00:00:00 2001 From: Adam Retter Date: Sun, 3 Aug 2014 21:11:44 +0100 Subject: [PATCH 05/33] Feature - Implement Java API for Comparator and Slice. Allows use of either byte[] or DirectByteBuffer for accessing underlying data. --- java/Makefile | 2 +- java/org/rocksdb/AbstractComparator.java | 81 ++++++++ java/org/rocksdb/AbstractSlice.java | 156 +++++++++++++++ java/org/rocksdb/Comparator.java | 25 +++ java/org/rocksdb/DirectComparator.java | 25 +++ java/org/rocksdb/DirectSlice.java | 99 ++++++++++ java/org/rocksdb/Options.java | 22 +++ java/org/rocksdb/Slice.java | 61 ++++++ java/rocksjni/comparator.cc | 64 +++++++ java/rocksjni/comparatorjnicallback.cc | 148 +++++++++++++++ java/rocksjni/comparatorjnicallback.h | 52 +++++ java/rocksjni/options.cc | 11 ++ java/rocksjni/portal.h | 147 +++++++++++++++ java/rocksjni/slice.cc | 231 +++++++++++++++++++++++ 14 files changed, 1123 insertions(+), 1 deletion(-) create mode 100644 java/org/rocksdb/AbstractComparator.java create mode 100644 java/org/rocksdb/AbstractSlice.java create mode 100644 java/org/rocksdb/Comparator.java create mode 100644 java/org/rocksdb/DirectComparator.java create mode 100644 java/org/rocksdb/DirectSlice.java create mode 100644 java/org/rocksdb/Slice.java create mode 100644 java/rocksjni/comparator.cc create mode 100644 java/rocksjni/comparatorjnicallback.cc create mode 100644 java/rocksjni/comparatorjnicallback.h create mode 100644 java/rocksjni/slice.cc diff --git a/java/Makefile b/java/Makefile index ef8ccbae4..5c20032d2 100644 --- a/java/Makefile +++ b/java/Makefile @@ -1,4 +1,4 @@ -NATIVE_JAVA_CLASSES = org.rocksdb.RocksDB org.rocksdb.Options org.rocksdb.WriteBatch org.rocksdb.WriteBatchInternal org.rocksdb.WriteBatchTest org.rocksdb.WriteOptions org.rocksdb.BackupableDB org.rocksdb.BackupableDBOptions org.rocksdb.Statistics org.rocksdb.RocksIterator org.rocksdb.VectorMemTableConfig org.rocksdb.SkipListMemTableConfig org.rocksdb.HashLinkedListMemTableConfig org.rocksdb.HashSkipListMemTableConfig org.rocksdb.PlainTableConfig org.rocksdb.BlockBasedTableConfig org.rocksdb.ReadOptions org.rocksdb.Filter org.rocksdb.BloomFilter org.rocksdb.RestoreOptions org.rocksdb.RestoreBackupableDB org.rocksdb.RocksEnv org.rocksdb.GenericRateLimiterConfig org.rocksdb.ColumnFamilyHandle +NATIVE_JAVA_CLASSES = org.rocksdb.RocksDB org.rocksdb.Options org.rocksdb.WriteBatch org.rocksdb.WriteBatchInternal org.rocksdb.WriteBatchTest org.rocksdb.WriteOptions org.rocksdb.BackupableDB org.rocksdb.BackupableDBOptions org.rocksdb.Statistics org.rocksdb.RocksIterator org.rocksdb.VectorMemTableConfig org.rocksdb.SkipListMemTableConfig org.rocksdb.HashLinkedListMemTableConfig org.rocksdb.HashSkipListMemTableConfig org.rocksdb.PlainTableConfig org.rocksdb.BlockBasedTableConfig org.rocksdb.ReadOptions org.rocksdb.Filter org.rocksdb.BloomFilter org.rocksdb.AbstractComparator org.rocksdb.Comparator org.rocksdb.DirectComparator org.rocksdb.AbstractSlice org.rocksdb.Slice org.rocksdb.DirectSlice org.rocksdb.RestoreOptions org.rocksdb.RestoreBackupableDB org.rocksdb.RocksEnv org.rocksdb.GenericRateLimiterConfig org.rocksdb.ColumnFamilyHandle ROCKSDB_MAJOR = $(shell egrep "ROCKSDB_MAJOR.[0-9]" ../include/rocksdb/version.h | cut -d ' ' -f 3) ROCKSDB_MINOR = $(shell egrep "ROCKSDB_MINOR.[0-9]" ../include/rocksdb/version.h | cut -d ' ' -f 3) diff --git a/java/org/rocksdb/AbstractComparator.java b/java/org/rocksdb/AbstractComparator.java new file mode 100644 index 000000000..fa797b273 --- /dev/null +++ b/java/org/rocksdb/AbstractComparator.java @@ -0,0 +1,81 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +package org.rocksdb; + +/** + * Comparators are used by RocksDB to determine + * the ordering of keys. + * + * This class is package private, implementers + * should extend either of the public abstract classes: + * @see org.rocksdb.Comparator + * @see org.rocksdb.DirectComparator + */ +abstract class AbstractComparator extends RocksObject { + + public abstract String name(); + + /** + * Three-way key comparison + * + * @param a Slice access to first key + * @param b Slice access to second key + * + * @return Should return either: + * 1) < 0 if "a" < "b" + * 2) == 0 if "a" == "b" + * 3) > 0 if "a" > "b" + */ + public abstract int compare(final T a, final T b); + + /** + * Used to reduce the space requirements + * for internal data structures like index blocks. + * + * If start < limit, you may return a new start which is a + * shorter string in [start, limit). + * + * Simple comparator implementations may return null if they + * wish to use start unchanged. i.e., an implementation of + * this method that does nothing is correct. + * + * @return a shorter start, or null + */ + public String findShortestSeparator(final String start, final T limit) { + return null; + } + + /** + * Used to reduce the space requirements + * for internal data structures like index blocks. + * + * You may return a new short key (key1) where + * key1 >= key. + * + * Simple comparator implementations may return null if they + * wish to leave the key unchanged. i.e., an implementation of + * this method that does nothing is correct. + * + * @return a shorter key, or null + */ + public String findShortSuccessor(final String key) { + return null; + } + + /** + * Deletes underlying C++ comparator pointer. + * + * Note that this function should be called only after all + * RocksDB instances referencing the comparator are closed. + * Otherwise an undefined behavior will occur. + */ + @Override protected void disposeInternal() { + assert(isInitialized()); + disposeInternal(nativeHandle_); + } + + private native void disposeInternal(long handle); +} diff --git a/java/org/rocksdb/AbstractSlice.java b/java/org/rocksdb/AbstractSlice.java new file mode 100644 index 000000000..963c72a1b --- /dev/null +++ b/java/org/rocksdb/AbstractSlice.java @@ -0,0 +1,156 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +package org.rocksdb; + +/** + * Slices are used by RocksDB to provide + * efficient access to keys and values. + * + * This class is package private, implementers + * should extend either of the public abstract classes: + * @see org.rocksdb.Slice + * @see org.rocksdb.DirectSlice + */ +abstract class AbstractSlice extends RocksObject { + + /** + * Returns the data. + * + * @return The data. Note, the type of access is + * determined by the subclass + * @see org.rocksdb.AbstractSlice#data0(long). + */ + public T data() { + assert (isInitialized()); + return data0(nativeHandle_); + } + + /** + * Access to the data is provided by the + * subtype as it needs to handle the + * generic typing. + * + * @param handle The address of the underlying + * native object. + * + * @return Java typed access to the data. + */ + protected abstract T data0(long handle); + + /** + * Return the length (in bytes) of the data. + * + * @return The length in bytes. + */ + public int size() { + assert (isInitialized()); + return size0(nativeHandle_); + } + + /** + * Return true if the length of the + * data is zero. + * + * @return true if there is no data, false otherwise. + */ + public boolean empty() { + assert (isInitialized()); + return empty0(nativeHandle_); + } + + /** + * Creates a string representation of the data + * + * @param hex When true, the representation + * will be encoded in hexidecimal. + * + * @return The string representation of the data. + */ + public String toString(final boolean hex) { + assert (isInitialized()); + return toString0(nativeHandle_, hex); + } + + @Override + public String toString() { + return toString(false); + } + + /** + * Three-way key comparison + * + * @param other A slice to compare against + * + * @return Should return either: + * 1) < 0 if this < other + * 2) == 0 if this == other + * 3) > 0 if this > other + */ + public int compare(final AbstractSlice other) { + assert (other != null); + assert (isInitialized()); + return compare0(nativeHandle_, other.nativeHandle_); + } + + /** + * If other is a slice, then + * we defer to compare to check equality, + * otherwise we return false. + * + * @param other Object to test for equality + * + * @return true when this.compare(other) == 0, + * false otherwise. + */ + @Override + public boolean equals(final Object other) { + if (other != null && other instanceof AbstractSlice) { + return compare((AbstractSlice)other) == 0; + } else { + return false; + } + } + + /** + * Determines whether this starts with prefix + * + * @param prefix Another slice which may of may not + * be the prefix of this slice. + * + * @return true when slice `prefix` is a prefix + * of this slice + */ + public boolean startsWith(final AbstractSlice prefix) { + if (prefix != null) { + assert (isInitialized()); + return startsWith0(nativeHandle_, prefix.nativeHandle_); + } else { + return false; + } + } + + /** + * Deletes underlying C++ slice pointer. + *

+ * Note that this function should be called only after all + * RocksDB instances referencing the slice are closed. + * Otherwise an undefined behavior will occur. + */ + @Override + protected void disposeInternal() { + assert(isInitialized()); + disposeInternal(nativeHandle_); + } + + protected native void createNewSliceFromString(String str); + private native int size0(long handle); + private native boolean empty0(long handle); + private native String toString0(long handle, boolean hex); + private native int compare0(long handle, long otherHandle); + private native boolean startsWith0(long handle, long otherHandle); + private native void disposeInternal(long handle); + +} diff --git a/java/org/rocksdb/Comparator.java b/java/org/rocksdb/Comparator.java new file mode 100644 index 000000000..8466cfd8e --- /dev/null +++ b/java/org/rocksdb/Comparator.java @@ -0,0 +1,25 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +package org.rocksdb; + +/** + * Base class for comparators which will receive + * byte[] based access via org.rocksdb.Slice in their + * compare method implementation. + * + * byte[] based slices perform better when small keys + * are involved. When using larger keys consider + * using @see org.rocksdb.DirectComparator + */ +public abstract class Comparator extends AbstractComparator { + + public Comparator() { + super(); + createNewComparator0(); + } + + private native void createNewComparator0(); +} diff --git a/java/org/rocksdb/DirectComparator.java b/java/org/rocksdb/DirectComparator.java new file mode 100644 index 000000000..25b4058ae --- /dev/null +++ b/java/org/rocksdb/DirectComparator.java @@ -0,0 +1,25 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +package org.rocksdb; + +/** + * Base class for comparators which will receive + * ByteBuffer based access via org.rocksdb.DirectSlice + * in their compare method implementation. + * + * ByteBuffer based slices perform better when large keys + * are involved. When using smaller keys consider + * using @see org.rocksdb.Comparator + */ +public abstract class DirectComparator extends AbstractComparator { + + public DirectComparator() { + super(); + createNewDirectComparator0(); + } + + private native void createNewDirectComparator0(); +} diff --git a/java/org/rocksdb/DirectSlice.java b/java/org/rocksdb/DirectSlice.java new file mode 100644 index 000000000..8169e3529 --- /dev/null +++ b/java/org/rocksdb/DirectSlice.java @@ -0,0 +1,99 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +package org.rocksdb; + +import java.nio.ByteBuffer; + +/** + * Base class for slices which will receive direct + * ByteBuffer based access to the underlying data. + * + * ByteBuffer backed slices typically perform better with + * larger keys and values. When using smaller keys and + * values consider using @see org.rocksdb.Slice + */ +public class DirectSlice extends AbstractSlice { + + /** + * Called from JNI to construct a new Java DirectSlice + * without an underlying C++ object set + * at creation time. + */ + private DirectSlice() { + super(); + disOwnNativeHandle(); + } + + /** + * Constructs a slice + * where the data is taken from + * a String. + */ + public DirectSlice(final String str) { + super(); + createNewSliceFromString(str); + } + + /** + * Constructs a slice where the data is + * read from the provided + * ByteBuffer up to a certain length + */ + public DirectSlice(final ByteBuffer data, final int length) { + super(); + createNewDirectSlice0(data, length); + } + + /** + * Constructs a slice where the data is + * read from the provided + * ByteBuffer + */ + public DirectSlice(final ByteBuffer data) { + super(); + createNewDirectSlice1(data); + } + + /** + * Retrieves the byte at a specific offset + * from the underlying data + * + * @param offset The (zero-based) offset of the byte to retrieve + * + * @return the requested byte + */ + public byte get(int offset) { + assert (isInitialized()); + return get0(nativeHandle_, offset); + } + + /** + * Clears the backing slice + */ + public void clear() { + assert (isInitialized()); + clear0(nativeHandle_); + } + + /** + * Drops the specified n + * number of bytes from the start + * of the backing slice + * + * @param n The number of bytes to drop + */ + public void removePrefix(final int n) { + assert (isInitialized()); + removePrefix0(nativeHandle_, n); + } + + private native void createNewDirectSlice0(ByteBuffer data, int length); + private native void createNewDirectSlice1(ByteBuffer data); + @Override protected final native ByteBuffer data0(long handle); + private native byte get0(long handle, int offset); + private native void clear0(long handle); + private native void removePrefix0(long handle, int length); +} diff --git a/java/org/rocksdb/Options.java b/java/org/rocksdb/Options.java index 741404e40..b99d0c7ea 100644 --- a/java/org/rocksdb/Options.java +++ b/java/org/rocksdb/Options.java @@ -193,6 +193,27 @@ public class Options extends RocksObject { return maxWriteBufferNumber(nativeHandle_); } + /** + * Use the specified comparator for key ordering. + * + * Comparator should not be disposed before options instances using this comparator is + * disposed. If dispose() function is not called, then comparator object will be + * GC'd automatically. + * + * Comparator instance can be re-used in multiple options instances. + * + * @param comparator java instance. + * @return the instance of the current Options. + * @see RocksDB.open() + */ + public Options setComparator(AbstractComparator comparator) { + assert (isInitialized()); + setComparatorHandle(nativeHandle_, comparator.nativeHandle_); + comparator_ = comparator; + return this; + } + private native void setComparatorHandle(long optHandle, long comparatorHandle); + /** * If true, an error will be thrown during RocksDB.open() if the * database already exists. @@ -2282,6 +2303,7 @@ public class Options extends RocksObject { long cacheSize_; int numShardBits_; + AbstractComparator comparator_; RocksEnv env_; MemTableConfig memTableConfig_; TableFormatConfig tableFormatConfig_; diff --git a/java/org/rocksdb/Slice.java b/java/org/rocksdb/Slice.java new file mode 100644 index 000000000..28c29c43d --- /dev/null +++ b/java/org/rocksdb/Slice.java @@ -0,0 +1,61 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +package org.rocksdb; + +/** + * Base class for slices which will receive + * byte[] based access to the underlying data. + * + * byte[] backed slices typically perform better with + * small keys and values. When using larger keys and + * values consider using @see org.rocksdb.DirectSlice + */ +public class Slice extends AbstractSlice { + + /** + * Called from JNI to construct a new Java Slice + * without an underlying C++ object set + * at creation time. + */ + private Slice() { + super(); + disOwnNativeHandle(); + } + + /** + * Constructs a slice + * where the data is taken from + * a String. + */ + public Slice(final String str) { + super(); + createNewSliceFromString(str); + } + + /** + * Constructs a slice + * where the data is a copy of + * the byte array from a specific offset. + */ + public Slice(final byte[] data, final int offset) { + super(); + createNewSlice0(data, offset); + } + + /** + * Constructs a slice + * where the data is a copy of + * the byte array. + */ + public Slice(final byte[] data) { + super(); + createNewSlice1(data); + } + + @Override protected final native byte[] data0(long handle); + private native void createNewSlice0(byte[] data, int length); + private native void createNewSlice1(byte[] data); +} diff --git a/java/rocksjni/comparator.cc b/java/rocksjni/comparator.cc new file mode 100644 index 000000000..54d6137cd --- /dev/null +++ b/java/rocksjni/comparator.cc @@ -0,0 +1,64 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// +// This file implements the "bridge" between Java and C++ for +// rocksdb::Comparator. + +#include +#include +#include +#include +#include + +#include "include/org_rocksdb_AbstractComparator.h" +#include "include/org_rocksdb_Comparator.h" +#include "include/org_rocksdb_DirectComparator.h" +#include "rocksjni/comparatorjnicallback.h" +#include "rocksjni/portal.h" + +// + +/* + * Class: org_rocksdb_Comparator + * Method: createNewComparator0 + * Signature: ()V + */ +void Java_org_rocksdb_Comparator_createNewComparator0( + JNIEnv* env, jobject jobj) { + const rocksdb::ComparatorJniCallback* c = new rocksdb::ComparatorJniCallback(env, jobj); + rocksdb::AbstractComparatorJni::setHandle(env, jobj, c); +} + +// + +//GetJavaVM(&m_jvm); + assert(rs == JNI_OK); + + // Note: we want to access the Java Comparator instance + // across multiple method calls, so we create a global ref + m_jComparator = env->NewGlobalRef(jComparator); + + // Note: The name of a Comparator will not change during it's lifetime, + // so we cache it in a global var + jmethodID jNameMethodId = AbstractComparatorJni::getNameMethodId(env); + jstring jsName = (jstring)env->CallObjectMethod(m_jComparator, jNameMethodId); + m_name = JniUtil::copyString(env, jsName); //also releases jsName + + m_jCompareMethodId = AbstractComparatorJni::getCompareMethodId(env); + m_jFindShortestSeparatorMethodId = AbstractComparatorJni::getFindShortestSeparatorMethodId(env); + m_jFindShortSuccessorMethodId = AbstractComparatorJni::getFindShortSuccessorMethodId(env); +} + +/** + * Attach/Get a JNIEnv for the current native thread + */ +JNIEnv* BaseComparatorJniCallback::getJniEnv() const { + JNIEnv *env; + jint rs = m_jvm->AttachCurrentThread((void **)&env, NULL); + assert(rs == JNI_OK); + return env; +}; + +const char* BaseComparatorJniCallback::Name() const { + return m_name.c_str(); +} + +int BaseComparatorJniCallback::Compare(const Slice& a, const Slice& b) const { + + JNIEnv* m_env = getJniEnv(); + + AbstractSliceJni::setHandle(m_env, m_jSliceA, &a); + AbstractSliceJni::setHandle(m_env, m_jSliceB, &b); + + jint result = m_env->CallIntMethod(m_jComparator, m_jCompareMethodId, m_jSliceA, m_jSliceB); + + m_jvm->DetachCurrentThread(); + + return result; +} + +void BaseComparatorJniCallback::FindShortestSeparator(std::string* start, const Slice& limit) const { + + if (start == nullptr) { + return; + } + + JNIEnv* m_env = getJniEnv(); + + const char* startUtf = start->c_str(); + jstring jsStart = m_env->NewStringUTF(startUtf); + + AbstractSliceJni::setHandle(m_env, m_jSliceLimit, &limit); + + jstring jsResultStart = (jstring)m_env->CallObjectMethod(m_jComparator, m_jFindShortestSeparatorMethodId, jsStart, m_jSliceLimit); + + m_env->DeleteLocalRef(jsStart); + + if(jsResultStart != nullptr) { + //update start with result + *start = JniUtil::copyString(m_env, jsResultStart); //also releases jsResultStart + } + + m_jvm->DetachCurrentThread(); +} + +void BaseComparatorJniCallback::FindShortSuccessor(std::string* key) const { + + if (key == nullptr) { + return; + } + + JNIEnv* m_env = getJniEnv(); + + const char* keyUtf = key->c_str(); + jstring jsKey = m_env->NewStringUTF(keyUtf); + + jstring jsResultKey = (jstring)m_env->CallObjectMethod(m_jComparator, m_jFindShortSuccessorMethodId, jsKey); + + m_env->DeleteLocalRef(jsKey); + + if(jsResultKey != nullptr) { + //update key with result + *key = JniUtil::copyString(m_env, jsResultKey); //also releases jsResultKey + } + + m_jvm->DetachCurrentThread(); +} + +BaseComparatorJniCallback::~BaseComparatorJniCallback() { + + // NOTE: we do not need to delete m_name here, + // I am not yet sure why, but doing so causes the error: + // java(13051,0x109f54000) malloc: *** error for object 0x109f52fa9: pointer being freed was not allocated + // *** set a breakpoint in malloc_error_break to debug + //delete[] m_name; + + JNIEnv* m_env = getJniEnv(); + + m_env->DeleteGlobalRef(m_jComparator); + m_env->DeleteGlobalRef(m_jSliceA); + m_env->DeleteGlobalRef(m_jSliceB); + m_env->DeleteGlobalRef(m_jSliceLimit); + + // Note: do not need to explicitly detach, as this function is effectively + // called from the Java class's disposeInternal method, and so already + // has an attached thread, getJniEnv above is just a no-op Attach to get the env + //jvm->DetachCurrentThread(); +} + +ComparatorJniCallback::ComparatorJniCallback( + JNIEnv* env, jobject jComparator) : BaseComparatorJniCallback(env, jComparator) { + + m_jSliceA = env->NewGlobalRef(SliceJni::construct0(env)); + m_jSliceB = env->NewGlobalRef(SliceJni::construct0(env)); + m_jSliceLimit = env->NewGlobalRef(SliceJni::construct0(env)); +} + +DirectComparatorJniCallback::DirectComparatorJniCallback( + JNIEnv* env, jobject jComparator) : BaseComparatorJniCallback(env, jComparator) { + + m_jSliceA = env->NewGlobalRef(DirectSliceJni::construct0(env)); + m_jSliceB = env->NewGlobalRef(DirectSliceJni::construct0(env)); + m_jSliceLimit = env->NewGlobalRef(DirectSliceJni::construct0(env)); +} +} // namespace rocksdb diff --git a/java/rocksjni/comparatorjnicallback.h b/java/rocksjni/comparatorjnicallback.h new file mode 100644 index 000000000..f188dce86 --- /dev/null +++ b/java/rocksjni/comparatorjnicallback.h @@ -0,0 +1,52 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// +// This file implements the callback "bridge" between Java and C++ for +// rocksdb::Comparator and rocksdb::DirectComparator. + +#ifndef JAVA_ROCKSJNI_COMPARATORJNICALLBACK_H_ +#define JAVA_ROCKSJNI_COMPARATORJNICALLBACK_H_ + +#include +#include "rocksdb/comparator.h" +#include "rocksdb/slice.h" + +namespace rocksdb { +class BaseComparatorJniCallback : public Comparator { + public: + BaseComparatorJniCallback(JNIEnv* env, jobject jComparator); + virtual ~BaseComparatorJniCallback(); + virtual const char* Name() const; + virtual int Compare(const Slice& a, const Slice& b) const; + virtual void FindShortestSeparator(std::string* start, const Slice& limit) const; + virtual void FindShortSuccessor(std::string* key) const; + + private: + JavaVM* m_jvm; + jobject m_jComparator; + std::string m_name; + jmethodID m_jCompareMethodId; + jmethodID m_jFindShortestSeparatorMethodId; + jmethodID m_jFindShortSuccessorMethodId; + JNIEnv* getJniEnv() const; + + protected: + jobject m_jSliceA; + jobject m_jSliceB; + jobject m_jSliceLimit; +}; + +class ComparatorJniCallback : public BaseComparatorJniCallback { + public: + ComparatorJniCallback(JNIEnv* env, jobject jComparator); +}; + +class DirectComparatorJniCallback : public BaseComparatorJniCallback { + public: + DirectComparatorJniCallback(JNIEnv* env, jobject jComparator); +}; +} // namespace rocksdb + +#endif // JAVA_ROCKSJNI_COMPARATORJNICALLBACK_H_ diff --git a/java/rocksjni/options.cc b/java/rocksjni/options.cc index ef104d92b..8e94f965b 100644 --- a/java/rocksjni/options.cc +++ b/java/rocksjni/options.cc @@ -164,6 +164,17 @@ jlong Java_org_rocksdb_Options_statisticsPtr( return reinterpret_cast(st); } +/* + * Class: org_rocksdb_Options + * Method: setComparatorHandle + * Signature: (JJ)V + */ +void Java_org_rocksdb_Options_setComparatorHandle( + JNIEnv* env, jobject jobj, jlong jopt_handle, jlong jcomparator_handle) { + reinterpret_cast(jopt_handle)->comparator = + reinterpret_cast(jcomparator_handle); +} + /* * Class: org_rocksdb_Options * Method: maxWriteBufferNumber diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index 8300a6e66..68403ebff 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -16,6 +16,7 @@ #include "rocksdb/filter_policy.h" #include "rocksdb/status.h" #include "rocksdb/utilities/backupable_db.h" +#include "rocksjni/comparatorjnicallback.h" namespace rocksdb { @@ -362,6 +363,136 @@ class ColumnFamilyHandleJni { } }; +class AbstractComparatorJni { + public: + // Get the java class id of org.rocksdb.Comparator. + static jclass getJClass(JNIEnv* env) { + jclass jclazz = env->FindClass("org/rocksdb/AbstractComparator"); + assert(jclazz != nullptr); + return jclazz; + } + + // Get the field id of the member variable of org.rocksdb.Comparator + // that stores the pointer to rocksdb::Comparator. + static jfieldID getHandleFieldID(JNIEnv* env) { + static jfieldID fid = env->GetFieldID( + getJClass(env), "nativeHandle_", "J"); + assert(fid != nullptr); + return fid; + } + + // Get the java method `name` of org.rocksdb.Comparator. + static jmethodID getNameMethodId(JNIEnv* env) { + static jmethodID mid = env->GetMethodID( + getJClass(env), "name", "()Ljava/lang/String;"); + assert(mid != nullptr); + return mid; + } + + // Get the java method `compare` of org.rocksdb.Comparator. + static jmethodID getCompareMethodId(JNIEnv* env) { + static jmethodID mid = env->GetMethodID( + getJClass(env), "compare", "(Lorg/rocksdb/AbstractSlice;Lorg/rocksdb/AbstractSlice;)I"); + assert(mid != nullptr); + return mid; + } + + // Get the java method `findShortestSeparator` of org.rocksdb.Comparator. + static jmethodID getFindShortestSeparatorMethodId(JNIEnv* env) { + static jmethodID mid = env->GetMethodID( + getJClass(env), "findShortestSeparator", "(Ljava/lang/String;Lorg/rocksdb/AbstractSlice;)Ljava/lang/String;"); + assert(mid != nullptr); + return mid; + } + + // Get the java method `findShortSuccessor` of org.rocksdb.Comparator. + static jmethodID getFindShortSuccessorMethodId(JNIEnv* env) { + static jmethodID mid = env->GetMethodID( + getJClass(env), "findShortSuccessor", "(Ljava/lang/String;)Ljava/lang/String;"); + assert(mid != nullptr); + return mid; + } + + // Get the pointer to ComparatorJniCallback. + static rocksdb::BaseComparatorJniCallback* getHandle(JNIEnv* env, jobject jobj) { + return reinterpret_cast( + env->GetLongField(jobj, getHandleFieldID(env))); + } + + // Pass the ComparatorJniCallback pointer to the java side. + static void setHandle( + JNIEnv* env, jobject jobj, const rocksdb::BaseComparatorJniCallback* op) { + env->SetLongField( + jobj, getHandleFieldID(env), + reinterpret_cast(op)); + } +}; + +class AbstractSliceJni { + public: + // Get the java class id of org.rocksdb.Slice. + static jclass getJClass(JNIEnv* env) { + jclass jclazz = env->FindClass("org/rocksdb/AbstractSlice"); + assert(jclazz != nullptr); + return jclazz; + } + + // Get the field id of the member variable of org.rocksdb.Slice + // that stores the pointer to rocksdb::Slice. + static jfieldID getHandleFieldID(JNIEnv* env) { + static jfieldID fid = env->GetFieldID( + getJClass(env), "nativeHandle_", "J"); + assert(fid != nullptr); + return fid; + } + + // Get the pointer to Slice. + static rocksdb::Slice* getHandle(JNIEnv* env, jobject jobj) { + return reinterpret_cast( + env->GetLongField(jobj, getHandleFieldID(env))); + } + + // Pass the Slice pointer to the java side. + static void setHandle( + JNIEnv* env, jobject jobj, const rocksdb::Slice* op) { + env->SetLongField( + jobj, getHandleFieldID(env), + reinterpret_cast(op)); + } +}; + +class SliceJni { + public: + // Get the java class id of org.rocksdb.Slice. + static jclass getJClass(JNIEnv* env) { + jclass jclazz = env->FindClass("org/rocksdb/Slice"); + assert(jclazz != nullptr); + return jclazz; + } + + static jobject construct0(JNIEnv* env) { + static jmethodID mid = env->GetMethodID(getJClass(env), "", "()V"); + assert(mid != nullptr); + return env->NewObject(getJClass(env), mid); + } +}; + +class DirectSliceJni { + public: + // Get the java class id of org.rocksdb.DirectSlice. + static jclass getJClass(JNIEnv* env) { + jclass jclazz = env->FindClass("org/rocksdb/DirectSlice"); + assert(jclazz != nullptr); + return jclazz; + } + + static jobject construct0(JNIEnv* env) { + static jmethodID mid = env->GetMethodID(getJClass(env), "", "()V"); + assert(mid != nullptr); + return env->NewObject(getJClass(env), mid); + } +}; + class ListJni { public: // Get the java class id of java.util.List. @@ -425,5 +556,21 @@ class ListJni { return mid; } }; + +class JniUtil { + public: + + /** + * Copies a jstring to a std::string + * and releases the original jstring + */ + static std::string copyString(JNIEnv* env, jstring js) { + const char *utf = env->GetStringUTFChars(js, NULL); + std::string name(utf); + env->ReleaseStringUTFChars(js, utf); + return name; + } +}; + } // namespace rocksdb #endif // JAVA_ROCKSJNI_PORTAL_H_ diff --git a/java/rocksjni/slice.cc b/java/rocksjni/slice.cc new file mode 100644 index 000000000..a0a6f71e6 --- /dev/null +++ b/java/rocksjni/slice.cc @@ -0,0 +1,231 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// +// This file implements the "bridge" between Java and C++ for +// rocksdb::Slice. + +#include +#include +#include +#include + +#include "include/org_rocksdb_AbstractSlice.h" +#include "include/org_rocksdb_Slice.h" +#include "include/org_rocksdb_DirectSlice.h" +#include "rocksdb/slice.h" +#include "rocksjni/portal.h" + +// + +/* + * Class: org_rocksdb_Slice + * Method: createNewSlice0 + * Signature: ([BI)V + */ +void Java_org_rocksdb_Slice_createNewSlice0( + JNIEnv * env, jobject jobj, jbyteArray data, jint offset) { + + const jsize dataSize = env->GetArrayLength(data); + const int len = dataSize - offset; + //jbyte ptrData[len]; + jbyte* ptrData = new jbyte[len]; + env->GetByteArrayRegion(data, offset, len, ptrData); + + const rocksdb::Slice* slice = new rocksdb::Slice((const char*)ptrData, len); + rocksdb::AbstractSliceJni::setHandle(env, jobj, slice); + +} + +/* + * Class: org_rocksdb_Slice + * Method: createNewSlice1 + * Signature: ([B)V + */ +void Java_org_rocksdb_Slice_createNewSlice1( + JNIEnv * env, jobject jobj, jbyteArray data) { + + jboolean isCopy; + jbyte* ptrData = env->GetByteArrayElements(data, &isCopy); + + const rocksdb::Slice* slice = new rocksdb::Slice((const char*)ptrData, env->GetArrayLength(data)); + rocksdb::AbstractSliceJni::setHandle(env, jobj, slice); + + env->ReleaseByteArrayElements(data, ptrData, JNI_COMMIT); + + //TODO where do we free ptrData later? + //do we need to call env->ReleaseByteArrayElements(data, ptrData, JNI_ABORT) in the org.rocksdb.Slice#dispose() method? +} + +/* + * Class: org_rocksdb_Slice + * Method: data0 + * Signature: (J)[B + */ +jbyteArray Java_org_rocksdb_Slice_data0( + JNIEnv* env, jobject jobj, jlong handle) { + const rocksdb::Slice* slice = reinterpret_cast(handle); + const int len = slice->size(); + const jbyteArray data = env->NewByteArray(len); + env->SetByteArrayRegion(data, 0, len, (jbyte*)slice->data()); + return data; +} + +// + +// { - public Comparator() { + public Comparator(final ComparatorOptions copt) { super(); - createNewComparator0(); + createNewComparator0(copt.nativeHandle_); } - private native void createNewComparator0(); + private native void createNewComparator0(final long comparatorOptionsHandle); } diff --git a/java/org/rocksdb/ComparatorOptions.java b/java/org/rocksdb/ComparatorOptions.java new file mode 100644 index 000000000..a55091dfa --- /dev/null +++ b/java/org/rocksdb/ComparatorOptions.java @@ -0,0 +1,49 @@ +package org.rocksdb; + +public class ComparatorOptions extends RocksObject { + + public ComparatorOptions() { + super(); + newComparatorOptions(); + } + + /** + * Use adaptive mutex, which spins in the user space before resorting + * to kernel. This could reduce context switch when the mutex is not + * heavily contended. However, if the mutex is hot, we could end up + * wasting spin time. + * Default: false + * + * @return true if adaptive mutex is used. + */ + public boolean useAdaptiveMutex() { + assert(isInitialized()); + return useAdaptiveMutex(nativeHandle_); + } + + /** + * Use adaptive mutex, which spins in the user space before resorting + * to kernel. This could reduce context switch when the mutex is not + * heavily contended. However, if the mutex is hot, we could end up + * wasting spin time. + * Default: false + * + * @param useAdaptiveMutex true if adaptive mutex is used. + * @return the reference to the current comparator options. + */ + public ComparatorOptions setUseAdaptiveMutex(final boolean useAdaptiveMutex) { + assert (isInitialized()); + setUseAdaptiveMutex(nativeHandle_, useAdaptiveMutex); + return this; + } + + @Override protected void disposeInternal() { + assert(isInitialized()); + disposeInternal(nativeHandle_); + } + + private native void newComparatorOptions(); + private native boolean useAdaptiveMutex(final long handle); + private native void setUseAdaptiveMutex(final long handle, final boolean useAdaptiveMutex); + private native void disposeInternal(long handle); +} diff --git a/java/org/rocksdb/DirectComparator.java b/java/org/rocksdb/DirectComparator.java index 25b4058ae..86476c40e 100644 --- a/java/org/rocksdb/DirectComparator.java +++ b/java/org/rocksdb/DirectComparator.java @@ -16,10 +16,10 @@ package org.rocksdb; */ public abstract class DirectComparator extends AbstractComparator { - public DirectComparator() { + public DirectComparator(final ComparatorOptions copt) { super(); - createNewDirectComparator0(); + createNewDirectComparator0(copt.nativeHandle_); } - private native void createNewDirectComparator0(); + private native void createNewDirectComparator0(final long comparatorOptionsHandle); } diff --git a/java/rocksjni/comparator.cc b/java/rocksjni/comparator.cc index 54d6137cd..8dcda6aa6 100644 --- a/java/rocksjni/comparator.cc +++ b/java/rocksjni/comparator.cc @@ -18,6 +18,28 @@ #include "rocksjni/comparatorjnicallback.h" #include "rocksjni/portal.h" +// + +void Java_org_rocksdb_ComparatorOptions_newComparatorOptions( + JNIEnv* env, jobject jobj, jstring jpath, jboolean jshare_table_files, + jboolean jsync, jboolean jdestroy_old_data, jboolean jbackup_log_files, + jlong jbackup_rate_limit, jlong jrestore_rate_limit) { + jbackup_rate_limit = (jbackup_rate_limit <= 0) ? 0 : jbackup_rate_limit; + jrestore_rate_limit = (jrestore_rate_limit <= 0) ? 0 : jrestore_rate_limit; + + const char* cpath = env->GetStringUTFChars(jpath, 0); + + auto bopt = new rocksdb::BackupableDBOptions(cpath, nullptr, + jshare_table_files, nullptr, jsync, jdestroy_old_data, jbackup_log_files, + jbackup_rate_limit, jrestore_rate_limit); + + env->ReleaseStringUTFChars(jpath, cpath); + + rocksdb::BackupableDBOptionsJni::setHandle(env, jobj, bopt); +} + +// + //(jhandle)->tailing = static_cast(jtailing); } + +///////////////////////////////////////////////////////////////////// +// rocksdb::ComparatorOptions +/* + * Class: org_rocksdb_ComparatorOptions + * Method: newComparatorOptions + * Signature: ()V + */ +void Java_org_rocksdb_ComparatorOptions_newComparatorOptions( + JNIEnv* env, jobject jobj) { + auto comparator_opt = new rocksdb::ComparatorJniCallbackOptions(); + rocksdb::ComparatorOptionsJni::setHandle(env, jobj, comparator_opt); +} + +/* + * Class: org_rocksdb_ComparatorOptions + * Method: useAdaptiveMutex + * Signature: (J)Z + */ +jboolean Java_org_rocksdb_ComparatorOptions_useAdaptiveMutex( + JNIEnv * env, jobject jobj, jlong jhandle) { + return reinterpret_cast(jhandle)->use_adaptive_mutex; +} + +/* + * Class: org_rocksdb_ComparatorOptions + * Method: setUseAdaptiveMutex + * Signature: (JZ)V + */ +void Java_org_rocksdb_ComparatorOptions_setUseAdaptiveMutex( + JNIEnv * env, jobject jobj, jlong jhandle, jboolean juse_adaptive_mutex) { + reinterpret_cast(jhandle)->use_adaptive_mutex = + static_cast(juse_adaptive_mutex); +} + +/* + * Class: org_rocksdb_ComparatorOptions + * Method: disposeInternal + * Signature: (J)V + */ +void Java_org_rocksdb_ComparatorOptions_disposeInternal( + JNIEnv * env, jobject jobj, jlong jhandle) { + delete reinterpret_cast(jhandle); + rocksdb::ComparatorOptionsJni::setHandle(env, jobj, nullptr); +} diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index 68403ebff..bd40d4290 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -363,6 +363,33 @@ class ColumnFamilyHandleJni { } }; +class ComparatorOptionsJni { + public: + // Get the java class id of org.rocksdb.ComparatorOptions. + static jclass getJClass(JNIEnv* env) { + jclass jclazz = env->FindClass("org/rocksdb/ComparatorOptions"); + assert(jclazz != nullptr); + return jclazz; + } + + // Get the field id of the member variable of org.rocksdb.ComparatorOptions + // that stores the pointer to rocksdb::ComparatorJniCallbackOptions. + static jfieldID getHandleFieldID(JNIEnv* env) { + static jfieldID fid = env->GetFieldID( + getJClass(env), "nativeHandle_", "J"); + assert(fid != nullptr); + return fid; + } + + // Pass the ComparatorJniCallbackOptions pointer to the java side. + static void setHandle( + JNIEnv* env, jobject jobj, const rocksdb::ComparatorJniCallbackOptions* op) { + env->SetLongField( + jobj, getHandleFieldID(env), + reinterpret_cast(op)); + } +}; + class AbstractComparatorJni { public: // Get the java class id of org.rocksdb.Comparator. From 25641bfc9c68671fa442b69506f7b638252ae341 Mon Sep 17 00:00:00 2001 From: Adam Retter Date: Fri, 15 Aug 2014 14:42:03 +0100 Subject: [PATCH 07/33] Fix to memory dealocation when creating a slice from a byte buffer --- java/org/rocksdb/Slice.java | 16 ++++++++++++++++ java/rocksjni/slice.cc | 22 ++++++++++++++++++---- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/java/org/rocksdb/Slice.java b/java/org/rocksdb/Slice.java index 28c29c43d..e6932cc76 100644 --- a/java/org/rocksdb/Slice.java +++ b/java/org/rocksdb/Slice.java @@ -55,7 +55,23 @@ public class Slice extends AbstractSlice { createNewSlice1(data); } + /** + * Deletes underlying C++ slice pointer + * and any buffered data. + * + *

+ * Note that this function should be called only after all + * RocksDB instances referencing the slice are closed. + * Otherwise an undefined behavior will occur. + */ + @Override + protected void disposeInternal() { + super.disposeInternal(); + disposeInternalBuf(nativeHandle_); + } + @Override protected final native byte[] data0(long handle); private native void createNewSlice0(byte[] data, int length); private native void createNewSlice1(byte[] data); + private native void disposeInternalBuf(long handle); } diff --git a/java/rocksjni/slice.cc b/java/rocksjni/slice.cc index a0a6f71e6..e54b9a745 100644 --- a/java/rocksjni/slice.cc +++ b/java/rocksjni/slice.cc @@ -130,16 +130,19 @@ void Java_org_rocksdb_Slice_createNewSlice0( void Java_org_rocksdb_Slice_createNewSlice1( JNIEnv * env, jobject jobj, jbyteArray data) { + const int len = env->GetArrayLength(data); + jboolean isCopy; jbyte* ptrData = env->GetByteArrayElements(data, &isCopy); + const char* buf = new char[len]; + memcpy((void*)buf, ptrData, len); - const rocksdb::Slice* slice = new rocksdb::Slice((const char*)ptrData, env->GetArrayLength(data)); + const rocksdb::Slice* slice = new rocksdb::Slice(buf, env->GetArrayLength(data)); rocksdb::AbstractSliceJni::setHandle(env, jobj, slice); - env->ReleaseByteArrayElements(data, ptrData, JNI_COMMIT); + env->ReleaseByteArrayElements(data, ptrData, JNI_ABORT); - //TODO where do we free ptrData later? - //do we need to call env->ReleaseByteArrayElements(data, ptrData, JNI_ABORT) in the org.rocksdb.Slice#dispose() method? + //NOTE: buf will be deleted in the org.rocksdb.Slice#dispose method } /* @@ -156,6 +159,17 @@ jbyteArray Java_org_rocksdb_Slice_data0( return data; } +/* + * Class: org_rocksdb_Slice + * Method: disposeInternalBuf + * Signature: (J)V + */ +void Java_org_rocksdb_Slice_disposeInternalBuf( + JNIEnv * env, jobject jobj, jlong handle) { + const rocksdb::Slice* slice = reinterpret_cast(handle); + delete [] slice->data_; +} + // // +// void Java_org_rocksdb_ComparatorOptions_newComparatorOptions( JNIEnv* env, jobject jobj, jstring jpath, jboolean jshare_table_files, @@ -37,10 +37,9 @@ void Java_org_rocksdb_ComparatorOptions_newComparatorOptions( rocksdb::BackupableDBOptionsJni::setHandle(env, jobj, bopt); } +// -// - -// /* * Class: org_rocksdb_AbstractComparator @@ -51,10 +50,9 @@ void Java_org_rocksdb_AbstractComparator_disposeInternal( JNIEnv* env, jobject jobj, jlong handle) { delete reinterpret_cast(handle); } +// -// - -// /* * Class: org_rocksdb_Comparator @@ -63,14 +61,15 @@ void Java_org_rocksdb_AbstractComparator_disposeInternal( */ void Java_org_rocksdb_Comparator_createNewComparator0( JNIEnv* env, jobject jobj, jlong copt_handle) { - const rocksdb::ComparatorJniCallbackOptions* copt = reinterpret_cast(copt_handle); - const rocksdb::ComparatorJniCallback* c = new rocksdb::ComparatorJniCallback(env, jobj, copt); + const rocksdb::ComparatorJniCallbackOptions* copt = + reinterpret_cast(copt_handle); + const rocksdb::ComparatorJniCallback* c = + new rocksdb::ComparatorJniCallback(env, jobj, copt); rocksdb::AbstractComparatorJni::setHandle(env, jobj, c); } +// -// - -// /* * Class: org_rocksdb_DirectComparator @@ -79,10 +78,10 @@ void Java_org_rocksdb_Comparator_createNewComparator0( */ void Java_org_rocksdb_DirectComparator_createNewDirectComparator0( JNIEnv* env, jobject jobj, jlong copt_handle) { - const rocksdb::ComparatorJniCallbackOptions* copt = reinterpret_cast(copt_handle); - const rocksdb::DirectComparatorJniCallback* c = new rocksdb::DirectComparatorJniCallback(env, jobj, copt); + const rocksdb::ComparatorJniCallbackOptions* copt = + reinterpret_cast(copt_handle); + const rocksdb::DirectComparatorJniCallback* c = + new rocksdb::DirectComparatorJniCallback(env, jobj, copt); rocksdb::AbstractComparatorJni::setHandle(env, jobj, c); } - -// - +// diff --git a/java/rocksjni/comparatorjnicallback.cc b/java/rocksjni/comparatorjnicallback.cc index 22bba334c..1f271fe42 100644 --- a/java/rocksjni/comparatorjnicallback.cc +++ b/java/rocksjni/comparatorjnicallback.cc @@ -7,14 +7,15 @@ // rocksdb::Comparator. #include "rocksjni/comparatorjnicallback.h" -#include "portal.h" +#include "rocksjni/portal.h" namespace rocksdb { BaseComparatorJniCallback::BaseComparatorJniCallback( - JNIEnv* env, jobject jComparator, const ComparatorJniCallbackOptions* copt) { + JNIEnv* env, jobject jComparator, + const ComparatorJniCallbackOptions* copt) { - //mutex is used for synchronisation when we are re-using - //the global java slice objects + // mutex is used for synchronisation when we are re-using + // the global java slice objects mutex_ = new port::Mutex(copt->use_adaptive_mutex); // Note: Comparator methods may be accessed by multiple threads, @@ -30,11 +31,13 @@ BaseComparatorJniCallback::BaseComparatorJniCallback( // so we cache it in a global var jmethodID jNameMethodId = AbstractComparatorJni::getNameMethodId(env); jstring jsName = (jstring)env->CallObjectMethod(m_jComparator, jNameMethodId); - m_name = JniUtil::copyString(env, jsName); //also releases jsName + m_name = JniUtil::copyString(env, jsName); // also releases jsName m_jCompareMethodId = AbstractComparatorJni::getCompareMethodId(env); - m_jFindShortestSeparatorMethodId = AbstractComparatorJni::getFindShortestSeparatorMethodId(env); - m_jFindShortSuccessorMethodId = AbstractComparatorJni::getFindShortSuccessorMethodId(env); + m_jFindShortestSeparatorMethodId = + AbstractComparatorJni::getFindShortestSeparatorMethodId(env); + m_jFindShortSuccessorMethodId = + AbstractComparatorJni::getFindShortSuccessorMethodId(env); } /** @@ -42,24 +45,25 @@ BaseComparatorJniCallback::BaseComparatorJniCallback( */ JNIEnv* BaseComparatorJniCallback::getJniEnv() const { JNIEnv *env; - jint rs = m_jvm->AttachCurrentThread((void **)&env, NULL); + jint rs = m_jvm->AttachCurrentThread(reinterpret_cast(&env), NULL); assert(rs == JNI_OK); return env; -}; +} const char* BaseComparatorJniCallback::Name() const { return m_name.c_str(); } int BaseComparatorJniCallback::Compare(const Slice& a, const Slice& b) const { - JNIEnv* m_env = getJniEnv(); mutex_->Lock(); AbstractSliceJni::setHandle(m_env, m_jSliceA, &a); AbstractSliceJni::setHandle(m_env, m_jSliceB, &b); - jint result = m_env->CallIntMethod(m_jComparator, m_jCompareMethodId, m_jSliceA, m_jSliceB); + jint result = + m_env->CallIntMethod(m_jComparator, m_jCompareMethodId, m_jSliceA, + m_jSliceB); mutex_->Unlock(); @@ -68,8 +72,8 @@ int BaseComparatorJniCallback::Compare(const Slice& a, const Slice& b) const { return result; } -void BaseComparatorJniCallback::FindShortestSeparator(std::string* start, const Slice& limit) const { - +void BaseComparatorJniCallback::FindShortestSeparator( + std::string* start, const Slice& limit) const { if (start == nullptr) { return; } @@ -82,22 +86,24 @@ void BaseComparatorJniCallback::FindShortestSeparator(std::string* start, const mutex_->Lock(); AbstractSliceJni::setHandle(m_env, m_jSliceLimit, &limit); - jstring jsResultStart = (jstring)m_env->CallObjectMethod(m_jComparator, m_jFindShortestSeparatorMethodId, jsStart, m_jSliceLimit); + jstring jsResultStart = + (jstring)m_env->CallObjectMethod(m_jComparator, + m_jFindShortestSeparatorMethodId, jsStart, m_jSliceLimit); mutex_->Unlock(); m_env->DeleteLocalRef(jsStart); - if(jsResultStart != nullptr) { - //update start with result - *start = JniUtil::copyString(m_env, jsResultStart); //also releases jsResultStart + if (jsResultStart != nullptr) { + // update start with result + *start = + JniUtil::copyString(m_env, jsResultStart); // also releases jsResultStart } m_jvm->DetachCurrentThread(); } void BaseComparatorJniCallback::FindShortSuccessor(std::string* key) const { - if (key == nullptr) { return; } @@ -107,13 +113,16 @@ void BaseComparatorJniCallback::FindShortSuccessor(std::string* key) const { const char* keyUtf = key->c_str(); jstring jsKey = m_env->NewStringUTF(keyUtf); - jstring jsResultKey = (jstring)m_env->CallObjectMethod(m_jComparator, m_jFindShortSuccessorMethodId, jsKey); + jstring jsResultKey = + (jstring)m_env->CallObjectMethod(m_jComparator, + m_jFindShortSuccessorMethodId, jsKey); m_env->DeleteLocalRef(jsKey); - if(jsResultKey != nullptr) { - //update key with result - *key = JniUtil::copyString(m_env, jsResultKey); //also releases jsResultKey + if (jsResultKey != nullptr) { + // update key with result + *key = + JniUtil::copyString(m_env, jsResultKey); // also releases jsResultKey } m_jvm->DetachCurrentThread(); @@ -121,7 +130,7 @@ void BaseComparatorJniCallback::FindShortSuccessor(std::string* key) const { BaseComparatorJniCallback::~BaseComparatorJniCallback() { JNIEnv* m_env = getJniEnv(); - + m_env->DeleteGlobalRef(m_jComparator); m_env->DeleteGlobalRef(m_jSliceA); m_env->DeleteGlobalRef(m_jSliceB); @@ -129,21 +138,23 @@ BaseComparatorJniCallback::~BaseComparatorJniCallback() { // Note: do not need to explicitly detach, as this function is effectively // called from the Java class's disposeInternal method, and so already - // has an attached thread, getJniEnv above is just a no-op Attach to get the env - //jvm->DetachCurrentThread(); + // has an attached thread, getJniEnv above is just a no-op Attach to get + // the env jvm->DetachCurrentThread(); } ComparatorJniCallback::ComparatorJniCallback( - JNIEnv* env, jobject jComparator, const ComparatorJniCallbackOptions* copt) : BaseComparatorJniCallback(env, jComparator, copt) { - + JNIEnv* env, jobject jComparator, + const ComparatorJniCallbackOptions* copt) : + BaseComparatorJniCallback(env, jComparator, copt) { m_jSliceA = env->NewGlobalRef(SliceJni::construct0(env)); m_jSliceB = env->NewGlobalRef(SliceJni::construct0(env)); m_jSliceLimit = env->NewGlobalRef(SliceJni::construct0(env)); } DirectComparatorJniCallback::DirectComparatorJniCallback( - JNIEnv* env, jobject jComparator, const ComparatorJniCallbackOptions* copt) : BaseComparatorJniCallback(env, jComparator, copt) { - + JNIEnv* env, jobject jComparator, + const ComparatorJniCallbackOptions* copt) : + BaseComparatorJniCallback(env, jComparator, copt) { m_jSliceA = env->NewGlobalRef(DirectSliceJni::construct0(env)); m_jSliceB = env->NewGlobalRef(DirectSliceJni::construct0(env)); m_jSliceLimit = env->NewGlobalRef(DirectSliceJni::construct0(env)); diff --git a/java/rocksjni/comparatorjnicallback.h b/java/rocksjni/comparatorjnicallback.h index 8ca0ac64f..cda32fce1 100644 --- a/java/rocksjni/comparatorjnicallback.h +++ b/java/rocksjni/comparatorjnicallback.h @@ -10,6 +10,7 @@ #define JAVA_ROCKSJNI_COMPARATORJNICALLBACK_H_ #include +#include #include "rocksdb/comparator.h" #include "rocksdb/slice.h" #include "port/port.h" @@ -43,15 +44,18 @@ struct ComparatorJniCallbackOptions { * introduce locking in regions of those methods via mutex_. */ class BaseComparatorJniCallback : public Comparator { - public: - BaseComparatorJniCallback(JNIEnv* env, jobject jComparator, const ComparatorJniCallbackOptions* copt); + public: + BaseComparatorJniCallback( + JNIEnv* env, jobject jComparator, + const ComparatorJniCallbackOptions* copt); virtual ~BaseComparatorJniCallback(); virtual const char* Name() const; virtual int Compare(const Slice& a, const Slice& b) const; - virtual void FindShortestSeparator(std::string* start, const Slice& limit) const; + virtual void FindShortestSeparator( + std::string* start, const Slice& limit) const; virtual void FindShortSuccessor(std::string* key) const; - private: + private: port::Mutex* mutex_; JavaVM* m_jvm; jobject m_jComparator; @@ -61,20 +65,24 @@ class BaseComparatorJniCallback : public Comparator { jmethodID m_jFindShortSuccessorMethodId; JNIEnv* getJniEnv() const; - protected: + protected: jobject m_jSliceA; jobject m_jSliceB; jobject m_jSliceLimit; }; class ComparatorJniCallback : public BaseComparatorJniCallback { - public: - ComparatorJniCallback(JNIEnv* env, jobject jComparator, const ComparatorJniCallbackOptions* copt); + public: + ComparatorJniCallback( + JNIEnv* env, jobject jComparator, + const ComparatorJniCallbackOptions* copt); }; class DirectComparatorJniCallback : public BaseComparatorJniCallback { - public: - DirectComparatorJniCallback(JNIEnv* env, jobject jComparator, const ComparatorJniCallbackOptions* copt); + public: + DirectComparatorJniCallback( + JNIEnv* env, jobject jComparator, + const ComparatorJniCallbackOptions* copt); }; } // namespace rocksdb diff --git a/java/rocksjni/options.cc b/java/rocksjni/options.cc index 4367fc708..ceb4ce031 100644 --- a/java/rocksjni/options.cc +++ b/java/rocksjni/options.cc @@ -16,6 +16,7 @@ #include "include/org_rocksdb_ReadOptions.h" #include "include/org_rocksdb_ComparatorOptions.h" +#include "rocksjni/comparatorjnicallback.h" #include "rocksjni/portal.h" #include "rocksdb/db.h" #include "rocksdb/options.h" @@ -25,7 +26,6 @@ #include "rocksdb/slice_transform.h" #include "rocksdb/rate_limiter.h" #include "rocksdb/comparator.h" -#include "comparatorjnicallback.h" /* * Class: org_rocksdb_Options @@ -1794,7 +1794,8 @@ void Java_org_rocksdb_ComparatorOptions_newComparatorOptions( */ jboolean Java_org_rocksdb_ComparatorOptions_useAdaptiveMutex( JNIEnv * env, jobject jobj, jlong jhandle) { - return reinterpret_cast(jhandle)->use_adaptive_mutex; + return reinterpret_cast(jhandle) + ->use_adaptive_mutex; } /* @@ -1804,8 +1805,8 @@ jboolean Java_org_rocksdb_ComparatorOptions_useAdaptiveMutex( */ void Java_org_rocksdb_ComparatorOptions_setUseAdaptiveMutex( JNIEnv * env, jobject jobj, jlong jhandle, jboolean juse_adaptive_mutex) { - reinterpret_cast(jhandle)->use_adaptive_mutex = - static_cast(juse_adaptive_mutex); + reinterpret_cast(jhandle) + ->use_adaptive_mutex = static_cast(juse_adaptive_mutex); } /* diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index bd40d4290..32452ae0b 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -12,6 +12,8 @@ #include #include +#include + #include "rocksdb/db.h" #include "rocksdb/filter_policy.h" #include "rocksdb/status.h" @@ -364,7 +366,7 @@ class ColumnFamilyHandleJni { }; class ComparatorOptionsJni { - public: + public: // Get the java class id of org.rocksdb.ComparatorOptions. static jclass getJClass(JNIEnv* env) { jclass jclazz = env->FindClass("org/rocksdb/ComparatorOptions"); @@ -383,7 +385,8 @@ class ComparatorOptionsJni { // Pass the ComparatorJniCallbackOptions pointer to the java side. static void setHandle( - JNIEnv* env, jobject jobj, const rocksdb::ComparatorJniCallbackOptions* op) { + JNIEnv* env, jobject jobj, + const rocksdb::ComparatorJniCallbackOptions* op) { env->SetLongField( jobj, getHandleFieldID(env), reinterpret_cast(op)); @@ -418,37 +421,41 @@ class AbstractComparatorJni { // Get the java method `compare` of org.rocksdb.Comparator. static jmethodID getCompareMethodId(JNIEnv* env) { - static jmethodID mid = env->GetMethodID( - getJClass(env), "compare", "(Lorg/rocksdb/AbstractSlice;Lorg/rocksdb/AbstractSlice;)I"); + static jmethodID mid = env->GetMethodID(getJClass(env), + "compare", + "(Lorg/rocksdb/AbstractSlice;Lorg/rocksdb/AbstractSlice;)I"); assert(mid != nullptr); return mid; } // Get the java method `findShortestSeparator` of org.rocksdb.Comparator. static jmethodID getFindShortestSeparatorMethodId(JNIEnv* env) { - static jmethodID mid = env->GetMethodID( - getJClass(env), "findShortestSeparator", "(Ljava/lang/String;Lorg/rocksdb/AbstractSlice;)Ljava/lang/String;"); + static jmethodID mid = env->GetMethodID(getJClass(env), + "findShortestSeparator", + "(Ljava/lang/String;Lorg/rocksdb/AbstractSlice;)Ljava/lang/String;"); assert(mid != nullptr); return mid; } // Get the java method `findShortSuccessor` of org.rocksdb.Comparator. static jmethodID getFindShortSuccessorMethodId(JNIEnv* env) { - static jmethodID mid = env->GetMethodID( - getJClass(env), "findShortSuccessor", "(Ljava/lang/String;)Ljava/lang/String;"); + static jmethodID mid = env->GetMethodID(getJClass(env), + "findShortSuccessor", + "(Ljava/lang/String;)Ljava/lang/String;"); assert(mid != nullptr); return mid; } // Get the pointer to ComparatorJniCallback. - static rocksdb::BaseComparatorJniCallback* getHandle(JNIEnv* env, jobject jobj) { + static rocksdb::BaseComparatorJniCallback* getHandle( + JNIEnv* env, jobject jobj) { return reinterpret_cast( env->GetLongField(jobj, getHandleFieldID(env))); } // Pass the ComparatorJniCallback pointer to the java side. static void setHandle( - JNIEnv* env, jobject jobj, const rocksdb::BaseComparatorJniCallback* op) { + JNIEnv* env, jobject jobj, const rocksdb::BaseComparatorJniCallback* op) { env->SetLongField( jobj, getHandleFieldID(env), reinterpret_cast(op)); @@ -585,8 +592,7 @@ class ListJni { }; class JniUtil { - public: - + public: /** * Copies a jstring to a std::string * and releases the original jstring diff --git a/java/rocksjni/slice.cc b/java/rocksjni/slice.cc index e54b9a745..0d8b92c9c 100644 --- a/java/rocksjni/slice.cc +++ b/java/rocksjni/slice.cc @@ -17,7 +17,7 @@ #include "rocksdb/slice.h" #include "rocksjni/portal.h" -// /* * Class: org_rocksdb_AbstractSlice @@ -73,7 +73,8 @@ jstring Java_org_rocksdb_AbstractSlice_toString0( jint Java_org_rocksdb_AbstractSlice_compare0( JNIEnv* env, jobject jobj, jlong handle, jlong otherHandle) { const rocksdb::Slice* slice = reinterpret_cast(handle); - const rocksdb::Slice* otherSlice = reinterpret_cast(otherHandle); + const rocksdb::Slice* otherSlice = + reinterpret_cast(otherHandle); return slice->compare(*otherSlice); } @@ -85,7 +86,8 @@ jint Java_org_rocksdb_AbstractSlice_compare0( jboolean Java_org_rocksdb_AbstractSlice_startsWith0( JNIEnv* env, jobject jobj, jlong handle, jlong otherHandle) { const rocksdb::Slice* slice = reinterpret_cast(handle); - const rocksdb::Slice* otherSlice = reinterpret_cast(otherHandle); + const rocksdb::Slice* otherSlice = + reinterpret_cast(otherHandle); return slice->starts_with(*otherSlice); } @@ -99,9 +101,9 @@ void Java_org_rocksdb_AbstractSlice_disposeInternal( delete reinterpret_cast(handle); } -// +// -// /* * Class: org_rocksdb_Slice @@ -113,13 +115,11 @@ void Java_org_rocksdb_Slice_createNewSlice0( const jsize dataSize = env->GetArrayLength(data); const int len = dataSize - offset; - //jbyte ptrData[len]; jbyte* ptrData = new jbyte[len]; env->GetByteArrayRegion(data, offset, len, ptrData); const rocksdb::Slice* slice = new rocksdb::Slice((const char*)ptrData, len); rocksdb::AbstractSliceJni::setHandle(env, jobj, slice); - } /* @@ -135,14 +135,15 @@ void Java_org_rocksdb_Slice_createNewSlice1( jboolean isCopy; jbyte* ptrData = env->GetByteArrayElements(data, &isCopy); const char* buf = new char[len]; - memcpy((void*)buf, ptrData, len); + memcpy(const_cast(buf), ptrData, len); - const rocksdb::Slice* slice = new rocksdb::Slice(buf, env->GetArrayLength(data)); + const rocksdb::Slice* slice = + new rocksdb::Slice(buf, env->GetArrayLength(data)); rocksdb::AbstractSliceJni::setHandle(env, jobj, slice); env->ReleaseByteArrayElements(data, ptrData, JNI_ABORT); - //NOTE: buf will be deleted in the org.rocksdb.Slice#dispose method + // NOTE: buf will be deleted in the org.rocksdb.Slice#dispose method } /* @@ -155,7 +156,8 @@ jbyteArray Java_org_rocksdb_Slice_data0( const rocksdb::Slice* slice = reinterpret_cast(handle); const int len = slice->size(); const jbyteArray data = env->NewByteArray(len); - env->SetByteArrayRegion(data, 0, len, (jbyte*)slice->data()); + env->SetByteArrayRegion(data, 0, len, + reinterpret_cast(const_cast(slice->data()))); return data; } @@ -170,9 +172,9 @@ void Java_org_rocksdb_Slice_disposeInternalBuf( delete [] slice->data_; } -// +// -// /* * Class: org_rocksdb_DirectSlice @@ -181,7 +183,8 @@ void Java_org_rocksdb_Slice_disposeInternalBuf( */ void Java_org_rocksdb_DirectSlice_createNewDirectSlice0( JNIEnv* env, jobject jobj, jobject data, jint length) { - const char* ptrData = (char*)env->GetDirectBufferAddress(data); + const char* ptrData = + reinterpret_cast(env->GetDirectBufferAddress(data)); const rocksdb::Slice* slice = new rocksdb::Slice(ptrData, length); rocksdb::AbstractSliceJni::setHandle(env, jobj, slice); } @@ -193,7 +196,8 @@ void Java_org_rocksdb_DirectSlice_createNewDirectSlice0( */ void Java_org_rocksdb_DirectSlice_createNewDirectSlice1( JNIEnv* env, jobject jobj, jobject data) { - const char* ptrData = (char*)env->GetDirectBufferAddress(data); + const char* ptrData = + reinterpret_cast(env->GetDirectBufferAddress(data)); const rocksdb::Slice* slice = new rocksdb::Slice(ptrData); rocksdb::AbstractSliceJni::setHandle(env, jobj, slice); } @@ -206,7 +210,8 @@ void Java_org_rocksdb_DirectSlice_createNewDirectSlice1( jobject Java_org_rocksdb_DirectSlice_data0( JNIEnv* env, jobject jobj, jlong handle) { const rocksdb::Slice* slice = reinterpret_cast(handle); - return env->NewDirectByteBuffer((void*)slice->data(), slice->size()); + return env->NewDirectByteBuffer(const_cast(slice->data()), + slice->size()); } /* @@ -228,6 +233,7 @@ jbyte Java_org_rocksdb_DirectSlice_get0( void Java_org_rocksdb_DirectSlice_clear0( JNIEnv* env, jobject jobj, jlong handle) { rocksdb::Slice* slice = reinterpret_cast(handle); + delete [] slice->data_; slice->clear(); } @@ -242,4 +248,4 @@ void Java_org_rocksdb_DirectSlice_removePrefix0( slice->remove_prefix(length); } -// +// From c63494fb61488b2c9380f17424fe277a2c094e9d Mon Sep 17 00:00:00 2001 From: Adam Retter Date: Thu, 25 Sep 2014 14:12:02 +0100 Subject: [PATCH 09/33] Tests for ComparatorOptions, Comparator and DirectComparator, and by proxy we also exercise Slice and DirectSlice --- java/Makefile | 4 +- java/org/rocksdb/AbstractComparator.java | 2 +- .../rocksdb/test/AbstractComparatorTest.java | 166 ++++++++++++++++++ .../rocksdb/test/ComparatorOptionsTest.java | 34 ++++ java/org/rocksdb/test/ComparatorTest.java | 45 +++++ .../rocksdb/test/DirectComparatorTest.java | 48 +++++ java/org/rocksdb/test/Types.java | 43 +++++ 7 files changed, 340 insertions(+), 2 deletions(-) create mode 100644 java/org/rocksdb/test/AbstractComparatorTest.java create mode 100644 java/org/rocksdb/test/ComparatorOptionsTest.java create mode 100644 java/org/rocksdb/test/ComparatorTest.java create mode 100644 java/org/rocksdb/test/DirectComparatorTest.java create mode 100644 java/org/rocksdb/test/Types.java diff --git a/java/Makefile b/java/Makefile index 19df0fa69..697df5175 100644 --- a/java/Makefile +++ b/java/Makefile @@ -46,7 +46,9 @@ test: java java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.ReadOnlyTest java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.ReadOptionsTest java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.StatisticsCollectorTest - @rm -rf /tmp/rocksdbjni_* + java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.ComparatorOptionsTest + java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.ComparatorTest + java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.DirectComparatorTest db_bench: java javac org/rocksdb/benchmark/*.java diff --git a/java/org/rocksdb/AbstractComparator.java b/java/org/rocksdb/AbstractComparator.java index fa797b273..e5c503025 100644 --- a/java/org/rocksdb/AbstractComparator.java +++ b/java/org/rocksdb/AbstractComparator.java @@ -14,7 +14,7 @@ package org.rocksdb; * @see org.rocksdb.Comparator * @see org.rocksdb.DirectComparator */ -abstract class AbstractComparator extends RocksObject { +public abstract class AbstractComparator extends RocksObject { public abstract String name(); diff --git a/java/org/rocksdb/test/AbstractComparatorTest.java b/java/org/rocksdb/test/AbstractComparatorTest.java new file mode 100644 index 000000000..e3cc6bb77 --- /dev/null +++ b/java/org/rocksdb/test/AbstractComparatorTest.java @@ -0,0 +1,166 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +package org.rocksdb.test; + +import org.rocksdb.*; + +import java.io.IOException; +import java.nio.file.*; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.Random; + +import static org.rocksdb.test.Types.byteToInt; +import static org.rocksdb.test.Types.intToByte; + +/** + * Abstract tests for both Comparator and DirectComparator + */ +public abstract class AbstractComparatorTest { + + /** + * Get a comparator which will expect Integer keys + * and determine an ascending order + * + * @return An integer ascending order key comparator + */ + public abstract AbstractComparator getAscendingIntKeyComparator(); + + /** + * Test which stores random keys into the database + * using an @see getAscendingIntKeyComparator + * it then checks that these keys are read back in + * ascending order + * + * @param db_path A path where we can store database + * files temporarily + */ + public void testRoundtrip(final Path db_path) throws IOException { + + Options opt = null; + RocksDB db = null; + + try { + opt = new Options(); + opt.setCreateIfMissing(true); + opt.setComparator(getAscendingIntKeyComparator()); + + // store 10,000 random integer keys + final int ITERATIONS = 10000; + + db = RocksDB.open(opt, db_path.toString()); + final Random random = new Random(); + for(int i = 0; i < ITERATIONS; i++) { + final byte key[] = intToByte(random.nextInt()); + if(i > 0 && db.get(key) != null) { // does key already exist (avoid duplicates) + i--; // generate a different key + } else { + db.put(key, "value".getBytes()); + } + } + db.close(); + + + // re-open db and read from start to end + // integer keys should be in ascending + // order as defined by SimpleIntComparator + db = RocksDB.open(opt, db_path.toString()); + final RocksIterator it = db.newIterator(); + it.seekToFirst(); + int lastKey = Integer.MIN_VALUE; + int count = 0; + for(it.seekToFirst(); it.isValid(); it.next()) { + final int thisKey = byteToInt(it.key()); + assert(thisKey > lastKey); + lastKey = thisKey; + count++; + } + db.close(); + + assert(count == ITERATIONS); + + } catch (final RocksDBException e) { + System.err.format("[ERROR]: %s%n", e); + e.printStackTrace(); + } finally { + if(db != null) { + db.close(); + } + + if(opt != null) { + opt.dispose(); + } + + removeDb(db_path); // cleanup after ourselves! + } + } + + /** + * Compares integer keys + * so that they are in ascending order + * + * @param a 4-bytes representing an integer key + * @param b 4-bytes representing an integer key + * + * @return negative if a < b, 0 if a == b, positive otherwise + */ + protected final int compareIntKeys(final byte[] a, final byte[] b) { + + final int iA = byteToInt(a); + final int iB = byteToInt(b); + + // protect against int key calculation overflow + final double diff = (double)iA - iB; + final int result; + if(diff < Integer.MIN_VALUE) { + result = Integer.MIN_VALUE; + } else if(diff > Integer.MAX_VALUE) { + result = Integer.MAX_VALUE; + } else { + result = (int)diff; + } + + return result; + } + + /** + * Utility method for deleting database files + * + * @param db_path The path to the database to remove + * from the filesystem + */ + private static void removeDb(final Path db_path) throws IOException { + Files.walkFileTree(db_path, new SimpleFileVisitor() { + @Override + public FileVisitResult visitFile(final Path file, final BasicFileAttributes attrs) + throws IOException { + Files.delete(file); + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult visitFileFailed(final Path file, IOException exc) + throws IOException { + // try to delete the file anyway, even if its attributes + // could not be read, since delete-only access is + // theoretically possible + Files.delete(file); + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult postVisitDirectory(final Path dir, IOException exc) + throws IOException { + if (exc == null) { + Files.delete(dir); + return FileVisitResult.CONTINUE; + } else { + // directory iteration failed; propagate exception + throw exc; + } + } + }); + } +} diff --git a/java/org/rocksdb/test/ComparatorOptionsTest.java b/java/org/rocksdb/test/ComparatorOptionsTest.java new file mode 100644 index 000000000..e25209392 --- /dev/null +++ b/java/org/rocksdb/test/ComparatorOptionsTest.java @@ -0,0 +1,34 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +package org.rocksdb.test; + +import org.rocksdb.ComparatorOptions; +import org.rocksdb.RocksDB; + +import java.util.Random; + +public class ComparatorOptionsTest { + + static { + RocksDB.loadLibrary(); + } + + public static void main(String[] args) { + final ComparatorOptions copt = new ComparatorOptions(); + Random rand = new Random(); + + { // UseAdaptiveMutex test + copt.setUseAdaptiveMutex(true); + assert(copt.useAdaptiveMutex() == true); + + copt.setUseAdaptiveMutex(false); + assert(copt.useAdaptiveMutex() == false); + } + + copt.dispose(); + System.out.println("Passed ComparatorOptionsTest"); + } +} diff --git a/java/org/rocksdb/test/ComparatorTest.java b/java/org/rocksdb/test/ComparatorTest.java new file mode 100644 index 000000000..34d7c78df --- /dev/null +++ b/java/org/rocksdb/test/ComparatorTest.java @@ -0,0 +1,45 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +package org.rocksdb.test; + +import org.rocksdb.*; + +import java.io.IOException; +import java.nio.file.FileSystems; + +public class ComparatorTest { + private static final String db_path = "/tmp/comparator_db"; + + static { + RocksDB.loadLibrary(); + } + + public static void main(String[] args) throws IOException { + + final AbstractComparatorTest comparatorTest = new AbstractComparatorTest() { + @Override + public AbstractComparator getAscendingIntKeyComparator() { + return new Comparator(new ComparatorOptions()) { + + @Override + public String name() { + return "test.AscendingIntKeyComparator"; + } + + @Override + public int compare(final Slice a, final Slice b) { + return compareIntKeys(a.data(), b.data()); + } + }; + } + }; + + // test the round-tripability of keys written and read with the Comparator + comparatorTest.testRoundtrip(FileSystems.getDefault().getPath(db_path)); + + System.out.println("Passed ComparatorTest"); + } +} diff --git a/java/org/rocksdb/test/DirectComparatorTest.java b/java/org/rocksdb/test/DirectComparatorTest.java new file mode 100644 index 000000000..9df06eb73 --- /dev/null +++ b/java/org/rocksdb/test/DirectComparatorTest.java @@ -0,0 +1,48 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +package org.rocksdb.test; + +import org.rocksdb.*; + +import java.io.IOException; +import java.nio.file.FileSystems; + +public class DirectComparatorTest { + private static final String db_path = "/tmp/direct_comparator_db"; + + static { + RocksDB.loadLibrary(); + } + + public static void main(String[] args) throws IOException { + + final AbstractComparatorTest comparatorTest = new AbstractComparatorTest() { + @Override + public AbstractComparator getAscendingIntKeyComparator() { + return new DirectComparator(new ComparatorOptions()) { + + @Override + public String name() { + return "test.AscendingIntKeyDirectComparator"; + } + + @Override + public int compare(final DirectSlice a, final DirectSlice b) { + final byte ax[] = new byte[4], bx[] = new byte[4]; + a.data().get(ax); + b.data().get(bx); + return compareIntKeys(ax, bx); + } + }; + } + }; + + // test the round-tripability of keys written and read with the DirectComparator + comparatorTest.testRoundtrip(FileSystems.getDefault().getPath(db_path)); + + System.out.println("Passed DirectComparatorTest"); + } +} diff --git a/java/org/rocksdb/test/Types.java b/java/org/rocksdb/test/Types.java new file mode 100644 index 000000000..22fcd3537 --- /dev/null +++ b/java/org/rocksdb/test/Types.java @@ -0,0 +1,43 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +package org.rocksdb.test; + +/** + * Simple type conversion methods + * for use in tests + */ +public class Types { + + /** + * Convert first 4 bytes of a byte array to an int + * + * @param data The byte array + * + * @return An integer + */ + public static int byteToInt(final byte data[]) { + return (data[0] & 0xff) | + ((data[1] & 0xff) << 8) | + ((data[2] & 0xff) << 16) | + ((data[3] & 0xff) << 24); + } + + /** + * Convert an int to 4 bytes + * + * @param v The int + * + * @return A byte array containing 4 bytes + */ + public static byte[] intToByte(final int v) { + return new byte[] { + (byte)((v >>> 0) & 0xff), + (byte)((v >>> 8) & 0xff), + (byte)((v >>> 16) & 0xff), + (byte)((v >>> 24) & 0xff) + }; + } +} From a6fb7f312dcbe4357c2b9d097f3bef033f2c659f Mon Sep 17 00:00:00 2001 From: Adam Retter Date: Mon, 6 Oct 2014 18:35:53 +0100 Subject: [PATCH 10/33] Fix code review comments raised in https://reviews.facebook.net/D22779 --- java/org/rocksdb/AbstractComparator.java | 16 ++++++- java/org/rocksdb/AbstractSlice.java | 33 +++++++++----- java/org/rocksdb/Comparator.java | 1 - java/org/rocksdb/ComparatorOptions.java | 12 ++++- java/org/rocksdb/DirectComparator.java | 1 - java/org/rocksdb/DirectSlice.java | 18 +++++++- java/org/rocksdb/Slice.java | 9 +++- .../rocksdb/test/AbstractComparatorTest.java | 12 ++--- java/rocksjni/comparator.cc | 21 --------- java/rocksjni/comparatorjnicallback.cc | 44 ++++++++++++------- java/rocksjni/comparatorjnicallback.h | 12 +++-- 11 files changed, 115 insertions(+), 64 deletions(-) diff --git a/java/org/rocksdb/AbstractComparator.java b/java/org/rocksdb/AbstractComparator.java index e5c503025..8de50e271 100644 --- a/java/org/rocksdb/AbstractComparator.java +++ b/java/org/rocksdb/AbstractComparator.java @@ -14,8 +14,22 @@ package org.rocksdb; * @see org.rocksdb.Comparator * @see org.rocksdb.DirectComparator */ -public abstract class AbstractComparator extends RocksObject { +public abstract class AbstractComparator + extends RocksObject { + /** + * The name of the comparator. Used to check for comparator + * mismatches (i.e., a DB created with one comparator is + * accessed using a different comparator). + * + * A new name should be used whenever + * the comparator implementation changes in a way that will cause + * the relative ordering of any two keys to change. + * + * Names starting with "rocksdb." are reserved and should not be used. + * + * @return The name of this comparator implementation + */ public abstract String name(); /** diff --git a/java/org/rocksdb/AbstractSlice.java b/java/org/rocksdb/AbstractSlice.java index 963c72a1b..971bd7c1a 100644 --- a/java/org/rocksdb/AbstractSlice.java +++ b/java/org/rocksdb/AbstractSlice.java @@ -13,13 +13,23 @@ package org.rocksdb; * should extend either of the public abstract classes: * @see org.rocksdb.Slice * @see org.rocksdb.DirectSlice + * + * Regards the lifecycle of Java Slices in RocksDB: + * At present when you configure a Comparator from Java, it creates an + * instance of a C++ BaseComparatorJniCallback subclass and + * passes that to RocksDB as the comparator. That subclass of + * BaseComparatorJniCallback creates the Java + * {@see org.rocksdb.AbstractSlice} subclass Objects. When you dispose + * the Java {@see org.rocksdb.AbstractComparator} subclass, it disposes the + * C++ BaseComparatorJniCallback subclass, which in turn destroys the + * Java {@see org.rocksdb.AbstractSlice} subclass Objects. */ abstract class AbstractSlice extends RocksObject { /** - * Returns the data. + * Returns the data of the slice. * - * @return The data. Note, the type of access is + * @return The slice data. Note, the type of access is * determined by the subclass * @see org.rocksdb.AbstractSlice#data0(long). */ @@ -65,7 +75,7 @@ abstract class AbstractSlice extends RocksObject { * Creates a string representation of the data * * @param hex When true, the representation - * will be encoded in hexidecimal. + * will be encoded in hexadecimal. * * @return The string representation of the data. */ @@ -96,13 +106,13 @@ abstract class AbstractSlice extends RocksObject { } /** - * If other is a slice, then - * we defer to compare to check equality, - * otherwise we return false. + * If other is a slice object, then + * we defer to {@link #compare(AbstractSlice) compare} + * to check equality, otherwise we return false. * * @param other Object to test for equality * - * @return true when this.compare(other) == 0, + * @return true when {@code this.compare(other) == 0}, * false otherwise. */ @Override @@ -115,13 +125,14 @@ abstract class AbstractSlice extends RocksObject { } /** - * Determines whether this starts with prefix + * Determines whether this slice starts with + * another slice * * @param prefix Another slice which may of may not - * be the prefix of this slice. + * be a prefix of this slice. * - * @return true when slice `prefix` is a prefix - * of this slice + * @return true when this slice starts with the + * {@code prefix} slice */ public boolean startsWith(final AbstractSlice prefix) { if (prefix != null) { diff --git a/java/org/rocksdb/Comparator.java b/java/org/rocksdb/Comparator.java index 7272a555a..c8e050bca 100644 --- a/java/org/rocksdb/Comparator.java +++ b/java/org/rocksdb/Comparator.java @@ -15,7 +15,6 @@ package org.rocksdb; * using @see org.rocksdb.DirectComparator */ public abstract class Comparator extends AbstractComparator { - public Comparator(final ComparatorOptions copt) { super(); createNewComparator0(copt.nativeHandle_); diff --git a/java/org/rocksdb/ComparatorOptions.java b/java/org/rocksdb/ComparatorOptions.java index a55091dfa..f0ba520a3 100644 --- a/java/org/rocksdb/ComparatorOptions.java +++ b/java/org/rocksdb/ComparatorOptions.java @@ -1,7 +1,14 @@ package org.rocksdb; +/** + * This class controls the behaviour + * of Java implementations of + * AbstractComparator + * + * Note that dispose() must be called before a ComparatorOptions + * instance becomes out-of-scope to release the allocated memory in C++. + */ public class ComparatorOptions extends RocksObject { - public ComparatorOptions() { super(); newComparatorOptions(); @@ -44,6 +51,7 @@ public class ComparatorOptions extends RocksObject { private native void newComparatorOptions(); private native boolean useAdaptiveMutex(final long handle); - private native void setUseAdaptiveMutex(final long handle, final boolean useAdaptiveMutex); + private native void setUseAdaptiveMutex(final long handle, + final boolean useAdaptiveMutex); private native void disposeInternal(long handle); } diff --git a/java/org/rocksdb/DirectComparator.java b/java/org/rocksdb/DirectComparator.java index 86476c40e..47f4d7256 100644 --- a/java/org/rocksdb/DirectComparator.java +++ b/java/org/rocksdb/DirectComparator.java @@ -15,7 +15,6 @@ package org.rocksdb; * using @see org.rocksdb.Comparator */ public abstract class DirectComparator extends AbstractComparator { - public DirectComparator(final ComparatorOptions copt) { super(); createNewDirectComparator0(copt.nativeHandle_); diff --git a/java/org/rocksdb/DirectSlice.java b/java/org/rocksdb/DirectSlice.java index 8169e3529..847bbd9c1 100644 --- a/java/org/rocksdb/DirectSlice.java +++ b/java/org/rocksdb/DirectSlice.java @@ -16,11 +16,18 @@ import java.nio.ByteBuffer; * values consider using @see org.rocksdb.Slice */ public class DirectSlice extends AbstractSlice { - /** * Called from JNI to construct a new Java DirectSlice * without an underlying C++ object set * at creation time. + * + * Note: You should be aware that + * {@see org.rocksdb.RocksObject#disOwnNativeHandle()} is intentionally + * called from the default DirectSlice constructor, and that it is marked as + * private. This is so that developers cannot construct their own default + * DirectSlice objects (at present). As developers cannot construct their own + * DirectSlice objects through this, they are not creating underlying C++ + * DirectSlice objects, and so there is nothing to free (dispose) from Java. */ private DirectSlice() { super(); @@ -31,6 +38,8 @@ public class DirectSlice extends AbstractSlice { * Constructs a slice * where the data is taken from * a String. + * + * @param str The string */ public DirectSlice(final String str) { super(); @@ -41,6 +50,9 @@ public class DirectSlice extends AbstractSlice { * Constructs a slice where the data is * read from the provided * ByteBuffer up to a certain length + * + * @param data The buffer containing the data + * @param length The length of the data to use for the slice */ public DirectSlice(final ByteBuffer data, final int length) { super(); @@ -51,6 +63,8 @@ public class DirectSlice extends AbstractSlice { * Constructs a slice where the data is * read from the provided * ByteBuffer + * + * @param data The bugger containing the data */ public DirectSlice(final ByteBuffer data) { super(); @@ -79,7 +93,7 @@ public class DirectSlice extends AbstractSlice { } /** - * Drops the specified n + * Drops the specified {@code n} * number of bytes from the start * of the backing slice * diff --git a/java/org/rocksdb/Slice.java b/java/org/rocksdb/Slice.java index e6932cc76..4449cb7b8 100644 --- a/java/org/rocksdb/Slice.java +++ b/java/org/rocksdb/Slice.java @@ -14,11 +14,18 @@ package org.rocksdb; * values consider using @see org.rocksdb.DirectSlice */ public class Slice extends AbstractSlice { - /** * Called from JNI to construct a new Java Slice * without an underlying C++ object set * at creation time. + * + * Note: You should be aware that + * {@see org.rocksdb.RocksObject#disOwnNativeHandle()} is intentionally + * called from the default Slice constructor, and that it is marked as + * private. This is so that developers cannot construct their own default + * Slice objects (at present). As developers cannot construct their own + * Slice objects through this, they are not creating underlying C++ Slice + * objects, and so there is nothing to free (dispose) from Java. */ private Slice() { super(); diff --git a/java/org/rocksdb/test/AbstractComparatorTest.java b/java/org/rocksdb/test/AbstractComparatorTest.java index e3cc6bb77..dfdb3cad9 100644 --- a/java/org/rocksdb/test/AbstractComparatorTest.java +++ b/java/org/rocksdb/test/AbstractComparatorTest.java @@ -52,9 +52,9 @@ public abstract class AbstractComparatorTest { db = RocksDB.open(opt, db_path.toString()); final Random random = new Random(); - for(int i = 0; i < ITERATIONS; i++) { + for (int i = 0; i < ITERATIONS; i++) { final byte key[] = intToByte(random.nextInt()); - if(i > 0 && db.get(key) != null) { // does key already exist (avoid duplicates) + if (i > 0 && db.get(key) != null) { // does key already exist (avoid duplicates) i--; // generate a different key } else { db.put(key, "value".getBytes()); @@ -71,7 +71,7 @@ public abstract class AbstractComparatorTest { it.seekToFirst(); int lastKey = Integer.MIN_VALUE; int count = 0; - for(it.seekToFirst(); it.isValid(); it.next()) { + for (it.seekToFirst(); it.isValid(); it.next()) { final int thisKey = byteToInt(it.key()); assert(thisKey > lastKey); lastKey = thisKey; @@ -85,11 +85,11 @@ public abstract class AbstractComparatorTest { System.err.format("[ERROR]: %s%n", e); e.printStackTrace(); } finally { - if(db != null) { + if (db != null) { db.close(); } - if(opt != null) { + if (opt != null) { opt.dispose(); } @@ -114,7 +114,7 @@ public abstract class AbstractComparatorTest { // protect against int key calculation overflow final double diff = (double)iA - iB; final int result; - if(diff < Integer.MIN_VALUE) { + if (diff < Integer.MIN_VALUE) { result = Integer.MIN_VALUE; } else if(diff > Integer.MAX_VALUE) { result = Integer.MAX_VALUE; diff --git a/java/rocksjni/comparator.cc b/java/rocksjni/comparator.cc index 420897939..196376235 100644 --- a/java/rocksjni/comparator.cc +++ b/java/rocksjni/comparator.cc @@ -18,27 +18,6 @@ #include "rocksjni/comparatorjnicallback.h" #include "rocksjni/portal.h" -// - -void Java_org_rocksdb_ComparatorOptions_newComparatorOptions( - JNIEnv* env, jobject jobj, jstring jpath, jboolean jshare_table_files, - jboolean jsync, jboolean jdestroy_old_data, jboolean jbackup_log_files, - jlong jbackup_rate_limit, jlong jrestore_rate_limit) { - jbackup_rate_limit = (jbackup_rate_limit <= 0) ? 0 : jbackup_rate_limit; - jrestore_rate_limit = (jrestore_rate_limit <= 0) ? 0 : jrestore_rate_limit; - - const char* cpath = env->GetStringUTFChars(jpath, 0); - - auto bopt = new rocksdb::BackupableDBOptions(cpath, nullptr, - jshare_table_files, nullptr, jsync, jdestroy_old_data, jbackup_log_files, - jbackup_rate_limit, jrestore_rate_limit); - - env->ReleaseStringUTFChars(jpath, cpath); - - rocksdb::BackupableDBOptionsJni::setHandle(env, jobj, bopt); -} -// - // 0) && (level < cfd->NumberLevels())); + assert(level < cfd->NumberLevels()); // If the file is being compacted no need to delete. if (metadata->being_compacted) { @@ -4561,6 +4561,12 @@ Status DBImpl::DeleteFile(std::string name) { return Status::InvalidArgument("File not in last level"); } } + // if level == 0, it has to be the oldest file + if (level == 0 && + cfd->current()->files_[0].back()->fd.GetNumber() != number) { + return Status::InvalidArgument("File in level 0, but not oldest"); + } + edit.SetColumnFamily(cfd->GetID()); edit.DeleteFile(level, number); status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), &edit, &mutex_, db_directory_.get()); diff --git a/db/deletefile_test.cc b/db/deletefile_test.cc index f1cd4b040..6a6f8e953 100644 --- a/db/deletefile_test.cc +++ b/db/deletefile_test.cc @@ -287,6 +287,75 @@ TEST(DeleteFileTest, DeleteLogFiles) { CloseDB(); } +TEST(DeleteFileTest, DeleteNonDefaultColumnFamily) { + CloseDB(); + DBOptions db_options; + db_options.create_if_missing = true; + db_options.create_missing_column_families = true; + std::vector column_families; + column_families.emplace_back(); + column_families.emplace_back("new_cf", ColumnFamilyOptions()); + + std::vector handles; + rocksdb::DB* db; + ASSERT_OK(DB::Open(db_options, dbname_, column_families, &handles, &db)); + + Random rnd(5); + for (int i = 0; i < 1000; ++i) { + ASSERT_OK(db->Put(WriteOptions(), handles[1], test::RandomKey(&rnd, 10), + test::RandomKey(&rnd, 10))); + } + ASSERT_OK(db->Flush(FlushOptions(), handles[1])); + for (int i = 0; i < 1000; ++i) { + ASSERT_OK(db->Put(WriteOptions(), handles[1], test::RandomKey(&rnd, 10), + test::RandomKey(&rnd, 10))); + } + ASSERT_OK(db->Flush(FlushOptions(), handles[1])); + + std::vector metadata; + db->GetLiveFilesMetaData(&metadata); + ASSERT_EQ(2U, metadata.size()); + ASSERT_EQ("new_cf", metadata[0].column_family_name); + ASSERT_EQ("new_cf", metadata[1].column_family_name); + auto old_file = metadata[0].smallest_seqno < metadata[1].smallest_seqno + ? metadata[0].name + : metadata[1].name; + auto new_file = metadata[0].smallest_seqno > metadata[1].smallest_seqno + ? metadata[0].name + : metadata[1].name; + ASSERT_TRUE(db->DeleteFile(new_file).IsInvalidArgument()); + ASSERT_OK(db->DeleteFile(old_file)); + + { + std::unique_ptr itr(db->NewIterator(ReadOptions(), handles[1])); + int count = 0; + for (itr->SeekToFirst(); itr->Valid(); itr->Next()) { + ASSERT_OK(itr->status()); + ++count; + } + ASSERT_EQ(count, 1000); + } + + delete handles[0]; + delete handles[1]; + delete db; + + ASSERT_OK(DB::Open(db_options, dbname_, column_families, &handles, &db)); + { + std::unique_ptr itr(db->NewIterator(ReadOptions(), handles[1])); + int count = 0; + for (itr->SeekToFirst(); itr->Valid(); itr->Next()) { + ASSERT_OK(itr->status()); + ++count; + } + ASSERT_EQ(count, 1000); + } + + delete handles[0]; + delete handles[1]; + delete db; +} + } //namespace rocksdb int main(int argc, char** argv) { From e11a5e776fa44611f34ca8676480fd6a9c21f4be Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Tue, 21 Oct 2014 17:28:31 -0700 Subject: [PATCH 12/33] Improve the comment of util/thread_local.h Summary: Improve the comment of util/thread_local.h Test Plan: n/a Reviewers: ljin Reviewed By: ljin Differential Revision: https://reviews.facebook.net/D25449 --- util/thread_local.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/util/thread_local.h b/util/thread_local.h index a037a9ceb..6884ed138 100644 --- a/util/thread_local.h +++ b/util/thread_local.h @@ -26,13 +26,14 @@ namespace rocksdb { // (2) a ThreadLocalPtr is destroyed typedef void (*UnrefHandler)(void* ptr); -// Thread local storage that only stores value of pointer type. The storage -// distinguish data coming from different thread and different ThreadLocalPtr -// instances. For example, if a regular thread_local variable A is declared -// in DBImpl, two DBImpl objects would share the same A. ThreadLocalPtr avoids -// the confliction. The total storage size equals to # of threads * # of -// ThreadLocalPtr instances. It is not efficient in terms of space, but it -// should serve most of our use cases well and keep code simple. +// ThreadLocalPtr stores only values of pointer type. Different from +// the usual thread-local-storage, ThreadLocalPtr has the ability to +// distinguish data coming from different threads and different +// ThreadLocalPtr instances. For example, if a regular thread_local +// variable A is declared in DBImpl, two DBImpl objects would share +// the same A. However, a ThreadLocalPtr that is defined under the +// scope of DBImpl can avoid such confliction. As a result, its memory +// usage would be O(# of threads * # of ThreadLocalPtr instances). class ThreadLocalPtr { public: explicit ThreadLocalPtr(UnrefHandler handler = nullptr); From 0fd985f42794c32ae14e25fc2780b38fde489c2e Mon Sep 17 00:00:00 2001 From: Lei Jin Date: Wed, 22 Oct 2014 11:52:35 -0700 Subject: [PATCH 13/33] Avoid reloading filter on Get() if cache_index_and_filter_blocks == false Summary: This fixes the case that filter policy is missing in SST file, but we open the table with filter policy on and cache_index_and_filter_blocks = false. The current behavior is that we will try to load it every time on Get() but fail. Test Plan: unit test Reviewers: yhchiang, igor, rven, sdong Reviewed By: sdong Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D25455 --- table/block_based_table_factory.cc | 5 ++ table/block_based_table_reader.cc | 106 +++++++++++++---------------- table/block_based_table_reader.h | 8 +-- table/table_test.cc | 53 ++++++++------- 4 files changed, 87 insertions(+), 85 deletions(-) diff --git a/table/block_based_table_factory.cc b/table/block_based_table_factory.cc index 3155f3394..3013ade2a 100644 --- a/table/block_based_table_factory.cc +++ b/table/block_based_table_factory.cc @@ -71,6 +71,11 @@ Status BlockBasedTableFactory::SanitizeOptions( return Status::InvalidArgument("Hash index is specified for block-based " "table, but prefix_extractor is not given"); } + if (table_options_.cache_index_and_filter_blocks && + table_options_.no_block_cache) { + return Status::InvalidArgument("Enable cache_index_and_filter_blocks, " + ", but block cache is disabled"); + } return Status::OK(); } diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 4b2050e03..c973b755e 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -483,8 +483,8 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, } // Will use block cache for index/filter blocks access? - if (table_options.block_cache && - table_options.cache_index_and_filter_blocks) { + if (table_options.cache_index_and_filter_blocks) { + assert(table_options.block_cache != nullptr); // Hack: Call NewIndexIterator() to implicitly add index to the block_cache unique_ptr iter(new_table->NewIndexIterator(ReadOptions())); s = iter->status(); @@ -506,19 +506,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, // Set filter block if (rep->filter_policy) { - // First try reading full_filter, then reading block_based_filter - for (auto filter_block_prefix : { kFullFilterBlockPrefix, - kFilterBlockPrefix }) { - std::string key = filter_block_prefix; - key.append(rep->filter_policy->Name()); - - BlockHandle handle; - if (FindMetaBlock(meta_iter.get(), key, &handle).ok()) { - rep->filter.reset(ReadFilter(handle, rep, - filter_block_prefix, nullptr)); - break; - } - } + rep->filter.reset(ReadFilter(rep, meta_iter.get(), nullptr)); } } else { delete index_reader; @@ -726,33 +714,43 @@ Status BlockBasedTable::PutDataBlockToCache( } FilterBlockReader* BlockBasedTable::ReadFilter( - const BlockHandle& filter_handle, BlockBasedTable::Rep* rep, - const std::string& filter_block_prefix, size_t* filter_size) { + Rep* rep, Iterator* meta_index_iter, size_t* filter_size) { // TODO: We might want to unify with ReadBlockFromFile() if we start // requiring checksum verification in Table::Open. - ReadOptions opt; - BlockContents block; - if (!ReadBlockContents(rep->file.get(), rep->footer, opt, filter_handle, - &block, rep->ioptions.env, false).ok()) { - return nullptr; - } + for (auto prefix : {kFullFilterBlockPrefix, kFilterBlockPrefix}) { + std::string filter_block_key = prefix; + filter_block_key.append(rep->filter_policy->Name()); + BlockHandle handle; + if (FindMetaBlock(meta_index_iter, filter_block_key, &handle).ok()) { + BlockContents block; + if (!ReadBlockContents(rep->file.get(), rep->footer, ReadOptions(), + handle, &block, rep->ioptions.env, false).ok()) { + // Error reading the block + return nullptr; + } - if (filter_size) { - *filter_size = block.data.size(); - } + if (filter_size) { + *filter_size = block.data.size(); + } - assert(rep->filter_policy); - if (kFilterBlockPrefix == filter_block_prefix) { - return new BlockBasedFilterBlockReader( - rep->ioptions.prefix_extractor, rep->table_options, std::move(block)); - } else if (kFullFilterBlockPrefix == filter_block_prefix) { - auto filter_bits_reader = rep->filter_policy-> - GetFilterBitsReader(block.data); - - if (filter_bits_reader != nullptr) { - return new FullFilterBlockReader(rep->ioptions.prefix_extractor, - rep->table_options, std::move(block), - filter_bits_reader); + assert(rep->filter_policy); + if (kFilterBlockPrefix == prefix) { + return new BlockBasedFilterBlockReader( + rep->ioptions.prefix_extractor, rep->table_options, + std::move(block)); + } else if (kFullFilterBlockPrefix == prefix) { + auto filter_bits_reader = rep->filter_policy-> + GetFilterBitsReader(block.data); + if (filter_bits_reader != nullptr) { + return new FullFilterBlockReader(rep->ioptions.prefix_extractor, + rep->table_options, + std::move(block), + filter_bits_reader); + } + } else { + assert(false); + return nullptr; + } } } return nullptr; @@ -760,8 +758,11 @@ FilterBlockReader* BlockBasedTable::ReadFilter( BlockBasedTable::CachableEntry BlockBasedTable::GetFilter( bool no_io) const { - // filter pre-populated - if (rep_->filter != nullptr) { + // If cache_index_and_filter_blocks is false, filter should be pre-populated. + // We will return rep_->filter anyway. rep_->filter can be nullptr if filter + // read fails at Open() time. We don't want to reload again since it will + // most probably fail again. + if (!rep_->table_options.cache_index_and_filter_blocks) { return {rep_->filter.get(), nullptr /* cache handle */}; } @@ -775,8 +776,7 @@ BlockBasedTable::CachableEntry BlockBasedTable::GetFilter( char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length]; auto key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size, rep_->footer.metaindex_handle(), - cache_key - ); + cache_key); Statistics* statistics = rep_->ioptions.statistics; auto cache_handle = @@ -797,22 +797,12 @@ BlockBasedTable::CachableEntry BlockBasedTable::GetFilter( auto s = ReadMetaBlock(rep_, &meta, &iter); if (s.ok()) { - // First try reading full_filter, then reading block_based_filter - for (auto filter_block_prefix : {kFullFilterBlockPrefix, - kFilterBlockPrefix}) { - std::string filter_block_key = filter_block_prefix; - filter_block_key.append(rep_->filter_policy->Name()); - BlockHandle handle; - if (FindMetaBlock(iter.get(), filter_block_key, &handle).ok()) { - filter = ReadFilter(handle, rep_, filter_block_prefix, &filter_size); - - if (filter == nullptr) break; // err happen in ReadFilter - assert(filter_size > 0); - cache_handle = block_cache->Insert( - key, filter, filter_size, &DeleteCachedEntry); - RecordTick(statistics, BLOCK_CACHE_ADD); - break; - } + filter = ReadFilter(rep_, iter.get(), &filter_size); + if (filter != nullptr) { + assert(filter_size > 0); + cache_handle = block_cache->Insert( + key, filter, filter_size, &DeleteCachedEntry); + RecordTick(statistics, BLOCK_CACHE_ADD); } } } diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index b272c4d13..a000c6a9a 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -183,10 +183,10 @@ class BlockBasedTable : public TableReader { std::unique_ptr* iter); // Create the filter from the filter block. - static FilterBlockReader* ReadFilter(const BlockHandle& filter_handle, - Rep* rep, - const std::string& filter_block_prefix, - size_t* filter_size = nullptr); + static FilterBlockReader* ReadFilter( + Rep* rep, + Iterator* meta_index_iter, + size_t* filter_size = nullptr); static void SetupCacheKeyPrefix(Rep* rep); diff --git a/table/table_test.cc b/table/table_test.cc index e4657e8cd..c54fb2ff7 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -1461,8 +1461,6 @@ TEST(BlockBasedTableTest, BlockCacheDisabledTest) { options.create_if_missing = true; options.statistics = CreateDBStatistics(); BlockBasedTableOptions table_options; - // Intentionally commented out: table_options.cache_index_and_filter_blocks = - // true; table_options.block_cache = NewLRUCache(1024); table_options.filter_policy.reset(NewBloomFilterPolicy(10)); options.table_factory.reset(new BlockBasedTableFactory(table_options)); @@ -1521,7 +1519,7 @@ TEST(BlockBasedTableTest, FilterBlockInBlockCache) { c.Finish(options, ioptions, table_options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); // preloading filter/index blocks is prohibited. - auto reader = dynamic_cast(c.GetTableReader()); + auto* reader = dynamic_cast(c.GetTableReader()); ASSERT_TRUE(!reader->TEST_filter_block_preloaded()); ASSERT_TRUE(!reader->TEST_index_reader_preloaded()); @@ -1567,28 +1565,11 @@ TEST(BlockBasedTableTest, FilterBlockInBlockCache) { // release the iterator so that the block cache can reset correctly. iter.reset(); - // -- PART 2: Open without block cache - table_options.no_block_cache = true; - table_options.block_cache.reset(); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - options.statistics = CreateDBStatistics(); // reset the stats - const ImmutableCFOptions ioptions1(options); - c.Reopen(ioptions1); - table_options.no_block_cache = false; - - { - iter.reset(c.NewIterator()); - iter->SeekToFirst(); - ASSERT_EQ("key", iter->key().ToString()); - BlockCachePropertiesSnapshot props(options.statistics.get()); - // Nothing is affected at all - props.AssertEqual(0, 0, 0, 0); - } - - // -- PART 3: Open with very small block cache + // -- PART 2: Open with very small block cache // In this test, no block will ever get hit since the block cache is // too small to fit even one entry. table_options.block_cache = NewLRUCache(1); + options.statistics = CreateDBStatistics(); options.table_factory.reset(new BlockBasedTableFactory(table_options)); const ImmutableCFOptions ioptions2(options); c.Reopen(ioptions2); @@ -1598,7 +1579,6 @@ TEST(BlockBasedTableTest, FilterBlockInBlockCache) { 0, 0, 0); } - { // Both index and data block get accessed. // It first cache index block then data block. But since the cache size @@ -1618,6 +1598,33 @@ TEST(BlockBasedTableTest, FilterBlockInBlockCache) { props.AssertEqual(2, 0, 0 + 1, // data block miss 0); } + iter.reset(); + + // -- PART 3: Open table with bloom filter enabled but not in SST file + table_options.block_cache = NewLRUCache(4096); + table_options.cache_index_and_filter_blocks = false; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + TableConstructor c3(BytewiseComparator()); + c3.Add("k01", "hello"); + ImmutableCFOptions ioptions3(options); + // Generate table without filter policy + c3.Finish(options, ioptions3, table_options, + GetPlainInternalComparator(options.comparator), &keys, &kvmap); + // Open table with filter policy + table_options.filter_policy.reset(NewBloomFilterPolicy(1)); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + options.statistics = CreateDBStatistics(); + ImmutableCFOptions ioptions4(options); + ASSERT_OK(c3.Reopen(ioptions4)); + reader = dynamic_cast(c3.GetTableReader()); + ASSERT_TRUE(!reader->TEST_filter_block_preloaded()); + GetContext get_context(options.comparator, nullptr, nullptr, nullptr, + GetContext::kNotFound, Slice(), nullptr, + nullptr, nullptr); + ASSERT_OK(reader->Get(ReadOptions(), "k01", &get_context)); + BlockCachePropertiesSnapshot props(options.statistics.get()); + props.AssertFilterBlockStat(0, 0); } TEST(BlockBasedTableTest, BlockCacheLeak) { From 839c376bd1ab866957081d9a6aa9edf9fa1cdb78 Mon Sep 17 00:00:00 2001 From: Lei Jin Date: Wed, 22 Oct 2014 13:53:35 -0700 Subject: [PATCH 14/33] fix table_test Summary: SaveValue expects an internal key but I previously added to table a user key Test Plan: ran the test --- table/table_test.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/table/table_test.cc b/table/table_test.cc index c54fb2ff7..5e1bbe4cf 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -1606,7 +1606,9 @@ TEST(BlockBasedTableTest, FilterBlockInBlockCache) { options.table_factory.reset(NewBlockBasedTableFactory(table_options)); TableConstructor c3(BytewiseComparator()); - c3.Add("k01", "hello"); + std::string user_key = "k01"; + InternalKey internal_key(user_key, 0, kTypeValue); + c3.Add(internal_key.Encode().ToString(), "hello"); ImmutableCFOptions ioptions3(options); // Generate table without filter policy c3.Finish(options, ioptions3, table_options, @@ -1619,10 +1621,12 @@ TEST(BlockBasedTableTest, FilterBlockInBlockCache) { ASSERT_OK(c3.Reopen(ioptions4)); reader = dynamic_cast(c3.GetTableReader()); ASSERT_TRUE(!reader->TEST_filter_block_preloaded()); + std::string value; GetContext get_context(options.comparator, nullptr, nullptr, nullptr, - GetContext::kNotFound, Slice(), nullptr, + GetContext::kNotFound, user_key, &value, nullptr, nullptr); - ASSERT_OK(reader->Get(ReadOptions(), "k01", &get_context)); + ASSERT_OK(reader->Get(ReadOptions(), user_key, &get_context)); + ASSERT_EQ(value, "hello"); BlockCachePropertiesSnapshot props(options.statistics.get()); props.AssertFilterBlockStat(0, 0); } From d755e53b87a436d761f2f6a4654b34a7df506054 Mon Sep 17 00:00:00 2001 From: sdong Date: Wed, 22 Oct 2014 18:24:14 -0700 Subject: [PATCH 15/33] Printing number of keys in DB Stats Summary: It is useful to print out number of keys in DB Stats Test Plan: ./db_bench --benchmarks fillrandom --num 1000000 -threads 16 -batch_size=16 and watch the outputs in LOG files Reviewers: MarkCallaghan, ljin, yhchiang, igor Reviewed By: igor Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D24513 --- db/db_impl.cc | 4 +++- db/internal_stats.cc | 24 ++++++++++++++++++------ db/internal_stats.h | 6 ++++++ 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 90f50174a..6a2daad7d 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -4129,7 +4129,7 @@ Status DBImpl::Write(const WriteOptions& write_options, WriteBatch* my_batch) { const uint64_t batch_size = WriteBatchInternal::ByteSize(updates); // Record statistics RecordTick(stats_, NUMBER_KEYS_WRITTEN, my_batch_count); - RecordTick(stats_, BYTES_WRITTEN, WriteBatchInternal::ByteSize(updates)); + RecordTick(stats_, BYTES_WRITTEN, batch_size); if (write_options.disableWAL) { flush_on_destroy_ = true; } @@ -4179,6 +4179,8 @@ Status DBImpl::Write(const WriteOptions& write_options, WriteBatch* my_batch) { // internal stats default_cf_internal_stats_->AddDBStats( InternalStats::BYTES_WRITTEN, batch_size); + default_cf_internal_stats_->AddDBStats(InternalStats::NUMBER_KEYS_WRITTEN, + my_batch_count); if (!write_options.disableWAL) { default_cf_internal_stats_->AddDBStats( InternalStats::WAL_FILE_SYNCED, 1); diff --git a/db/internal_stats.cc b/db/internal_stats.cc index aa3b3c850..cfeb9c00d 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -293,16 +293,25 @@ void InternalStats::DumpDBStats(std::string* value) { value->append(buf); // Cumulative uint64_t user_bytes_written = db_stats_[InternalStats::BYTES_WRITTEN]; + uint64_t num_keys_written = db_stats_[InternalStats::NUMBER_KEYS_WRITTEN]; uint64_t write_other = db_stats_[InternalStats::WRITE_DONE_BY_OTHER]; uint64_t write_self = db_stats_[InternalStats::WRITE_DONE_BY_SELF]; uint64_t wal_bytes = db_stats_[InternalStats::WAL_FILE_BYTES]; uint64_t wal_synced = db_stats_[InternalStats::WAL_FILE_SYNCED]; uint64_t write_with_wal = db_stats_[InternalStats::WRITE_WITH_WAL]; // Data + // writes: total number of write requests. + // keys: total number of key updates issued by all the write requests + // batches: number of group commits issued to the DB. Each group can contain + // one or more writes. + // so writes/keys is the average number of put in multi-put or put + // writes/batches is the average group commit size. + // + // The format is the same for interval stats. snprintf(buf, sizeof(buf), - "Cumulative writes: %" PRIu64 " writes, %" PRIu64 " batches, " - "%.1f writes per batch, %.2f GB user ingest\n", - write_other + write_self, write_self, + "Cumulative writes: %" PRIu64 " writes, %" PRIu64 " keys, %" PRIu64 + " batches, %.1f writes per batch, %.2f GB user ingest\n", + write_other + write_self, num_keys_written, write_self, (write_other + write_self) / static_cast(write_self + 1), user_bytes_written / kGB); value->append(buf); @@ -318,11 +327,13 @@ void InternalStats::DumpDBStats(std::string* value) { // Interval uint64_t interval_write_other = write_other - db_stats_snapshot_.write_other; uint64_t interval_write_self = write_self - db_stats_snapshot_.write_self; + uint64_t interval_num_keys_written = + num_keys_written - db_stats_snapshot_.num_keys_written; snprintf(buf, sizeof(buf), - "Interval writes: %" PRIu64 " writes, %" PRIu64 " batches, " - "%.1f writes per batch, %.1f MB user ingest\n", + "Interval writes: %" PRIu64 " writes, %" PRIu64 " keys, %" PRIu64 + " batches, %.1f writes per batch, %.1f MB user ingest\n", interval_write_other + interval_write_self, - interval_write_self, + interval_num_keys_written, interval_write_self, static_cast(interval_write_other + interval_write_self) / (interval_write_self + 1), (user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB); @@ -347,6 +358,7 @@ void InternalStats::DumpDBStats(std::string* value) { db_stats_snapshot_.ingest_bytes = user_bytes_written; db_stats_snapshot_.write_other = write_other; db_stats_snapshot_.write_self = write_self; + db_stats_snapshot_.num_keys_written = num_keys_written; db_stats_snapshot_.wal_bytes = wal_bytes; db_stats_snapshot_.wal_synced = wal_synced; db_stats_snapshot_.write_with_wal = write_with_wal; diff --git a/db/internal_stats.h b/db/internal_stats.h index 4d12a2512..84fd10289 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -67,6 +67,7 @@ class InternalStats { WAL_FILE_BYTES, WAL_FILE_SYNCED, BYTES_WRITTEN, + NUMBER_KEYS_WRITTEN, WRITE_DONE_BY_OTHER, WRITE_DONE_BY_SELF, WRITE_WITH_WAL, @@ -264,6 +265,11 @@ class InternalStats { // another thread. uint64_t write_other; uint64_t write_self; + // Total number of keys written. write_self and write_other measure number + // of write requests written, Each of the write request can contain updates + // to multiple keys. num_keys_written is total number of keys updated by all + // those writes. + uint64_t num_keys_written; double seconds_up; DBStatsSnapshot() From 2a8e5203d8f955e94b074f52a50410dbdbee0a0d Mon Sep 17 00:00:00 2001 From: sdong Date: Wed, 22 Oct 2014 18:43:27 -0700 Subject: [PATCH 16/33] db_bench: --batch_size used for write benchmarks too Summary: Now --bench_size is only used in multireadrandom tests, although the codes allow it to run in all write tests. I don't see a reason why we can't enable it. Test Plan: Run ./db_bench -benchmarks multirandomwrite --threads=5 -batch_size=16 and see the stats printed out in LOG to make sure batching really happened. Reviewers: ljin, yhchiang, rven, igor Reviewed By: igor Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D25509 --- db/db_bench.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/db/db_bench.cc b/db/db_bench.cc index d018ce70f..f0fe5e02e 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -1232,7 +1232,7 @@ class Benchmark { writes_ = (FLAGS_writes < 0 ? FLAGS_num : FLAGS_writes); value_size_ = FLAGS_value_size; key_size_ = FLAGS_key_size; - entries_per_batch_ = 1; + entries_per_batch_ = FLAGS_batch_size; write_options_ = WriteOptions(); if (FLAGS_sync) { write_options_.sync = true; @@ -1287,7 +1287,6 @@ class Benchmark { } else if (name == Slice("readrandomfast")) { method = &Benchmark::ReadRandomFast; } else if (name == Slice("multireadrandom")) { - entries_per_batch_ = FLAGS_batch_size; fprintf(stderr, "entries_per_batch = %" PRIi64 "\n", entries_per_batch_); method = &Benchmark::MultiReadRandom; From c584d2b538c29242c14a56ebcf6f5e396f120fb1 Mon Sep 17 00:00:00 2001 From: Adam Retter Date: Thu, 23 Oct 2014 15:39:48 +0100 Subject: [PATCH 17/33] Fix for building RocksDB Java on Mac OS X Yosemite --- Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2e101bede..d9a8feffa 100644 --- a/Makefile +++ b/Makefile @@ -528,7 +528,11 @@ ROCKSDB_SOURCES_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PAT ifeq ($(PLATFORM), OS_MACOSX) ROCKSDBJNILIB = librocksdbjni-osx.jnilib ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-osx.jar -JAVA_INCLUDE = -I/System/Library/Frameworks/JavaVM.framework/Headers/ +ifneq ("$(wildcard $(JAVA_HOME)/include/darwin)","") + JAVA_INCLUDE = -I$(JAVA_HOME)/include -I $(JAVA_HOME)/include/darwin +else + JAVA_INCLUDE = -I/System/Library/Frameworks/JavaVM.framework/Headers/ +endif endif libz.a: From 3b5fe3a1f36cdd438a2e818e94a4174951c68ba1 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Thu, 23 Oct 2014 10:41:58 -0700 Subject: [PATCH 18/33] Correct the log message in VersionEdit Summary: When VersionEdit fails in kNewFile3, previously it logs "new-file2 entry". However, it should be "new-file3 entry." Test Plan: make --- db/version_edit.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/version_edit.cc b/db/version_edit.cc index 4e2cf8f5b..271016aaf 100644 --- a/db/version_edit.cc +++ b/db/version_edit.cc @@ -293,7 +293,7 @@ Status VersionEdit::DecodeFrom(const Slice& src) { new_files_.push_back(std::make_pair(level, f)); } else { if (!msg) { - msg = "new-file2 entry"; + msg = "new-file3 entry"; } } break; From 90f156402c4b74f05fb86834401ab96cc018cf03 Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Thu, 23 Oct 2014 11:18:11 -0700 Subject: [PATCH 19/33] Fix CompactBetweenSnapshots --- db/db_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/db_test.cc b/db/db_test.cc index d13929fc6..968ea7521 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -5272,7 +5272,7 @@ TEST(DBTest, CompactBetweenSnapshots) { do { Options options = CurrentOptions(); options.disable_auto_compactions = true; - CreateAndReopenWithCF({"pikachu"}); + CreateAndReopenWithCF({"pikachu"}, &options); Random rnd(301); FillLevels("a", "z", 1); From 9383922cc3bdf8f85caf466cb61cf66090762e37 Mon Sep 17 00:00:00 2001 From: Adam Retter Date: Thu, 23 Oct 2014 19:35:58 +0100 Subject: [PATCH 20/33] Added java tests to travis build --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 8f1bcb0ae..b52e5acf7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,6 +14,6 @@ before_install: - sudo dpkg -i libgflags-dev_2.0-1_amd64.deb # Lousy hack to disable use and testing of fallocate, which doesn't behave quite # as EnvPosixTest::AllocateTest expects within the Travis OpenVZ environment. -script: OPT=-DTRAVIS make unity && OPT=-DTRAVIS make check -j8 +script: OPT=-DTRAVIS make unity && OPT=-DTRAVIS make check -j8 && OPT=-DTRAVIS make rocksdbjava jtest notifications: email: false From 4b1786e9599c5770376e8809bfbce7d74426640e Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Thu, 23 Oct 2014 12:03:19 -0700 Subject: [PATCH 21/33] Fix SIGSEGV when declaring Arena after ScopedArenaIterator --- db/db_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/db_test.cc b/db/db_test.cc index 968ea7521..06d0241ee 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -807,8 +807,8 @@ class DBTest { } std::string AllEntriesFor(const Slice& user_key, int cf = 0) { - ScopedArenaIterator iter; Arena arena; + ScopedArenaIterator iter; if (cf == 0) { iter.set(dbfull()->TEST_NewInternalIterator(&arena)); } else { From 9aa9668a83cfebe865212a5468cc747fc800872c Mon Sep 17 00:00:00 2001 From: fyrz Date: Thu, 16 Oct 2014 20:39:26 +0200 Subject: [PATCH 22/33] [RocksJava] Memtables update to 3.6 - Adjusted HashLinkedList to 3.6.0 - Adjusted SkipList to 3.6.0 - Introduced a memtable test --- java/Makefile | 1 + .../rocksdb/HashLinkedListMemTableConfig.java | 122 +++++++++++++++++- .../rocksdb/HashSkipListMemTableConfig.java | 3 + java/org/rocksdb/SkipListMemTableConfig.java | 38 +++++- java/org/rocksdb/VectorMemTableConfig.java | 4 + java/org/rocksdb/test/MemTableTest.java | 107 +++++++++++++++ java/rocksjni/memtablejni.cc | 34 +++-- 7 files changed, 296 insertions(+), 13 deletions(-) create mode 100644 java/org/rocksdb/test/MemTableTest.java diff --git a/java/Makefile b/java/Makefile index 697df5175..504060bc3 100644 --- a/java/Makefile +++ b/java/Makefile @@ -42,6 +42,7 @@ test: java java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.ColumnFamilyTest java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.FilterTest java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.KeyMayExistTest + java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.MemTableTest java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.OptionsTest java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.ReadOnlyTest java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.ReadOptionsTest diff --git a/java/org/rocksdb/HashLinkedListMemTableConfig.java b/java/org/rocksdb/HashLinkedListMemTableConfig.java index 381a16f49..78a4e8661 100644 --- a/java/org/rocksdb/HashLinkedListMemTableConfig.java +++ b/java/org/rocksdb/HashLinkedListMemTableConfig.java @@ -15,9 +15,21 @@ package org.rocksdb; */ public class HashLinkedListMemTableConfig extends MemTableConfig { public static final long DEFAULT_BUCKET_COUNT = 50000; + public static final long DEFAULT_HUGE_PAGE_TLB_SIZE = 0; + public static final int DEFAULT_BUCKET_ENTRIES_LOG_THRES = 4096; + public static final boolean + DEFAULT_IF_LOG_BUCKET_DIST_WHEN_FLUSH = true; + public static final int DEFAUL_THRESHOLD_USE_SKIPLIST = 256; + /** + * HashLinkedListMemTableConfig constructor + */ public HashLinkedListMemTableConfig() { bucketCount_ = DEFAULT_BUCKET_COUNT; + hugePageTlbSize_ = DEFAULT_HUGE_PAGE_TLB_SIZE; + bucketEntriesLoggingThreshold_ = DEFAULT_BUCKET_ENTRIES_LOG_THRES; + ifLogBucketDistWhenFlush_ = DEFAULT_IF_LOG_BUCKET_DIST_WHEN_FLUSH; + thresholdUseSkiplist_ = DEFAUL_THRESHOLD_USE_SKIPLIST; } /** @@ -42,13 +54,119 @@ public class HashLinkedListMemTableConfig extends MemTableConfig { return bucketCount_; } + /** + *

Set the size of huge tlb or allocate the hashtable bytes from + * malloc if {@code size <= 0}.

+ * + *

The user needs to reserve huge pages for it to be allocated, + * like: {@code sysctl -w vm.nr_hugepages=20}

+ * + *

See linux documentation/vm/hugetlbpage.txt

+ * + * @param size if set to {@code <= 0} hashtable bytes from malloc + * @return the reference to the current HashLinkedListMemTableConfig. + */ + public HashLinkedListMemTableConfig setHugePageTlbSize(long size) { + hugePageTlbSize_ = size; + return this; + } + + /** + * Returns the size value of hugePageTlbSize. + * + * @return the hugePageTlbSize. + */ + public long hugePageTlbSize() { + return hugePageTlbSize_; + } + + /** + * If number of entries in one bucket exceeds that setting, log + * about it. + * + * @param threshold - number of entries in a single bucket before + * logging starts. + * @return the reference to the current HashLinkedListMemTableConfig. + */ + public HashLinkedListMemTableConfig + setBucketEntriesLoggingThreshold(int threshold) { + bucketEntriesLoggingThreshold_ = threshold; + return this; + } + + /** + * Returns the maximum number of entries in one bucket before + * logging starts. + * + * @return maximum number of entries in one bucket before logging + * starts. + */ + public int bucketEntriesLoggingThreshold() { + return bucketEntriesLoggingThreshold_; + } + + /** + * If true the distrubition of number of entries will be logged. + * + * @param logDistribution - boolean parameter indicating if number + * of entry distribution shall be logged. + * @return the reference to the current HashLinkedListMemTableConfig. + */ + public HashLinkedListMemTableConfig + setIfLogBucketDistWhenFlush(boolean logDistribution) { + ifLogBucketDistWhenFlush_ = logDistribution; + return this; + } + + /** + * Returns information about logging the distribution of + * number of entries on flush. + * + * @return if distrubtion of number of entries shall be logged. + */ + public boolean ifLogBucketDistWhenFlush() { + return ifLogBucketDistWhenFlush_; + } + + /** + * Set maximum number of entries in one bucket. Exceeding this val + * leads to a switch from LinkedList to SkipList. + * + * @param threshold maximum number of entries before SkipList is + * used. + * @return the reference to the current HashLinkedListMemTableConfig. + */ + public HashLinkedListMemTableConfig + setThresholdUseSkiplist(int threshold) { + thresholdUseSkiplist_ = threshold; + return this; + } + + /** + * Returns entries per bucket threshold before LinkedList is + * replaced by SkipList usage for that bucket. + * + * @return entries per bucket threshold before SkipList is used. + */ + public int thresholdUseSkiplist() { + return thresholdUseSkiplist_; + } + @Override protected long newMemTableFactoryHandle() throws RocksDBException { - return newMemTableFactoryHandle(bucketCount_); + return newMemTableFactoryHandle(bucketCount_, hugePageTlbSize_, + bucketEntriesLoggingThreshold_, ifLogBucketDistWhenFlush_, + thresholdUseSkiplist_); } - private native long newMemTableFactoryHandle(long bucketCount) + private native long newMemTableFactoryHandle(long bucketCount, + long hugePageTlbSize, int bucketEntriesLoggingThreshold, + boolean ifLogBucketDistWhenFlush, int thresholdUseSkiplist) throws RocksDBException; private long bucketCount_; + private long hugePageTlbSize_; + private int bucketEntriesLoggingThreshold_; + private boolean ifLogBucketDistWhenFlush_; + private int thresholdUseSkiplist_; } diff --git a/java/org/rocksdb/HashSkipListMemTableConfig.java b/java/org/rocksdb/HashSkipListMemTableConfig.java index 100f16c82..ad2120f18 100644 --- a/java/org/rocksdb/HashSkipListMemTableConfig.java +++ b/java/org/rocksdb/HashSkipListMemTableConfig.java @@ -18,6 +18,9 @@ public class HashSkipListMemTableConfig extends MemTableConfig { public static final int DEFAULT_BRANCHING_FACTOR = 4; public static final int DEFAULT_HEIGHT = 4; + /** + * HashSkipListMemTableConfig constructor + */ public HashSkipListMemTableConfig() { bucketCount_ = DEFAULT_BUCKET_COUNT; branchingFactor_ = DEFAULT_BRANCHING_FACTOR; diff --git a/java/org/rocksdb/SkipListMemTableConfig.java b/java/org/rocksdb/SkipListMemTableConfig.java index 7f9f5cb5f..d26fd9d32 100644 --- a/java/org/rocksdb/SkipListMemTableConfig.java +++ b/java/org/rocksdb/SkipListMemTableConfig.java @@ -4,12 +4,46 @@ package org.rocksdb; * The config for skip-list memtable representation. */ public class SkipListMemTableConfig extends MemTableConfig { + + public static final long DEFAULT_LOOKAHEAD = 0; + + /** + * SkipListMemTableConfig constructor + */ public SkipListMemTableConfig() { + lookahead_ = DEFAULT_LOOKAHEAD; + } + + /** + * Sets lookahead for SkipList + * + * @param lookahead If non-zero, each iterator's seek operation + * will start the search from the previously visited record + * (doing at most 'lookahead' steps). This is an + * optimization for the access pattern including many + * seeks with consecutive keys. + * @return the current instance of SkipListMemTableConfig + */ + public SkipListMemTableConfig setLookahead(long lookahead) { + lookahead_ = lookahead; + return this; } + /** + * Returns the currently set lookahead value. + * + * @return lookahead value + */ + public long lookahead() { + return lookahead_; + } + + @Override protected long newMemTableFactoryHandle() { - return newMemTableFactoryHandle0(); + return newMemTableFactoryHandle0(lookahead_); } - private native long newMemTableFactoryHandle0(); + private native long newMemTableFactoryHandle0(long lookahead); + + private long lookahead_; } diff --git a/java/org/rocksdb/VectorMemTableConfig.java b/java/org/rocksdb/VectorMemTableConfig.java index b7a413f19..ba1be3e77 100644 --- a/java/org/rocksdb/VectorMemTableConfig.java +++ b/java/org/rocksdb/VectorMemTableConfig.java @@ -5,6 +5,10 @@ package org.rocksdb; */ public class VectorMemTableConfig extends MemTableConfig { public static final int DEFAULT_RESERVED_SIZE = 0; + + /** + * VectorMemTableConfig constructor + */ public VectorMemTableConfig() { reservedSize_ = DEFAULT_RESERVED_SIZE; } diff --git a/java/org/rocksdb/test/MemTableTest.java b/java/org/rocksdb/test/MemTableTest.java new file mode 100644 index 000000000..0d1e4d54a --- /dev/null +++ b/java/org/rocksdb/test/MemTableTest.java @@ -0,0 +1,107 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +package org.rocksdb.test; + +import org.rocksdb.*; + +public class MemTableTest { + static { + RocksDB.loadLibrary(); + } + public static void main(String[] args) { + Options options = new Options(); + // Test HashSkipListMemTableConfig + HashSkipListMemTableConfig memTableConfig = + new HashSkipListMemTableConfig(); + assert(memTableConfig.bucketCount() == 1000000); + memTableConfig.setBucketCount(2000000); + assert(memTableConfig.bucketCount() == 2000000); + assert(memTableConfig.height() == 4); + memTableConfig.setHeight(5); + assert(memTableConfig.height() == 5); + assert(memTableConfig.branchingFactor() == 4); + memTableConfig.setBranchingFactor(6); + assert(memTableConfig.branchingFactor() == 6); + try { + options.setMemTableConfig(memTableConfig); + } catch (RocksDBException e) { + assert(false); + } + memTableConfig = null; + options.dispose(); + System.gc(); + System.runFinalization(); + // Test SkipList + options = new Options(); + SkipListMemTableConfig skipMemTableConfig = + new SkipListMemTableConfig(); + assert(skipMemTableConfig.lookahead() == 0); + skipMemTableConfig.setLookahead(20); + assert(skipMemTableConfig.lookahead() == 20); + try { + options.setMemTableConfig(skipMemTableConfig); + } catch (RocksDBException e) { + assert(false); + } + skipMemTableConfig = null; + options.dispose(); + System.gc(); + System.runFinalization(); + // Test HashLinkedListMemTableConfig + options = new Options(); + HashLinkedListMemTableConfig hashLinkedListMemTableConfig = + new HashLinkedListMemTableConfig(); + assert(hashLinkedListMemTableConfig.bucketCount() == 50000); + hashLinkedListMemTableConfig.setBucketCount(100000); + assert(hashLinkedListMemTableConfig.bucketCount() == 100000); + assert(hashLinkedListMemTableConfig.hugePageTlbSize() == 0); + hashLinkedListMemTableConfig.setHugePageTlbSize(1); + assert(hashLinkedListMemTableConfig.hugePageTlbSize() == 1); + assert(hashLinkedListMemTableConfig. + bucketEntriesLoggingThreshold() == 4096); + hashLinkedListMemTableConfig. + setBucketEntriesLoggingThreshold(200); + assert(hashLinkedListMemTableConfig. + bucketEntriesLoggingThreshold() == 200); + assert(hashLinkedListMemTableConfig. + ifLogBucketDistWhenFlush() == true); + hashLinkedListMemTableConfig. + setIfLogBucketDistWhenFlush(false); + assert(hashLinkedListMemTableConfig. + ifLogBucketDistWhenFlush() == false); + assert(hashLinkedListMemTableConfig. + thresholdUseSkiplist() == 256); + hashLinkedListMemTableConfig.setThresholdUseSkiplist(29); + assert(hashLinkedListMemTableConfig. + thresholdUseSkiplist() == 29); + try { + options.setMemTableConfig(hashLinkedListMemTableConfig); + } catch (RocksDBException e) { + assert(false); + } + hashLinkedListMemTableConfig = null; + options.dispose(); + System.gc(); + System.runFinalization(); + // test VectorMemTableConfig + options = new Options(); + VectorMemTableConfig vectorMemTableConfig = + new VectorMemTableConfig(); + assert(vectorMemTableConfig.reservedSize() == 0); + vectorMemTableConfig.setReservedSize(123); + assert(vectorMemTableConfig.reservedSize() == 123); + try { + options.setMemTableConfig(vectorMemTableConfig); + } catch (RocksDBException e) { + assert(false); + } + vectorMemTableConfig = null; + options.dispose(); + System.gc(); + System.runFinalization(); + System.out.println("Mem-table test passed"); + } +} diff --git a/java/rocksjni/memtablejni.cc b/java/rocksjni/memtablejni.cc index 4be03d491..fe83885c2 100644 --- a/java/rocksjni/memtablejni.cc +++ b/java/rocksjni/memtablejni.cc @@ -34,16 +34,26 @@ jlong Java_org_rocksdb_HashSkipListMemTableConfig_newMemTableFactoryHandle( /* * Class: org_rocksdb_HashLinkedListMemTableConfig * Method: newMemTableFactoryHandle - * Signature: (J)J + * Signature: (JJIZI)J */ jlong Java_org_rocksdb_HashLinkedListMemTableConfig_newMemTableFactoryHandle( - JNIEnv* env, jobject jobj, jlong jbucket_count) { - rocksdb::Status s = rocksdb::check_if_jlong_fits_size_t(jbucket_count); - if (s.ok()) { + JNIEnv* env, jobject jobj, jlong jbucket_count, jlong jhuge_page_tlb_size, + jint jbucket_entries_logging_threshold, + jboolean jif_log_bucket_dist_when_flash, jint jthreshold_use_skiplist) { + rocksdb::Status statusBucketCount = + rocksdb::check_if_jlong_fits_size_t(jbucket_count); + rocksdb::Status statusHugePageTlb = + rocksdb::check_if_jlong_fits_size_t(jhuge_page_tlb_size); + if (statusBucketCount.ok() && statusHugePageTlb.ok()) { return reinterpret_cast(rocksdb::NewHashLinkListRepFactory( - static_cast(jbucket_count))); + static_cast(jbucket_count), + static_cast(jhuge_page_tlb_size), + static_cast(jbucket_entries_logging_threshold), + static_cast(jif_log_bucket_dist_when_flash), + static_cast(jthreshold_use_skiplist))); } - rocksdb::RocksDBExceptionJni::ThrowNew(env, s); + rocksdb::RocksDBExceptionJni::ThrowNew(env, + !statusBucketCount.ok()?statusBucketCount:statusHugePageTlb); return 0; } @@ -66,9 +76,15 @@ jlong Java_org_rocksdb_VectorMemTableConfig_newMemTableFactoryHandle( /* * Class: org_rocksdb_SkipListMemTableConfig * Method: newMemTableFactoryHandle0 - * Signature: ()J + * Signature: (J)J */ jlong Java_org_rocksdb_SkipListMemTableConfig_newMemTableFactoryHandle0( - JNIEnv* env, jobject jobj) { - return reinterpret_cast(new rocksdb::SkipListFactory()); + JNIEnv* env, jobject jobj, jlong jlookahead) { + rocksdb::Status s = rocksdb::check_if_jlong_fits_size_t(jlookahead); + if (s.ok()) { + return reinterpret_cast(new rocksdb::SkipListFactory( + static_cast(jlookahead))); + } + rocksdb::RocksDBExceptionJni::ThrowNew(env, s); + return 0; } From 1eb545721df80a186887eb0b83c7b006401652c5 Mon Sep 17 00:00:00 2001 From: fyrz Date: Tue, 21 Oct 2014 21:17:45 +0200 Subject: [PATCH 23/33] Fix incorrectly merged Java - Makefile --- java/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/java/Makefile b/java/Makefile index 697df5175..fd15dd34e 100644 --- a/java/Makefile +++ b/java/Makefile @@ -49,6 +49,7 @@ test: java java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.ComparatorOptionsTest java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.ComparatorTest java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.DirectComparatorTest + @rm -rf /tmp/rocksdbjni_* db_bench: java javac org/rocksdb/benchmark/*.java From bd4fbaee3753a7405b3f3d4248b6b43a93ea760c Mon Sep 17 00:00:00 2001 From: fyrz Date: Tue, 21 Oct 2014 21:18:33 +0200 Subject: [PATCH 24/33] Fixed cross platform build after introducing Java-7 dependencies --- java/crossbuild/build-linux-centos.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/crossbuild/build-linux-centos.sh b/java/crossbuild/build-linux-centos.sh index 55f179b62..5730b1533 100755 --- a/java/crossbuild/build-linux-centos.sh +++ b/java/crossbuild/build-linux-centos.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash # install all required packages for rocksdb that are available through yum ARCH=$(uname -i) -sudo yum -y install java-1.6.0-openjdk-devel.$ARCH zlib zlib-devel bzip2 bzip2-devel +sudo yum -y install java-1.7.0-openjdk-devel.$ARCH zlib zlib-devel bzip2 bzip2-devel # install gcc/g++ 4.7 via CERN (http://linux.web.cern.ch/linux/devtoolset/) sudo wget -O /etc/yum.repos.d/slc5-devtoolset.repo http://linuxsoft.cern.ch/cern/devtoolset/slc5-devtoolset.repo @@ -12,7 +12,7 @@ tar xvfz gflags-1.6.tar.gz; cd gflags-1.6; scl enable devtoolset-1.1 ./configure export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib # set java home so we can build rocksdb jars -export JAVA_HOME=/usr/lib/jvm/java-1.6.0 +export JAVA_HOME=/usr/lib/jvm/java-1.7.0 # build rocksdb cd /rocksdb From 574028679b9f19d504a3695989c711fa5d73fe80 Mon Sep 17 00:00:00 2001 From: Lei Jin Date: Thu, 23 Oct 2014 15:34:21 -0700 Subject: [PATCH 25/33] dynamic max_sequential_skip_in_iterations Summary: This is not a critical options. Making it dynamic so that we can remove more reference to cfd->options() Test Plan: unit test Reviewers: yhchiang, sdong, igor Reviewed By: igor Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D24957 --- db/column_family.h | 2 +- db/db_impl.cc | 45 ++++++++++++++-------------------- db/db_impl_readonly.cc | 13 +++++----- db/db_test.cc | 50 ++++++++++++++++++++++++++++++++++++++ db/forward_iterator.cc | 51 +++++++++++++++++++++++---------------- db/forward_iterator.h | 6 ++--- util/mutable_cf_options.h | 10 ++++++-- util/options_helper.cc | 15 ++++++++++-- 8 files changed, 130 insertions(+), 62 deletions(-) diff --git a/db/column_family.h b/db/column_family.h index 9c415c2a8..96b08c52e 100644 --- a/db/column_family.h +++ b/db/column_family.h @@ -166,7 +166,7 @@ class ColumnFamilyData { bool IsDropped() const { return dropped_; } // thread-safe - int NumberLevels() const { return options_.num_levels; } + int NumberLevels() const { return ioptions_.num_levels; } void SetLogNumber(uint64_t log_number) { log_number_ = log_number; } uint64_t GetLogNumber() const { return log_number_; } diff --git a/db/db_impl.cc b/db/db_impl.cc index 6a2daad7d..0e47774a7 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -3828,16 +3828,16 @@ Iterator* DBImpl::NewIterator(const ReadOptions& read_options, // not supported in lite version return nullptr; #else - auto iter = new ForwardIterator(this, read_options, cfd); + SuperVersion* sv = cfd->GetReferencedSuperVersion(&mutex_); + auto iter = new ForwardIterator(this, read_options, cfd, sv); return NewDBIterator(env_, *cfd->ioptions(), cfd->user_comparator(), iter, - kMaxSequenceNumber, - cfd->options()->max_sequential_skip_in_iterations, - read_options.iterate_upper_bound); + kMaxSequenceNumber, + sv->mutable_cf_options.max_sequential_skip_in_iterations, + read_options.iterate_upper_bound); #endif } else { SequenceNumber latest_snapshot = versions_->LastSequence(); - SuperVersion* sv = nullptr; - sv = cfd->GetReferencedSuperVersion(&mutex_); + SuperVersion* sv = cfd->GetReferencedSuperVersion(&mutex_); auto snapshot = read_options.snapshot != nullptr @@ -3889,7 +3889,7 @@ Iterator* DBImpl::NewIterator(const ReadOptions& read_options, // that they are likely to be in the same cache line and/or page. ArenaWrappedDBIter* db_iter = NewArenaWrappedDbIterator( env_, *cfd->ioptions(), cfd->user_comparator(), - snapshot, cfd->options()->max_sequential_skip_in_iterations, + snapshot, sv->mutable_cf_options.max_sequential_skip_in_iterations, read_options.iterate_upper_bound); Iterator* internal_iter = @@ -3908,19 +3908,6 @@ Status DBImpl::NewIterators( std::vector* iterators) { iterators->clear(); iterators->reserve(column_families.size()); - SequenceNumber latest_snapshot = 0; - std::vector super_versions; - super_versions.reserve(column_families.size()); - - if (!read_options.tailing) { - mutex_.Lock(); - latest_snapshot = versions_->LastSequence(); - for (auto cfh : column_families) { - auto cfd = reinterpret_cast(cfh)->cfd(); - super_versions.push_back(cfd->GetSuperVersion()->Ref()); - } - mutex_.Unlock(); - } if (read_options.tailing) { #ifdef ROCKSDB_LITE @@ -3929,17 +3916,21 @@ Status DBImpl::NewIterators( #else for (auto cfh : column_families) { auto cfd = reinterpret_cast(cfh)->cfd(); - auto iter = new ForwardIterator(this, read_options, cfd); + SuperVersion* sv = cfd->GetReferencedSuperVersion(&mutex_); + auto iter = new ForwardIterator(this, read_options, cfd, sv); iterators->push_back( NewDBIterator(env_, *cfd->ioptions(), cfd->user_comparator(), iter, - kMaxSequenceNumber, - cfd->options()->max_sequential_skip_in_iterations)); + kMaxSequenceNumber, + sv->mutable_cf_options.max_sequential_skip_in_iterations)); } #endif } else { + SequenceNumber latest_snapshot = versions_->LastSequence(); + for (size_t i = 0; i < column_families.size(); ++i) { - auto cfh = reinterpret_cast(column_families[i]); - auto cfd = cfh->cfd(); + auto* cfd = reinterpret_cast( + column_families[i])->cfd(); + SuperVersion* sv = cfd->GetReferencedSuperVersion(&mutex_); auto snapshot = read_options.snapshot != nullptr @@ -3949,9 +3940,9 @@ Status DBImpl::NewIterators( ArenaWrappedDBIter* db_iter = NewArenaWrappedDbIterator( env_, *cfd->ioptions(), cfd->user_comparator(), snapshot, - cfd->options()->max_sequential_skip_in_iterations); + sv->mutable_cf_options.max_sequential_skip_in_iterations); Iterator* internal_iter = NewInternalIterator( - read_options, cfd, super_versions[i], db_iter->GetArena()); + read_options, cfd, sv, db_iter->GetArena()); db_iter->SetIterUnderDBIter(internal_iter); iterators->push_back(db_iter); } diff --git a/db/db_impl_readonly.cc b/db/db_impl_readonly.cc index 31ebdbedd..c98693d38 100644 --- a/db/db_impl_readonly.cc +++ b/db/db_impl_readonly.cc @@ -53,7 +53,7 @@ Iterator* DBImplReadOnly::NewIterator(const ReadOptions& read_options, ? reinterpret_cast( read_options.snapshot)->number_ : latest_snapshot), - cfd->options()->max_sequential_skip_in_iterations); + super_version->mutable_cf_options.max_sequential_skip_in_iterations); auto internal_iter = NewInternalIterator( read_options, cfd, super_version, db_iter->GetArena()); db_iter->SetIterUnderDBIter(internal_iter); @@ -72,16 +72,17 @@ Status DBImplReadOnly::NewIterators( SequenceNumber latest_snapshot = versions_->LastSequence(); for (auto cfh : column_families) { - auto cfd = reinterpret_cast(cfh)->cfd(); - auto db_iter = NewArenaWrappedDbIterator( + auto* cfd = reinterpret_cast(cfh)->cfd(); + auto* sv = cfd->GetSuperVersion()->Ref(); + auto* db_iter = NewArenaWrappedDbIterator( env_, *cfd->ioptions(), cfd->user_comparator(), (read_options.snapshot != nullptr ? reinterpret_cast( read_options.snapshot)->number_ : latest_snapshot), - cfd->options()->max_sequential_skip_in_iterations); - auto internal_iter = NewInternalIterator( - read_options, cfd, cfd->GetSuperVersion()->Ref(), db_iter->GetArena()); + sv->mutable_cf_options.max_sequential_skip_in_iterations); + auto* internal_iter = NewInternalIterator( + read_options, cfd, sv, db_iter->GetArena()); db_iter->SetIterUnderDBIter(internal_iter); iterators->push_back(db_iter); } diff --git a/db/db_test.cc b/db/db_test.cc index 06d0241ee..1ee8b58af 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -8889,6 +8889,56 @@ TEST(DBTest, DynamicCompactionOptions) { ASSERT_TRUE(Put(Key(count), RandomString(&rnd, 1024), wo).ok()); } +TEST(DBTest, DynamicMiscOptions) { + // Test max_sequential_skip_in_iterations + Options options; + options.env = env_; + options.create_if_missing = true; + options.max_sequential_skip_in_iterations = 16; + options.compression = kNoCompression; + options.statistics = rocksdb::CreateDBStatistics(); + DestroyAndReopen(&options); + + auto assert_reseek_count = [this, &options](int key_start, int num_reseek) { + int key0 = key_start; + int key1 = key_start + 1; + int key2 = key_start + 2; + Random rnd(301); + ASSERT_OK(Put(Key(key0), RandomString(&rnd, 8))); + for (int i = 0; i < 10; ++i) { + ASSERT_OK(Put(Key(key1), RandomString(&rnd, 8))); + } + ASSERT_OK(Put(Key(key2), RandomString(&rnd, 8))); + std::unique_ptr iter(db_->NewIterator(ReadOptions())); + iter->Seek(Key(key1)); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key().compare(Key(key1)), 0); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key().compare(Key(key2)), 0); + ASSERT_EQ(num_reseek, + TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION)); + }; + // No reseek + assert_reseek_count(100, 0); + + ASSERT_TRUE(dbfull()->SetOptions({ + {"max_sequential_skip_in_iterations", "4"} + })); + // Clear memtable and make new option effective + dbfull()->TEST_FlushMemTable(true); + // Trigger reseek + assert_reseek_count(200, 1); + + ASSERT_TRUE(dbfull()->SetOptions({ + {"max_sequential_skip_in_iterations", "16"} + })); + // Clear memtable and make new option effective + dbfull()->TEST_FlushMemTable(true); + // No reseek + assert_reseek_count(300, 1); +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/db/forward_iterator.cc b/db/forward_iterator.cc index 684045e05..b2e4bd067 100644 --- a/db/forward_iterator.cc +++ b/db/forward_iterator.cc @@ -114,25 +114,29 @@ class LevelIterator : public Iterator { }; ForwardIterator::ForwardIterator(DBImpl* db, const ReadOptions& read_options, - ColumnFamilyData* cfd) + ColumnFamilyData* cfd, SuperVersion* current_sv) : db_(db), read_options_(read_options), cfd_(cfd), prefix_extractor_(cfd->options()->prefix_extractor.get()), user_comparator_(cfd->user_comparator()), immutable_min_heap_(MinIterComparator(&cfd_->internal_comparator())), - sv_(nullptr), + sv_(current_sv), mutable_iter_(nullptr), current_(nullptr), valid_(false), is_prev_set_(false), - is_prev_inclusive_(false) {} + is_prev_inclusive_(false) { + if (sv_) { + RebuildIterators(false); + } +} ForwardIterator::~ForwardIterator() { - Cleanup(); + Cleanup(true); } -void ForwardIterator::Cleanup() { +void ForwardIterator::Cleanup(bool release_sv) { if (mutable_iter_ != nullptr) { mutable_iter_->~Iterator(); } @@ -149,15 +153,17 @@ void ForwardIterator::Cleanup() { } level_iters_.clear(); - if (sv_ != nullptr && sv_->Unref()) { - DBImpl::DeletionState deletion_state; - db_->mutex_.Lock(); - sv_->Cleanup(); - db_->FindObsoleteFiles(deletion_state, false, true); - db_->mutex_.Unlock(); - delete sv_; - if (deletion_state.HaveSomethingToDelete()) { - db_->PurgeObsoleteFiles(deletion_state); + if (release_sv) { + if (sv_ != nullptr && sv_->Unref()) { + DBImpl::DeletionState deletion_state; + db_->mutex_.Lock(); + sv_->Cleanup(); + db_->FindObsoleteFiles(deletion_state, false, true); + db_->mutex_.Unlock(); + delete sv_; + if (deletion_state.HaveSomethingToDelete()) { + db_->PurgeObsoleteFiles(deletion_state); + } } } } @@ -169,7 +175,7 @@ bool ForwardIterator::Valid() const { void ForwardIterator::SeekToFirst() { if (sv_ == nullptr || sv_ ->version_number != cfd_->GetSuperVersionNumber()) { - RebuildIterators(); + RebuildIterators(true); } else if (status_.IsIncomplete()) { ResetIncompleteIterators(); } @@ -179,7 +185,7 @@ void ForwardIterator::SeekToFirst() { void ForwardIterator::Seek(const Slice& internal_key) { if (sv_ == nullptr || sv_ ->version_number != cfd_->GetSuperVersionNumber()) { - RebuildIterators(); + RebuildIterators(true); } else if (status_.IsIncomplete()) { ResetIncompleteIterators(); } @@ -188,6 +194,7 @@ void ForwardIterator::Seek(const Slice& internal_key) { void ForwardIterator::SeekInternal(const Slice& internal_key, bool seek_to_first) { + assert(mutable_iter_); // mutable seek_to_first ? mutable_iter_->SeekToFirst() : mutable_iter_->Seek(internal_key); @@ -338,7 +345,7 @@ void ForwardIterator::Next() { std::string current_key = key().ToString(); Slice old_key(current_key.data(), current_key.size()); - RebuildIterators(); + RebuildIterators(true); SeekInternal(old_key, false); if (!valid_ || key().compare(old_key) != 0) { return; @@ -412,11 +419,13 @@ Status ForwardIterator::status() const { return Status::OK(); } -void ForwardIterator::RebuildIterators() { +void ForwardIterator::RebuildIterators(bool refresh_sv) { // Clean up - Cleanup(); - // New - sv_ = cfd_->GetReferencedSuperVersion(&(db_->mutex_)); + Cleanup(refresh_sv); + if (refresh_sv) { + // New + sv_ = cfd_->GetReferencedSuperVersion(&(db_->mutex_)); + } mutable_iter_ = sv_->mem->NewIterator(read_options_, &arena_); sv_->imm->AddIterators(read_options_, &imm_iters_, &arena_); const auto& l0_files = sv_->current->files_[0]; diff --git a/db/forward_iterator.h b/db/forward_iterator.h index 4d3761ee1..537dc1352 100644 --- a/db/forward_iterator.h +++ b/db/forward_iterator.h @@ -51,7 +51,7 @@ typedef std::priority_queue max_bytes_for_level_multiplier_additional; + // Misc options + uint64_t max_sequential_skip_in_iterations; + // Derived options // Per-level target file size. std::vector max_file_size; diff --git a/util/options_helper.cc b/util/options_helper.cc index 2a56a1ccf..372a7171f 100644 --- a/util/options_helper.cc +++ b/util/options_helper.cc @@ -150,6 +150,17 @@ bool ParseCompactionOptions(const std::string& name, const std::string& value, return true; } +template +bool ParseMiscOptions(const std::string& name, const std::string& value, + OptionsType* new_options) { + if (name == "max_sequential_skip_in_iterations") { + new_options->max_sequential_skip_in_iterations = ParseUint64(value); + } else { + return false; + } + return true; +} + bool GetMutableOptionsFromStrings( const MutableCFOptions& base_options, const std::unordered_map& options_map, @@ -160,6 +171,7 @@ bool GetMutableOptionsFromStrings( for (const auto& o : options_map) { if (ParseMemtableOptions(o.first, o.second, new_options)) { } else if (ParseCompactionOptions(o.first, o.second, new_options)) { + } else if (ParseMiscOptions(o.first, o.second, new_options)) { } else { return false; } @@ -228,6 +240,7 @@ bool GetColumnFamilyOptionsFromMap( try { if (ParseMemtableOptions(o.first, o.second, new_options)) { } else if (ParseCompactionOptions(o.first, o.second, new_options)) { + } else if (ParseMiscOptions(o.first, o.second, new_options)) { } else if (o.first == "min_write_buffer_number_to_merge") { new_options->min_write_buffer_number_to_merge = ParseInt(o.second); } else if (o.first == "compression") { @@ -286,8 +299,6 @@ bool GetColumnFamilyOptionsFromMap( } else if (o.first == "compaction_options_fifo") { new_options->compaction_options_fifo.max_table_files_size = ParseUint64(o.second); - } else if (o.first == "max_sequential_skip_in_iterations") { - new_options->max_sequential_skip_in_iterations = ParseUint64(o.second); } else if (o.first == "inplace_update_support") { new_options->inplace_update_support = ParseBoolean(o.first, o.second); } else if (o.first == "inplace_update_num_locks") { From 1fee591e74222784e6cb91783ab0d69f2804d413 Mon Sep 17 00:00:00 2001 From: Lei Jin Date: Thu, 23 Oct 2014 15:35:10 -0700 Subject: [PATCH 26/33] comments for DynamicCompactionOptions test Summary: as title Test Plan: n/a Reviewers: yhchiang, sdong, igor Reviewed By: igor Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D24963 --- db/db_test.cc | 45 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/db/db_test.cc b/db/db_test.cc index 1ee8b58af..a68e5686c 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -8702,8 +8702,9 @@ TEST(DBTest, DynamicCompactionOptions) { dbfull()->TEST_WaitForFlushMemTable(); }; - // Write 3 files that have the same key range, trigger compaction and - // result in one L1 file + // Write 3 files that have the same key range. + // Since level0_file_num_compaction_trigger is 3, compaction should be + // triggered. The compaction should result in one L1 file gen_l0_kb(0, 64, 1); ASSERT_EQ(NumTableFilesAtLevel(0), 1); gen_l0_kb(0, 64, 1); @@ -8718,6 +8719,10 @@ TEST(DBTest, DynamicCompactionOptions) { ASSERT_GE(metadata[0].size, k64KB - k4KB); // Test compaction trigger and target_file_size_base + // Reduce compaction trigger to 2, and reduce L1 file size to 32KB. + // Writing to 64KB L0 files should trigger a compaction. Since these + // 2 L0 files have the same key range, compaction merge them and should + // result in 2 32KB L1 files. ASSERT_TRUE(dbfull()->SetOptions({ {"level0_file_num_compaction_trigger", "2"}, {"target_file_size_base", std::to_string(k32KB) } @@ -8733,8 +8738,13 @@ TEST(DBTest, DynamicCompactionOptions) { ASSERT_EQ(2U, metadata.size()); ASSERT_LE(metadata[0].size, k32KB + k4KB); ASSERT_GE(metadata[0].size, k32KB - k4KB); + ASSERT_LE(metadata[1].size, k32KB + k4KB); + ASSERT_GE(metadata[1].size, k32KB - k4KB); // Test max_bytes_for_level_base + // Increase level base size to 256KB and write enough data that will + // fill L1 and L2. L1 size should be around 256KB while L2 size should be + // around 256KB x 4. ASSERT_TRUE(dbfull()->SetOptions({ {"max_bytes_for_level_base", std::to_string(k256KB) } })); @@ -8751,7 +8761,9 @@ TEST(DBTest, DynamicCompactionOptions) { SizeAtLevel(2) < 4 * k256KB * 1.2); // Test max_bytes_for_level_multiplier and - // max_bytes_for_level_base (reduce) + // max_bytes_for_level_base. Now, reduce both mulitplier and level base, + // After filling enough data that can fit in L1 - L3, we should see L1 size + // reduces to 128KB from 256KB which was asserted previously. Same for L2. ASSERT_TRUE(dbfull()->SetOptions({ {"max_bytes_for_level_multiplier", "2"}, {"max_bytes_for_level_base", std::to_string(k128KB) } @@ -8767,7 +8779,10 @@ TEST(DBTest, DynamicCompactionOptions) { ASSERT_TRUE(SizeAtLevel(2) < 2 * k128KB * 1.2); ASSERT_TRUE(SizeAtLevel(3) < 4 * k128KB * 1.2); - // Clean up memtable and L0 + // Test level0_stop_writes_trigger. + // Clean up memtable and L0. Block compaction threads. If continue to write + // and flush memtables. We should see put timeout after 8 memtable flushes + // since level0_stop_writes_trigger = 8 dbfull()->CompactRange(nullptr, nullptr); // Block compaction SleepingBackgroundTask sleeping_task_low1; @@ -8788,7 +8803,9 @@ TEST(DBTest, DynamicCompactionOptions) { sleeping_task_low1.WakeUp(); sleeping_task_low1.WaitUntilDone(); - // Test: stop trigger (reduce) + // Now reduce level0_stop_writes_trigger to 6. Clear up memtables and L0. + // Block compaction thread again. Perform the put and memtable flushes + // until we see timeout after 6 memtable flushes. ASSERT_TRUE(dbfull()->SetOptions({ {"level0_stop_writes_trigger", "6"} })); @@ -8810,6 +8827,10 @@ TEST(DBTest, DynamicCompactionOptions) { sleeping_task_low2.WaitUntilDone(); // Test disable_auto_compactions + // Compaction thread is unblocked but auto compaction is disabled. Write + // 4 L0 files and compaction should be triggered. If auto compaction is + // disabled, then TEST_WaitForCompact will be waiting for nothing. Number of + // L0 files do not change after the call. ASSERT_TRUE(dbfull()->SetOptions({ {"disable_auto_compactions", "true"} })); @@ -8824,6 +8845,8 @@ TEST(DBTest, DynamicCompactionOptions) { dbfull()->TEST_WaitForCompact(); ASSERT_EQ(NumTableFilesAtLevel(0), 4); + // Enable auto compaction and perform the same test, # of L0 files should be + // reduced after compaction. ASSERT_TRUE(dbfull()->SetOptions({ {"disable_auto_compactions", "false"} })); @@ -8838,8 +8861,10 @@ TEST(DBTest, DynamicCompactionOptions) { dbfull()->TEST_WaitForCompact(); ASSERT_LT(NumTableFilesAtLevel(0), 4); - // Test for hard_rate_limit, change max_bytes_for_level_base to make level - // size big + // Test for hard_rate_limit. + // First change max_bytes_for_level_base to a big value and populate + // L1 - L3. Then thrink max_bytes_for_level_base and disable auto compaction + // at the same time, we should see some level with score greater than 2. ASSERT_TRUE(dbfull()->SetOptions({ {"max_bytes_for_level_base", std::to_string(k256KB) } })); @@ -8869,7 +8894,9 @@ TEST(DBTest, DynamicCompactionOptions) { SizeAtLevel(2) / k64KB > 4 || SizeAtLevel(3) / k64KB > 8); - // Enfoce hard rate limit, L0 score is not regulated by this limit + // Enfoce hard rate limit. Now set hard_rate_limit to 2, + // we should start to see put delay (1000 us) and timeout as a result + // (L0 score is not regulated by this limit). ASSERT_TRUE(dbfull()->SetOptions({ {"hard_rate_limit", "2"} })); @@ -8881,7 +8908,7 @@ TEST(DBTest, DynamicCompactionOptions) { wo.timeout_hint_us = 500; ASSERT_TRUE(Put(Key(count), RandomString(&rnd, 1024), wo).IsTimedOut()); - // Bump up limit + // Lift the limit and no timeout ASSERT_TRUE(dbfull()->SetOptions({ {"hard_rate_limit", "100"} })); From 122f98e0b9814ee54edb74a7c39eebdf1ead41ec Mon Sep 17 00:00:00 2001 From: Lei Jin Date: Thu, 23 Oct 2014 15:37:14 -0700 Subject: [PATCH 27/33] dynamic max_mem_compact_level Summary: as title Test Plan: unit test Reviewers: sdong, yhchiang, rven, igor Reviewed By: igor Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D25347 --- db/db_impl.cc | 8 ++++++-- db/db_test.cc | 39 ++++++++++++++++++++++++++++++++++++++ db/version_set.cc | 6 +++--- util/mutable_cf_options.cc | 6 ++++++ util/mutable_cf_options.h | 3 +++ util/options_helper.cc | 4 ++-- 6 files changed, 59 insertions(+), 7 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 0e47774a7..334b6df0f 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1863,12 +1863,16 @@ int DBImpl::NumberLevels(ColumnFamilyHandle* column_family) { int DBImpl::MaxMemCompactionLevel(ColumnFamilyHandle* column_family) { auto cfh = reinterpret_cast(column_family); - return cfh->cfd()->options()->max_mem_compaction_level; + MutexLock l(&mutex_); + return cfh->cfd()->GetSuperVersion()-> + mutable_cf_options.max_mem_compaction_level; } int DBImpl::Level0StopWriteTrigger(ColumnFamilyHandle* column_family) { auto cfh = reinterpret_cast(column_family); - return cfh->cfd()->options()->level0_stop_writes_trigger; + MutexLock l(&mutex_); + return cfh->cfd()->GetSuperVersion()-> + mutable_cf_options.level0_stop_writes_trigger; } Status DBImpl::Flush(const FlushOptions& flush_options, diff --git a/db/db_test.cc b/db/db_test.cc index a68e5686c..cfd9dcd9b 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -8914,6 +8914,45 @@ TEST(DBTest, DynamicCompactionOptions) { })); dbfull()->TEST_FlushMemTable(true); ASSERT_TRUE(Put(Key(count), RandomString(&rnd, 1024), wo).ok()); + + // Test max_mem_compaction_level. + // Destory DB and start from scratch + options.max_background_compactions = 1; + options.max_background_flushes = 0; + options.max_mem_compaction_level = 2; + DestroyAndReopen(&options); + ASSERT_EQ(NumTableFilesAtLevel(0), 0); + ASSERT_EQ(NumTableFilesAtLevel(1), 0); + ASSERT_EQ(NumTableFilesAtLevel(2), 0); + + ASSERT_TRUE(Put("max_mem_compaction_level_key", + RandomString(&rnd, 8)).ok()); + dbfull()->TEST_FlushMemTable(true); + ASSERT_EQ(NumTableFilesAtLevel(0), 0); + ASSERT_EQ(NumTableFilesAtLevel(1), 0); + ASSERT_EQ(NumTableFilesAtLevel(2), 1); + + ASSERT_TRUE(Put("max_mem_compaction_level_key", + RandomString(&rnd, 8)).ok()); + // Set new value and it becomes effective in this flush + ASSERT_TRUE(dbfull()->SetOptions({ + {"max_mem_compaction_level", "1"} + })); + dbfull()->TEST_FlushMemTable(true); + ASSERT_EQ(NumTableFilesAtLevel(0), 0); + ASSERT_EQ(NumTableFilesAtLevel(1), 1); + ASSERT_EQ(NumTableFilesAtLevel(2), 1); + + ASSERT_TRUE(Put("max_mem_compaction_level_key", + RandomString(&rnd, 8)).ok()); + // Set new value and it becomes effective in this flush + ASSERT_TRUE(dbfull()->SetOptions({ + {"max_mem_compaction_level", "0"} + })); + dbfull()->TEST_FlushMemTable(true); + ASSERT_EQ(NumTableFilesAtLevel(0), 1); + ASSERT_EQ(NumTableFilesAtLevel(1), 1); + ASSERT_EQ(NumTableFilesAtLevel(2), 1); } TEST(DBTest, DynamicMiscOptions) { diff --git a/db/version_set.cc b/db/version_set.cc index 0819196fb..65c36c715 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -895,7 +895,7 @@ void Version::ComputeCompactionScore( } if (cfd_->ioptions()->compaction_style == kCompactionStyleFIFO) { score = static_cast(total_size) / - cfd_->options()->compaction_options_fifo.max_table_files_size; + cfd_->ioptions()->compaction_options_fifo.max_table_files_size; } else if (numfiles >= mutable_cf_options.level0_stop_writes_trigger) { // If we are slowing down writes, then we better compact that first score = 1000000; @@ -1051,8 +1051,8 @@ int Version::PickLevelForMemTableOutput( InternalKey start(smallest_user_key, kMaxSequenceNumber, kValueTypeForSeek); InternalKey limit(largest_user_key, 0, static_cast(0)); std::vector overlaps; - int max_mem_compact_level = cfd_->options()->max_mem_compaction_level; - while (max_mem_compact_level > 0 && level < max_mem_compact_level) { + while (mutable_cf_options.max_mem_compaction_level > 0 && + level < mutable_cf_options.max_mem_compaction_level) { if (OverlapInLevel(level + 1, &smallest_user_key, &largest_user_key)) { break; } diff --git a/util/mutable_cf_options.cc b/util/mutable_cf_options.cc index 1b3197b18..5ae26ac81 100644 --- a/util/mutable_cf_options.cc +++ b/util/mutable_cf_options.cc @@ -92,6 +92,8 @@ void MutableCFOptions::Dump(Logger* log) const { max_successive_merges); Log(log, " filter_deletes: %d", filter_deletes); + Log(log, " inplace_update_num_locks: %zu", + inplace_update_num_locks); Log(log, " disable_auto_compactions: %d", disable_auto_compactions); Log(log, " soft_rate_limit: %lf", @@ -126,6 +128,10 @@ void MutableCFOptions::Dump(Logger* log) const { } result.resize(result.size() - 2); Log(log, "max_bytes_for_level_multiplier_additional: %s", result.c_str()); + Log(log, " max_mem_compaction_level: %d", + max_mem_compaction_level); + Log(log, " max_sequential_skip_in_iterations: %" PRIu64, + max_sequential_skip_in_iterations); } } // namespace rocksdb diff --git a/util/mutable_cf_options.h b/util/mutable_cf_options.h index c6b312e1f..a738e7978 100644 --- a/util/mutable_cf_options.h +++ b/util/mutable_cf_options.h @@ -38,6 +38,7 @@ struct MutableCFOptions { max_bytes_for_level_multiplier(options.max_bytes_for_level_multiplier), max_bytes_for_level_multiplier_additional( options.max_bytes_for_level_multiplier_additional), + max_mem_compaction_level(options.max_mem_compaction_level), max_sequential_skip_in_iterations( options.max_sequential_skip_in_iterations) { @@ -65,6 +66,7 @@ struct MutableCFOptions { target_file_size_multiplier(0), max_bytes_for_level_base(0), max_bytes_for_level_multiplier(0), + max_mem_compaction_level(0), max_sequential_skip_in_iterations(0) {} @@ -108,6 +110,7 @@ struct MutableCFOptions { uint64_t max_bytes_for_level_base; int max_bytes_for_level_multiplier; std::vector max_bytes_for_level_multiplier_additional; + int max_mem_compaction_level; // Misc options uint64_t max_sequential_skip_in_iterations; diff --git a/util/options_helper.cc b/util/options_helper.cc index 372a7171f..4fef52299 100644 --- a/util/options_helper.cc +++ b/util/options_helper.cc @@ -144,6 +144,8 @@ bool ParseCompactionOptions(const std::string& name, const std::string& value, start = end + 1; } } + } else if (name == "max_mem_compaction_level") { + new_options->max_mem_compaction_level = ParseInt(value); } else { return false; } @@ -283,8 +285,6 @@ bool GetColumnFamilyOptionsFromMap( ParseInt(o.second.substr(start, o.second.size() - start)); } else if (o.first == "num_levels") { new_options->num_levels = ParseInt(o.second); - } else if (o.first == "max_mem_compaction_level") { - new_options->max_mem_compaction_level = ParseInt(o.second); } else if (o.first == "purge_redundant_kvs_while_flush") { new_options->purge_redundant_kvs_while_flush = ParseBoolean(o.first, o.second); From b794194adef63437a9248dc6bcdb5c38a7e335dd Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Thu, 23 Oct 2014 15:37:51 -0700 Subject: [PATCH 28/33] Remove java build from travis --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index b52e5acf7..8f1bcb0ae 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,6 +14,6 @@ before_install: - sudo dpkg -i libgflags-dev_2.0-1_amd64.deb # Lousy hack to disable use and testing of fallocate, which doesn't behave quite # as EnvPosixTest::AllocateTest expects within the Travis OpenVZ environment. -script: OPT=-DTRAVIS make unity && OPT=-DTRAVIS make check -j8 && OPT=-DTRAVIS make rocksdbjava jtest +script: OPT=-DTRAVIS make unity && OPT=-DTRAVIS make check -j8 notifications: email: false From 720c1c056d0b1afde0bb22347fe7965627b3eb11 Mon Sep 17 00:00:00 2001 From: Lei Jin Date: Thu, 23 Oct 2014 15:41:37 -0700 Subject: [PATCH 29/33] fix erro during merge Summary: as title Test Plan: make release --- util/mutable_cf_options.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/util/mutable_cf_options.cc b/util/mutable_cf_options.cc index 5ae26ac81..1bc8a5b7d 100644 --- a/util/mutable_cf_options.cc +++ b/util/mutable_cf_options.cc @@ -92,8 +92,6 @@ void MutableCFOptions::Dump(Logger* log) const { max_successive_merges); Log(log, " filter_deletes: %d", filter_deletes); - Log(log, " inplace_update_num_locks: %zu", - inplace_update_num_locks); Log(log, " disable_auto_compactions: %d", disable_auto_compactions); Log(log, " soft_rate_limit: %lf", From 724fba2b397396978bbe9533c5be81564ffe090e Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Thu, 23 Oct 2014 15:40:20 -0700 Subject: [PATCH 30/33] Improve the log in Universal Compaction to include more debug information. Summary: Previously, the log for Universal Compaction does not include the current number of files in case the compaction is triggered by the number of files. This diff includes the number of files in the log. Test Plan: make --- db/compaction_picker.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db/compaction_picker.cc b/db/compaction_picker.cc index 84bd95839..63d621c50 100644 --- a/db/compaction_picker.cc +++ b/db/compaction_picker.cc @@ -553,8 +553,8 @@ Compaction* UniversalCompactionPicker::PickCompaction( if ((c = PickCompactionUniversalReadAmp( mutable_cf_options, version, score, UINT_MAX, num_files, log_buffer)) != nullptr) { - LogToBuffer(log_buffer, "[%s] Universal: compacting for file num\n", - version->cfd_->GetName().c_str()); + LogToBuffer(log_buffer, "[%s] Universal: compacting for file num -- %u\n", + version->cfd_->GetName().c_str(), num_files); } } } From 240ed0cd7b68f3dbaf48fb07e9e861b9fd653fe2 Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 23 Oct 2014 15:44:45 -0700 Subject: [PATCH 31/33] Fix uninitialized parameter caused by D24513 Summary: D24513 introduced a bug that a variable is not initialized. It also causes valgrind issue. Test Plan: Run tests used to fail valgrind and make sure it passes Reviewers: yhchiang, ljin, igor Reviewed By: igor Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D25569 --- db/internal_stats.h | 1 + 1 file changed, 1 insertion(+) diff --git a/db/internal_stats.h b/db/internal_stats.h index 84fd10289..5caa33415 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -279,6 +279,7 @@ class InternalStats { write_with_wal(0), write_other(0), write_self(0), + num_keys_written(0), seconds_up(0) {} } db_stats_snapshot_; From 001ce64dc7659c65569ffb1c440e26cd23db3c94 Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Fri, 24 Oct 2014 10:11:57 -0700 Subject: [PATCH 32/33] Use chrono for timing Summary: Since we depend on C++11, we might as well use it for timing, instead of this platform-depended code. Test Plan: Ran autovector_test, which reports time and confirmed that output is similar to master Reviewers: ljin, sdong, yhchiang, rven, dhruba Reviewed By: dhruba Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D25587 --- util/env_posix.cc | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/util/env_posix.cc b/util/env_posix.cc index cf917e874..177932bcd 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -7,6 +7,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. +#include #include #include #include @@ -1350,25 +1351,13 @@ class PosixEnv : public Env { } virtual uint64_t NowMicros() { - struct timeval tv; - // TODO(kailiu) MAC DON'T HAVE THIS - gettimeofday(&tv, nullptr); - return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; + return std::chrono::duration_cast( + std::chrono::steady_clock::now().time_since_epoch()).count(); } virtual uint64_t NowNanos() { -#ifdef OS_LINUX - struct timespec ts; - clock_gettime(CLOCK_MONOTONIC, &ts); - return static_cast(ts.tv_sec) * 1000000000 + ts.tv_nsec; -#elif __MACH__ - clock_serv_t cclock; - mach_timespec_t ts; - host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); - clock_get_time(cclock, &ts); - mach_port_deallocate(mach_task_self(), cclock); -#endif - return static_cast(ts.tv_sec) * 1000000000 + ts.tv_nsec; + return std::chrono::duration_cast( + std::chrono::steady_clock::now().time_since_epoch()).count(); } virtual void SleepForMicroseconds(int micros) { From 965d9d50b8cbb413de5e834b5b83ddbb682d0f1d Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Fri, 24 Oct 2014 11:58:15 -0700 Subject: [PATCH 33/33] Fix timing --- util/env_posix.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/env_posix.cc b/util/env_posix.cc index 177932bcd..76ba4a6bd 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -1351,8 +1351,8 @@ class PosixEnv : public Env { } virtual uint64_t NowMicros() { - return std::chrono::duration_cast( - std::chrono::steady_clock::now().time_since_epoch()).count(); + return std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()).count(); } virtual uint64_t NowNanos() {