fixed conflict in java/Makefile

main
Vlad Balan 10 years ago
commit a04929aa49
  1. 6
      .gitignore
  2. 6
      Makefile
  3. 2
      db/column_family.h
  4. 4
      db/compaction_picker.cc
  5. 21
      db/db_bench.cc
  6. 73
      db/db_impl.cc
  7. 13
      db/db_impl_readonly.cc
  8. 160
      db/db_test.cc
  9. 69
      db/deletefile_test.cc
  10. 51
      db/forward_iterator.cc
  11. 6
      db/forward_iterator.h
  12. 24
      db/internal_stats.cc
  13. 7
      db/internal_stats.h
  14. 2
      db/version_edit.cc
  15. 6
      db/version_set.cc
  16. 6
      include/rocksdb/table.h
  17. 6
      java/Makefile
  18. 4
      java/crossbuild/build-linux-centos.sh
  19. 95
      java/org/rocksdb/AbstractComparator.java
  20. 167
      java/org/rocksdb/AbstractSlice.java
  21. 24
      java/org/rocksdb/Comparator.java
  22. 57
      java/org/rocksdb/ComparatorOptions.java
  23. 24
      java/org/rocksdb/DirectComparator.java
  24. 113
      java/org/rocksdb/DirectSlice.java
  25. 122
      java/org/rocksdb/HashLinkedListMemTableConfig.java
  26. 3
      java/org/rocksdb/HashSkipListMemTableConfig.java
  27. 22
      java/org/rocksdb/Options.java
  28. 38
      java/org/rocksdb/SkipListMemTableConfig.java
  29. 84
      java/org/rocksdb/Slice.java
  30. 4
      java/org/rocksdb/VectorMemTableConfig.java
  31. 166
      java/org/rocksdb/test/AbstractComparatorTest.java
  32. 34
      java/org/rocksdb/test/ComparatorOptionsTest.java
  33. 45
      java/org/rocksdb/test/ComparatorTest.java
  34. 48
      java/org/rocksdb/test/DirectComparatorTest.java
  35. 107
      java/org/rocksdb/test/MemTableTest.java
  36. 43
      java/org/rocksdb/test/Types.java
  37. 66
      java/rocksjni/comparator.cc
  38. 176
      java/rocksjni/comparatorjnicallback.cc
  39. 95
      java/rocksjni/comparatorjnicallback.h
  40. 34
      java/rocksjni/memtablejni.cc
  41. 60
      java/rocksjni/options.cc
  42. 180
      java/rocksjni/portal.h
  43. 251
      java/rocksjni/slice.cc
  44. 5
      table/adaptive_table_factory.h
  45. 16
      table/block_based_table_factory.cc
  46. 5
      table/block_based_table_factory.h
  47. 106
      table/block_based_table_reader.cc
  48. 8
      table/block_based_table_reader.h
  49. 3
      table/cuckoo_table_factory.h
  50. 5
      table/plain_table_factory.h
  51. 57
      table/table_test.cc
  52. 21
      util/env_posix.cc
  53. 4
      util/mutable_cf_options.cc
  54. 13
      util/mutable_cf_options.h
  55. 19
      util/options_helper.cc
  56. 15
      util/thread_local.h

6
.gitignore vendored

@ -31,8 +31,14 @@ coverage/COVERAGE_REPORT
package/
.phutil_module_cache
tags
java/out
java/*.log
java/include/org_rocksdb_*.h
.idea/
*.iml
unity.cc
java/crossbuild/.vagrant
.vagrant/

@ -528,7 +528,11 @@ ROCKSDB_SOURCES_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PAT
ifeq ($(PLATFORM), OS_MACOSX)
ROCKSDBJNILIB = librocksdbjni-osx.jnilib
ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-osx.jar
JAVA_INCLUDE = -I/System/Library/Frameworks/JavaVM.framework/Headers/
ifneq ("$(wildcard $(JAVA_HOME)/include/darwin)","")
JAVA_INCLUDE = -I$(JAVA_HOME)/include -I $(JAVA_HOME)/include/darwin
else
JAVA_INCLUDE = -I/System/Library/Frameworks/JavaVM.framework/Headers/
endif
endif
libz.a:

@ -166,7 +166,7 @@ class ColumnFamilyData {
bool IsDropped() const { return dropped_; }
// thread-safe
int NumberLevels() const { return options_.num_levels; }
int NumberLevels() const { return ioptions_.num_levels; }
void SetLogNumber(uint64_t log_number) { log_number_ = log_number; }
uint64_t GetLogNumber() const { return log_number_; }

@ -553,8 +553,8 @@ Compaction* UniversalCompactionPicker::PickCompaction(
if ((c = PickCompactionUniversalReadAmp(
mutable_cf_options, version, score, UINT_MAX,
num_files, log_buffer)) != nullptr) {
LogToBuffer(log_buffer, "[%s] Universal: compacting for file num\n",
version->cfd_->GetName().c_str());
LogToBuffer(log_buffer, "[%s] Universal: compacting for file num -- %u\n",
version->cfd_->GetName().c_str(), num_files);
}
}
}

@ -168,8 +168,9 @@ DEFINE_int32(duration, 0, "Time in seconds for the random-ops tests to run."
DEFINE_int32(value_size, 100, "Size of each value");
DEFINE_int32(seekseq_next, 0, "How many times to call Next() after Seek() in "
"fillseekseq");
DEFINE_int32(seek_nexts, 0,
"How many times to call Next() after Seek() in "
"fillseekseq and seekrandom");
DEFINE_bool(use_uint64_comparator, false, "use Uint64 user comparator");
@ -1231,7 +1232,7 @@ class Benchmark {
writes_ = (FLAGS_writes < 0 ? FLAGS_num : FLAGS_writes);
value_size_ = FLAGS_value_size;
key_size_ = FLAGS_key_size;
entries_per_batch_ = 1;
entries_per_batch_ = FLAGS_batch_size;
write_options_ = WriteOptions();
if (FLAGS_sync) {
write_options_.sync = true;
@ -1286,7 +1287,6 @@ class Benchmark {
} else if (name == Slice("readrandomfast")) {
method = &Benchmark::ReadRandomFast;
} else if (name == Slice("multireadrandom")) {
entries_per_batch_ = FLAGS_batch_size;
fprintf(stderr, "entries_per_batch = %" PRIi64 "\n",
entries_per_batch_);
method = &Benchmark::MultiReadRandom;
@ -2265,6 +2265,7 @@ class Benchmark {
std::unique_ptr<const char[]> key_guard(key.data());
Duration duration(FLAGS_duration, reads_);
char value_buffer[256];
while (!duration.Done(1)) {
if (!FLAGS_use_tailing_iterator && FLAGS_iter_refresh_interval_us >= 0) {
uint64_t now = FLAGS_env->NowMicros();
@ -2296,6 +2297,16 @@ class Benchmark {
if (iter_to_use->Valid() && iter_to_use->key().compare(key) == 0) {
found++;
}
for (int j = 0; j < FLAGS_seek_nexts && iter_to_use->Valid(); ++j) {
// Copy out iterator's value to make sure we read them.
Slice value = iter_to_use->value();
memcpy(value_buffer, value.data(),
std::min(value.size(), sizeof(value_buffer)));
iter_to_use->Next();
assert(iter_to_use->status().ok());
}
thread->stats.FinishedOps(&db_, db_.db, 1);
}
delete single_iter;
@ -2820,7 +2831,7 @@ class Benchmark {
assert(iter->Valid() && iter->key() == key);
thread->stats.FinishedOps(nullptr, db, 1);
for (int j = 0; j < FLAGS_seekseq_next && i+1 < FLAGS_num; ++j) {
for (int j = 0; j < FLAGS_seek_nexts && i + 1 < FLAGS_num; ++j) {
iter->Next();
GenerateKeyFromInt(++i, FLAGS_num, &key);
assert(iter->Valid() && iter->key() == key);

@ -282,12 +282,12 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
namespace {
Status SanitizeDBOptionsByCFOptions(
const DBOptions* db_opts,
Status SanitizeOptionsByTable(
const DBOptions& db_opts,
const std::vector<ColumnFamilyDescriptor>& column_families) {
Status s;
for (auto cf : column_families) {
s = cf.options.table_factory->SanitizeDBOptions(db_opts);
s = cf.options.table_factory->SanitizeOptions(db_opts, cf.options);
if (!s.ok()) {
return s;
}
@ -1863,12 +1863,16 @@ int DBImpl::NumberLevels(ColumnFamilyHandle* column_family) {
int DBImpl::MaxMemCompactionLevel(ColumnFamilyHandle* column_family) {
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
return cfh->cfd()->options()->max_mem_compaction_level;
MutexLock l(&mutex_);
return cfh->cfd()->GetSuperVersion()->
mutable_cf_options.max_mem_compaction_level;
}
int DBImpl::Level0StopWriteTrigger(ColumnFamilyHandle* column_family) {
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
return cfh->cfd()->options()->level0_stop_writes_trigger;
MutexLock l(&mutex_);
return cfh->cfd()->GetSuperVersion()->
mutable_cf_options.level0_stop_writes_trigger;
}
Status DBImpl::Flush(const FlushOptions& flush_options,
@ -3828,16 +3832,16 @@ Iterator* DBImpl::NewIterator(const ReadOptions& read_options,
// not supported in lite version
return nullptr;
#else
auto iter = new ForwardIterator(this, read_options, cfd);
SuperVersion* sv = cfd->GetReferencedSuperVersion(&mutex_);
auto iter = new ForwardIterator(this, read_options, cfd, sv);
return NewDBIterator(env_, *cfd->ioptions(), cfd->user_comparator(), iter,
kMaxSequenceNumber,
cfd->options()->max_sequential_skip_in_iterations,
read_options.iterate_upper_bound);
kMaxSequenceNumber,
sv->mutable_cf_options.max_sequential_skip_in_iterations,
read_options.iterate_upper_bound);
#endif
} else {
SequenceNumber latest_snapshot = versions_->LastSequence();
SuperVersion* sv = nullptr;
sv = cfd->GetReferencedSuperVersion(&mutex_);
SuperVersion* sv = cfd->GetReferencedSuperVersion(&mutex_);
auto snapshot =
read_options.snapshot != nullptr
@ -3889,7 +3893,7 @@ Iterator* DBImpl::NewIterator(const ReadOptions& read_options,
// that they are likely to be in the same cache line and/or page.
ArenaWrappedDBIter* db_iter = NewArenaWrappedDbIterator(
env_, *cfd->ioptions(), cfd->user_comparator(),
snapshot, cfd->options()->max_sequential_skip_in_iterations,
snapshot, sv->mutable_cf_options.max_sequential_skip_in_iterations,
read_options.iterate_upper_bound);
Iterator* internal_iter =
@ -3908,19 +3912,6 @@ Status DBImpl::NewIterators(
std::vector<Iterator*>* iterators) {
iterators->clear();
iterators->reserve(column_families.size());
SequenceNumber latest_snapshot = 0;
std::vector<SuperVersion*> super_versions;
super_versions.reserve(column_families.size());
if (!read_options.tailing) {
mutex_.Lock();
latest_snapshot = versions_->LastSequence();
for (auto cfh : column_families) {
auto cfd = reinterpret_cast<ColumnFamilyHandleImpl*>(cfh)->cfd();
super_versions.push_back(cfd->GetSuperVersion()->Ref());
}
mutex_.Unlock();
}
if (read_options.tailing) {
#ifdef ROCKSDB_LITE
@ -3929,17 +3920,21 @@ Status DBImpl::NewIterators(
#else
for (auto cfh : column_families) {
auto cfd = reinterpret_cast<ColumnFamilyHandleImpl*>(cfh)->cfd();
auto iter = new ForwardIterator(this, read_options, cfd);
SuperVersion* sv = cfd->GetReferencedSuperVersion(&mutex_);
auto iter = new ForwardIterator(this, read_options, cfd, sv);
iterators->push_back(
NewDBIterator(env_, *cfd->ioptions(), cfd->user_comparator(), iter,
kMaxSequenceNumber,
cfd->options()->max_sequential_skip_in_iterations));
kMaxSequenceNumber,
sv->mutable_cf_options.max_sequential_skip_in_iterations));
}
#endif
} else {
SequenceNumber latest_snapshot = versions_->LastSequence();
for (size_t i = 0; i < column_families.size(); ++i) {
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_families[i]);
auto cfd = cfh->cfd();
auto* cfd = reinterpret_cast<ColumnFamilyHandleImpl*>(
column_families[i])->cfd();
SuperVersion* sv = cfd->GetReferencedSuperVersion(&mutex_);
auto snapshot =
read_options.snapshot != nullptr
@ -3949,9 +3944,9 @@ Status DBImpl::NewIterators(
ArenaWrappedDBIter* db_iter = NewArenaWrappedDbIterator(
env_, *cfd->ioptions(), cfd->user_comparator(), snapshot,
cfd->options()->max_sequential_skip_in_iterations);
sv->mutable_cf_options.max_sequential_skip_in_iterations);
Iterator* internal_iter = NewInternalIterator(
read_options, cfd, super_versions[i], db_iter->GetArena());
read_options, cfd, sv, db_iter->GetArena());
db_iter->SetIterUnderDBIter(internal_iter);
iterators->push_back(db_iter);
}
@ -4129,7 +4124,7 @@ Status DBImpl::Write(const WriteOptions& write_options, WriteBatch* my_batch) {
const uint64_t batch_size = WriteBatchInternal::ByteSize(updates);
// Record statistics
RecordTick(stats_, NUMBER_KEYS_WRITTEN, my_batch_count);
RecordTick(stats_, BYTES_WRITTEN, WriteBatchInternal::ByteSize(updates));
RecordTick(stats_, BYTES_WRITTEN, batch_size);
if (write_options.disableWAL) {
flush_on_destroy_ = true;
}
@ -4179,6 +4174,8 @@ Status DBImpl::Write(const WriteOptions& write_options, WriteBatch* my_batch) {
// internal stats
default_cf_internal_stats_->AddDBStats(
InternalStats::BYTES_WRITTEN, batch_size);
default_cf_internal_stats_->AddDBStats(InternalStats::NUMBER_KEYS_WRITTEN,
my_batch_count);
if (!write_options.disableWAL) {
default_cf_internal_stats_->AddDBStats(
InternalStats::WAL_FILE_SYNCED, 1);
@ -4542,7 +4539,7 @@ Status DBImpl::DeleteFile(std::string name) {
name.c_str());
return Status::InvalidArgument("File not found");
}
assert((level > 0) && (level < cfd->NumberLevels()));
assert(level < cfd->NumberLevels());
// If the file is being compacted no need to delete.
if (metadata->being_compacted) {
@ -4561,6 +4558,12 @@ Status DBImpl::DeleteFile(std::string name) {
return Status::InvalidArgument("File not in last level");
}
}
// if level == 0, it has to be the oldest file
if (level == 0 &&
cfd->current()->files_[0].back()->fd.GetNumber() != number) {
return Status::InvalidArgument("File in level 0, but not oldest");
}
edit.SetColumnFamily(cfd->GetID());
edit.DeleteFile(level, number);
status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
&edit, &mutex_, db_directory_.get());
@ -4703,7 +4706,7 @@ Status DB::Open(const Options& options, const std::string& dbname, DB** dbptr) {
Status DB::Open(const DBOptions& db_options, const std::string& dbname,
const std::vector<ColumnFamilyDescriptor>& column_families,
std::vector<ColumnFamilyHandle*>* handles, DB** dbptr) {
Status s = SanitizeDBOptionsByCFOptions(&db_options, column_families);
Status s = SanitizeOptionsByTable(db_options, column_families);
if (!s.ok()) {
return s;
}

@ -53,7 +53,7 @@ Iterator* DBImplReadOnly::NewIterator(const ReadOptions& read_options,
? reinterpret_cast<const SnapshotImpl*>(
read_options.snapshot)->number_
: latest_snapshot),
cfd->options()->max_sequential_skip_in_iterations);
super_version->mutable_cf_options.max_sequential_skip_in_iterations);
auto internal_iter = NewInternalIterator(
read_options, cfd, super_version, db_iter->GetArena());
db_iter->SetIterUnderDBIter(internal_iter);
@ -72,16 +72,17 @@ Status DBImplReadOnly::NewIterators(
SequenceNumber latest_snapshot = versions_->LastSequence();
for (auto cfh : column_families) {
auto cfd = reinterpret_cast<ColumnFamilyHandleImpl*>(cfh)->cfd();
auto db_iter = NewArenaWrappedDbIterator(
auto* cfd = reinterpret_cast<ColumnFamilyHandleImpl*>(cfh)->cfd();
auto* sv = cfd->GetSuperVersion()->Ref();
auto* db_iter = NewArenaWrappedDbIterator(
env_, *cfd->ioptions(), cfd->user_comparator(),
(read_options.snapshot != nullptr
? reinterpret_cast<const SnapshotImpl*>(
read_options.snapshot)->number_
: latest_snapshot),
cfd->options()->max_sequential_skip_in_iterations);
auto internal_iter = NewInternalIterator(
read_options, cfd, cfd->GetSuperVersion()->Ref(), db_iter->GetArena());
sv->mutable_cf_options.max_sequential_skip_in_iterations);
auto* internal_iter = NewInternalIterator(
read_options, cfd, sv, db_iter->GetArena());
db_iter->SetIterUnderDBIter(internal_iter);
iterators->push_back(db_iter);
}

@ -807,8 +807,8 @@ class DBTest {
}
std::string AllEntriesFor(const Slice& user_key, int cf = 0) {
ScopedArenaIterator iter;
Arena arena;
ScopedArenaIterator iter;
if (cf == 0) {
iter.set(dbfull()->TEST_NewInternalIterator(&arena));
} else {
@ -5272,7 +5272,7 @@ TEST(DBTest, CompactBetweenSnapshots) {
do {
Options options = CurrentOptions();
options.disable_auto_compactions = true;
CreateAndReopenWithCF({"pikachu"});
CreateAndReopenWithCF({"pikachu"}, &options);
Random rnd(301);
FillLevels("a", "z", 1);
@ -8354,6 +8354,17 @@ TEST(DBTest, TableOptionsSanitizeTest) {
options.prefix_extractor.reset(NewNoopTransform());
Destroy(&options);
ASSERT_TRUE(TryReopen(&options).IsNotSupported());
// Test for check of prefix_extractor when hash index is used for
// block-based table
BlockBasedTableOptions to;
to.index_type = BlockBasedTableOptions::kHashSearch;
options = Options();
options.create_if_missing = true;
options.table_factory.reset(NewBlockBasedTableFactory(to));
ASSERT_TRUE(TryReopen(&options).IsInvalidArgument());
options.prefix_extractor.reset(NewFixedPrefixTransform(1));
ASSERT_OK(TryReopen(&options));
}
TEST(DBTest, DBIteratorBoundTest) {
@ -8691,8 +8702,9 @@ TEST(DBTest, DynamicCompactionOptions) {
dbfull()->TEST_WaitForFlushMemTable();
};
// Write 3 files that have the same key range, trigger compaction and
// result in one L1 file
// Write 3 files that have the same key range.
// Since level0_file_num_compaction_trigger is 3, compaction should be
// triggered. The compaction should result in one L1 file
gen_l0_kb(0, 64, 1);
ASSERT_EQ(NumTableFilesAtLevel(0), 1);
gen_l0_kb(0, 64, 1);
@ -8707,6 +8719,10 @@ TEST(DBTest, DynamicCompactionOptions) {
ASSERT_GE(metadata[0].size, k64KB - k4KB);
// Test compaction trigger and target_file_size_base
// Reduce compaction trigger to 2, and reduce L1 file size to 32KB.
// Writing to 64KB L0 files should trigger a compaction. Since these
// 2 L0 files have the same key range, compaction merge them and should
// result in 2 32KB L1 files.
ASSERT_TRUE(dbfull()->SetOptions({
{"level0_file_num_compaction_trigger", "2"},
{"target_file_size_base", std::to_string(k32KB) }
@ -8722,8 +8738,13 @@ TEST(DBTest, DynamicCompactionOptions) {
ASSERT_EQ(2U, metadata.size());
ASSERT_LE(metadata[0].size, k32KB + k4KB);
ASSERT_GE(metadata[0].size, k32KB - k4KB);
ASSERT_LE(metadata[1].size, k32KB + k4KB);
ASSERT_GE(metadata[1].size, k32KB - k4KB);
// Test max_bytes_for_level_base
// Increase level base size to 256KB and write enough data that will
// fill L1 and L2. L1 size should be around 256KB while L2 size should be
// around 256KB x 4.
ASSERT_TRUE(dbfull()->SetOptions({
{"max_bytes_for_level_base", std::to_string(k256KB) }
}));
@ -8740,7 +8761,9 @@ TEST(DBTest, DynamicCompactionOptions) {
SizeAtLevel(2) < 4 * k256KB * 1.2);
// Test max_bytes_for_level_multiplier and
// max_bytes_for_level_base (reduce)
// max_bytes_for_level_base. Now, reduce both mulitplier and level base,
// After filling enough data that can fit in L1 - L3, we should see L1 size
// reduces to 128KB from 256KB which was asserted previously. Same for L2.
ASSERT_TRUE(dbfull()->SetOptions({
{"max_bytes_for_level_multiplier", "2"},
{"max_bytes_for_level_base", std::to_string(k128KB) }
@ -8752,14 +8775,14 @@ TEST(DBTest, DynamicCompactionOptions) {
gen_l0_kb(i, 64, 32);
}
dbfull()->TEST_WaitForCompact();
ASSERT_TRUE(SizeAtLevel(1) > k128KB * 0.8 &&
SizeAtLevel(1) < k128KB * 1.2);
ASSERT_TRUE(SizeAtLevel(2) > 2 * k128KB * 0.8 &&
SizeAtLevel(2) < 2 * k128KB * 1.2);
ASSERT_TRUE(SizeAtLevel(3) > 4 * k128KB * 0.8 &&
SizeAtLevel(3) < 4 * k128KB * 1.2);
// Clean up memtable and L0
ASSERT_TRUE(SizeAtLevel(1) < k128KB * 1.2);
ASSERT_TRUE(SizeAtLevel(2) < 2 * k128KB * 1.2);
ASSERT_TRUE(SizeAtLevel(3) < 4 * k128KB * 1.2);
// Test level0_stop_writes_trigger.
// Clean up memtable and L0. Block compaction threads. If continue to write
// and flush memtables. We should see put timeout after 8 memtable flushes
// since level0_stop_writes_trigger = 8
dbfull()->CompactRange(nullptr, nullptr);
// Block compaction
SleepingBackgroundTask sleeping_task_low1;
@ -8780,7 +8803,9 @@ TEST(DBTest, DynamicCompactionOptions) {
sleeping_task_low1.WakeUp();
sleeping_task_low1.WaitUntilDone();
// Test: stop trigger (reduce)
// Now reduce level0_stop_writes_trigger to 6. Clear up memtables and L0.
// Block compaction thread again. Perform the put and memtable flushes
// until we see timeout after 6 memtable flushes.
ASSERT_TRUE(dbfull()->SetOptions({
{"level0_stop_writes_trigger", "6"}
}));
@ -8802,6 +8827,10 @@ TEST(DBTest, DynamicCompactionOptions) {
sleeping_task_low2.WaitUntilDone();
// Test disable_auto_compactions
// Compaction thread is unblocked but auto compaction is disabled. Write
// 4 L0 files and compaction should be triggered. If auto compaction is
// disabled, then TEST_WaitForCompact will be waiting for nothing. Number of
// L0 files do not change after the call.
ASSERT_TRUE(dbfull()->SetOptions({
{"disable_auto_compactions", "true"}
}));
@ -8816,6 +8845,8 @@ TEST(DBTest, DynamicCompactionOptions) {
dbfull()->TEST_WaitForCompact();
ASSERT_EQ(NumTableFilesAtLevel(0), 4);
// Enable auto compaction and perform the same test, # of L0 files should be
// reduced after compaction.
ASSERT_TRUE(dbfull()->SetOptions({
{"disable_auto_compactions", "false"}
}));
@ -8830,8 +8861,10 @@ TEST(DBTest, DynamicCompactionOptions) {
dbfull()->TEST_WaitForCompact();
ASSERT_LT(NumTableFilesAtLevel(0), 4);
// Test for hard_rate_limit, change max_bytes_for_level_base to make level
// size big
// Test for hard_rate_limit.
// First change max_bytes_for_level_base to a big value and populate
// L1 - L3. Then thrink max_bytes_for_level_base and disable auto compaction
// at the same time, we should see some level with score greater than 2.
ASSERT_TRUE(dbfull()->SetOptions({
{"max_bytes_for_level_base", std::to_string(k256KB) }
}));
@ -8861,7 +8894,9 @@ TEST(DBTest, DynamicCompactionOptions) {
SizeAtLevel(2) / k64KB > 4 ||
SizeAtLevel(3) / k64KB > 8);
// Enfoce hard rate limit, L0 score is not regulated by this limit
// Enfoce hard rate limit. Now set hard_rate_limit to 2,
// we should start to see put delay (1000 us) and timeout as a result
// (L0 score is not regulated by this limit).
ASSERT_TRUE(dbfull()->SetOptions({
{"hard_rate_limit", "2"}
}));
@ -8873,12 +8908,101 @@ TEST(DBTest, DynamicCompactionOptions) {
wo.timeout_hint_us = 500;
ASSERT_TRUE(Put(Key(count), RandomString(&rnd, 1024), wo).IsTimedOut());
// Bump up limit
// Lift the limit and no timeout
ASSERT_TRUE(dbfull()->SetOptions({
{"hard_rate_limit", "100"}
}));
dbfull()->TEST_FlushMemTable(true);
ASSERT_TRUE(Put(Key(count), RandomString(&rnd, 1024), wo).ok());
// Test max_mem_compaction_level.
// Destory DB and start from scratch
options.max_background_compactions = 1;
options.max_background_flushes = 0;
options.max_mem_compaction_level = 2;
DestroyAndReopen(&options);
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
ASSERT_EQ(NumTableFilesAtLevel(1), 0);
ASSERT_EQ(NumTableFilesAtLevel(2), 0);
ASSERT_TRUE(Put("max_mem_compaction_level_key",
RandomString(&rnd, 8)).ok());
dbfull()->TEST_FlushMemTable(true);
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
ASSERT_EQ(NumTableFilesAtLevel(1), 0);
ASSERT_EQ(NumTableFilesAtLevel(2), 1);
ASSERT_TRUE(Put("max_mem_compaction_level_key",
RandomString(&rnd, 8)).ok());
// Set new value and it becomes effective in this flush
ASSERT_TRUE(dbfull()->SetOptions({
{"max_mem_compaction_level", "1"}
}));
dbfull()->TEST_FlushMemTable(true);
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
ASSERT_EQ(NumTableFilesAtLevel(1), 1);
ASSERT_EQ(NumTableFilesAtLevel(2), 1);
ASSERT_TRUE(Put("max_mem_compaction_level_key",
RandomString(&rnd, 8)).ok());
// Set new value and it becomes effective in this flush
ASSERT_TRUE(dbfull()->SetOptions({
{"max_mem_compaction_level", "0"}
}));
dbfull()->TEST_FlushMemTable(true);
ASSERT_EQ(NumTableFilesAtLevel(0), 1);
ASSERT_EQ(NumTableFilesAtLevel(1), 1);
ASSERT_EQ(NumTableFilesAtLevel(2), 1);
}
TEST(DBTest, DynamicMiscOptions) {
// Test max_sequential_skip_in_iterations
Options options;
options.env = env_;
options.create_if_missing = true;
options.max_sequential_skip_in_iterations = 16;
options.compression = kNoCompression;
options.statistics = rocksdb::CreateDBStatistics();
DestroyAndReopen(&options);
auto assert_reseek_count = [this, &options](int key_start, int num_reseek) {
int key0 = key_start;
int key1 = key_start + 1;
int key2 = key_start + 2;
Random rnd(301);
ASSERT_OK(Put(Key(key0), RandomString(&rnd, 8)));
for (int i = 0; i < 10; ++i) {
ASSERT_OK(Put(Key(key1), RandomString(&rnd, 8)));
}
ASSERT_OK(Put(Key(key2), RandomString(&rnd, 8)));
std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
iter->Seek(Key(key1));
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(Key(key1)), 0);
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(Key(key2)), 0);
ASSERT_EQ(num_reseek,
TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION));
};
// No reseek
assert_reseek_count(100, 0);
ASSERT_TRUE(dbfull()->SetOptions({
{"max_sequential_skip_in_iterations", "4"}
}));
// Clear memtable and make new option effective
dbfull()->TEST_FlushMemTable(true);
// Trigger reseek
assert_reseek_count(200, 1);
ASSERT_TRUE(dbfull()->SetOptions({
{"max_sequential_skip_in_iterations", "16"}
}));
// Clear memtable and make new option effective
dbfull()->TEST_FlushMemTable(true);
// No reseek
assert_reseek_count(300, 1);
}
} // namespace rocksdb

@ -287,6 +287,75 @@ TEST(DeleteFileTest, DeleteLogFiles) {
CloseDB();
}
TEST(DeleteFileTest, DeleteNonDefaultColumnFamily) {
CloseDB();
DBOptions db_options;
db_options.create_if_missing = true;
db_options.create_missing_column_families = true;
std::vector<ColumnFamilyDescriptor> column_families;
column_families.emplace_back();
column_families.emplace_back("new_cf", ColumnFamilyOptions());
std::vector<rocksdb::ColumnFamilyHandle*> handles;
rocksdb::DB* db;
ASSERT_OK(DB::Open(db_options, dbname_, column_families, &handles, &db));
Random rnd(5);
for (int i = 0; i < 1000; ++i) {
ASSERT_OK(db->Put(WriteOptions(), handles[1], test::RandomKey(&rnd, 10),
test::RandomKey(&rnd, 10)));
}
ASSERT_OK(db->Flush(FlushOptions(), handles[1]));
for (int i = 0; i < 1000; ++i) {
ASSERT_OK(db->Put(WriteOptions(), handles[1], test::RandomKey(&rnd, 10),
test::RandomKey(&rnd, 10)));
}
ASSERT_OK(db->Flush(FlushOptions(), handles[1]));
std::vector<LiveFileMetaData> metadata;
db->GetLiveFilesMetaData(&metadata);
ASSERT_EQ(2U, metadata.size());
ASSERT_EQ("new_cf", metadata[0].column_family_name);
ASSERT_EQ("new_cf", metadata[1].column_family_name);
auto old_file = metadata[0].smallest_seqno < metadata[1].smallest_seqno
? metadata[0].name
: metadata[1].name;
auto new_file = metadata[0].smallest_seqno > metadata[1].smallest_seqno
? metadata[0].name
: metadata[1].name;
ASSERT_TRUE(db->DeleteFile(new_file).IsInvalidArgument());
ASSERT_OK(db->DeleteFile(old_file));
{
std::unique_ptr<Iterator> itr(db->NewIterator(ReadOptions(), handles[1]));
int count = 0;
for (itr->SeekToFirst(); itr->Valid(); itr->Next()) {
ASSERT_OK(itr->status());
++count;
}
ASSERT_EQ(count, 1000);
}
delete handles[0];
delete handles[1];
delete db;
ASSERT_OK(DB::Open(db_options, dbname_, column_families, &handles, &db));
{
std::unique_ptr<Iterator> itr(db->NewIterator(ReadOptions(), handles[1]));
int count = 0;
for (itr->SeekToFirst(); itr->Valid(); itr->Next()) {
ASSERT_OK(itr->status());
++count;
}
ASSERT_EQ(count, 1000);
}
delete handles[0];
delete handles[1];
delete db;
}
} //namespace rocksdb
int main(int argc, char** argv) {

@ -114,25 +114,29 @@ class LevelIterator : public Iterator {
};
ForwardIterator::ForwardIterator(DBImpl* db, const ReadOptions& read_options,
ColumnFamilyData* cfd)
ColumnFamilyData* cfd, SuperVersion* current_sv)
: db_(db),
read_options_(read_options),
cfd_(cfd),
prefix_extractor_(cfd->options()->prefix_extractor.get()),
user_comparator_(cfd->user_comparator()),
immutable_min_heap_(MinIterComparator(&cfd_->internal_comparator())),
sv_(nullptr),
sv_(current_sv),
mutable_iter_(nullptr),
current_(nullptr),
valid_(false),
is_prev_set_(false),
is_prev_inclusive_(false) {}
is_prev_inclusive_(false) {
if (sv_) {
RebuildIterators(false);
}
}
ForwardIterator::~ForwardIterator() {
Cleanup();
Cleanup(true);
}
void ForwardIterator::Cleanup() {
void ForwardIterator::Cleanup(bool release_sv) {
if (mutable_iter_ != nullptr) {
mutable_iter_->~Iterator();
}
@ -149,15 +153,17 @@ void ForwardIterator::Cleanup() {
}
level_iters_.clear();
if (sv_ != nullptr && sv_->Unref()) {
DBImpl::DeletionState deletion_state;
db_->mutex_.Lock();
sv_->Cleanup();
db_->FindObsoleteFiles(deletion_state, false, true);
db_->mutex_.Unlock();
delete sv_;
if (deletion_state.HaveSomethingToDelete()) {
db_->PurgeObsoleteFiles(deletion_state);
if (release_sv) {
if (sv_ != nullptr && sv_->Unref()) {
DBImpl::DeletionState deletion_state;
db_->mutex_.Lock();
sv_->Cleanup();
db_->FindObsoleteFiles(deletion_state, false, true);
db_->mutex_.Unlock();
delete sv_;
if (deletion_state.HaveSomethingToDelete()) {
db_->PurgeObsoleteFiles(deletion_state);
}
}
}
}
@ -169,7 +175,7 @@ bool ForwardIterator::Valid() const {
void ForwardIterator::SeekToFirst() {
if (sv_ == nullptr ||
sv_ ->version_number != cfd_->GetSuperVersionNumber()) {
RebuildIterators();
RebuildIterators(true);
} else if (status_.IsIncomplete()) {
ResetIncompleteIterators();
}
@ -179,7 +185,7 @@ void ForwardIterator::SeekToFirst() {
void ForwardIterator::Seek(const Slice& internal_key) {
if (sv_ == nullptr ||
sv_ ->version_number != cfd_->GetSuperVersionNumber()) {
RebuildIterators();
RebuildIterators(true);
} else if (status_.IsIncomplete()) {
ResetIncompleteIterators();
}
@ -188,6 +194,7 @@ void ForwardIterator::Seek(const Slice& internal_key) {
void ForwardIterator::SeekInternal(const Slice& internal_key,
bool seek_to_first) {
assert(mutable_iter_);
// mutable
seek_to_first ? mutable_iter_->SeekToFirst() :
mutable_iter_->Seek(internal_key);
@ -338,7 +345,7 @@ void ForwardIterator::Next() {
std::string current_key = key().ToString();
Slice old_key(current_key.data(), current_key.size());
RebuildIterators();
RebuildIterators(true);
SeekInternal(old_key, false);
if (!valid_ || key().compare(old_key) != 0) {
return;
@ -412,11 +419,13 @@ Status ForwardIterator::status() const {
return Status::OK();
}
void ForwardIterator::RebuildIterators() {
void ForwardIterator::RebuildIterators(bool refresh_sv) {
// Clean up
Cleanup();
// New
sv_ = cfd_->GetReferencedSuperVersion(&(db_->mutex_));
Cleanup(refresh_sv);
if (refresh_sv) {
// New
sv_ = cfd_->GetReferencedSuperVersion(&(db_->mutex_));
}
mutable_iter_ = sv_->mem->NewIterator(read_options_, &arena_);
sv_->imm->AddIterators(read_options_, &imm_iters_, &arena_);
const auto& l0_files = sv_->current->files_[0];

@ -51,7 +51,7 @@ typedef std::priority_queue<Iterator*,
class ForwardIterator : public Iterator {
public:
ForwardIterator(DBImpl* db, const ReadOptions& read_options,
ColumnFamilyData* cfd);
ColumnFamilyData* cfd, SuperVersion* current_sv = nullptr);
virtual ~ForwardIterator();
void SeekToLast() override {
@ -72,8 +72,8 @@ class ForwardIterator : public Iterator {
virtual Status status() const override;
private:
void Cleanup();
void RebuildIterators();
void Cleanup(bool release_sv);
void RebuildIterators(bool refresh_sv);
void ResetIncompleteIterators();
void SeekInternal(const Slice& internal_key, bool seek_to_first);
void UpdateCurrent();

@ -293,16 +293,25 @@ void InternalStats::DumpDBStats(std::string* value) {
value->append(buf);
// Cumulative
uint64_t user_bytes_written = db_stats_[InternalStats::BYTES_WRITTEN];
uint64_t num_keys_written = db_stats_[InternalStats::NUMBER_KEYS_WRITTEN];
uint64_t write_other = db_stats_[InternalStats::WRITE_DONE_BY_OTHER];
uint64_t write_self = db_stats_[InternalStats::WRITE_DONE_BY_SELF];
uint64_t wal_bytes = db_stats_[InternalStats::WAL_FILE_BYTES];
uint64_t wal_synced = db_stats_[InternalStats::WAL_FILE_SYNCED];
uint64_t write_with_wal = db_stats_[InternalStats::WRITE_WITH_WAL];
// Data
// writes: total number of write requests.
// keys: total number of key updates issued by all the write requests
// batches: number of group commits issued to the DB. Each group can contain
// one or more writes.
// so writes/keys is the average number of put in multi-put or put
// writes/batches is the average group commit size.
//
// The format is the same for interval stats.
snprintf(buf, sizeof(buf),
"Cumulative writes: %" PRIu64 " writes, %" PRIu64 " batches, "
"%.1f writes per batch, %.2f GB user ingest\n",
write_other + write_self, write_self,
"Cumulative writes: %" PRIu64 " writes, %" PRIu64 " keys, %" PRIu64
" batches, %.1f writes per batch, %.2f GB user ingest\n",
write_other + write_self, num_keys_written, write_self,
(write_other + write_self) / static_cast<double>(write_self + 1),
user_bytes_written / kGB);
value->append(buf);
@ -318,11 +327,13 @@ void InternalStats::DumpDBStats(std::string* value) {
// Interval
uint64_t interval_write_other = write_other - db_stats_snapshot_.write_other;
uint64_t interval_write_self = write_self - db_stats_snapshot_.write_self;
uint64_t interval_num_keys_written =
num_keys_written - db_stats_snapshot_.num_keys_written;
snprintf(buf, sizeof(buf),
"Interval writes: %" PRIu64 " writes, %" PRIu64 " batches, "
"%.1f writes per batch, %.1f MB user ingest\n",
"Interval writes: %" PRIu64 " writes, %" PRIu64 " keys, %" PRIu64
" batches, %.1f writes per batch, %.1f MB user ingest\n",
interval_write_other + interval_write_self,
interval_write_self,
interval_num_keys_written, interval_write_self,
static_cast<double>(interval_write_other + interval_write_self) /
(interval_write_self + 1),
(user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB);
@ -347,6 +358,7 @@ void InternalStats::DumpDBStats(std::string* value) {
db_stats_snapshot_.ingest_bytes = user_bytes_written;
db_stats_snapshot_.write_other = write_other;
db_stats_snapshot_.write_self = write_self;
db_stats_snapshot_.num_keys_written = num_keys_written;
db_stats_snapshot_.wal_bytes = wal_bytes;
db_stats_snapshot_.wal_synced = wal_synced;
db_stats_snapshot_.write_with_wal = write_with_wal;

@ -67,6 +67,7 @@ class InternalStats {
WAL_FILE_BYTES,
WAL_FILE_SYNCED,
BYTES_WRITTEN,
NUMBER_KEYS_WRITTEN,
WRITE_DONE_BY_OTHER,
WRITE_DONE_BY_SELF,
WRITE_WITH_WAL,
@ -264,6 +265,11 @@ class InternalStats {
// another thread.
uint64_t write_other;
uint64_t write_self;
// Total number of keys written. write_self and write_other measure number
// of write requests written, Each of the write request can contain updates
// to multiple keys. num_keys_written is total number of keys updated by all
// those writes.
uint64_t num_keys_written;
double seconds_up;
DBStatsSnapshot()
@ -273,6 +279,7 @@ class InternalStats {
write_with_wal(0),
write_other(0),
write_self(0),
num_keys_written(0),
seconds_up(0) {}
} db_stats_snapshot_;

@ -293,7 +293,7 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
new_files_.push_back(std::make_pair(level, f));
} else {
if (!msg) {
msg = "new-file2 entry";
msg = "new-file3 entry";
}
}
break;

@ -895,7 +895,7 @@ void Version::ComputeCompactionScore(
}
if (cfd_->ioptions()->compaction_style == kCompactionStyleFIFO) {
score = static_cast<double>(total_size) /
cfd_->options()->compaction_options_fifo.max_table_files_size;
cfd_->ioptions()->compaction_options_fifo.max_table_files_size;
} else if (numfiles >= mutable_cf_options.level0_stop_writes_trigger) {
// If we are slowing down writes, then we better compact that first
score = 1000000;
@ -1051,8 +1051,8 @@ int Version::PickLevelForMemTableOutput(
InternalKey start(smallest_user_key, kMaxSequenceNumber, kValueTypeForSeek);
InternalKey limit(largest_user_key, 0, static_cast<ValueType>(0));
std::vector<FileMetaData*> overlaps;
int max_mem_compact_level = cfd_->options()->max_mem_compaction_level;
while (max_mem_compact_level > 0 && level < max_mem_compact_level) {
while (mutable_cf_options.max_mem_compaction_level > 0 &&
level < mutable_cf_options.max_mem_compaction_level) {
if (OverlapInLevel(level + 1, &smallest_user_key, &largest_user_key)) {
break;
}

@ -355,11 +355,13 @@ class TableFactory {
WritableFile* file, const CompressionType compression_type,
const CompressionOptions& compression_opts) const = 0;
// Sanitizes the specified DB Options.
// Sanitizes the specified DB Options and ColumnFamilyOptions.
//
// If the function cannot find a way to sanitize the input DB Options,
// a non-ok Status will be returned.
virtual Status SanitizeDBOptions(const DBOptions* db_opts) const = 0;
virtual Status SanitizeOptions(
const DBOptions& db_opts,
const ColumnFamilyOptions& cf_opts) const = 0;
// Return a string that contains printable format of table configurations.
// RocksDB prints configurations at DB Open().

@ -1,4 +1,4 @@
NATIVE_JAVA_CLASSES = org.rocksdb.RocksDB org.rocksdb.Options org.rocksdb.WriteBatch org.rocksdb.WriteBatchInternal org.rocksdb.WriteBatchTest org.rocksdb.WriteOptions org.rocksdb.BackupableDB org.rocksdb.BackupableDBOptions org.rocksdb.Statistics org.rocksdb.RocksIterator org.rocksdb.VectorMemTableConfig org.rocksdb.SkipListMemTableConfig org.rocksdb.HashLinkedListMemTableConfig org.rocksdb.HashSkipListMemTableConfig org.rocksdb.PlainTableConfig org.rocksdb.BlockBasedTableConfig org.rocksdb.ReadOptions org.rocksdb.Filter org.rocksdb.BloomFilter org.rocksdb.RestoreOptions org.rocksdb.RestoreBackupableDB org.rocksdb.RocksEnv org.rocksdb.GenericRateLimiterConfig org.rocksdb.ColumnFamilyHandle org.rocksdb.MergeOperator org.rocksdb.StringAppendOperator
NATIVE_JAVA_CLASSES = org.rocksdb.RocksDB org.rocksdb.Options org.rocksdb.WriteBatch org.rocksdb.WriteBatchInternal org.rocksdb.WriteBatchTest org.rocksdb.WriteOptions org.rocksdb.BackupableDB org.rocksdb.BackupableDBOptions org.rocksdb.Statistics org.rocksdb.RocksIterator org.rocksdb.VectorMemTableConfig org.rocksdb.SkipListMemTableConfig org.rocksdb.HashLinkedListMemTableConfig org.rocksdb.HashSkipListMemTableConfig org.rocksdb.PlainTableConfig org.rocksdb.BlockBasedTableConfig org.rocksdb.ReadOptions org.rocksdb.Filter org.rocksdb.BloomFilter org.rocksdb.ComparatorOptions org.rocksdb.AbstractComparator org.rocksdb.Comparator org.rocksdb.DirectComparator org.rocksdb.AbstractSlice org.rocksdb.Slice org.rocksdb.DirectSlice org.rocksdb.RestoreOptions org.rocksdb.RestoreBackupableDB org.rocksdb.RocksEnv org.rocksdb.GenericRateLimiterConfig org.rocksdb.ColumnFamilyHandle org.rocksdb.MergeOperator org.rocksdb.StringAppendOperator
ROCKSDB_MAJOR = $(shell egrep "ROCKSDB_MAJOR.[0-9]" ../include/rocksdb/version.h | cut -d ' ' -f 3)
ROCKSDB_MINOR = $(shell egrep "ROCKSDB_MINOR.[0-9]" ../include/rocksdb/version.h | cut -d ' ' -f 3)
@ -42,11 +42,15 @@ test: java
java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.ColumnFamilyTest
java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.FilterTest
java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.KeyMayExistTest
java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.MemTableTest
java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.OptionsTest
java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.ReadOnlyTest
java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.MergeTest
java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.ReadOptionsTest
java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.StatisticsCollectorTest
java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.ComparatorOptionsTest
java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.ComparatorTest
java -ea -Djava.library.path=.:../ -cp "$(ROCKSDB_JAR):.:./*" org.rocksdb.test.DirectComparatorTest
@rm -rf /tmp/rocksdbjni_*
db_bench: java

@ -1,7 +1,7 @@
#!/usr/bin/env bash
# install all required packages for rocksdb that are available through yum
ARCH=$(uname -i)
sudo yum -y install java-1.6.0-openjdk-devel.$ARCH zlib zlib-devel bzip2 bzip2-devel
sudo yum -y install java-1.7.0-openjdk-devel.$ARCH zlib zlib-devel bzip2 bzip2-devel
# install gcc/g++ 4.7 via CERN (http://linux.web.cern.ch/linux/devtoolset/)
sudo wget -O /etc/yum.repos.d/slc5-devtoolset.repo http://linuxsoft.cern.ch/cern/devtoolset/slc5-devtoolset.repo
@ -12,7 +12,7 @@ tar xvfz gflags-1.6.tar.gz; cd gflags-1.6; scl enable devtoolset-1.1 ./configure
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib
# set java home so we can build rocksdb jars
export JAVA_HOME=/usr/lib/jvm/java-1.6.0
export JAVA_HOME=/usr/lib/jvm/java-1.7.0
# build rocksdb
cd /rocksdb

@ -0,0 +1,95 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
package org.rocksdb;
/**
* Comparators are used by RocksDB to determine
* the ordering of keys.
*
* This class is package private, implementers
* should extend either of the public abstract classes:
* @see org.rocksdb.Comparator
* @see org.rocksdb.DirectComparator
*/
public abstract class AbstractComparator<T extends AbstractSlice>
extends RocksObject {
/**
* The name of the comparator. Used to check for comparator
* mismatches (i.e., a DB created with one comparator is
* accessed using a different comparator).
*
* A new name should be used whenever
* the comparator implementation changes in a way that will cause
* the relative ordering of any two keys to change.
*
* Names starting with "rocksdb." are reserved and should not be used.
*
* @return The name of this comparator implementation
*/
public abstract String name();
/**
* Three-way key comparison
*
* @param a Slice access to first key
* @param b Slice access to second key
*
* @return Should return either:
* 1) < 0 if "a" < "b"
* 2) == 0 if "a" == "b"
* 3) > 0 if "a" > "b"
*/
public abstract int compare(final T a, final T b);
/**
* Used to reduce the space requirements
* for internal data structures like index blocks.
*
* If start < limit, you may return a new start which is a
* shorter string in [start, limit).
*
* Simple comparator implementations may return null if they
* wish to use start unchanged. i.e., an implementation of
* this method that does nothing is correct.
*
* @return a shorter start, or null
*/
public String findShortestSeparator(final String start, final T limit) {
return null;
}
/**
* Used to reduce the space requirements
* for internal data structures like index blocks.
*
* You may return a new short key (key1) where
* key1 >= key.
*
* Simple comparator implementations may return null if they
* wish to leave the key unchanged. i.e., an implementation of
* this method that does nothing is correct.
*
* @return a shorter key, or null
*/
public String findShortSuccessor(final String key) {
return null;
}
/**
* Deletes underlying C++ comparator pointer.
*
* Note that this function should be called only after all
* RocksDB instances referencing the comparator are closed.
* Otherwise an undefined behavior will occur.
*/
@Override protected void disposeInternal() {
assert(isInitialized());
disposeInternal(nativeHandle_);
}
private native void disposeInternal(long handle);
}

@ -0,0 +1,167 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
package org.rocksdb;
/**
* Slices are used by RocksDB to provide
* efficient access to keys and values.
*
* This class is package private, implementers
* should extend either of the public abstract classes:
* @see org.rocksdb.Slice
* @see org.rocksdb.DirectSlice
*
* Regards the lifecycle of Java Slices in RocksDB:
* At present when you configure a Comparator from Java, it creates an
* instance of a C++ BaseComparatorJniCallback subclass and
* passes that to RocksDB as the comparator. That subclass of
* BaseComparatorJniCallback creates the Java
* {@see org.rocksdb.AbstractSlice} subclass Objects. When you dispose
* the Java {@see org.rocksdb.AbstractComparator} subclass, it disposes the
* C++ BaseComparatorJniCallback subclass, which in turn destroys the
* Java {@see org.rocksdb.AbstractSlice} subclass Objects.
*/
abstract class AbstractSlice<T> extends RocksObject {
/**
* Returns the data of the slice.
*
* @return The slice data. Note, the type of access is
* determined by the subclass
* @see org.rocksdb.AbstractSlice#data0(long).
*/
public T data() {
assert (isInitialized());
return data0(nativeHandle_);
}
/**
* Access to the data is provided by the
* subtype as it needs to handle the
* generic typing.
*
* @param handle The address of the underlying
* native object.
*
* @return Java typed access to the data.
*/
protected abstract T data0(long handle);
/**
* Return the length (in bytes) of the data.
*
* @return The length in bytes.
*/
public int size() {
assert (isInitialized());
return size0(nativeHandle_);
}
/**
* Return true if the length of the
* data is zero.
*
* @return true if there is no data, false otherwise.
*/
public boolean empty() {
assert (isInitialized());
return empty0(nativeHandle_);
}
/**
* Creates a string representation of the data
*
* @param hex When true, the representation
* will be encoded in hexadecimal.
*
* @return The string representation of the data.
*/
public String toString(final boolean hex) {
assert (isInitialized());
return toString0(nativeHandle_, hex);
}
@Override
public String toString() {
return toString(false);
}
/**
* Three-way key comparison
*
* @param other A slice to compare against
*
* @return Should return either:
* 1) < 0 if this < other
* 2) == 0 if this == other
* 3) > 0 if this > other
*/
public int compare(final AbstractSlice other) {
assert (other != null);
assert (isInitialized());
return compare0(nativeHandle_, other.nativeHandle_);
}
/**
* If other is a slice object, then
* we defer to {@link #compare(AbstractSlice) compare}
* to check equality, otherwise we return false.
*
* @param other Object to test for equality
*
* @return true when {@code this.compare(other) == 0},
* false otherwise.
*/
@Override
public boolean equals(final Object other) {
if (other != null && other instanceof AbstractSlice) {
return compare((AbstractSlice)other) == 0;
} else {
return false;
}
}
/**
* Determines whether this slice starts with
* another slice
*
* @param prefix Another slice which may of may not
* be a prefix of this slice.
*
* @return true when this slice starts with the
* {@code prefix} slice
*/
public boolean startsWith(final AbstractSlice prefix) {
if (prefix != null) {
assert (isInitialized());
return startsWith0(nativeHandle_, prefix.nativeHandle_);
} else {
return false;
}
}
/**
* Deletes underlying C++ slice pointer.
* <p/>
* Note that this function should be called only after all
* RocksDB instances referencing the slice are closed.
* Otherwise an undefined behavior will occur.
*/
@Override
protected void disposeInternal() {
assert(isInitialized());
disposeInternal(nativeHandle_);
}
protected native void createNewSliceFromString(String str);
private native int size0(long handle);
private native boolean empty0(long handle);
private native String toString0(long handle, boolean hex);
private native int compare0(long handle, long otherHandle);
private native boolean startsWith0(long handle, long otherHandle);
private native void disposeInternal(long handle);
}

@ -0,0 +1,24 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
package org.rocksdb;
/**
* Base class for comparators which will receive
* byte[] based access via org.rocksdb.Slice in their
* compare method implementation.
*
* byte[] based slices perform better when small keys
* are involved. When using larger keys consider
* using @see org.rocksdb.DirectComparator
*/
public abstract class Comparator extends AbstractComparator<Slice> {
public Comparator(final ComparatorOptions copt) {
super();
createNewComparator0(copt.nativeHandle_);
}
private native void createNewComparator0(final long comparatorOptionsHandle);
}

@ -0,0 +1,57 @@
package org.rocksdb;
/**
* This class controls the behaviour
* of Java implementations of
* AbstractComparator
*
* Note that dispose() must be called before a ComparatorOptions
* instance becomes out-of-scope to release the allocated memory in C++.
*/
public class ComparatorOptions extends RocksObject {
public ComparatorOptions() {
super();
newComparatorOptions();
}
/**
* Use adaptive mutex, which spins in the user space before resorting
* to kernel. This could reduce context switch when the mutex is not
* heavily contended. However, if the mutex is hot, we could end up
* wasting spin time.
* Default: false
*
* @return true if adaptive mutex is used.
*/
public boolean useAdaptiveMutex() {
assert(isInitialized());
return useAdaptiveMutex(nativeHandle_);
}
/**
* Use adaptive mutex, which spins in the user space before resorting
* to kernel. This could reduce context switch when the mutex is not
* heavily contended. However, if the mutex is hot, we could end up
* wasting spin time.
* Default: false
*
* @param useAdaptiveMutex true if adaptive mutex is used.
* @return the reference to the current comparator options.
*/
public ComparatorOptions setUseAdaptiveMutex(final boolean useAdaptiveMutex) {
assert (isInitialized());
setUseAdaptiveMutex(nativeHandle_, useAdaptiveMutex);
return this;
}
@Override protected void disposeInternal() {
assert(isInitialized());
disposeInternal(nativeHandle_);
}
private native void newComparatorOptions();
private native boolean useAdaptiveMutex(final long handle);
private native void setUseAdaptiveMutex(final long handle,
final boolean useAdaptiveMutex);
private native void disposeInternal(long handle);
}

@ -0,0 +1,24 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
package org.rocksdb;
/**
* Base class for comparators which will receive
* ByteBuffer based access via org.rocksdb.DirectSlice
* in their compare method implementation.
*
* ByteBuffer based slices perform better when large keys
* are involved. When using smaller keys consider
* using @see org.rocksdb.Comparator
*/
public abstract class DirectComparator extends AbstractComparator<DirectSlice> {
public DirectComparator(final ComparatorOptions copt) {
super();
createNewDirectComparator0(copt.nativeHandle_);
}
private native void createNewDirectComparator0(final long comparatorOptionsHandle);
}

@ -0,0 +1,113 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
package org.rocksdb;
import java.nio.ByteBuffer;
/**
* Base class for slices which will receive direct
* ByteBuffer based access to the underlying data.
*
* ByteBuffer backed slices typically perform better with
* larger keys and values. When using smaller keys and
* values consider using @see org.rocksdb.Slice
*/
public class DirectSlice extends AbstractSlice<ByteBuffer> {
/**
* Called from JNI to construct a new Java DirectSlice
* without an underlying C++ object set
* at creation time.
*
* Note: You should be aware that
* {@see org.rocksdb.RocksObject#disOwnNativeHandle()} is intentionally
* called from the default DirectSlice constructor, and that it is marked as
* private. This is so that developers cannot construct their own default
* DirectSlice objects (at present). As developers cannot construct their own
* DirectSlice objects through this, they are not creating underlying C++
* DirectSlice objects, and so there is nothing to free (dispose) from Java.
*/
private DirectSlice() {
super();
disOwnNativeHandle();
}
/**
* Constructs a slice
* where the data is taken from
* a String.
*
* @param str The string
*/
public DirectSlice(final String str) {
super();
createNewSliceFromString(str);
}
/**
* Constructs a slice where the data is
* read from the provided
* ByteBuffer up to a certain length
*
* @param data The buffer containing the data
* @param length The length of the data to use for the slice
*/
public DirectSlice(final ByteBuffer data, final int length) {
super();
createNewDirectSlice0(data, length);
}
/**
* Constructs a slice where the data is
* read from the provided
* ByteBuffer
*
* @param data The bugger containing the data
*/
public DirectSlice(final ByteBuffer data) {
super();
createNewDirectSlice1(data);
}
/**
* Retrieves the byte at a specific offset
* from the underlying data
*
* @param offset The (zero-based) offset of the byte to retrieve
*
* @return the requested byte
*/
public byte get(int offset) {
assert (isInitialized());
return get0(nativeHandle_, offset);
}
/**
* Clears the backing slice
*/
public void clear() {
assert (isInitialized());
clear0(nativeHandle_);
}
/**
* Drops the specified {@code n}
* number of bytes from the start
* of the backing slice
*
* @param n The number of bytes to drop
*/
public void removePrefix(final int n) {
assert (isInitialized());
removePrefix0(nativeHandle_, n);
}
private native void createNewDirectSlice0(ByteBuffer data, int length);
private native void createNewDirectSlice1(ByteBuffer data);
@Override protected final native ByteBuffer data0(long handle);
private native byte get0(long handle, int offset);
private native void clear0(long handle);
private native void removePrefix0(long handle, int length);
}

@ -15,9 +15,21 @@ package org.rocksdb;
*/
public class HashLinkedListMemTableConfig extends MemTableConfig {
public static final long DEFAULT_BUCKET_COUNT = 50000;
public static final long DEFAULT_HUGE_PAGE_TLB_SIZE = 0;
public static final int DEFAULT_BUCKET_ENTRIES_LOG_THRES = 4096;
public static final boolean
DEFAULT_IF_LOG_BUCKET_DIST_WHEN_FLUSH = true;
public static final int DEFAUL_THRESHOLD_USE_SKIPLIST = 256;
/**
* HashLinkedListMemTableConfig constructor
*/
public HashLinkedListMemTableConfig() {
bucketCount_ = DEFAULT_BUCKET_COUNT;
hugePageTlbSize_ = DEFAULT_HUGE_PAGE_TLB_SIZE;
bucketEntriesLoggingThreshold_ = DEFAULT_BUCKET_ENTRIES_LOG_THRES;
ifLogBucketDistWhenFlush_ = DEFAULT_IF_LOG_BUCKET_DIST_WHEN_FLUSH;
thresholdUseSkiplist_ = DEFAUL_THRESHOLD_USE_SKIPLIST;
}
/**
@ -42,13 +54,119 @@ public class HashLinkedListMemTableConfig extends MemTableConfig {
return bucketCount_;
}
/**
* <p>Set the size of huge tlb or allocate the hashtable bytes from
* malloc if {@code size <= 0}.</p>
*
* <p>The user needs to reserve huge pages for it to be allocated,
* like: {@code sysctl -w vm.nr_hugepages=20}</p>
*
* <p>See linux documentation/vm/hugetlbpage.txt</p>
*
* @param size if set to {@code <= 0} hashtable bytes from malloc
* @return the reference to the current HashLinkedListMemTableConfig.
*/
public HashLinkedListMemTableConfig setHugePageTlbSize(long size) {
hugePageTlbSize_ = size;
return this;
}
/**
* Returns the size value of hugePageTlbSize.
*
* @return the hugePageTlbSize.
*/
public long hugePageTlbSize() {
return hugePageTlbSize_;
}
/**
* If number of entries in one bucket exceeds that setting, log
* about it.
*
* @param threshold - number of entries in a single bucket before
* logging starts.
* @return the reference to the current HashLinkedListMemTableConfig.
*/
public HashLinkedListMemTableConfig
setBucketEntriesLoggingThreshold(int threshold) {
bucketEntriesLoggingThreshold_ = threshold;
return this;
}
/**
* Returns the maximum number of entries in one bucket before
* logging starts.
*
* @return maximum number of entries in one bucket before logging
* starts.
*/
public int bucketEntriesLoggingThreshold() {
return bucketEntriesLoggingThreshold_;
}
/**
* If true the distrubition of number of entries will be logged.
*
* @param logDistribution - boolean parameter indicating if number
* of entry distribution shall be logged.
* @return the reference to the current HashLinkedListMemTableConfig.
*/
public HashLinkedListMemTableConfig
setIfLogBucketDistWhenFlush(boolean logDistribution) {
ifLogBucketDistWhenFlush_ = logDistribution;
return this;
}
/**
* Returns information about logging the distribution of
* number of entries on flush.
*
* @return if distrubtion of number of entries shall be logged.
*/
public boolean ifLogBucketDistWhenFlush() {
return ifLogBucketDistWhenFlush_;
}
/**
* Set maximum number of entries in one bucket. Exceeding this val
* leads to a switch from LinkedList to SkipList.
*
* @param threshold maximum number of entries before SkipList is
* used.
* @return the reference to the current HashLinkedListMemTableConfig.
*/
public HashLinkedListMemTableConfig
setThresholdUseSkiplist(int threshold) {
thresholdUseSkiplist_ = threshold;
return this;
}
/**
* Returns entries per bucket threshold before LinkedList is
* replaced by SkipList usage for that bucket.
*
* @return entries per bucket threshold before SkipList is used.
*/
public int thresholdUseSkiplist() {
return thresholdUseSkiplist_;
}
@Override protected long newMemTableFactoryHandle()
throws RocksDBException {
return newMemTableFactoryHandle(bucketCount_);
return newMemTableFactoryHandle(bucketCount_, hugePageTlbSize_,
bucketEntriesLoggingThreshold_, ifLogBucketDistWhenFlush_,
thresholdUseSkiplist_);
}
private native long newMemTableFactoryHandle(long bucketCount)
private native long newMemTableFactoryHandle(long bucketCount,
long hugePageTlbSize, int bucketEntriesLoggingThreshold,
boolean ifLogBucketDistWhenFlush, int thresholdUseSkiplist)
throws RocksDBException;
private long bucketCount_;
private long hugePageTlbSize_;
private int bucketEntriesLoggingThreshold_;
private boolean ifLogBucketDistWhenFlush_;
private int thresholdUseSkiplist_;
}

@ -18,6 +18,9 @@ public class HashSkipListMemTableConfig extends MemTableConfig {
public static final int DEFAULT_BRANCHING_FACTOR = 4;
public static final int DEFAULT_HEIGHT = 4;
/**
* HashSkipListMemTableConfig constructor
*/
public HashSkipListMemTableConfig() {
bucketCount_ = DEFAULT_BUCKET_COUNT;
branchingFactor_ = DEFAULT_BRANCHING_FACTOR;

@ -193,6 +193,27 @@ public class Options extends RocksObject {
return maxWriteBufferNumber(nativeHandle_);
}
/**
* Use the specified comparator for key ordering.
*
* Comparator should not be disposed before options instances using this comparator is
* disposed. If dispose() function is not called, then comparator object will be
* GC'd automatically.
*
* Comparator instance can be re-used in multiple options instances.
*
* @param comparator java instance.
* @return the instance of the current Options.
* @see RocksDB.open()
*/
public Options setComparator(AbstractComparator comparator) {
assert (isInitialized());
setComparatorHandle(nativeHandle_, comparator.nativeHandle_);
comparator_ = comparator;
return this;
}
private native void setComparatorHandle(long optHandle, long comparatorHandle);
/**
* If true, an error will be thrown during RocksDB.open() if the
* database already exists.
@ -2318,6 +2339,7 @@ public class Options extends RocksObject {
long cacheSize_;
int numShardBits_;
AbstractComparator comparator_;
RocksEnv env_;
MemTableConfig memTableConfig_;
TableFormatConfig tableFormatConfig_;

@ -4,12 +4,46 @@ package org.rocksdb;
* The config for skip-list memtable representation.
*/
public class SkipListMemTableConfig extends MemTableConfig {
public static final long DEFAULT_LOOKAHEAD = 0;
/**
* SkipListMemTableConfig constructor
*/
public SkipListMemTableConfig() {
lookahead_ = DEFAULT_LOOKAHEAD;
}
/**
* Sets lookahead for SkipList
*
* @param lookahead If non-zero, each iterator's seek operation
* will start the search from the previously visited record
* (doing at most 'lookahead' steps). This is an
* optimization for the access pattern including many
* seeks with consecutive keys.
* @return the current instance of SkipListMemTableConfig
*/
public SkipListMemTableConfig setLookahead(long lookahead) {
lookahead_ = lookahead;
return this;
}
/**
* Returns the currently set lookahead value.
*
* @return lookahead value
*/
public long lookahead() {
return lookahead_;
}
@Override protected long newMemTableFactoryHandle() {
return newMemTableFactoryHandle0();
return newMemTableFactoryHandle0(lookahead_);
}
private native long newMemTableFactoryHandle0();
private native long newMemTableFactoryHandle0(long lookahead);
private long lookahead_;
}

@ -0,0 +1,84 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
package org.rocksdb;
/**
* Base class for slices which will receive
* byte[] based access to the underlying data.
*
* byte[] backed slices typically perform better with
* small keys and values. When using larger keys and
* values consider using @see org.rocksdb.DirectSlice
*/
public class Slice extends AbstractSlice<byte[]> {
/**
* Called from JNI to construct a new Java Slice
* without an underlying C++ object set
* at creation time.
*
* Note: You should be aware that
* {@see org.rocksdb.RocksObject#disOwnNativeHandle()} is intentionally
* called from the default Slice constructor, and that it is marked as
* private. This is so that developers cannot construct their own default
* Slice objects (at present). As developers cannot construct their own
* Slice objects through this, they are not creating underlying C++ Slice
* objects, and so there is nothing to free (dispose) from Java.
*/
private Slice() {
super();
disOwnNativeHandle();
}
/**
* Constructs a slice
* where the data is taken from
* a String.
*/
public Slice(final String str) {
super();
createNewSliceFromString(str);
}
/**
* Constructs a slice
* where the data is a copy of
* the byte array from a specific offset.
*/
public Slice(final byte[] data, final int offset) {
super();
createNewSlice0(data, offset);
}
/**
* Constructs a slice
* where the data is a copy of
* the byte array.
*/
public Slice(final byte[] data) {
super();
createNewSlice1(data);
}
/**
* Deletes underlying C++ slice pointer
* and any buffered data.
*
* <p/>
* Note that this function should be called only after all
* RocksDB instances referencing the slice are closed.
* Otherwise an undefined behavior will occur.
*/
@Override
protected void disposeInternal() {
super.disposeInternal();
disposeInternalBuf(nativeHandle_);
}
@Override protected final native byte[] data0(long handle);
private native void createNewSlice0(byte[] data, int length);
private native void createNewSlice1(byte[] data);
private native void disposeInternalBuf(long handle);
}

@ -5,6 +5,10 @@ package org.rocksdb;
*/
public class VectorMemTableConfig extends MemTableConfig {
public static final int DEFAULT_RESERVED_SIZE = 0;
/**
* VectorMemTableConfig constructor
*/
public VectorMemTableConfig() {
reservedSize_ = DEFAULT_RESERVED_SIZE;
}

@ -0,0 +1,166 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
package org.rocksdb.test;
import org.rocksdb.*;
import java.io.IOException;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.Random;
import static org.rocksdb.test.Types.byteToInt;
import static org.rocksdb.test.Types.intToByte;
/**
* Abstract tests for both Comparator and DirectComparator
*/
public abstract class AbstractComparatorTest {
/**
* Get a comparator which will expect Integer keys
* and determine an ascending order
*
* @return An integer ascending order key comparator
*/
public abstract AbstractComparator getAscendingIntKeyComparator();
/**
* Test which stores random keys into the database
* using an @see getAscendingIntKeyComparator
* it then checks that these keys are read back in
* ascending order
*
* @param db_path A path where we can store database
* files temporarily
*/
public void testRoundtrip(final Path db_path) throws IOException {
Options opt = null;
RocksDB db = null;
try {
opt = new Options();
opt.setCreateIfMissing(true);
opt.setComparator(getAscendingIntKeyComparator());
// store 10,000 random integer keys
final int ITERATIONS = 10000;
db = RocksDB.open(opt, db_path.toString());
final Random random = new Random();
for (int i = 0; i < ITERATIONS; i++) {
final byte key[] = intToByte(random.nextInt());
if (i > 0 && db.get(key) != null) { // does key already exist (avoid duplicates)
i--; // generate a different key
} else {
db.put(key, "value".getBytes());
}
}
db.close();
// re-open db and read from start to end
// integer keys should be in ascending
// order as defined by SimpleIntComparator
db = RocksDB.open(opt, db_path.toString());
final RocksIterator it = db.newIterator();
it.seekToFirst();
int lastKey = Integer.MIN_VALUE;
int count = 0;
for (it.seekToFirst(); it.isValid(); it.next()) {
final int thisKey = byteToInt(it.key());
assert(thisKey > lastKey);
lastKey = thisKey;
count++;
}
db.close();
assert(count == ITERATIONS);
} catch (final RocksDBException e) {
System.err.format("[ERROR]: %s%n", e);
e.printStackTrace();
} finally {
if (db != null) {
db.close();
}
if (opt != null) {
opt.dispose();
}
removeDb(db_path); // cleanup after ourselves!
}
}
/**
* Compares integer keys
* so that they are in ascending order
*
* @param a 4-bytes representing an integer key
* @param b 4-bytes representing an integer key
*
* @return negative if a < b, 0 if a == b, positive otherwise
*/
protected final int compareIntKeys(final byte[] a, final byte[] b) {
final int iA = byteToInt(a);
final int iB = byteToInt(b);
// protect against int key calculation overflow
final double diff = (double)iA - iB;
final int result;
if (diff < Integer.MIN_VALUE) {
result = Integer.MIN_VALUE;
} else if(diff > Integer.MAX_VALUE) {
result = Integer.MAX_VALUE;
} else {
result = (int)diff;
}
return result;
}
/**
* Utility method for deleting database files
*
* @param db_path The path to the database to remove
* from the filesystem
*/
private static void removeDb(final Path db_path) throws IOException {
Files.walkFileTree(db_path, new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(final Path file, final BasicFileAttributes attrs)
throws IOException {
Files.delete(file);
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult visitFileFailed(final Path file, IOException exc)
throws IOException {
// try to delete the file anyway, even if its attributes
// could not be read, since delete-only access is
// theoretically possible
Files.delete(file);
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult postVisitDirectory(final Path dir, IOException exc)
throws IOException {
if (exc == null) {
Files.delete(dir);
return FileVisitResult.CONTINUE;
} else {
// directory iteration failed; propagate exception
throw exc;
}
}
});
}
}

@ -0,0 +1,34 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
package org.rocksdb.test;
import org.rocksdb.ComparatorOptions;
import org.rocksdb.RocksDB;
import java.util.Random;
public class ComparatorOptionsTest {
static {
RocksDB.loadLibrary();
}
public static void main(String[] args) {
final ComparatorOptions copt = new ComparatorOptions();
Random rand = new Random();
{ // UseAdaptiveMutex test
copt.setUseAdaptiveMutex(true);
assert(copt.useAdaptiveMutex() == true);
copt.setUseAdaptiveMutex(false);
assert(copt.useAdaptiveMutex() == false);
}
copt.dispose();
System.out.println("Passed ComparatorOptionsTest");
}
}

@ -0,0 +1,45 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
package org.rocksdb.test;
import org.rocksdb.*;
import java.io.IOException;
import java.nio.file.FileSystems;
public class ComparatorTest {
private static final String db_path = "/tmp/comparator_db";
static {
RocksDB.loadLibrary();
}
public static void main(String[] args) throws IOException {
final AbstractComparatorTest comparatorTest = new AbstractComparatorTest() {
@Override
public AbstractComparator getAscendingIntKeyComparator() {
return new Comparator(new ComparatorOptions()) {
@Override
public String name() {
return "test.AscendingIntKeyComparator";
}
@Override
public int compare(final Slice a, final Slice b) {
return compareIntKeys(a.data(), b.data());
}
};
}
};
// test the round-tripability of keys written and read with the Comparator
comparatorTest.testRoundtrip(FileSystems.getDefault().getPath(db_path));
System.out.println("Passed ComparatorTest");
}
}

@ -0,0 +1,48 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
package org.rocksdb.test;
import org.rocksdb.*;
import java.io.IOException;
import java.nio.file.FileSystems;
public class DirectComparatorTest {
private static final String db_path = "/tmp/direct_comparator_db";
static {
RocksDB.loadLibrary();
}
public static void main(String[] args) throws IOException {
final AbstractComparatorTest comparatorTest = new AbstractComparatorTest() {
@Override
public AbstractComparator getAscendingIntKeyComparator() {
return new DirectComparator(new ComparatorOptions()) {
@Override
public String name() {
return "test.AscendingIntKeyDirectComparator";
}
@Override
public int compare(final DirectSlice a, final DirectSlice b) {
final byte ax[] = new byte[4], bx[] = new byte[4];
a.data().get(ax);
b.data().get(bx);
return compareIntKeys(ax, bx);
}
};
}
};
// test the round-tripability of keys written and read with the DirectComparator
comparatorTest.testRoundtrip(FileSystems.getDefault().getPath(db_path));
System.out.println("Passed DirectComparatorTest");
}
}

@ -0,0 +1,107 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
package org.rocksdb.test;
import org.rocksdb.*;
public class MemTableTest {
static {
RocksDB.loadLibrary();
}
public static void main(String[] args) {
Options options = new Options();
// Test HashSkipListMemTableConfig
HashSkipListMemTableConfig memTableConfig =
new HashSkipListMemTableConfig();
assert(memTableConfig.bucketCount() == 1000000);
memTableConfig.setBucketCount(2000000);
assert(memTableConfig.bucketCount() == 2000000);
assert(memTableConfig.height() == 4);
memTableConfig.setHeight(5);
assert(memTableConfig.height() == 5);
assert(memTableConfig.branchingFactor() == 4);
memTableConfig.setBranchingFactor(6);
assert(memTableConfig.branchingFactor() == 6);
try {
options.setMemTableConfig(memTableConfig);
} catch (RocksDBException e) {
assert(false);
}
memTableConfig = null;
options.dispose();
System.gc();
System.runFinalization();
// Test SkipList
options = new Options();
SkipListMemTableConfig skipMemTableConfig =
new SkipListMemTableConfig();
assert(skipMemTableConfig.lookahead() == 0);
skipMemTableConfig.setLookahead(20);
assert(skipMemTableConfig.lookahead() == 20);
try {
options.setMemTableConfig(skipMemTableConfig);
} catch (RocksDBException e) {
assert(false);
}
skipMemTableConfig = null;
options.dispose();
System.gc();
System.runFinalization();
// Test HashLinkedListMemTableConfig
options = new Options();
HashLinkedListMemTableConfig hashLinkedListMemTableConfig =
new HashLinkedListMemTableConfig();
assert(hashLinkedListMemTableConfig.bucketCount() == 50000);
hashLinkedListMemTableConfig.setBucketCount(100000);
assert(hashLinkedListMemTableConfig.bucketCount() == 100000);
assert(hashLinkedListMemTableConfig.hugePageTlbSize() == 0);
hashLinkedListMemTableConfig.setHugePageTlbSize(1);
assert(hashLinkedListMemTableConfig.hugePageTlbSize() == 1);
assert(hashLinkedListMemTableConfig.
bucketEntriesLoggingThreshold() == 4096);
hashLinkedListMemTableConfig.
setBucketEntriesLoggingThreshold(200);
assert(hashLinkedListMemTableConfig.
bucketEntriesLoggingThreshold() == 200);
assert(hashLinkedListMemTableConfig.
ifLogBucketDistWhenFlush() == true);
hashLinkedListMemTableConfig.
setIfLogBucketDistWhenFlush(false);
assert(hashLinkedListMemTableConfig.
ifLogBucketDistWhenFlush() == false);
assert(hashLinkedListMemTableConfig.
thresholdUseSkiplist() == 256);
hashLinkedListMemTableConfig.setThresholdUseSkiplist(29);
assert(hashLinkedListMemTableConfig.
thresholdUseSkiplist() == 29);
try {
options.setMemTableConfig(hashLinkedListMemTableConfig);
} catch (RocksDBException e) {
assert(false);
}
hashLinkedListMemTableConfig = null;
options.dispose();
System.gc();
System.runFinalization();
// test VectorMemTableConfig
options = new Options();
VectorMemTableConfig vectorMemTableConfig =
new VectorMemTableConfig();
assert(vectorMemTableConfig.reservedSize() == 0);
vectorMemTableConfig.setReservedSize(123);
assert(vectorMemTableConfig.reservedSize() == 123);
try {
options.setMemTableConfig(vectorMemTableConfig);
} catch (RocksDBException e) {
assert(false);
}
vectorMemTableConfig = null;
options.dispose();
System.gc();
System.runFinalization();
System.out.println("Mem-table test passed");
}
}

@ -0,0 +1,43 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
package org.rocksdb.test;
/**
* Simple type conversion methods
* for use in tests
*/
public class Types {
/**
* Convert first 4 bytes of a byte array to an int
*
* @param data The byte array
*
* @return An integer
*/
public static int byteToInt(final byte data[]) {
return (data[0] & 0xff) |
((data[1] & 0xff) << 8) |
((data[2] & 0xff) << 16) |
((data[3] & 0xff) << 24);
}
/**
* Convert an int to 4 bytes
*
* @param v The int
*
* @return A byte array containing 4 bytes
*/
public static byte[] intToByte(final int v) {
return new byte[] {
(byte)((v >>> 0) & 0xff),
(byte)((v >>> 8) & 0xff),
(byte)((v >>> 16) & 0xff),
(byte)((v >>> 24) & 0xff)
};
}
}

@ -0,0 +1,66 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// This file implements the "bridge" between Java and C++ for
// rocksdb::Comparator.
#include <stdio.h>
#include <stdlib.h>
#include <jni.h>
#include <string>
#include <functional>
#include "include/org_rocksdb_AbstractComparator.h"
#include "include/org_rocksdb_Comparator.h"
#include "include/org_rocksdb_DirectComparator.h"
#include "rocksjni/comparatorjnicallback.h"
#include "rocksjni/portal.h"
// <editor-fold desc="org.rocksdb.AbstractComparator>
/*
* Class: org_rocksdb_AbstractComparator
* Method: disposeInternal
* Signature: (J)V
*/
void Java_org_rocksdb_AbstractComparator_disposeInternal(
JNIEnv* env, jobject jobj, jlong handle) {
delete reinterpret_cast<rocksdb::BaseComparatorJniCallback*>(handle);
}
// </editor-fold>
// <editor-fold desc="org.rocksdb.Comparator>
/*
* Class: org_rocksdb_Comparator
* Method: createNewComparator0
* Signature: ()V
*/
void Java_org_rocksdb_Comparator_createNewComparator0(
JNIEnv* env, jobject jobj, jlong copt_handle) {
const rocksdb::ComparatorJniCallbackOptions* copt =
reinterpret_cast<rocksdb::ComparatorJniCallbackOptions*>(copt_handle);
const rocksdb::ComparatorJniCallback* c =
new rocksdb::ComparatorJniCallback(env, jobj, copt);
rocksdb::AbstractComparatorJni::setHandle(env, jobj, c);
}
// </editor-fold>
// <editor-fold desc="org.rocksdb.DirectComparator>
/*
* Class: org_rocksdb_DirectComparator
* Method: createNewDirectComparator0
* Signature: ()V
*/
void Java_org_rocksdb_DirectComparator_createNewDirectComparator0(
JNIEnv* env, jobject jobj, jlong copt_handle) {
const rocksdb::ComparatorJniCallbackOptions* copt =
reinterpret_cast<rocksdb::ComparatorJniCallbackOptions*>(copt_handle);
const rocksdb::DirectComparatorJniCallback* c =
new rocksdb::DirectComparatorJniCallback(env, jobj, copt);
rocksdb::AbstractComparatorJni::setHandle(env, jobj, c);
}
// </editor-fold>

@ -0,0 +1,176 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// This file implements the callback "bridge" between Java and C++ for
// rocksdb::Comparator.
#include "rocksjni/comparatorjnicallback.h"
#include "rocksjni/portal.h"
namespace rocksdb {
BaseComparatorJniCallback::BaseComparatorJniCallback(
JNIEnv* env, jobject jComparator,
const ComparatorJniCallbackOptions* copt)
: mtx_compare(new port::Mutex(copt->use_adaptive_mutex)),
mtx_findShortestSeparator(new port::Mutex(copt->use_adaptive_mutex)) {
// Note: Comparator methods may be accessed by multiple threads,
// so we ref the jvm not the env
const jint rs = env->GetJavaVM(&m_jvm);
assert(rs == JNI_OK);
// Note: we want to access the Java Comparator instance
// across multiple method calls, so we create a global ref
m_jComparator = env->NewGlobalRef(jComparator);
// Note: The name of a Comparator will not change during it's lifetime,
// so we cache it in a global var
jmethodID jNameMethodId = AbstractComparatorJni::getNameMethodId(env);
jstring jsName = (jstring)env->CallObjectMethod(m_jComparator, jNameMethodId);
m_name = JniUtil::copyString(env, jsName); // also releases jsName
m_jCompareMethodId = AbstractComparatorJni::getCompareMethodId(env);
m_jFindShortestSeparatorMethodId =
AbstractComparatorJni::getFindShortestSeparatorMethodId(env);
m_jFindShortSuccessorMethodId =
AbstractComparatorJni::getFindShortSuccessorMethodId(env);
}
/**
* Attach/Get a JNIEnv for the current native thread
*/
JNIEnv* BaseComparatorJniCallback::getJniEnv() const {
JNIEnv *env;
jint rs = m_jvm->AttachCurrentThread(reinterpret_cast<void **>(&env), NULL);
assert(rs == JNI_OK);
return env;
}
const char* BaseComparatorJniCallback::Name() const {
return m_name.c_str();
}
int BaseComparatorJniCallback::Compare(const Slice& a, const Slice& b) const {
JNIEnv* m_env = getJniEnv();
// TODO(adamretter): slice objects can potentially be cached using thread
// local variables to avoid locking. Could make this configurable depending on
// performance.
mtx_compare->Lock();
AbstractSliceJni::setHandle(m_env, m_jSliceA, &a);
AbstractSliceJni::setHandle(m_env, m_jSliceB, &b);
jint result =
m_env->CallIntMethod(m_jComparator, m_jCompareMethodId, m_jSliceA,
m_jSliceB);
mtx_compare->Unlock();
m_jvm->DetachCurrentThread();
return result;
}
void BaseComparatorJniCallback::FindShortestSeparator(
std::string* start, const Slice& limit) const {
if (start == nullptr) {
return;
}
JNIEnv* m_env = getJniEnv();
const char* startUtf = start->c_str();
jstring jsStart = m_env->NewStringUTF(startUtf);
// TODO(adamretter): slice object can potentially be cached using thread local
// variable to avoid locking. Could make this configurable depending on
// performance.
mtx_findShortestSeparator->Lock();
AbstractSliceJni::setHandle(m_env, m_jSliceLimit, &limit);
jstring jsResultStart =
(jstring)m_env->CallObjectMethod(m_jComparator,
m_jFindShortestSeparatorMethodId, jsStart, m_jSliceLimit);
mtx_findShortestSeparator->Unlock();
m_env->DeleteLocalRef(jsStart);
if (jsResultStart != nullptr) {
// update start with result
*start =
JniUtil::copyString(m_env, jsResultStart); // also releases jsResultStart
}
m_jvm->DetachCurrentThread();
}
void BaseComparatorJniCallback::FindShortSuccessor(std::string* key) const {
if (key == nullptr) {
return;
}
JNIEnv* m_env = getJniEnv();
const char* keyUtf = key->c_str();
jstring jsKey = m_env->NewStringUTF(keyUtf);
jstring jsResultKey =
(jstring)m_env->CallObjectMethod(m_jComparator,
m_jFindShortSuccessorMethodId, jsKey);
m_env->DeleteLocalRef(jsKey);
if (jsResultKey != nullptr) {
// updates key with result, also releases jsResultKey.
*key = JniUtil::copyString(m_env, jsResultKey);
}
m_jvm->DetachCurrentThread();
}
BaseComparatorJniCallback::~BaseComparatorJniCallback() {
JNIEnv* m_env = getJniEnv();
m_env->DeleteGlobalRef(m_jComparator);
// Note: do not need to explicitly detach, as this function is effectively
// called from the Java class's disposeInternal method, and so already
// has an attached thread, getJniEnv above is just a no-op Attach to get
// the env jvm->DetachCurrentThread();
}
ComparatorJniCallback::ComparatorJniCallback(
JNIEnv* env, jobject jComparator,
const ComparatorJniCallbackOptions* copt) :
BaseComparatorJniCallback(env, jComparator, copt) {
m_jSliceA = env->NewGlobalRef(SliceJni::construct0(env));
m_jSliceB = env->NewGlobalRef(SliceJni::construct0(env));
m_jSliceLimit = env->NewGlobalRef(SliceJni::construct0(env));
}
ComparatorJniCallback::~ComparatorJniCallback() {
JNIEnv* m_env = getJniEnv();
m_env->DeleteGlobalRef(m_jSliceA);
m_env->DeleteGlobalRef(m_jSliceB);
m_env->DeleteGlobalRef(m_jSliceLimit);
}
DirectComparatorJniCallback::DirectComparatorJniCallback(
JNIEnv* env, jobject jComparator,
const ComparatorJniCallbackOptions* copt) :
BaseComparatorJniCallback(env, jComparator, copt) {
m_jSliceA = env->NewGlobalRef(DirectSliceJni::construct0(env));
m_jSliceB = env->NewGlobalRef(DirectSliceJni::construct0(env));
m_jSliceLimit = env->NewGlobalRef(DirectSliceJni::construct0(env));
}
DirectComparatorJniCallback::~DirectComparatorJniCallback() {
JNIEnv* m_env = getJniEnv();
m_env->DeleteGlobalRef(m_jSliceA);
m_env->DeleteGlobalRef(m_jSliceB);
m_env->DeleteGlobalRef(m_jSliceLimit);
}
} // namespace rocksdb

@ -0,0 +1,95 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// This file implements the callback "bridge" between Java and C++ for
// rocksdb::Comparator and rocksdb::DirectComparator.
#ifndef JAVA_ROCKSJNI_COMPARATORJNICALLBACK_H_
#define JAVA_ROCKSJNI_COMPARATORJNICALLBACK_H_
#include <jni.h>
#include <string>
#include "rocksdb/comparator.h"
#include "rocksdb/slice.h"
#include "port/port.h"
namespace rocksdb {
struct ComparatorJniCallbackOptions {
// Use adaptive mutex, which spins in the user space before resorting
// to kernel. This could reduce context switch when the mutex is not
// heavily contended. However, if the mutex is hot, we could end up
// wasting spin time.
// Default: false
bool use_adaptive_mutex;
ComparatorJniCallbackOptions() : use_adaptive_mutex(false) {
}
};
/**
* This class acts as a bridge between C++
* and Java. The methods in this class will be
* called back from the RocksDB storage engine (C++)
* we then callback to the appropriate Java method
* this enables Comparators to be implemented in Java.
*
* The design of this Comparator caches the Java Slice
* objects that are used in the compare and findShortestSeparator
* method callbacks. Instead of creating new objects for each callback
* of those functions, by reuse via setHandle we are a lot
* faster; Unfortunately this means that we have to
* introduce independent locking in regions of each of those methods
* via the mutexs mtx_compare and mtx_findShortestSeparator respectively
*/
class BaseComparatorJniCallback : public Comparator {
public:
BaseComparatorJniCallback(
JNIEnv* env, jobject jComparator,
const ComparatorJniCallbackOptions* copt);
virtual ~BaseComparatorJniCallback();
virtual const char* Name() const;
virtual int Compare(const Slice& a, const Slice& b) const;
virtual void FindShortestSeparator(
std::string* start, const Slice& limit) const;
virtual void FindShortSuccessor(std::string* key) const;
private:
// used for synchronisation in compare method
port::Mutex* mtx_compare;
// used for synchronisation in findShortestSeparator method
port::Mutex* mtx_findShortestSeparator;
JavaVM* m_jvm;
jobject m_jComparator;
std::string m_name;
jmethodID m_jCompareMethodId;
jmethodID m_jFindShortestSeparatorMethodId;
jmethodID m_jFindShortSuccessorMethodId;
protected:
JNIEnv* getJniEnv() const;
jobject m_jSliceA;
jobject m_jSliceB;
jobject m_jSliceLimit;
};
class ComparatorJniCallback : public BaseComparatorJniCallback {
public:
ComparatorJniCallback(
JNIEnv* env, jobject jComparator,
const ComparatorJniCallbackOptions* copt);
~ComparatorJniCallback();
};
class DirectComparatorJniCallback : public BaseComparatorJniCallback {
public:
DirectComparatorJniCallback(
JNIEnv* env, jobject jComparator,
const ComparatorJniCallbackOptions* copt);
~DirectComparatorJniCallback();
};
} // namespace rocksdb
#endif // JAVA_ROCKSJNI_COMPARATORJNICALLBACK_H_

@ -34,16 +34,26 @@ jlong Java_org_rocksdb_HashSkipListMemTableConfig_newMemTableFactoryHandle(
/*
* Class: org_rocksdb_HashLinkedListMemTableConfig
* Method: newMemTableFactoryHandle
* Signature: (J)J
* Signature: (JJIZI)J
*/
jlong Java_org_rocksdb_HashLinkedListMemTableConfig_newMemTableFactoryHandle(
JNIEnv* env, jobject jobj, jlong jbucket_count) {
rocksdb::Status s = rocksdb::check_if_jlong_fits_size_t(jbucket_count);
if (s.ok()) {
JNIEnv* env, jobject jobj, jlong jbucket_count, jlong jhuge_page_tlb_size,
jint jbucket_entries_logging_threshold,
jboolean jif_log_bucket_dist_when_flash, jint jthreshold_use_skiplist) {
rocksdb::Status statusBucketCount =
rocksdb::check_if_jlong_fits_size_t(jbucket_count);
rocksdb::Status statusHugePageTlb =
rocksdb::check_if_jlong_fits_size_t(jhuge_page_tlb_size);
if (statusBucketCount.ok() && statusHugePageTlb.ok()) {
return reinterpret_cast<jlong>(rocksdb::NewHashLinkListRepFactory(
static_cast<size_t>(jbucket_count)));
static_cast<size_t>(jbucket_count),
static_cast<size_t>(jhuge_page_tlb_size),
static_cast<int32_t>(jbucket_entries_logging_threshold),
static_cast<bool>(jif_log_bucket_dist_when_flash),
static_cast<int32_t>(jthreshold_use_skiplist)));
}
rocksdb::RocksDBExceptionJni::ThrowNew(env, s);
rocksdb::RocksDBExceptionJni::ThrowNew(env,
!statusBucketCount.ok()?statusBucketCount:statusHugePageTlb);
return 0;
}
@ -66,9 +76,15 @@ jlong Java_org_rocksdb_VectorMemTableConfig_newMemTableFactoryHandle(
/*
* Class: org_rocksdb_SkipListMemTableConfig
* Method: newMemTableFactoryHandle0
* Signature: ()J
* Signature: (J)J
*/
jlong Java_org_rocksdb_SkipListMemTableConfig_newMemTableFactoryHandle0(
JNIEnv* env, jobject jobj) {
return reinterpret_cast<jlong>(new rocksdb::SkipListFactory());
JNIEnv* env, jobject jobj, jlong jlookahead) {
rocksdb::Status s = rocksdb::check_if_jlong_fits_size_t(jlookahead);
if (s.ok()) {
return reinterpret_cast<jlong>(new rocksdb::SkipListFactory(
static_cast<size_t>(jlookahead)));
}
rocksdb::RocksDBExceptionJni::ThrowNew(env, s);
return 0;
}

@ -14,6 +14,9 @@
#include "include/org_rocksdb_Options.h"
#include "include/org_rocksdb_WriteOptions.h"
#include "include/org_rocksdb_ReadOptions.h"
#include "include/org_rocksdb_ComparatorOptions.h"
#include "rocksjni/comparatorjnicallback.h"
#include "rocksjni/portal.h"
#include "rocksdb/db.h"
#include "rocksdb/options.h"
@ -166,6 +169,17 @@ jlong Java_org_rocksdb_Options_statisticsPtr(
return reinterpret_cast<jlong>(st);
}
/*
* Class: org_rocksdb_Options
* Method: setComparatorHandle
* Signature: (JJ)V
*/
void Java_org_rocksdb_Options_setComparatorHandle(
JNIEnv* env, jobject jobj, jlong jopt_handle, jlong jcomparator_handle) {
reinterpret_cast<rocksdb::Options*>(jopt_handle)->comparator =
reinterpret_cast<rocksdb::Comparator*>(jcomparator_handle);
}
/*
* Class: org_rocksdb_Options
* Method: maxWriteBufferNumber
@ -1785,3 +1799,49 @@ void Java_org_rocksdb_ReadOptions_setTailing(
reinterpret_cast<rocksdb::ReadOptions*>(jhandle)->tailing =
static_cast<bool>(jtailing);
}
/////////////////////////////////////////////////////////////////////
// rocksdb::ComparatorOptions
/*
* Class: org_rocksdb_ComparatorOptions
* Method: newComparatorOptions
* Signature: ()V
*/
void Java_org_rocksdb_ComparatorOptions_newComparatorOptions(
JNIEnv* env, jobject jobj) {
auto comparator_opt = new rocksdb::ComparatorJniCallbackOptions();
rocksdb::ComparatorOptionsJni::setHandle(env, jobj, comparator_opt);
}
/*
* Class: org_rocksdb_ComparatorOptions
* Method: useAdaptiveMutex
* Signature: (J)Z
*/
jboolean Java_org_rocksdb_ComparatorOptions_useAdaptiveMutex(
JNIEnv * env, jobject jobj, jlong jhandle) {
return reinterpret_cast<rocksdb::ComparatorJniCallbackOptions*>(jhandle)
->use_adaptive_mutex;
}
/*
* Class: org_rocksdb_ComparatorOptions
* Method: setUseAdaptiveMutex
* Signature: (JZ)V
*/
void Java_org_rocksdb_ComparatorOptions_setUseAdaptiveMutex(
JNIEnv * env, jobject jobj, jlong jhandle, jboolean juse_adaptive_mutex) {
reinterpret_cast<rocksdb::ComparatorJniCallbackOptions*>(jhandle)
->use_adaptive_mutex = static_cast<bool>(juse_adaptive_mutex);
}
/*
* Class: org_rocksdb_ComparatorOptions
* Method: disposeInternal
* Signature: (J)V
*/
void Java_org_rocksdb_ComparatorOptions_disposeInternal(
JNIEnv * env, jobject jobj, jlong jhandle) {
delete reinterpret_cast<rocksdb::ComparatorJniCallbackOptions*>(jhandle);
rocksdb::ComparatorOptionsJni::setHandle(env, jobj, nullptr);
}

@ -12,10 +12,13 @@
#include <jni.h>
#include <limits>
#include <string>
#include "rocksdb/db.h"
#include "rocksdb/filter_policy.h"
#include "rocksdb/status.h"
#include "rocksdb/utilities/backupable_db.h"
#include "rocksjni/comparatorjnicallback.h"
namespace rocksdb {
@ -362,6 +365,168 @@ class ColumnFamilyHandleJni {
}
};
class ComparatorOptionsJni {
public:
// Get the java class id of org.rocksdb.ComparatorOptions.
static jclass getJClass(JNIEnv* env) {
jclass jclazz = env->FindClass("org/rocksdb/ComparatorOptions");
assert(jclazz != nullptr);
return jclazz;
}
// Get the field id of the member variable of org.rocksdb.ComparatorOptions
// that stores the pointer to rocksdb::ComparatorJniCallbackOptions.
static jfieldID getHandleFieldID(JNIEnv* env) {
static jfieldID fid = env->GetFieldID(
getJClass(env), "nativeHandle_", "J");
assert(fid != nullptr);
return fid;
}
// Pass the ComparatorJniCallbackOptions pointer to the java side.
static void setHandle(
JNIEnv* env, jobject jobj,
const rocksdb::ComparatorJniCallbackOptions* op) {
env->SetLongField(
jobj, getHandleFieldID(env),
reinterpret_cast<jlong>(op));
}
};
class AbstractComparatorJni {
public:
// Get the java class id of org.rocksdb.Comparator.
static jclass getJClass(JNIEnv* env) {
jclass jclazz = env->FindClass("org/rocksdb/AbstractComparator");
assert(jclazz != nullptr);
return jclazz;
}
// Get the field id of the member variable of org.rocksdb.Comparator
// that stores the pointer to rocksdb::Comparator.
static jfieldID getHandleFieldID(JNIEnv* env) {
static jfieldID fid = env->GetFieldID(
getJClass(env), "nativeHandle_", "J");
assert(fid != nullptr);
return fid;
}
// Get the java method `name` of org.rocksdb.Comparator.
static jmethodID getNameMethodId(JNIEnv* env) {
static jmethodID mid = env->GetMethodID(
getJClass(env), "name", "()Ljava/lang/String;");
assert(mid != nullptr);
return mid;
}
// Get the java method `compare` of org.rocksdb.Comparator.
static jmethodID getCompareMethodId(JNIEnv* env) {
static jmethodID mid = env->GetMethodID(getJClass(env),
"compare",
"(Lorg/rocksdb/AbstractSlice;Lorg/rocksdb/AbstractSlice;)I");
assert(mid != nullptr);
return mid;
}
// Get the java method `findShortestSeparator` of org.rocksdb.Comparator.
static jmethodID getFindShortestSeparatorMethodId(JNIEnv* env) {
static jmethodID mid = env->GetMethodID(getJClass(env),
"findShortestSeparator",
"(Ljava/lang/String;Lorg/rocksdb/AbstractSlice;)Ljava/lang/String;");
assert(mid != nullptr);
return mid;
}
// Get the java method `findShortSuccessor` of org.rocksdb.Comparator.
static jmethodID getFindShortSuccessorMethodId(JNIEnv* env) {
static jmethodID mid = env->GetMethodID(getJClass(env),
"findShortSuccessor",
"(Ljava/lang/String;)Ljava/lang/String;");
assert(mid != nullptr);
return mid;
}
// Get the pointer to ComparatorJniCallback.
static rocksdb::BaseComparatorJniCallback* getHandle(
JNIEnv* env, jobject jobj) {
return reinterpret_cast<rocksdb::BaseComparatorJniCallback*>(
env->GetLongField(jobj, getHandleFieldID(env)));
}
// Pass the ComparatorJniCallback pointer to the java side.
static void setHandle(
JNIEnv* env, jobject jobj, const rocksdb::BaseComparatorJniCallback* op) {
env->SetLongField(
jobj, getHandleFieldID(env),
reinterpret_cast<jlong>(op));
}
};
class AbstractSliceJni {
public:
// Get the java class id of org.rocksdb.Slice.
static jclass getJClass(JNIEnv* env) {
jclass jclazz = env->FindClass("org/rocksdb/AbstractSlice");
assert(jclazz != nullptr);
return jclazz;
}
// Get the field id of the member variable of org.rocksdb.Slice
// that stores the pointer to rocksdb::Slice.
static jfieldID getHandleFieldID(JNIEnv* env) {
static jfieldID fid = env->GetFieldID(
getJClass(env), "nativeHandle_", "J");
assert(fid != nullptr);
return fid;
}
// Get the pointer to Slice.
static rocksdb::Slice* getHandle(JNIEnv* env, jobject jobj) {
return reinterpret_cast<rocksdb::Slice*>(
env->GetLongField(jobj, getHandleFieldID(env)));
}
// Pass the Slice pointer to the java side.
static void setHandle(
JNIEnv* env, jobject jobj, const rocksdb::Slice* op) {
env->SetLongField(
jobj, getHandleFieldID(env),
reinterpret_cast<jlong>(op));
}
};
class SliceJni {
public:
// Get the java class id of org.rocksdb.Slice.
static jclass getJClass(JNIEnv* env) {
jclass jclazz = env->FindClass("org/rocksdb/Slice");
assert(jclazz != nullptr);
return jclazz;
}
static jobject construct0(JNIEnv* env) {
static jmethodID mid = env->GetMethodID(getJClass(env), "<init>", "()V");
assert(mid != nullptr);
return env->NewObject(getJClass(env), mid);
}
};
class DirectSliceJni {
public:
// Get the java class id of org.rocksdb.DirectSlice.
static jclass getJClass(JNIEnv* env) {
jclass jclazz = env->FindClass("org/rocksdb/DirectSlice");
assert(jclazz != nullptr);
return jclazz;
}
static jobject construct0(JNIEnv* env) {
static jmethodID mid = env->GetMethodID(getJClass(env), "<init>", "()V");
assert(mid != nullptr);
return env->NewObject(getJClass(env), mid);
}
};
class ListJni {
public:
// Get the java class id of java.util.List.
@ -425,5 +590,20 @@ class ListJni {
return mid;
}
};
class JniUtil {
public:
/**
* Copies a jstring to a std::string
* and releases the original jstring
*/
static std::string copyString(JNIEnv* env, jstring js) {
const char *utf = env->GetStringUTFChars(js, NULL);
std::string name(utf);
env->ReleaseStringUTFChars(js, utf);
return name;
}
};
} // namespace rocksdb
#endif // JAVA_ROCKSJNI_PORTAL_H_

@ -0,0 +1,251 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// This file implements the "bridge" between Java and C++ for
// rocksdb::Slice.
#include <stdio.h>
#include <stdlib.h>
#include <jni.h>
#include <string>
#include "include/org_rocksdb_AbstractSlice.h"
#include "include/org_rocksdb_Slice.h"
#include "include/org_rocksdb_DirectSlice.h"
#include "rocksdb/slice.h"
#include "rocksjni/portal.h"
// <editor-fold desc="org.rocksdb.AbstractSlice>
/*
* Class: org_rocksdb_AbstractSlice
* Method: createNewSliceFromString
* Signature: (Ljava/lang/String;)V
*/
void Java_org_rocksdb_AbstractSlice_createNewSliceFromString(
JNIEnv* env, jobject jobj, jstring str) {
const std::string s = rocksdb::JniUtil::copyString(env, str);
const rocksdb::Slice* slice = new rocksdb::Slice(s);
rocksdb::AbstractSliceJni::setHandle(env, jobj, slice);
}
/*
* Class: org_rocksdb_AbstractSlice
* Method: size0
* Signature: (J)I
*/
jint Java_org_rocksdb_AbstractSlice_size0(
JNIEnv* env, jobject jobj, jlong handle) {
const rocksdb::Slice* slice = reinterpret_cast<rocksdb::Slice*>(handle);
return slice->size();
}
/*
* Class: org_rocksdb_AbstractSlice
* Method: empty0
* Signature: (J)Z
*/
jboolean Java_org_rocksdb_AbstractSlice_empty0(
JNIEnv* env, jobject jobj, jlong handle) {
const rocksdb::Slice* slice = reinterpret_cast<rocksdb::Slice*>(handle);
return slice->empty();
}
/*
* Class: org_rocksdb_AbstractSlice
* Method: toString0
* Signature: (JZ)Ljava/lang/String;
*/
jstring Java_org_rocksdb_AbstractSlice_toString0(
JNIEnv* env, jobject jobj, jlong handle, jboolean hex) {
const rocksdb::Slice* slice = reinterpret_cast<rocksdb::Slice*>(handle);
const std::string s = slice->ToString(hex);
return env->NewStringUTF(s.c_str());
}
/*
* Class: org_rocksdb_AbstractSlice
* Method: compare0
* Signature: (JJ)I;
*/
jint Java_org_rocksdb_AbstractSlice_compare0(
JNIEnv* env, jobject jobj, jlong handle, jlong otherHandle) {
const rocksdb::Slice* slice = reinterpret_cast<rocksdb::Slice*>(handle);
const rocksdb::Slice* otherSlice =
reinterpret_cast<rocksdb::Slice*>(otherHandle);
return slice->compare(*otherSlice);
}
/*
* Class: org_rocksdb_AbstractSlice
* Method: startsWith0
* Signature: (JJ)Z;
*/
jboolean Java_org_rocksdb_AbstractSlice_startsWith0(
JNIEnv* env, jobject jobj, jlong handle, jlong otherHandle) {
const rocksdb::Slice* slice = reinterpret_cast<rocksdb::Slice*>(handle);
const rocksdb::Slice* otherSlice =
reinterpret_cast<rocksdb::Slice*>(otherHandle);
return slice->starts_with(*otherSlice);
}
/*
* Class: org_rocksdb_AbstractSlice
* Method: disposeInternal
* Signature: (J)V
*/
void Java_org_rocksdb_AbstractSlice_disposeInternal(
JNIEnv* env, jobject jobj, jlong handle) {
delete reinterpret_cast<rocksdb::Slice*>(handle);
}
// </editor-fold>
// <editor-fold desc="org.rocksdb.Slice>
/*
* Class: org_rocksdb_Slice
* Method: createNewSlice0
* Signature: ([BI)V
*/
void Java_org_rocksdb_Slice_createNewSlice0(
JNIEnv * env, jobject jobj, jbyteArray data, jint offset) {
const jsize dataSize = env->GetArrayLength(data);
const int len = dataSize - offset;
jbyte* ptrData = new jbyte[len];
env->GetByteArrayRegion(data, offset, len, ptrData);
const rocksdb::Slice* slice = new rocksdb::Slice((const char*)ptrData, len);
rocksdb::AbstractSliceJni::setHandle(env, jobj, slice);
}
/*
* Class: org_rocksdb_Slice
* Method: createNewSlice1
* Signature: ([B)V
*/
void Java_org_rocksdb_Slice_createNewSlice1(
JNIEnv * env, jobject jobj, jbyteArray data) {
const int len = env->GetArrayLength(data);
jboolean isCopy;
jbyte* ptrData = env->GetByteArrayElements(data, &isCopy);
const char* buf = new char[len];
memcpy(const_cast<char*>(buf), ptrData, len);
const rocksdb::Slice* slice =
new rocksdb::Slice(buf, env->GetArrayLength(data));
rocksdb::AbstractSliceJni::setHandle(env, jobj, slice);
env->ReleaseByteArrayElements(data, ptrData, JNI_ABORT);
// NOTE: buf will be deleted in the org.rocksdb.Slice#dispose method
}
/*
* Class: org_rocksdb_Slice
* Method: data0
* Signature: (J)[B
*/
jbyteArray Java_org_rocksdb_Slice_data0(
JNIEnv* env, jobject jobj, jlong handle) {
const rocksdb::Slice* slice = reinterpret_cast<rocksdb::Slice*>(handle);
const int len = slice->size();
const jbyteArray data = env->NewByteArray(len);
env->SetByteArrayRegion(data, 0, len,
reinterpret_cast<jbyte*>(const_cast<char*>(slice->data())));
return data;
}
/*
* Class: org_rocksdb_Slice
* Method: disposeInternalBuf
* Signature: (J)V
*/
void Java_org_rocksdb_Slice_disposeInternalBuf(
JNIEnv * env, jobject jobj, jlong handle) {
const rocksdb::Slice* slice = reinterpret_cast<rocksdb::Slice*>(handle);
delete [] slice->data_;
}
// </editor-fold>
// <editor-fold desc="org.rocksdb.DirectSlice>
/*
* Class: org_rocksdb_DirectSlice
* Method: createNewDirectSlice0
* Signature: (Ljava/nio/ByteBuffer;I)V
*/
void Java_org_rocksdb_DirectSlice_createNewDirectSlice0(
JNIEnv* env, jobject jobj, jobject data, jint length) {
const char* ptrData =
reinterpret_cast<char*>(env->GetDirectBufferAddress(data));
const rocksdb::Slice* slice = new rocksdb::Slice(ptrData, length);
rocksdb::AbstractSliceJni::setHandle(env, jobj, slice);
}
/*
* Class: org_rocksdb_DirectSlice
* Method: createNewDirectSlice1
* Signature: (Ljava/nio/ByteBuffer;)V
*/
void Java_org_rocksdb_DirectSlice_createNewDirectSlice1(
JNIEnv* env, jobject jobj, jobject data) {
const char* ptrData =
reinterpret_cast<char*>(env->GetDirectBufferAddress(data));
const rocksdb::Slice* slice = new rocksdb::Slice(ptrData);
rocksdb::AbstractSliceJni::setHandle(env, jobj, slice);
}
/*
* Class: org_rocksdb_DirectSlice
* Method: data0
* Signature: (J)Ljava/lang/Object;
*/
jobject Java_org_rocksdb_DirectSlice_data0(
JNIEnv* env, jobject jobj, jlong handle) {
const rocksdb::Slice* slice = reinterpret_cast<rocksdb::Slice*>(handle);
return env->NewDirectByteBuffer(const_cast<char*>(slice->data()),
slice->size());
}
/*
* Class: org_rocksdb_DirectSlice
* Method: get0
* Signature: (JI)B
*/
jbyte Java_org_rocksdb_DirectSlice_get0(
JNIEnv* env, jobject jobj, jlong handle, jint offset) {
rocksdb::Slice* slice = reinterpret_cast<rocksdb::Slice*>(handle);
return (*slice)[offset];
}
/*
* Class: org_rocksdb_DirectSlice
* Method: clear0
* Signature: (J)V
*/
void Java_org_rocksdb_DirectSlice_clear0(
JNIEnv* env, jobject jobj, jlong handle) {
rocksdb::Slice* slice = reinterpret_cast<rocksdb::Slice*>(handle);
delete [] slice->data_;
slice->clear();
}
/*
* Class: org_rocksdb_DirectSlice
* Method: removePrefix0
* Signature: (JI)V
*/
void Java_org_rocksdb_DirectSlice_removePrefix0(
JNIEnv* env, jobject jobj, jlong handle, jint length) {
rocksdb::Slice* slice = reinterpret_cast<rocksdb::Slice*>(handle);
slice->remove_prefix(length);
}
// </editor-fold>

@ -47,8 +47,9 @@ class AdaptiveTableFactory : public TableFactory {
const CompressionOptions& compression_opts) const override;
// Sanitizes the specified DB Options.
Status SanitizeDBOptions(const DBOptions* db_opts) const override {
if (db_opts->allow_mmap_reads == false) {
Status SanitizeOptions(const DBOptions& db_opts,
const ColumnFamilyOptions& cf_opts) const override {
if (db_opts.allow_mmap_reads == false) {
return Status::NotSupported(
"AdaptiveTable with allow_mmap_reads == false is not supported.");
}

@ -63,6 +63,22 @@ TableBuilder* BlockBasedTableFactory::NewTableBuilder(
return table_builder;
}
Status BlockBasedTableFactory::SanitizeOptions(
const DBOptions& db_opts,
const ColumnFamilyOptions& cf_opts) const {
if (table_options_.index_type == BlockBasedTableOptions::kHashSearch &&
cf_opts.prefix_extractor == nullptr) {
return Status::InvalidArgument("Hash index is specified for block-based "
"table, but prefix_extractor is not given");
}
if (table_options_.cache_index_and_filter_blocks &&
table_options_.no_block_cache) {
return Status::InvalidArgument("Enable cache_index_and_filter_blocks, "
", but block cache is disabled");
}
return Status::OK();
}
std::string BlockBasedTableFactory::GetPrintableTableOptions() const {
std::string ret;
ret.reserve(20000);

@ -46,9 +46,8 @@ class BlockBasedTableFactory : public TableFactory {
const CompressionOptions& compression_opts) const override;
// Sanitizes the specified DB Options.
Status SanitizeDBOptions(const DBOptions* db_opts) const override {
return Status::OK();
}
Status SanitizeOptions(const DBOptions& db_opts,
const ColumnFamilyOptions& cf_opts) const override;
std::string GetPrintableTableOptions() const override;

@ -483,8 +483,8 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
}
// Will use block cache for index/filter blocks access?
if (table_options.block_cache &&
table_options.cache_index_and_filter_blocks) {
if (table_options.cache_index_and_filter_blocks) {
assert(table_options.block_cache != nullptr);
// Hack: Call NewIndexIterator() to implicitly add index to the block_cache
unique_ptr<Iterator> iter(new_table->NewIndexIterator(ReadOptions()));
s = iter->status();
@ -506,19 +506,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
// Set filter block
if (rep->filter_policy) {
// First try reading full_filter, then reading block_based_filter
for (auto filter_block_prefix : { kFullFilterBlockPrefix,
kFilterBlockPrefix }) {
std::string key = filter_block_prefix;
key.append(rep->filter_policy->Name());
BlockHandle handle;
if (FindMetaBlock(meta_iter.get(), key, &handle).ok()) {
rep->filter.reset(ReadFilter(handle, rep,
filter_block_prefix, nullptr));
break;
}
}
rep->filter.reset(ReadFilter(rep, meta_iter.get(), nullptr));
}
} else {
delete index_reader;
@ -726,33 +714,43 @@ Status BlockBasedTable::PutDataBlockToCache(
}
FilterBlockReader* BlockBasedTable::ReadFilter(
const BlockHandle& filter_handle, BlockBasedTable::Rep* rep,
const std::string& filter_block_prefix, size_t* filter_size) {
Rep* rep, Iterator* meta_index_iter, size_t* filter_size) {
// TODO: We might want to unify with ReadBlockFromFile() if we start
// requiring checksum verification in Table::Open.
ReadOptions opt;
BlockContents block;
if (!ReadBlockContents(rep->file.get(), rep->footer, opt, filter_handle,
&block, rep->ioptions.env, false).ok()) {
return nullptr;
}
for (auto prefix : {kFullFilterBlockPrefix, kFilterBlockPrefix}) {
std::string filter_block_key = prefix;
filter_block_key.append(rep->filter_policy->Name());
BlockHandle handle;
if (FindMetaBlock(meta_index_iter, filter_block_key, &handle).ok()) {
BlockContents block;
if (!ReadBlockContents(rep->file.get(), rep->footer, ReadOptions(),
handle, &block, rep->ioptions.env, false).ok()) {
// Error reading the block
return nullptr;
}
if (filter_size) {
*filter_size = block.data.size();
}
if (filter_size) {
*filter_size = block.data.size();
}
assert(rep->filter_policy);
if (kFilterBlockPrefix == filter_block_prefix) {
return new BlockBasedFilterBlockReader(
rep->ioptions.prefix_extractor, rep->table_options, std::move(block));
} else if (kFullFilterBlockPrefix == filter_block_prefix) {
auto filter_bits_reader = rep->filter_policy->
GetFilterBitsReader(block.data);
if (filter_bits_reader != nullptr) {
return new FullFilterBlockReader(rep->ioptions.prefix_extractor,
rep->table_options, std::move(block),
filter_bits_reader);
assert(rep->filter_policy);
if (kFilterBlockPrefix == prefix) {
return new BlockBasedFilterBlockReader(
rep->ioptions.prefix_extractor, rep->table_options,
std::move(block));
} else if (kFullFilterBlockPrefix == prefix) {
auto filter_bits_reader = rep->filter_policy->
GetFilterBitsReader(block.data);
if (filter_bits_reader != nullptr) {
return new FullFilterBlockReader(rep->ioptions.prefix_extractor,
rep->table_options,
std::move(block),
filter_bits_reader);
}
} else {
assert(false);
return nullptr;
}
}
}
return nullptr;
@ -760,8 +758,11 @@ FilterBlockReader* BlockBasedTable::ReadFilter(
BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
bool no_io) const {
// filter pre-populated
if (rep_->filter != nullptr) {
// If cache_index_and_filter_blocks is false, filter should be pre-populated.
// We will return rep_->filter anyway. rep_->filter can be nullptr if filter
// read fails at Open() time. We don't want to reload again since it will
// most probably fail again.
if (!rep_->table_options.cache_index_and_filter_blocks) {
return {rep_->filter.get(), nullptr /* cache handle */};
}
@ -775,8 +776,7 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
auto key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size,
rep_->footer.metaindex_handle(),
cache_key
);
cache_key);
Statistics* statistics = rep_->ioptions.statistics;
auto cache_handle =
@ -797,22 +797,12 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
auto s = ReadMetaBlock(rep_, &meta, &iter);
if (s.ok()) {
// First try reading full_filter, then reading block_based_filter
for (auto filter_block_prefix : {kFullFilterBlockPrefix,
kFilterBlockPrefix}) {
std::string filter_block_key = filter_block_prefix;
filter_block_key.append(rep_->filter_policy->Name());
BlockHandle handle;
if (FindMetaBlock(iter.get(), filter_block_key, &handle).ok()) {
filter = ReadFilter(handle, rep_, filter_block_prefix, &filter_size);
if (filter == nullptr) break; // err happen in ReadFilter
assert(filter_size > 0);
cache_handle = block_cache->Insert(
key, filter, filter_size, &DeleteCachedEntry<FilterBlockReader>);
RecordTick(statistics, BLOCK_CACHE_ADD);
break;
}
filter = ReadFilter(rep_, iter.get(), &filter_size);
if (filter != nullptr) {
assert(filter_size > 0);
cache_handle = block_cache->Insert(
key, filter, filter_size, &DeleteCachedEntry<FilterBlockReader>);
RecordTick(statistics, BLOCK_CACHE_ADD);
}
}
}

@ -183,10 +183,10 @@ class BlockBasedTable : public TableReader {
std::unique_ptr<Iterator>* iter);
// Create the filter from the filter block.
static FilterBlockReader* ReadFilter(const BlockHandle& filter_handle,
Rep* rep,
const std::string& filter_block_prefix,
size_t* filter_size = nullptr);
static FilterBlockReader* ReadFilter(
Rep* rep,
Iterator* meta_index_iter,
size_t* filter_size = nullptr);
static void SetupCacheKeyPrefix(Rep* rep);

@ -64,7 +64,8 @@ class CuckooTableFactory : public TableFactory {
const CompressionType, const CompressionOptions&) const override;
// Sanitizes the specified DB Options.
Status SanitizeDBOptions(const DBOptions* db_opts) const override {
Status SanitizeOptions(const DBOptions& db_opts,
const ColumnFamilyOptions& cf_opts) const override {
return Status::OK();
}

@ -170,8 +170,9 @@ class PlainTableFactory : public TableFactory {
static const char kValueTypeSeqId0 = 0xFF;
// Sanitizes the specified DB Options.
Status SanitizeDBOptions(const DBOptions* db_opts) const override {
if (db_opts->allow_mmap_reads == false) {
Status SanitizeOptions(const DBOptions& db_opts,
const ColumnFamilyOptions& cf_opts) const override {
if (db_opts.allow_mmap_reads == false) {
return Status::NotSupported(
"PlainTable with allow_mmap_reads == false is not supported.");
}

@ -1461,8 +1461,6 @@ TEST(BlockBasedTableTest, BlockCacheDisabledTest) {
options.create_if_missing = true;
options.statistics = CreateDBStatistics();
BlockBasedTableOptions table_options;
// Intentionally commented out: table_options.cache_index_and_filter_blocks =
// true;
table_options.block_cache = NewLRUCache(1024);
table_options.filter_policy.reset(NewBloomFilterPolicy(10));
options.table_factory.reset(new BlockBasedTableFactory(table_options));
@ -1521,7 +1519,7 @@ TEST(BlockBasedTableTest, FilterBlockInBlockCache) {
c.Finish(options, ioptions, table_options,
GetPlainInternalComparator(options.comparator), &keys, &kvmap);
// preloading filter/index blocks is prohibited.
auto reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
auto* reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
ASSERT_TRUE(!reader->TEST_filter_block_preloaded());
ASSERT_TRUE(!reader->TEST_index_reader_preloaded());
@ -1567,28 +1565,11 @@ TEST(BlockBasedTableTest, FilterBlockInBlockCache) {
// release the iterator so that the block cache can reset correctly.
iter.reset();
// -- PART 2: Open without block cache
table_options.no_block_cache = true;
table_options.block_cache.reset();
options.table_factory.reset(new BlockBasedTableFactory(table_options));
options.statistics = CreateDBStatistics(); // reset the stats
const ImmutableCFOptions ioptions1(options);
c.Reopen(ioptions1);
table_options.no_block_cache = false;
{
iter.reset(c.NewIterator());
iter->SeekToFirst();
ASSERT_EQ("key", iter->key().ToString());
BlockCachePropertiesSnapshot props(options.statistics.get());
// Nothing is affected at all
props.AssertEqual(0, 0, 0, 0);
}
// -- PART 3: Open with very small block cache
// -- PART 2: Open with very small block cache
// In this test, no block will ever get hit since the block cache is
// too small to fit even one entry.
table_options.block_cache = NewLRUCache(1);
options.statistics = CreateDBStatistics();
options.table_factory.reset(new BlockBasedTableFactory(table_options));
const ImmutableCFOptions ioptions2(options);
c.Reopen(ioptions2);
@ -1598,7 +1579,6 @@ TEST(BlockBasedTableTest, FilterBlockInBlockCache) {
0, 0, 0);
}
{
// Both index and data block get accessed.
// It first cache index block then data block. But since the cache size
@ -1618,6 +1598,37 @@ TEST(BlockBasedTableTest, FilterBlockInBlockCache) {
props.AssertEqual(2, 0, 0 + 1, // data block miss
0);
}
iter.reset();
// -- PART 3: Open table with bloom filter enabled but not in SST file
table_options.block_cache = NewLRUCache(4096);
table_options.cache_index_and_filter_blocks = false;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
TableConstructor c3(BytewiseComparator());
std::string user_key = "k01";
InternalKey internal_key(user_key, 0, kTypeValue);
c3.Add(internal_key.Encode().ToString(), "hello");
ImmutableCFOptions ioptions3(options);
// Generate table without filter policy
c3.Finish(options, ioptions3, table_options,
GetPlainInternalComparator(options.comparator), &keys, &kvmap);
// Open table with filter policy
table_options.filter_policy.reset(NewBloomFilterPolicy(1));
options.table_factory.reset(new BlockBasedTableFactory(table_options));
options.statistics = CreateDBStatistics();
ImmutableCFOptions ioptions4(options);
ASSERT_OK(c3.Reopen(ioptions4));
reader = dynamic_cast<BlockBasedTable*>(c3.GetTableReader());
ASSERT_TRUE(!reader->TEST_filter_block_preloaded());
std::string value;
GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
GetContext::kNotFound, user_key, &value,
nullptr, nullptr);
ASSERT_OK(reader->Get(ReadOptions(), user_key, &get_context));
ASSERT_EQ(value, "hello");
BlockCachePropertiesSnapshot props(options.statistics.get());
props.AssertFilterBlockStat(0, 0);
}
TEST(BlockBasedTableTest, BlockCacheLeak) {

@ -7,6 +7,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <chrono>
#include <deque>
#include <set>
#include <dirent.h>
@ -1350,25 +1351,13 @@ class PosixEnv : public Env {
}
virtual uint64_t NowMicros() {
struct timeval tv;
// TODO(kailiu) MAC DON'T HAVE THIS
gettimeofday(&tv, nullptr);
return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
return std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count();
}
virtual uint64_t NowNanos() {
#ifdef OS_LINUX
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return static_cast<uint64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#elif __MACH__
clock_serv_t cclock;
mach_timespec_t ts;
host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock);
clock_get_time(cclock, &ts);
mach_port_deallocate(mach_task_self(), cclock);
#endif
return static_cast<uint64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
return std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::steady_clock::now().time_since_epoch()).count();
}
virtual void SleepForMicroseconds(int micros) {

@ -126,6 +126,10 @@ void MutableCFOptions::Dump(Logger* log) const {
}
result.resize(result.size() - 2);
Log(log, "max_bytes_for_level_multiplier_additional: %s", result.c_str());
Log(log, " max_mem_compaction_level: %d",
max_mem_compaction_level);
Log(log, " max_sequential_skip_in_iterations: %" PRIu64,
max_sequential_skip_in_iterations);
}
} // namespace rocksdb

@ -37,7 +37,10 @@ struct MutableCFOptions {
max_bytes_for_level_base(options.max_bytes_for_level_base),
max_bytes_for_level_multiplier(options.max_bytes_for_level_multiplier),
max_bytes_for_level_multiplier_additional(
options.max_bytes_for_level_multiplier_additional)
options.max_bytes_for_level_multiplier_additional),
max_mem_compaction_level(options.max_mem_compaction_level),
max_sequential_skip_in_iterations(
options.max_sequential_skip_in_iterations)
{
RefreshDerivedOptions(ioptions);
}
@ -62,7 +65,9 @@ struct MutableCFOptions {
target_file_size_base(0),
target_file_size_multiplier(0),
max_bytes_for_level_base(0),
max_bytes_for_level_multiplier(0)
max_bytes_for_level_multiplier(0),
max_mem_compaction_level(0),
max_sequential_skip_in_iterations(0)
{}
// Must be called after any change to MutableCFOptions
@ -105,6 +110,10 @@ struct MutableCFOptions {
uint64_t max_bytes_for_level_base;
int max_bytes_for_level_multiplier;
std::vector<int> max_bytes_for_level_multiplier_additional;
int max_mem_compaction_level;
// Misc options
uint64_t max_sequential_skip_in_iterations;
// Derived options
// Per-level target file size.

@ -144,6 +144,19 @@ bool ParseCompactionOptions(const std::string& name, const std::string& value,
start = end + 1;
}
}
} else if (name == "max_mem_compaction_level") {
new_options->max_mem_compaction_level = ParseInt(value);
} else {
return false;
}
return true;
}
template<typename OptionsType>
bool ParseMiscOptions(const std::string& name, const std::string& value,
OptionsType* new_options) {
if (name == "max_sequential_skip_in_iterations") {
new_options->max_sequential_skip_in_iterations = ParseUint64(value);
} else {
return false;
}
@ -160,6 +173,7 @@ bool GetMutableOptionsFromStrings(
for (const auto& o : options_map) {
if (ParseMemtableOptions(o.first, o.second, new_options)) {
} else if (ParseCompactionOptions(o.first, o.second, new_options)) {
} else if (ParseMiscOptions(o.first, o.second, new_options)) {
} else {
return false;
}
@ -228,6 +242,7 @@ bool GetColumnFamilyOptionsFromMap(
try {
if (ParseMemtableOptions(o.first, o.second, new_options)) {
} else if (ParseCompactionOptions(o.first, o.second, new_options)) {
} else if (ParseMiscOptions(o.first, o.second, new_options)) {
} else if (o.first == "min_write_buffer_number_to_merge") {
new_options->min_write_buffer_number_to_merge = ParseInt(o.second);
} else if (o.first == "compression") {
@ -270,8 +285,6 @@ bool GetColumnFamilyOptionsFromMap(
ParseInt(o.second.substr(start, o.second.size() - start));
} else if (o.first == "num_levels") {
new_options->num_levels = ParseInt(o.second);
} else if (o.first == "max_mem_compaction_level") {
new_options->max_mem_compaction_level = ParseInt(o.second);
} else if (o.first == "purge_redundant_kvs_while_flush") {
new_options->purge_redundant_kvs_while_flush =
ParseBoolean(o.first, o.second);
@ -286,8 +299,6 @@ bool GetColumnFamilyOptionsFromMap(
} else if (o.first == "compaction_options_fifo") {
new_options->compaction_options_fifo.max_table_files_size
= ParseUint64(o.second);
} else if (o.first == "max_sequential_skip_in_iterations") {
new_options->max_sequential_skip_in_iterations = ParseUint64(o.second);
} else if (o.first == "inplace_update_support") {
new_options->inplace_update_support = ParseBoolean(o.first, o.second);
} else if (o.first == "inplace_update_num_locks") {

@ -26,13 +26,14 @@ namespace rocksdb {
// (2) a ThreadLocalPtr is destroyed
typedef void (*UnrefHandler)(void* ptr);
// Thread local storage that only stores value of pointer type. The storage
// distinguish data coming from different thread and different ThreadLocalPtr
// instances. For example, if a regular thread_local variable A is declared
// in DBImpl, two DBImpl objects would share the same A. ThreadLocalPtr avoids
// the confliction. The total storage size equals to # of threads * # of
// ThreadLocalPtr instances. It is not efficient in terms of space, but it
// should serve most of our use cases well and keep code simple.
// ThreadLocalPtr stores only values of pointer type. Different from
// the usual thread-local-storage, ThreadLocalPtr has the ability to
// distinguish data coming from different threads and different
// ThreadLocalPtr instances. For example, if a regular thread_local
// variable A is declared in DBImpl, two DBImpl objects would share
// the same A. However, a ThreadLocalPtr that is defined under the
// scope of DBImpl can avoid such confliction. As a result, its memory
// usage would be O(# of threads * # of ThreadLocalPtr instances).
class ThreadLocalPtr {
public:
explicit ThreadLocalPtr(UnrefHandler handler = nullptr);

Loading…
Cancel
Save