From ddb8039e3aa1fe99d4a17c21cc970831258d1bb8 Mon Sep 17 00:00:00 2001 From: Naveen Date: Mon, 18 Aug 2014 14:03:46 -0700 Subject: [PATCH 01/19] RocksDB static build Make file changes to download and build the dependencies .Load the shared library when RocksDB is initialized --- Makefile | 32 +++++++++++++++- java/org/rocksdb/NativeLibraryLoader.java | 46 +++++++++++++++++++++++ java/org/rocksdb/Options.java | 3 ++ java/org/rocksdb/RocksDB.java | 24 ++++++++++-- 4 files changed, 100 insertions(+), 5 deletions(-) create mode 100644 java/org/rocksdb/NativeLibraryLoader.java diff --git a/Makefile b/Makefile index 1bd202bc9..fd7c8c7d1 100644 --- a/Makefile +++ b/Makefile @@ -175,7 +175,7 @@ endif # PLATFORM_SHARED_EXT .PHONY: blackbox_crash_test check clean coverage crash_test ldb_tests \ release tags valgrind_check whitebox_crash_test format static_lib shared_lib all \ - dbg + dbg rocksdbjavastatic rocksdbjava all: $(LIBRARY) $(PROGRAMS) $(TESTS) @@ -480,6 +480,36 @@ ROCKSDBJNILIB = librocksdbjni.jnilib JAVA_INCLUDE = -I/System/Library/Frameworks/JavaVM.framework/Headers/ endif + +rocksdbjavastatic: + #build zlib + curl -O http://zlib.net/zlib-1.2.8.tar.gz + tar xvzf zlib-1.2.8.tar.gz + cd zlib-1.2.8 && CFLAGS='-fPIC' ./configure --static && make + cp zlib-1.2.8/libz.a . + rm -rf zlib-1.2.8.tar.gz zlib-1.2.8 + + #build bzip + curl -O http://www.bzip.org/1.0.6/bzip2-1.0.6.tar.gz + tar xvzf bzip2-1.0.6.tar.gz + cd bzip2-1.0.6 && make CFLAGS='-fPIC -Wall -Winline -O2 -g -D_FILE_OFFSET_BITS=64' + cp bzip2-1.0.6/libbz2.a . + rm -rf bzip2-1.0.6.tar.gz bzip2-1.0.6 + + #build snappy + curl -O https://snappy.googlecode.com/files/snappy-1.1.1.tar.gz + tar xvzf snappy-1.1.1.tar.gz + cd snappy-1.1.1 && ./configure --with-pic --enable-static + cd snappy-1.1.1 && make + cp snappy-1.1.1/.libs/libsnappy.a . + rm -rf snappy-1.1.1 snappy-1.1.1.tar.gz + OPT="-fPIC -DNDEBUG -O2" $(MAKE) $(LIBRARY) -j + cd java;$(MAKE) java; + rm -f ./java/$(ROCKSDBJNILIB) + $(CXX) $(CXXFLAGS) -I./java/. $(JAVA_INCLUDE) -shared -fPIC -o ./java/$(ROCKSDBJNILIB) $(JNI_NATIVE_SOURCES) $(LIBOBJECTS) $(COVERAGEFLAGS) libz.a libbz2.a libsnappy.a + cd java;jar -cf $(ROCKSDB_JAR) org/rocksdb/*.class org/rocksdb/util/*.class HISTORY*.md $(ROCKSDBJNILIB) + + rocksdbjava: OPT="-fPIC -DNDEBUG -O2" $(MAKE) $(LIBRARY) -j32 cd java;$(MAKE) java; diff --git a/java/org/rocksdb/NativeLibraryLoader.java b/java/org/rocksdb/NativeLibraryLoader.java new file mode 100644 index 000000000..f49f54488 --- /dev/null +++ b/java/org/rocksdb/NativeLibraryLoader.java @@ -0,0 +1,46 @@ +package org.rocksdb; + +import java.io.*; + +public class NativeLibraryLoader +{ + + private static String sharedLibraryName = "librocksdbjni.so"; + private static String tempFilePrefix = "librocksdbjni"; + private static String tempFileSuffix = ".so"; + /** + * Private constructor - this class will never be instanced + */ + private NativeLibraryLoader() { + } + + public static void loadLibraryFromJar() throws IOException { + + File temp = File.createTempFile(tempFilePrefix, tempFileSuffix); + temp.deleteOnExit(); + + if (!temp.exists()) { + throw new FileNotFoundException("File " + temp.getAbsolutePath() + " does not exist."); + } + + byte[] buffer = new byte[1024]; + int readBytes; + + InputStream is = ClassLoader.getSystemClassLoader().getResourceAsStream(sharedLibraryName); + if (is == null) { + throw new FileNotFoundException(sharedLibraryName + " was not found inside JAR."); + } + + OutputStream os = new FileOutputStream(temp); + try { + while ((readBytes = is.read(buffer)) != -1) { + os.write(buffer, 0, readBytes); + } + } finally { + os.close(); + is.close(); + } + + System.load(temp.getAbsolutePath()); + } +} diff --git a/java/org/rocksdb/Options.java b/java/org/rocksdb/Options.java index 95f994606..420bfebba 100644 --- a/java/org/rocksdb/Options.java +++ b/java/org/rocksdb/Options.java @@ -13,6 +13,9 @@ package org.rocksdb; * native resources will be released as part of the process. */ public class Options extends RocksObject { + static{ + RocksDB.loadLibrary(); + } static final long DEFAULT_CACHE_SIZE = 8 << 20; static final int DEFAULT_NUM_SHARD_BITS = -1; /** diff --git a/java/org/rocksdb/RocksDB.java b/java/org/rocksdb/RocksDB.java index c7b06cc6d..bd9a4f648 100644 --- a/java/org/rocksdb/RocksDB.java +++ b/java/org/rocksdb/RocksDB.java @@ -11,6 +11,7 @@ import java.util.HashMap; import java.io.Closeable; import java.io.IOException; import org.rocksdb.util.Environment; +import org.rocksdb.NativeLibraryLoader; /** * A RocksDB is a persistent ordered map from keys to values. It is safe for @@ -18,16 +19,24 @@ import org.rocksdb.util.Environment; * All methods of this class could potentially throw RocksDBException, which * indicates sth wrong at the rocksdb library side and the call failed. */ -public class RocksDB extends RocksObject { +public class RocksDB extends org.rocksdb.RocksObject +{ + public static final int NOT_FOUND = -1; private static final String[] compressionLibs_ = { "snappy", "z", "bzip2", "lz4", "lz4hc"}; + static { + loadLibrary(); + } + + /** * Loads the necessary library files. * Calling this method twice will have no effect. */ - public static synchronized void loadLibrary() { + public static synchronized void loadLibrary() + { // loading possibly necessary libraries. for (String lib : compressionLibs_) { try { @@ -36,8 +45,15 @@ public class RocksDB extends RocksObject { // since it may be optional, we ignore its loading failure here. } } - // However, if any of them is required. We will see error here. - System.loadLibrary("rocksdbjni"); + + try + { + NativeLibraryLoader.loadLibraryFromJar(); + } + catch (IOException e) + { + e.printStackTrace(); + } } /** From 343e98a7d13391ab95dd0b64c1422da597926a96 Mon Sep 17 00:00:00 2001 From: Naveen Date: Mon, 18 Aug 2014 14:08:10 -0700 Subject: [PATCH 02/19] Reverting import change --- java/org/rocksdb/RocksDB.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/org/rocksdb/RocksDB.java b/java/org/rocksdb/RocksDB.java index bd9a4f648..6825bf3c5 100644 --- a/java/org/rocksdb/RocksDB.java +++ b/java/org/rocksdb/RocksDB.java @@ -19,7 +19,7 @@ import org.rocksdb.NativeLibraryLoader; * All methods of this class could potentially throw RocksDBException, which * indicates sth wrong at the rocksdb library side and the call failed. */ -public class RocksDB extends org.rocksdb.RocksObject +public class RocksDB extends RocksObject { public static final int NOT_FOUND = -1; From 1d284db2135ecb10d8bcd7cd4c1c10fc9f57f621 Mon Sep 17 00:00:00 2001 From: Naveen Date: Mon, 8 Sep 2014 17:44:52 -0700 Subject: [PATCH 03/19] Addressing review comments --- Makefile | 27 ++++++++++++----------- java/org/rocksdb/NativeLibraryLoader.java | 10 ++++----- java/org/rocksdb/Options.java | 2 +- java/org/rocksdb/RocksDB.java | 9 +++----- 4 files changed, 23 insertions(+), 25 deletions(-) diff --git a/Makefile b/Makefile index fd7c8c7d1..4c58e0b0a 100644 --- a/Makefile +++ b/Makefile @@ -245,7 +245,7 @@ unity: unity.cc unity.o clean: -rm -f $(PROGRAMS) $(TESTS) $(LIBRARY) $(SHARED) $(MEMENVLIBRARY) build_config.mk unity.cc -rm -rf ios-x86/* ios-arm/* - -find . -name "*.[od]" -exec rm {} \; + -find . -name "*.[oda]" -exec rm {} \; -find . -type f -regex ".*\.\(\(gcda\)\|\(gcno\)\)" -exec rm {} \; tags: ctags * -R @@ -480,29 +480,30 @@ ROCKSDBJNILIB = librocksdbjni.jnilib JAVA_INCLUDE = -I/System/Library/Frameworks/JavaVM.framework/Headers/ endif - -rocksdbjavastatic: - #build zlib +libz.a: + -rm -rf zlib-1.2.8 curl -O http://zlib.net/zlib-1.2.8.tar.gz tar xvzf zlib-1.2.8.tar.gz cd zlib-1.2.8 && CFLAGS='-fPIC' ./configure --static && make - cp zlib-1.2.8/libz.a . - rm -rf zlib-1.2.8.tar.gz zlib-1.2.8 - - #build bzip - curl -O http://www.bzip.org/1.0.6/bzip2-1.0.6.tar.gz + cp zlib-1.2.8/libz.a . + +libbz2.a: + -rm -rf bzip2-1.0.6 + curl -O http://www.bzip.org/1.0.6/bzip2-1.0.6.tar.gz tar xvzf bzip2-1.0.6.tar.gz cd bzip2-1.0.6 && make CFLAGS='-fPIC -Wall -Winline -O2 -g -D_FILE_OFFSET_BITS=64' cp bzip2-1.0.6/libbz2.a . - rm -rf bzip2-1.0.6.tar.gz bzip2-1.0.6 - #build snappy +libsnappy.a: + -rm -rf snappy-1.1.1 curl -O https://snappy.googlecode.com/files/snappy-1.1.1.tar.gz tar xvzf snappy-1.1.1.tar.gz - cd snappy-1.1.1 && ./configure --with-pic --enable-static + cd snappy-1.1.1 && ./configure --with-pic --enable-static cd snappy-1.1.1 && make cp snappy-1.1.1/.libs/libsnappy.a . - rm -rf snappy-1.1.1 snappy-1.1.1.tar.gz + + +rocksdbjavastatic: libz.a libbz2.a libsnappy.a OPT="-fPIC -DNDEBUG -O2" $(MAKE) $(LIBRARY) -j cd java;$(MAKE) java; rm -f ./java/$(ROCKSDBJNILIB) diff --git a/java/org/rocksdb/NativeLibraryLoader.java b/java/org/rocksdb/NativeLibraryLoader.java index f49f54488..f6b8520f5 100644 --- a/java/org/rocksdb/NativeLibraryLoader.java +++ b/java/org/rocksdb/NativeLibraryLoader.java @@ -2,8 +2,7 @@ package org.rocksdb; import java.io.*; -public class NativeLibraryLoader -{ +public class NativeLibraryLoader { private static String sharedLibraryName = "librocksdbjni.so"; private static String tempFilePrefix = "librocksdbjni"; @@ -14,13 +13,14 @@ public class NativeLibraryLoader private NativeLibraryLoader() { } - public static void loadLibraryFromJar() throws IOException { + public static void loadLibraryFromJar() + throws IOException { File temp = File.createTempFile(tempFilePrefix, tempFileSuffix); temp.deleteOnExit(); if (!temp.exists()) { - throw new FileNotFoundException("File " + temp.getAbsolutePath() + " does not exist."); + throw new RuntimeException("File " + temp.getAbsolutePath() + " does not exist."); } byte[] buffer = new byte[1024]; @@ -28,7 +28,7 @@ public class NativeLibraryLoader InputStream is = ClassLoader.getSystemClassLoader().getResourceAsStream(sharedLibraryName); if (is == null) { - throw new FileNotFoundException(sharedLibraryName + " was not found inside JAR."); + throw new RuntimeException(sharedLibraryName + " was not found inside JAR."); } OutputStream os = new FileOutputStream(temp); diff --git a/java/org/rocksdb/Options.java b/java/org/rocksdb/Options.java index 420bfebba..4ed0b8025 100644 --- a/java/org/rocksdb/Options.java +++ b/java/org/rocksdb/Options.java @@ -13,7 +13,7 @@ package org.rocksdb; * native resources will be released as part of the process. */ public class Options extends RocksObject { - static{ + static { RocksDB.loadLibrary(); } static final long DEFAULT_CACHE_SIZE = 8 << 20; diff --git a/java/org/rocksdb/RocksDB.java b/java/org/rocksdb/RocksDB.java index 6825bf3c5..132b9ac39 100644 --- a/java/org/rocksdb/RocksDB.java +++ b/java/org/rocksdb/RocksDB.java @@ -19,8 +19,7 @@ import org.rocksdb.NativeLibraryLoader; * All methods of this class could potentially throw RocksDBException, which * indicates sth wrong at the rocksdb library side and the call failed. */ -public class RocksDB extends RocksObject -{ +public class RocksDB extends RocksObject { public static final int NOT_FOUND = -1; private static final String[] compressionLibs_ = { @@ -35,8 +34,7 @@ public class RocksDB extends RocksObject * Loads the necessary library files. * Calling this method twice will have no effect. */ - public static synchronized void loadLibrary() - { + public static synchronized void loadLibrary() { // loading possibly necessary libraries. for (String lib : compressionLibs_) { try { @@ -45,14 +43,13 @@ public class RocksDB extends RocksObject // since it may be optional, we ignore its loading failure here. } } - try { NativeLibraryLoader.loadLibraryFromJar(); } catch (IOException e) { - e.printStackTrace(); + throw new RuntimeException("Unable to load the RocksDB shared library" + e); } } From 4436f17bd80fd3e953787821a0e46b7d8369e4f3 Mon Sep 17 00:00:00 2001 From: liuchang0812 Date: Sun, 21 Sep 2014 22:09:48 +0800 Subject: [PATCH 04/19] fixed #303: replace %ld with % PRId64 --- table/cuckoo_table_reader_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/table/cuckoo_table_reader_test.cc b/table/cuckoo_table_reader_test.cc index 3b170b638..6dd5e5525 100644 --- a/table/cuckoo_table_reader_test.cc +++ b/table/cuckoo_table_reader_test.cc @@ -522,7 +522,7 @@ void ReadKeys(uint64_t num, uint32_t batch_size) { float time_per_op = (env->NowMicros() - start_time) * 1.0 / num; fprintf(stderr, "Time taken per op is %.3fus (%.1f Mqps) with batch size of %u, " - "# of found keys %ld\n", + "# of found keys %" PRId64 "\n", time_per_op, 1.0 / time_per_op, batch_size, found_count); } } // namespace. From 6a031b6a81d9b3592d5cba41d18069999f499938 Mon Sep 17 00:00:00 2001 From: liuchang0812 Date: Sun, 21 Sep 2014 22:20:00 +0800 Subject: [PATCH 05/19] remove unused variable --- util/logging.h | 1 - 1 file changed, 1 deletion(-) diff --git a/util/logging.h b/util/logging.h index ce0269726..7ca8ae0a3 100644 --- a/util/logging.h +++ b/util/logging.h @@ -19,7 +19,6 @@ namespace rocksdb { class Slice; -class WritableFile; // Append a human-readable size in bytes int AppendHumanBytes(uint64_t bytes, char* output, int len); From 7e0dcb953f44e1567762a0a77c26cdde0d4b9e9c Mon Sep 17 00:00:00 2001 From: whu_liuchang Date: Mon, 22 Sep 2014 23:24:53 +0800 Subject: [PATCH 06/19] Update logging.cc fix cast style to cpp static_cast --- util/logging.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/logging.cc b/util/logging.cc index 4dfb9a449..1f5b8e2a7 100644 --- a/util/logging.cc +++ b/util/logging.cc @@ -45,7 +45,7 @@ int AppendHumanBytes(uint64_t bytes, char* output, int len) { void AppendNumberTo(std::string* str, uint64_t num) { char buf[30]; - snprintf(buf, sizeof(buf), "%llu", (unsigned long long) num); + snprintf(buf, sizeof(buf), "%llu", static_cast(unsigned long long) num); str->append(buf); } From a7574d4fa17c6110b878bd91c674fb012982b7ec Mon Sep 17 00:00:00 2001 From: whu_liuchang Date: Mon, 22 Sep 2014 23:37:00 +0800 Subject: [PATCH 07/19] Update logging.cc --- util/logging.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/logging.cc b/util/logging.cc index 1f5b8e2a7..c54ea1cdc 100644 --- a/util/logging.cc +++ b/util/logging.cc @@ -45,7 +45,7 @@ int AppendHumanBytes(uint64_t bytes, char* output, int len) { void AppendNumberTo(std::string* str, uint64_t num) { char buf[30]; - snprintf(buf, sizeof(buf), "%llu", static_cast(unsigned long long) num); + snprintf(buf, sizeof(buf), "%llu", static_cast(num)); str->append(buf); } From 787cb4db29b995297935819cf7f2f07f6ffd9977 Mon Sep 17 00:00:00 2001 From: liuchang0812 Date: Tue, 23 Sep 2014 01:10:46 +0800 Subject: [PATCH 08/19] remove cast, replace %llu with % PRIu64 --- util/logging.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/logging.cc b/util/logging.cc index c54ea1cdc..98d96b82b 100644 --- a/util/logging.cc +++ b/util/logging.cc @@ -45,7 +45,7 @@ int AppendHumanBytes(uint64_t bytes, char* output, int len) { void AppendNumberTo(std::string* str, uint64_t num) { char buf[30]; - snprintf(buf, sizeof(buf), "%llu", static_cast(num)); + snprintf(buf, sizeof(buf), "%" PRIu64, num); str->append(buf); } From 5e6aee4325ae9dabe38905828f5b63944a40c06a Mon Sep 17 00:00:00 2001 From: Lei Jin Date: Mon, 22 Sep 2014 10:36:53 -0700 Subject: [PATCH 09/19] dont create backup_input if compaction filter v2 is not used Summary: Compaction creates backup_input iterator even though it only needed when compaction filter v2 is enabled Test Plan: make all check Reviewers: sdong, yhchiang, igor Reviewed By: igor Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D23769 --- db/db_impl.cc | 50 ++++++++++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 6038c2ce5..260939810 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -3117,9 +3117,6 @@ Status DBImpl::DoCompactionWork(CompactionState* compact, const uint64_t start_micros = env_->NowMicros(); unique_ptr input(versions_->MakeInputIterator(compact->compaction)); input->SeekToFirst(); - shared_ptr backup_input( - versions_->MakeInputIterator(compact->compaction)); - backup_input->SeekToFirst(); Status status; ParsedInternalKey ikey; @@ -3132,14 +3129,30 @@ Status DBImpl::DoCompactionWork(CompactionState* compact, auto compaction_filter_v2 = compaction_filter_from_factory_v2.get(); - // temp_backup_input always point to the start of the current buffer - // temp_backup_input = backup_input; - // iterate through input, - // 1) buffer ineligible keys and value keys into 2 separate buffers; - // 2) send value_buffer to compaction filter and alternate the values; - // 3) merge value_buffer with ineligible_value_buffer; - // 4) run the modified "compaction" using the old for loop. - if (compaction_filter_v2) { + if (!compaction_filter_v2) { + status = ProcessKeyValueCompaction( + is_snapshot_supported, + visible_at_tip, + earliest_snapshot, + latest_snapshot, + deletion_state, + bottommost_level, + imm_micros, + input.get(), + compact, + false, + log_buffer); + } else { + // temp_backup_input always point to the start of the current buffer + // temp_backup_input = backup_input; + // iterate through input, + // 1) buffer ineligible keys and value keys into 2 separate buffers; + // 2) send value_buffer to compaction filter and alternate the values; + // 3) merge value_buffer with ineligible_value_buffer; + // 4) run the modified "compaction" using the old for loop. + shared_ptr backup_input( + versions_->MakeInputIterator(compact->compaction)); + backup_input->SeekToFirst(); while (backup_input->Valid() && !shutting_down_.Acquire_Load() && !cfd->IsDropped()) { // FLUSH preempts compaction @@ -3267,21 +3280,6 @@ Status DBImpl::DoCompactionWork(CompactionState* compact, log_buffer); } // checking for compaction filter v2 - if (!compaction_filter_v2) { - status = ProcessKeyValueCompaction( - is_snapshot_supported, - visible_at_tip, - earliest_snapshot, - latest_snapshot, - deletion_state, - bottommost_level, - imm_micros, - input.get(), - compact, - false, - log_buffer); - } - if (status.ok() && (shutting_down_.Acquire_Load() || cfd->IsDropped())) { status = Status::ShutdownInProgress( "Database shutdown or Column family drop during compaction"); From 57a32f147f08b9ddccaf40c53f082e695355915a Mon Sep 17 00:00:00 2001 From: Lei Jin Date: Mon, 22 Sep 2014 11:15:03 -0700 Subject: [PATCH 10/19] change target_file_size_base to uint64_t Summary: It contrains the file size to be 4G max with int Test Plan: tried to grep instance and made sure other related variables are also uint64 Reviewers: sdong, yhchiang, igor Reviewed By: igor Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D23697 --- HISTORY.md | 1 + db/db_bench.cc | 2 +- include/rocksdb/options.h | 2 +- util/options.cc | 30 +++++++++++++++--------------- util/options_helper.cc | 2 +- 5 files changed, 19 insertions(+), 18 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index b64e12b42..a8b89f54f 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -8,6 +8,7 @@ * We have refactored our system of stalling writes. Any stall-related statistics' meanings are changed. Instead of per-write stall counts, we now count stalls per-epoch, where epochs are periods between flushes and compactions. You'll find more information in our Tuning Perf Guide once we release RocksDB 3.6. * When disableDataSync=true, we no longer sync the MANIFEST file. * Add identity_as_first_hash property to CuckooTable. SST file needs to be rebuilt to be opened by reader properly. +* Change target_file_size_base type to uint64_t from int. ----- Past Releases ----- diff --git a/db/db_bench.cc b/db/db_bench.cc index 08e61e46b..d90c628a9 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -307,7 +307,7 @@ DEFINE_string(wal_dir, "", "If not empty, use the given dir for WAL"); DEFINE_int32(num_levels, 7, "The total number of levels"); -DEFINE_int32(target_file_size_base, 2 * 1048576, "Target file size at level-1"); +DEFINE_int64(target_file_size_base, 2 * 1048576, "Target file size at level-1"); DEFINE_int32(target_file_size_multiplier, 1, "A multiplier to compute target level-N file size (N >= 2)"); diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 2c9734d24..84a0422c1 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -287,7 +287,7 @@ struct ColumnFamilyOptions { // and each file on level-3 will be 200MB. // by default target_file_size_base is 2MB. - int target_file_size_base; + uint64_t target_file_size_base; // by default target_file_size_multiplier is 1, which means // by default files in different levels will have similar size. int target_file_size_multiplier; diff --git a/util/options.cc b/util/options.cc index f0042cbda..32612d6a7 100644 --- a/util/options.cc +++ b/util/options.cc @@ -273,8 +273,8 @@ void DBOptions::Dump(Logger* log) const { Log(log, " Options.disableDataSync: %d", disableDataSync); Log(log, " Options.use_fsync: %d", use_fsync); Log(log, " Options.max_log_file_size: %zu", max_log_file_size); - Log(log, "Options.max_manifest_file_size: %lu", - (unsigned long)max_manifest_file_size); + Log(log, "Options.max_manifest_file_size: %" PRIu64, + max_manifest_file_size); Log(log, " Options.log_file_time_to_roll: %zu", log_file_time_to_roll); Log(log, " Options.keep_log_file_num: %zu", keep_log_file_num); Log(log, " Options.allow_os_buffer: %d", allow_os_buffer); @@ -290,16 +290,16 @@ void DBOptions::Dump(Logger* log) const { table_cache_numshardbits); Log(log, " Options.table_cache_remove_scan_count_limit: %d", table_cache_remove_scan_count_limit); - Log(log, " Options.delete_obsolete_files_period_micros: %lu", - (unsigned long)delete_obsolete_files_period_micros); + Log(log, " Options.delete_obsolete_files_period_micros: %" PRIu64, + delete_obsolete_files_period_micros); Log(log, " Options.max_background_compactions: %d", max_background_compactions); Log(log, " Options.max_background_flushes: %d", max_background_flushes); - Log(log, " Options.WAL_ttl_seconds: %lu", - (unsigned long)WAL_ttl_seconds); - Log(log, " Options.WAL_size_limit_MB: %lu", - (unsigned long)WAL_size_limit_MB); + Log(log, " Options.WAL_ttl_seconds: %" PRIu64, + WAL_ttl_seconds); + Log(log, " Options.WAL_size_limit_MB: %" PRIu64, + WAL_size_limit_MB); Log(log, " Options.manifest_preallocation_size: %zu", manifest_preallocation_size); Log(log, " Options.allow_os_buffer: %d", @@ -322,8 +322,8 @@ void DBOptions::Dump(Logger* log) const { use_adaptive_mutex); Log(log, " Options.rate_limiter: %p", rate_limiter.get()); - Log(log, " Options.bytes_per_sync: %lu", - (unsigned long)bytes_per_sync); + Log(log, " Options.bytes_per_sync: %" PRIu64, + bytes_per_sync); } // DBOptions::Dump void ColumnFamilyOptions::Dump(Logger* log) const { @@ -371,20 +371,20 @@ void ColumnFamilyOptions::Dump(Logger* log) const { level0_stop_writes_trigger); Log(log," Options.max_mem_compaction_level: %d", max_mem_compaction_level); - Log(log," Options.target_file_size_base: %d", + Log(log," Options.target_file_size_base: %" PRIu64, target_file_size_base); Log(log," Options.target_file_size_multiplier: %d", target_file_size_multiplier); - Log(log," Options.max_bytes_for_level_base: %lu", - (unsigned long)max_bytes_for_level_base); + Log(log," Options.max_bytes_for_level_base: %" PRIu64, + max_bytes_for_level_base); Log(log," Options.max_bytes_for_level_multiplier: %d", max_bytes_for_level_multiplier); for (int i = 0; i < num_levels; i++) { Log(log,"Options.max_bytes_for_level_multiplier_addtl[%d]: %d", i, max_bytes_for_level_multiplier_additional[i]); } - Log(log," Options.max_sequential_skip_in_iterations: %lu", - (unsigned long)max_sequential_skip_in_iterations); + Log(log," Options.max_sequential_skip_in_iterations: %" PRIu64, + max_sequential_skip_in_iterations); Log(log," Options.expanded_compaction_factor: %d", expanded_compaction_factor); Log(log," Options.source_compaction_factor: %d", diff --git a/util/options_helper.cc b/util/options_helper.cc index db066f747..d552a2b9e 100644 --- a/util/options_helper.cc +++ b/util/options_helper.cc @@ -177,7 +177,7 @@ bool GetOptionsFromStrings( } else if (o.first == "max_mem_compaction_level") { new_options->max_mem_compaction_level = ParseInt(o.second); } else if (o.first == "target_file_size_base") { - new_options->target_file_size_base = ParseInt(o.second); + new_options->target_file_size_base = ParseUint64(o.second); } else if (o.first == "target_file_size_multiplier") { new_options->target_file_size_multiplier = ParseInt(o.second); } else if (o.first == "max_bytes_for_level_base") { From d0de413f4dc94e539401608e2007540a5ea01098 Mon Sep 17 00:00:00 2001 From: sdong Date: Mon, 22 Sep 2014 11:37:35 -0700 Subject: [PATCH 11/19] WriteBatchWithIndex to allow different Comparators for different column families Summary: Previously, one single column family is given to WriteBatchWithIndex to index keys for all column families. An extra map from column family ID to comparator is maintained which can override the default comparator given in the constructor. A WriteBatchWithIndex::SetComparatorForCF() is added for user to add comparators per column family. Also move more codes into anonymous namespace. Test Plan: Add a unit test Reviewers: ljin, igor Reviewed By: igor Subscribers: dhruba, leveldb, yhchiang Differential Revision: https://reviews.facebook.net/D23355 --- db/column_family.cc | 13 ++ db/column_family.h | 4 + db/write_batch_test.cc | 5 +- .../utilities/write_batch_with_index.h | 15 ++- .../write_batch_with_index.cc | 79 +++++++----- .../write_batch_with_index_test.cc | 114 +++++++++++++++++- 6 files changed, 189 insertions(+), 41 deletions(-) diff --git a/db/column_family.cc b/db/column_family.cc index ff6b8fe6c..8b4e007ed 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -86,6 +86,10 @@ ColumnFamilyHandleImpl::~ColumnFamilyHandleImpl() { uint32_t ColumnFamilyHandleImpl::GetID() const { return cfd()->GetID(); } +const Comparator* ColumnFamilyHandleImpl::user_comparator() const { + return cfd()->user_comparator(); +} + ColumnFamilyOptions SanitizeOptions(const InternalKeyComparator* icmp, const ColumnFamilyOptions& src) { ColumnFamilyOptions result = src; @@ -726,4 +730,13 @@ uint32_t GetColumnFamilyID(ColumnFamilyHandle* column_family) { return column_family_id; } +const Comparator* GetColumnFamilyUserComparator( + ColumnFamilyHandle* column_family) { + if (column_family != nullptr) { + auto cfh = reinterpret_cast(column_family); + return cfh->user_comparator(); + } + return nullptr; +} + } // namespace rocksdb diff --git a/db/column_family.h b/db/column_family.h index f1ef13cf1..65b4b53ba 100644 --- a/db/column_family.h +++ b/db/column_family.h @@ -49,6 +49,7 @@ class ColumnFamilyHandleImpl : public ColumnFamilyHandle { // destroy without mutex virtual ~ColumnFamilyHandleImpl(); virtual ColumnFamilyData* cfd() const { return cfd_; } + virtual const Comparator* user_comparator() const; virtual uint32_t GetID() const; @@ -448,4 +449,7 @@ class ColumnFamilyMemTablesImpl : public ColumnFamilyMemTables { extern uint32_t GetColumnFamilyID(ColumnFamilyHandle* column_family); +extern const Comparator* GetColumnFamilyUserComparator( + ColumnFamilyHandle* column_family); + } // namespace rocksdb diff --git a/db/write_batch_test.cc b/db/write_batch_test.cc index d8fa52d40..ba7451078 100644 --- a/db/write_batch_test.cc +++ b/db/write_batch_test.cc @@ -289,6 +289,9 @@ class ColumnFamilyHandleImplDummy : public ColumnFamilyHandleImpl { explicit ColumnFamilyHandleImplDummy(int id) : ColumnFamilyHandleImpl(nullptr, nullptr, nullptr), id_(id) {} uint32_t GetID() const override { return id_; } + const Comparator* user_comparator() const override { + return BytewiseComparator(); + } private: uint32_t id_; @@ -320,7 +323,7 @@ TEST(WriteBatchTest, ColumnFamiliesBatchTest) { } TEST(WriteBatchTest, ColumnFamiliesBatchWithIndexTest) { - WriteBatchWithIndex batch(BytewiseComparator(), 20); + WriteBatchWithIndex batch; ColumnFamilyHandleImplDummy zero(0), two(2), three(3), eight(8); batch.Put(&zero, Slice("foo"), Slice("bar")); batch.Put(&two, Slice("twofoo"), Slice("bar2")); diff --git a/include/rocksdb/utilities/write_batch_with_index.h b/include/rocksdb/utilities/write_batch_with_index.h index c09f53d11..85c80850f 100644 --- a/include/rocksdb/utilities/write_batch_with_index.h +++ b/include/rocksdb/utilities/write_batch_with_index.h @@ -11,8 +11,9 @@ #pragma once -#include "rocksdb/status.h" +#include "rocksdb/comparator.h" #include "rocksdb/slice.h" +#include "rocksdb/status.h" #include "rocksdb/write_batch.h" namespace rocksdb { @@ -56,12 +57,14 @@ class WBWIIterator { // A user can call NewIterator() to create an iterator. class WriteBatchWithIndex { public: - // index_comparator indicates the order when iterating data in the write - // batch. Technically, it doesn't have to be the same as the one used in - // the DB. + // backup_index_comparator: the backup comparator used to compare keys + // within the same column family, if column family is not given in the + // interface, or we can't find a column family from the column family handle + // passed in, backup_index_comparator will be used for the column family. // reserved_bytes: reserved bytes in underlying WriteBatch - explicit WriteBatchWithIndex(const Comparator* index_comparator, - size_t reserved_bytes = 0); + explicit WriteBatchWithIndex( + const Comparator* backup_index_comparator = BytewiseComparator(), + size_t reserved_bytes = 0); virtual ~WriteBatchWithIndex(); WriteBatch* GetWriteBatch(); diff --git a/utilities/write_batch_with_index/write_batch_with_index.cc b/utilities/write_batch_with_index/write_batch_with_index.cc index 68b3d3970..2caa2e4cc 100644 --- a/utilities/write_batch_with_index/write_batch_with_index.cc +++ b/utilities/write_batch_with_index/write_batch_with_index.cc @@ -20,7 +20,6 @@ class ReadableWriteBatch : public WriteBatch { Status GetEntryFromDataOffset(size_t data_offset, WriteType* type, Slice* Key, Slice* value, Slice* blob) const; }; -} // namespace // Key used by skip list, as the binary searchable index of WriteBatchWithIndex. struct WriteBatchIndexEntry { @@ -38,44 +37,28 @@ struct WriteBatchIndexEntry { class WriteBatchEntryComparator { public: - WriteBatchEntryComparator(const Comparator* comparator, + WriteBatchEntryComparator(const Comparator* default_comparator, const ReadableWriteBatch* write_batch) - : comparator_(comparator), write_batch_(write_batch) {} + : default_comparator_(default_comparator), write_batch_(write_batch) {} // Compare a and b. Return a negative value if a is less than b, 0 if they // are equal, and a positive value if a is greater than b int operator()(const WriteBatchIndexEntry* entry1, const WriteBatchIndexEntry* entry2) const; + void SetComparatorForCF(uint32_t column_family_id, + const Comparator* comparator) { + cf_comparator_map_[column_family_id] = comparator; + } + private: - const Comparator* comparator_; + const Comparator* default_comparator_; + std::unordered_map cf_comparator_map_; const ReadableWriteBatch* write_batch_; }; typedef SkipList WriteBatchEntrySkipList; -struct WriteBatchWithIndex::Rep { - Rep(const Comparator* index_comparator, size_t reserved_bytes = 0) - : write_batch(reserved_bytes), - comparator(index_comparator, &write_batch), - skip_list(comparator, &arena) {} - ReadableWriteBatch write_batch; - WriteBatchEntryComparator comparator; - Arena arena; - WriteBatchEntrySkipList skip_list; - - WriteBatchIndexEntry* GetEntry(ColumnFamilyHandle* column_family) { - return GetEntryWithCfId(GetColumnFamilyID(column_family)); - } - - WriteBatchIndexEntry* GetEntryWithCfId(uint32_t column_family_id) { - auto* mem = arena.Allocate(sizeof(WriteBatchIndexEntry)); - auto* index_entry = new (mem) - WriteBatchIndexEntry(write_batch.GetDataSize(), column_family_id); - return index_entry; - } -}; - class WBWIIteratorImpl : public WBWIIterator { public: WBWIIteratorImpl(uint32_t column_family_id, @@ -138,6 +121,35 @@ class WBWIIteratorImpl : public WBWIIterator { } } }; +} // namespace + +struct WriteBatchWithIndex::Rep { + Rep(const Comparator* index_comparator, size_t reserved_bytes = 0) + : write_batch(reserved_bytes), + comparator(index_comparator, &write_batch), + skip_list(comparator, &arena) {} + ReadableWriteBatch write_batch; + WriteBatchEntryComparator comparator; + Arena arena; + WriteBatchEntrySkipList skip_list; + + WriteBatchIndexEntry* GetEntry(ColumnFamilyHandle* column_family) { + uint32_t cf_id = GetColumnFamilyID(column_family); + const auto* cf_cmp = GetColumnFamilyUserComparator(column_family); + if (cf_cmp != nullptr) { + comparator.SetComparatorForCF(cf_id, cf_cmp); + } + + return GetEntryWithCfId(cf_id); + } + + WriteBatchIndexEntry* GetEntryWithCfId(uint32_t column_family_id) { + auto* mem = arena.Allocate(sizeof(WriteBatchIndexEntry)); + auto* index_entry = new (mem) + WriteBatchIndexEntry(write_batch.GetDataSize(), column_family_id); + return index_entry; + } +}; Status ReadableWriteBatch::GetEntryFromDataOffset(size_t data_offset, WriteType* type, Slice* Key, @@ -179,9 +191,9 @@ Status ReadableWriteBatch::GetEntryFromDataOffset(size_t data_offset, return Status::OK(); } -WriteBatchWithIndex::WriteBatchWithIndex(const Comparator* index_comparator, - size_t reserved_bytes) - : rep(new Rep(index_comparator, reserved_bytes)) {} +WriteBatchWithIndex::WriteBatchWithIndex( + const Comparator* default_index_comparator, size_t reserved_bytes) + : rep(new Rep(default_index_comparator, reserved_bytes)) {} WriteBatchWithIndex::~WriteBatchWithIndex() { delete rep; } @@ -287,7 +299,14 @@ int WriteBatchEntryComparator::operator()( key2 = *(entry2->search_key); } - int cmp = comparator_->Compare(key1, key2); + int cmp; + auto comparator_for_cf = cf_comparator_map_.find(entry1->column_family); + if (comparator_for_cf != cf_comparator_map_.end()) { + cmp = comparator_for_cf->second->Compare(key1, key2); + } else { + cmp = default_comparator_->Compare(key1, key2); + } + if (cmp != 0) { return cmp; } else if (entry1->offset > entry2->offset) { diff --git a/utilities/write_batch_with_index/write_batch_with_index_test.cc b/utilities/write_batch_with_index/write_batch_with_index_test.cc index fdceed4c4..ad8c110c1 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_test.cc +++ b/utilities/write_batch_with_index/write_batch_with_index_test.cc @@ -19,12 +19,16 @@ namespace rocksdb { namespace { class ColumnFamilyHandleImplDummy : public ColumnFamilyHandleImpl { public: - explicit ColumnFamilyHandleImplDummy(int id) - : ColumnFamilyHandleImpl(nullptr, nullptr, nullptr), id_(id) {} + explicit ColumnFamilyHandleImplDummy(int id, const Comparator* comparator) + : ColumnFamilyHandleImpl(nullptr, nullptr, nullptr), + id_(id), + comparator_(comparator) {} uint32_t GetID() const override { return id_; } + const Comparator* user_comparator() const override { return comparator_; } private: uint32_t id_; + const Comparator* comparator_; }; struct Entry { @@ -90,8 +94,9 @@ TEST(WriteBatchWithIndexTest, TestValueAsSecondaryIndex) { index_map[e.value].push_back(&e); } - WriteBatchWithIndex batch(BytewiseComparator(), 20); - ColumnFamilyHandleImplDummy data(6), index(8); + WriteBatchWithIndex batch(nullptr, 20); + ColumnFamilyHandleImplDummy data(6, BytewiseComparator()); + ColumnFamilyHandleImplDummy index(8, BytewiseComparator()); for (auto& e : entries) { if (e.type == kPutRecord) { batch.Put(&data, e.key, e.value); @@ -230,6 +235,107 @@ TEST(WriteBatchWithIndexTest, TestValueAsSecondaryIndex) { } } +class ReverseComparator : public Comparator { + public: + ReverseComparator() {} + + virtual const char* Name() const override { + return "rocksdb.ReverseComparator"; + } + + virtual int Compare(const Slice& a, const Slice& b) const override { + return 0 - BytewiseComparator()->Compare(a, b); + } + + virtual void FindShortestSeparator(std::string* start, + const Slice& limit) const {} + virtual void FindShortSuccessor(std::string* key) const {} +}; + +TEST(WriteBatchWithIndexTest, TestComparatorForCF) { + ReverseComparator reverse_cmp; + ColumnFamilyHandleImplDummy cf1(6, nullptr); + ColumnFamilyHandleImplDummy reverse_cf(66, &reverse_cmp); + ColumnFamilyHandleImplDummy cf2(88, BytewiseComparator()); + WriteBatchWithIndex batch(BytewiseComparator(), 20); + + batch.Put(&cf1, "ddd", ""); + batch.Put(&cf2, "aaa", ""); + batch.Put(&cf2, "eee", ""); + batch.Put(&cf1, "ccc", ""); + batch.Put(&reverse_cf, "a11", ""); + batch.Put(&cf1, "bbb", ""); + batch.Put(&reverse_cf, "a33", ""); + batch.Put(&reverse_cf, "a22", ""); + + { + std::unique_ptr iter(batch.NewIterator(&cf1)); + iter->Seek(""); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("bbb", iter->Entry().key.ToString()); + iter->Next(); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("ccc", iter->Entry().key.ToString()); + iter->Next(); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("ddd", iter->Entry().key.ToString()); + iter->Next(); + ASSERT_OK(iter->status()); + ASSERT_TRUE(!iter->Valid()); + } + + { + std::unique_ptr iter(batch.NewIterator(&cf2)); + iter->Seek(""); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("aaa", iter->Entry().key.ToString()); + iter->Next(); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("eee", iter->Entry().key.ToString()); + iter->Next(); + ASSERT_OK(iter->status()); + ASSERT_TRUE(!iter->Valid()); + } + + { + std::unique_ptr iter(batch.NewIterator(&reverse_cf)); + iter->Seek(""); + ASSERT_OK(iter->status()); + ASSERT_TRUE(!iter->Valid()); + + iter->Seek("z"); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("a33", iter->Entry().key.ToString()); + iter->Next(); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("a22", iter->Entry().key.ToString()); + iter->Next(); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("a11", iter->Entry().key.ToString()); + iter->Next(); + ASSERT_OK(iter->status()); + ASSERT_TRUE(!iter->Valid()); + + iter->Seek("a22"); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("a22", iter->Entry().key.ToString()); + + iter->Seek("a13"); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("a11", iter->Entry().key.ToString()); + } +} + } // namespace int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); } From 53b00399548eb8eb1cab396e17890aca6c5f497e Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Mon, 22 Sep 2014 15:00:03 -0700 Subject: [PATCH 12/19] Fix release compile --- util/options_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/options_test.cc b/util/options_test.cc index f640b991f..eee285e2a 100644 --- a/util/options_test.cc +++ b/util/options_test.cc @@ -177,7 +177,7 @@ TEST(OptionsTest, GetOptionsFromStringsTest) { ASSERT_EQ(new_opt.level0_slowdown_writes_trigger, 9); ASSERT_EQ(new_opt.level0_stop_writes_trigger, 10); ASSERT_EQ(new_opt.max_mem_compaction_level, 11); - ASSERT_EQ(new_opt.target_file_size_base, 12); + ASSERT_EQ(new_opt.target_file_size_base, static_cast(12)); ASSERT_EQ(new_opt.target_file_size_multiplier, 13); ASSERT_EQ(new_opt.max_bytes_for_level_base, 14U); ASSERT_EQ(new_opt.max_bytes_for_level_multiplier, 15); From 3d74f09979a2eadc1711a13ca4e221b53a6c44b3 Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Mon, 22 Sep 2014 15:19:20 -0700 Subject: [PATCH 13/19] Fix compile --- table/full_filter_block_test.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/table/full_filter_block_test.cc b/table/full_filter_block_test.cc index 12e783b4a..7bf61f238 100644 --- a/table/full_filter_block_test.cc +++ b/table/full_filter_block_test.cc @@ -30,7 +30,8 @@ class TestFilterBitsBuilder : public FilterBitsBuilder { for (size_t i = 0; i < hash_entries_.size(); i++) { EncodeFixed32(data + i * 4, hash_entries_[i]); } - buf->reset(data); + const char* const_data = data; + buf->reset(const_data); return Slice(data, len); } From 55af370756af6f11edd79e431a4f9cc0a04e784b Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Tue, 23 Sep 2014 13:02:23 -0700 Subject: [PATCH 14/19] Remove TODO for checking index checksums --- table/block_based_table_reader.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 2e883632f..eb3de7a3b 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -498,7 +498,6 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, // pre-load these blocks, which will kept in member variables in Rep // and with a same life-time as this table object. IndexReader* index_reader = nullptr; - // TODO: we never really verify check sum for index block s = new_table->CreateIndexReader(&index_reader, meta_iter.get()); if (s.ok()) { From 0a29ce53938a7c5db3484c196fb98e5e61a952df Mon Sep 17 00:00:00 2001 From: Lei Jin Date: Tue, 23 Sep 2014 14:18:57 -0700 Subject: [PATCH 15/19] re-enable BlockBasedTable::SetupForCompaction() Summary: It was commented out in D22545 by accident. Keep the option in ImmutableOptions for now. I can make it dynamic in https://reviews.facebook.net/D23349 Test Plan: make release Reviewers: sdong, yhchiang, igor Reviewed By: igor Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D23865 --- include/rocksdb/immutable_options.h | 2 ++ include/rocksdb/options.h | 14 ++++++++------ table/block_based_table_reader.cc | 4 +--- util/options.cc | 3 ++- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/rocksdb/immutable_options.h b/include/rocksdb/immutable_options.h index de4480cff..54b676626 100644 --- a/include/rocksdb/immutable_options.h +++ b/include/rocksdb/immutable_options.h @@ -77,6 +77,8 @@ struct ImmutableCFOptions { std::vector compression_per_level; CompressionOptions compression_opts; + + Options::AccessHint access_hint_on_compaction_start; }; } // namespace rocksdb diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 84a0422c1..a60f94268 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -58,6 +58,7 @@ enum CompactionStyle : char { kCompactionStyleFIFO = 0x2, // FIFO compaction style }; + struct CompactionOptionsFIFO { // once the total sum of table files reaches this, we will delete the oldest // table file @@ -783,12 +784,13 @@ struct DBOptions { // Specify the file access pattern once a compaction is started. // It will be applied to all input files of a compaction. // Default: NORMAL - enum { - NONE, - NORMAL, - SEQUENTIAL, - WILLNEED - } access_hint_on_compaction_start; + enum AccessHint { + NONE, + NORMAL, + SEQUENTIAL, + WILLNEED + }; + AccessHint access_hint_on_compaction_start; // Use adaptive mutex, which spins in the user space before resorting // to kernel. This could reduce context switch when the mutex is not diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index eb3de7a3b..09328dc3b 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -532,8 +532,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, } void BlockBasedTable::SetupForCompaction() { - /* - switch (.access_hint_on_compaction_start) { + switch (rep_->ioptions.access_hint_on_compaction_start) { case Options::NONE: break; case Options::NORMAL: @@ -549,7 +548,6 @@ void BlockBasedTable::SetupForCompaction() { assert(false); } compaction_optimized_ = true; - */ } std::shared_ptr BlockBasedTable::GetTableProperties() diff --git a/util/options.cc b/util/options.cc index 32612d6a7..28120659b 100644 --- a/util/options.cc +++ b/util/options.cc @@ -59,7 +59,8 @@ ImmutableCFOptions::ImmutableCFOptions(const Options& options) use_fsync(options.use_fsync), compression(options.compression), compression_per_level(options.compression_per_level), - compression_opts(options.compression_opts) {} + compression_opts(options.compression_opts), + access_hint_on_compaction_start(options.access_hint_on_compaction_start) {} ColumnFamilyOptions::ColumnFamilyOptions() : comparator(BytewiseComparator()), From cf7ace886573f387194d158e09497c05a30bd24a Mon Sep 17 00:00:00 2001 From: Naveen Date: Wed, 10 Sep 2014 12:12:31 -0700 Subject: [PATCH 16/19] Addressing review comments --- java/org/rocksdb/NativeLibraryLoader.java | 19 ++++++++++++------- java/org/rocksdb/RocksDB.java | 2 +- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/java/org/rocksdb/NativeLibraryLoader.java b/java/org/rocksdb/NativeLibraryLoader.java index f6b8520f5..880e90acc 100644 --- a/java/org/rocksdb/NativeLibraryLoader.java +++ b/java/org/rocksdb/NativeLibraryLoader.java @@ -2,16 +2,16 @@ package org.rocksdb; import java.io.*; + +/** + * This class is used to load the RocksDB shared library from within the jar. + * The shared library is extracted to a temp folder and loaded from there. + */ public class NativeLibraryLoader { private static String sharedLibraryName = "librocksdbjni.so"; private static String tempFilePrefix = "librocksdbjni"; private static String tempFileSuffix = ".so"; - /** - * Private constructor - this class will never be instanced - */ - private NativeLibraryLoader() { - } public static void loadLibraryFromJar() throws IOException { @@ -23,7 +23,7 @@ public class NativeLibraryLoader { throw new RuntimeException("File " + temp.getAbsolutePath() + " does not exist."); } - byte[] buffer = new byte[1024]; + byte[] buffer = new byte[102400]; int readBytes; InputStream is = ClassLoader.getSystemClassLoader().getResourceAsStream(sharedLibraryName); @@ -31,8 +31,8 @@ public class NativeLibraryLoader { throw new RuntimeException(sharedLibraryName + " was not found inside JAR."); } - OutputStream os = new FileOutputStream(temp); try { + OutputStream os = new FileOutputStream(temp); while ((readBytes = is.read(buffer)) != -1) { os.write(buffer, 0, readBytes); } @@ -43,4 +43,9 @@ public class NativeLibraryLoader { System.load(temp.getAbsolutePath()); } + /** + * Private constructor to disallow instantiation + */ + private NativeLibraryLoader() { + } } diff --git a/java/org/rocksdb/RocksDB.java b/java/org/rocksdb/RocksDB.java index 132b9ac39..f45a608e2 100644 --- a/java/org/rocksdb/RocksDB.java +++ b/java/org/rocksdb/RocksDB.java @@ -26,7 +26,7 @@ public class RocksDB extends RocksObject { "snappy", "z", "bzip2", "lz4", "lz4hc"}; static { - loadLibrary(); + RocksDB.loadLibrary(); } From fd7d3fe604191d9e076c9a76485111a0109acadf Mon Sep 17 00:00:00 2001 From: Naveen Date: Mon, 22 Sep 2014 18:20:02 -0700 Subject: [PATCH 17/19] Addressing review comments (adding a env variable to override temp directory) --- java/org/rocksdb/NativeLibraryLoader.java | 19 +++++++++++++------ java/org/rocksdb/RocksDB.java | 7 +++---- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/java/org/rocksdb/NativeLibraryLoader.java b/java/org/rocksdb/NativeLibraryLoader.java index 880e90acc..ad4315dd4 100644 --- a/java/org/rocksdb/NativeLibraryLoader.java +++ b/java/org/rocksdb/NativeLibraryLoader.java @@ -8,15 +8,18 @@ import java.io.*; * The shared library is extracted to a temp folder and loaded from there. */ public class NativeLibraryLoader { - private static String sharedLibraryName = "librocksdbjni.so"; private static String tempFilePrefix = "librocksdbjni"; private static String tempFileSuffix = ".so"; - public static void loadLibraryFromJar() + public static void loadLibraryFromJar(String tmpDir) throws IOException { + File temp; + if(tmpDir == null || tmpDir.equals("")) + temp = File.createTempFile(tempFilePrefix, tempFileSuffix); + else + temp = new File(tmpDir+"/"+sharedLibraryName); - File temp = File.createTempFile(tempFilePrefix, tempFileSuffix); temp.deleteOnExit(); if (!temp.exists()) { @@ -31,14 +34,18 @@ public class NativeLibraryLoader { throw new RuntimeException(sharedLibraryName + " was not found inside JAR."); } + OutputStream os = null; try { - OutputStream os = new FileOutputStream(temp); + os = new FileOutputStream(temp); while ((readBytes = is.read(buffer)) != -1) { os.write(buffer, 0, readBytes); } } finally { - os.close(); - is.close(); + if(os != null) + os.close(); + + if(is != null) + is.close(); } System.load(temp.getAbsolutePath()); diff --git a/java/org/rocksdb/RocksDB.java b/java/org/rocksdb/RocksDB.java index f45a608e2..387e34282 100644 --- a/java/org/rocksdb/RocksDB.java +++ b/java/org/rocksdb/RocksDB.java @@ -20,21 +20,20 @@ import org.rocksdb.NativeLibraryLoader; * indicates sth wrong at the rocksdb library side and the call failed. */ public class RocksDB extends RocksObject { - public static final int NOT_FOUND = -1; private static final String[] compressionLibs_ = { "snappy", "z", "bzip2", "lz4", "lz4hc"}; static { - RocksDB.loadLibrary(); + RocksDB.loadLibrary(); } - /** * Loads the necessary library files. * Calling this method twice will have no effect. */ public static synchronized void loadLibrary() { + String tmpDir = System.getenv("ROCKSDB_SHAREDLIB_DIR"); // loading possibly necessary libraries. for (String lib : compressionLibs_) { try { @@ -45,7 +44,7 @@ public class RocksDB extends RocksObject { } try { - NativeLibraryLoader.loadLibraryFromJar(); + NativeLibraryLoader.loadLibraryFromJar(tmpDir); } catch (IOException e) { From 51eeaf65e26bab134d7ebef1a69bc7356c815d60 Mon Sep 17 00:00:00 2001 From: Naveen Date: Tue, 23 Sep 2014 10:31:55 -0700 Subject: [PATCH 18/19] Addressing review comments --- java/org/rocksdb/NativeLibraryLoader.java | 2 +- java/org/rocksdb/RocksDB.java | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/java/org/rocksdb/NativeLibraryLoader.java b/java/org/rocksdb/NativeLibraryLoader.java index ad4315dd4..440056582 100644 --- a/java/org/rocksdb/NativeLibraryLoader.java +++ b/java/org/rocksdb/NativeLibraryLoader.java @@ -18,7 +18,7 @@ public class NativeLibraryLoader { if(tmpDir == null || tmpDir.equals("")) temp = File.createTempFile(tempFilePrefix, tempFileSuffix); else - temp = new File(tmpDir+"/"+sharedLibraryName); + temp = new File(tmpDir + "/" + sharedLibraryName); temp.deleteOnExit(); diff --git a/java/org/rocksdb/RocksDB.java b/java/org/rocksdb/RocksDB.java index 387e34282..ec1cb8a28 100644 --- a/java/org/rocksdb/RocksDB.java +++ b/java/org/rocksdb/RocksDB.java @@ -31,6 +31,9 @@ public class RocksDB extends RocksObject { /** * Loads the necessary library files. * Calling this method twice will have no effect. + * By default the method extracts the shared library for loading at + * java.io.tmpdir, however, you can override this temporary location by + * setting the environment variable ROCKSDB_SHAREDLIB_DIR. */ public static synchronized void loadLibrary() { String tmpDir = System.getenv("ROCKSDB_SHAREDLIB_DIR"); From cdaf44f9aeebb38281fc742c76cc0602516a55d8 Mon Sep 17 00:00:00 2001 From: sdong Date: Tue, 23 Sep 2014 13:43:03 -0700 Subject: [PATCH 19/19] Enlarge log size cap when printing file summary Summary: Now the file summary is too small for printing. Enlarge it. To enable it, allow to pass a size to log buffer. Test Plan: Add a unit test. make all check Reviewers: ljin, yhchiang Reviewed By: yhchiang Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D21723 --- db/compaction_picker.cc | 2 +- db/version_set.h | 4 ++-- util/env_test.cc | 35 +++++++++++++++++++++++++++++++++++ util/log_buffer.cc | 21 ++++++++++++++++----- util/log_buffer.h | 9 +++++++-- 5 files changed, 61 insertions(+), 10 deletions(-) diff --git a/db/compaction_picker.cc b/db/compaction_picker.cc index 04d5c6f47..7cd965c20 100644 --- a/db/compaction_picker.cc +++ b/db/compaction_picker.cc @@ -575,7 +575,7 @@ Compaction* UniversalCompactionPicker::PickCompaction(Version* version, return nullptr; } Version::FileSummaryStorage tmp; - LogToBuffer(log_buffer, "[%s] Universal: candidate files(%zu): %s\n", + LogToBuffer(log_buffer, 3072, "[%s] Universal: candidate files(%zu): %s\n", version->cfd_->GetName().c_str(), version->files_[level].size(), version->LevelFileSummary(&tmp, 0)); diff --git a/db/version_set.h b/db/version_set.h index 353adbfec..211fca179 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -183,10 +183,10 @@ class Version { // Return a human-readable short (single-line) summary of the number // of files per level. Uses *scratch as backing store. struct LevelSummaryStorage { - char buffer[100]; + char buffer[1000]; }; struct FileSummaryStorage { - char buffer[1000]; + char buffer[3000]; }; const char* LevelSummary(LevelSummaryStorage* scratch) const; // Return a human-readable short (single-line) summary of files diff --git a/util/env_test.cc b/util/env_test.cc index 3e811a98d..1779f1aa0 100644 --- a/util/env_test.cc +++ b/util/env_test.cc @@ -768,6 +768,41 @@ TEST(EnvPosixTest, LogBufferTest) { ASSERT_EQ(10, test_logger.char_x_count); } +class TestLogger2 : public Logger { + public: + explicit TestLogger2(size_t max_log_size) : max_log_size_(max_log_size) {} + virtual void Logv(const char* format, va_list ap) override { + char new_format[2000]; + std::fill_n(new_format, sizeof(new_format), '2'); + { + va_list backup_ap; + va_copy(backup_ap, ap); + int n = vsnprintf(new_format, sizeof(new_format) - 1, format, backup_ap); + // 48 bytes for extra information + bytes allocated + ASSERT_TRUE( + n <= 48 + static_cast(max_log_size_ - sizeof(struct timeval))); + ASSERT_TRUE(n > static_cast(max_log_size_ - sizeof(struct timeval))); + va_end(backup_ap); + } + } + size_t max_log_size_; +}; + +TEST(EnvPosixTest, LogBufferMaxSizeTest) { + char bytes9000[9000]; + std::fill_n(bytes9000, sizeof(bytes9000), '1'); + bytes9000[sizeof(bytes9000) - 1] = '\0'; + + for (size_t max_log_size = 256; max_log_size <= 1024; + max_log_size += 1024 - 256) { + TestLogger2 test_logger(max_log_size); + test_logger.SetInfoLogLevel(InfoLogLevel::INFO_LEVEL); + LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, &test_logger); + LogToBuffer(&log_buffer, max_log_size, "%s", bytes9000); + log_buffer.FlushBufferToLog(); + } +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/util/log_buffer.cc b/util/log_buffer.cc index 726c01442..ddddaec9f 100644 --- a/util/log_buffer.cc +++ b/util/log_buffer.cc @@ -13,17 +13,17 @@ LogBuffer::LogBuffer(const InfoLogLevel log_level, Logger*info_log) : log_level_(log_level), info_log_(info_log) {} -void LogBuffer::AddLogToBuffer(const char* format, va_list ap) { +void LogBuffer::AddLogToBuffer(size_t max_log_size, const char* format, + va_list ap) { if (!info_log_ || log_level_ < info_log_->GetInfoLogLevel()) { // Skip the level because of its level. return; } - const size_t kLogSizeLimit = 512; - char* alloc_mem = arena_.AllocateAligned(kLogSizeLimit); + char* alloc_mem = arena_.AllocateAligned(max_log_size); BufferedLog* buffered_log = new (alloc_mem) BufferedLog(); char* p = buffered_log->message; - char* limit = alloc_mem + kLogSizeLimit - 1; + char* limit = alloc_mem + max_log_size - 1; // store the time gettimeofday(&(buffered_log->now_tv), nullptr); @@ -61,11 +61,22 @@ void LogBuffer::FlushBufferToLog() { logs_.clear(); } +void LogToBuffer(LogBuffer* log_buffer, size_t max_log_size, const char* format, + ...) { + if (log_buffer != nullptr) { + va_list ap; + va_start(ap, format); + log_buffer->AddLogToBuffer(max_log_size, format, ap); + va_end(ap); + } +} + void LogToBuffer(LogBuffer* log_buffer, const char* format, ...) { + const size_t kDefaultMaxLogSize = 512; if (log_buffer != nullptr) { va_list ap; va_start(ap, format); - log_buffer->AddLogToBuffer(format, ap); + log_buffer->AddLogToBuffer(kDefaultMaxLogSize, format, ap); va_end(ap); } } diff --git a/util/log_buffer.h b/util/log_buffer.h index 2a24bf854..2d790086e 100644 --- a/util/log_buffer.h +++ b/util/log_buffer.h @@ -21,8 +21,9 @@ class LogBuffer { // info_log: logger to write the logs to LogBuffer(const InfoLogLevel log_level, Logger* info_log); - // Add a log entry to the buffer. - void AddLogToBuffer(const char* format, va_list ap); + // Add a log entry to the buffer. Use default max_log_size. + // max_log_size indicates maximize log size, including some metadata. + void AddLogToBuffer(size_t max_log_size, const char* format, va_list ap); size_t IsEmpty() const { return logs_.empty(); } @@ -44,6 +45,10 @@ class LogBuffer { // Add log to the LogBuffer for a delayed info logging. It can be used when // we want to add some logs inside a mutex. +// max_log_size indicates maximize log size, including some metadata. +extern void LogToBuffer(LogBuffer* log_buffer, size_t max_log_size, + const char* format, ...); +// Same as previous function, but with default max log size. extern void LogToBuffer(LogBuffer* log_buffer, const char* format, ...); } // namespace rocksdb