diff --git a/.travis.yml b/.travis.yml index 804554ca5..b6fa63c5d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -34,7 +34,7 @@ before_script: # as EnvPosixTest::AllocateTest expects within the Travis OpenVZ environment. script: - if [[ "${TRAVIS_OS_NAME}" == 'linux' ]]; then OPT=-DTRAVIS CLANG_FORMAT_DIFF=/tmp/clang-format-diff.py make format || true; fi - - OPT=-DTRAVIS V=1 make -j4 check && OPT=-DTRAVIS V=1 make clean jclean rocksdbjava jtest + - OPT=-DTRAVIS V=1 make -j4 check && OPT=-DTRAVIS V=1 make clean jclean rocksdbjava jtest && make clean && OPT="-DTRAVIS -DROCKSDB_LITE" V=1 make -j4 check notifications: email: diff --git a/CMakeLists.txt b/CMakeLists.txt index 15eb1817a..98efe3892 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,7 +31,7 @@ string(REGEX REPLACE "(..)/(..)/..(..).*" "\\1/\\2/\\3" DATE ${DATE}) string(REGEX REPLACE "(..):(.....).*" " \\1:\\2" TIME ${TIME}) string(CONCAT GIT_DATE_TIME ${DATE} ${TIME}) -execute_process(COMMAND $ENV{COMSPEC} " /C git rev-parse HEAD 2>nil" OUTPUT_VARIABLE GIT_SHA) +execute_process(COMMAND $ENV{COMSPEC} " /C git -C ${CMAKE_CURRENT_SOURCE_DIR} rev-parse HEAD" OUTPUT_VARIABLE GIT_SHA) string(REGEX REPLACE "[^0-9a-f]+" "" GIT_SHA ${GIT_SHA}) set(BUILD_VERSION_CC ${CMAKE_CURRENT_SOURCE_DIR}/util/build_version.cc) @@ -131,6 +131,9 @@ set(SOURCES db/write_batch_base.cc db/write_controller.cc db/write_thread.cc + memtable/hash_cuckoo_rep.cc + memtable/hash_linklist_rep.cc + memtable/hash_skiplist_rep.cc port/stack_trace.cc port/win/env_win.cc port/win/port_win.cc @@ -164,6 +167,7 @@ set(SOURCES table/plain_table_reader.cc table/table_properties.cc table/two_level_iterator.cc + tools/sst_dump_tool.cc tools/dump/db_dump_tool.cc util/arena.cc util/auto_roll_logger.cc @@ -184,14 +188,11 @@ set(SOURCES util/file_reader_writer.cc util/filter_policy.cc util/hash.cc - util/hash_cuckoo_rep.cc - util/hash_linklist_rep.cc - util/hash_skiplist_rep.cc util/histogram.cc util/instrumented_mutex.cc util/iostats_context.cc - util/ldb_cmd.cc - util/ldb_tool.cc + tools/ldb_cmd.cc + tools/ldb_tool.cc util/logging.cc util/log_buffer.cc util/memenv.cc @@ -207,7 +208,6 @@ set(SOURCES util/rate_limiter.cc util/skiplistrep.cc util/slice.cc - util/sst_dump_tool.cc util/statistics.cc util/status.cc util/status_message.cc @@ -294,6 +294,7 @@ set(TESTS db/db_universal_compaction_test.cc db/db_wal_test.cc db/db_tailing_iter_test.cc + db/db_table_properties_test.cc db/dbformat_test.cc db/deletefile_test.cc db/fault_injection_test.cc @@ -302,6 +303,7 @@ set(TESTS db/flush_job_test.cc db/listener_test.cc db/log_test.cc + db/manual_compaction_test.cc db/memtable_list_test.cc db/merge_test.cc db/merge_helper_test.cc @@ -326,7 +328,9 @@ set(TESTS table/merger_test.cc table/table_test.cc tools/db_sanity_test.cc + tools/ldb_cmd_test.cc tools/reduce_levels_test.cc + tools/sst_dump_test.cc util/arena_test.cc util/autovector_test.cc util/auto_roll_logger_test.cc @@ -341,14 +345,11 @@ set(TESTS util/file_reader_writer_test.cc util/heap_test.cc util/histogram_test.cc - util/ldb_cmd_test.cc - util/manual_compaction_test.cc util/memenv_test.cc util/mock_env_test.cc util/options_test.cc util/rate_limiter_test.cc util/slice_transform_test.cc - util/sst_dump_test.cc util/thread_list_test.cc util/thread_local_test.cc utilities/backupable/backupable_db_test.cc diff --git a/HISTORY.md b/HISTORY.md index 7493da314..990f7c071 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -5,6 +5,7 @@ * CompactionFilter::Context includes information of Column Family ID * The need-compaction hint given by TablePropertiesCollector::NeedCompact() will be persistent and recoverable after DB recovery. This introduces a breaking format change. If you use this experimental feature, including NewCompactOnDeletionCollectorFactory() in the new version, you may not be able to directly downgrade the DB back to version 4.0 or lower. * TablePropertiesCollectorFactory::CreateTablePropertiesCollector() now takes an option Context, containing the information of column family ID for the file being written. +* Remove DefaultCompactionFilterFactory. ## 4.1.0 (10/8/2015) ### New Features diff --git a/INSTALL.md b/INSTALL.md index 72865e2f2..bff75155f 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -87,4 +87,5 @@ your make commands, like this: `PORTABLE=1 make static_lib` * Run: `TARGET_OS=IOS make static_lib`. When building the project which uses rocksdb iOS library, make sure to define two important pre-processing macros: `ROCKSDB_LITE` and `IOS_CROSS_COMPILE`. * **Windows**: + * For building with MS Visual Studio 13 you will need Update 4 installed. * Read and follow the instructions at CMakeLists.txt diff --git a/Makefile b/Makefile index d9ad6572e..fb14eab92 100644 --- a/Makefile +++ b/Makefile @@ -33,7 +33,9 @@ quoted_perl_command = $(subst ','\'',$(perl_command)) # with debug level 0. To compile with level 0, run `make shared_lib`, # `make install-shared`, `make static_lib`, `make install-static` or # `make install` -DEBUG_LEVEL=1 + +# Set the default DEBUG_LEVEL to 1 +DEBUG_LEVEL?=1 ifeq ($(MAKECMDGOALS),dbg) DEBUG_LEVEL=2 @@ -230,6 +232,7 @@ TESTS = \ db_tailing_iter_test \ db_universal_compaction_test \ db_wal_test \ + db_table_properties_test \ block_hash_index_test \ autovector_test \ column_family_test \ @@ -550,8 +553,10 @@ check: all echo "===== Running $$t"; ./$$t || exit 1; done; \ fi rm -rf $(TMPD) +ifeq ($(filter -DROCKSDB_LITE,$(OPT)),) python tools/ldb_test.py sh tools/rocksdb_dump_test.sh +endif check_some: $(SUBSET) ldb_tests for t in $(SUBSET); do echo "===== Running $$t"; ./$$t || exit 1; done @@ -563,12 +568,12 @@ ldb_tests: ldb crash_test: whitebox_crash_test blackbox_crash_test blackbox_crash_test: db_stress - python -u tools/db_crashtest.py -s - python -u tools/db_crashtest.py + python -u tools/db_crashtest.py --simple blackbox + python -u tools/db_crashtest.py blackbox whitebox_crash_test: db_stress - python -u tools/db_crashtest2.py -s - python -u tools/db_crashtest2.py + python -u tools/db_crashtest.py --simple whitebox + python -u tools/db_crashtest.py whitebox asan_check: $(MAKE) clean @@ -739,6 +744,9 @@ db_universal_compaction_test: db/db_universal_compaction_test.o db/db_test_util. db_wal_test: db/db_wal_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) +db_table_properties_test: db/db_table_properties_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) + $(AM_LINK) + log_write_bench: util/log_write_bench.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) $(pg) @@ -904,7 +912,7 @@ options_test: util/options_test.o $(LIBOBJECTS) $(TESTHARNESS) event_logger_test: util/event_logger_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) -sst_dump_test: util/sst_dump_test.o $(LIBOBJECTS) $(TESTHARNESS) +sst_dump_test: tools/sst_dump_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) memenv_test : util/memenv_test.o $(LIBOBJECTS) $(TESTHARNESS) @@ -916,7 +924,7 @@ optimistic_transaction_test: utilities/transactions/optimistic_transaction_test. mock_env_test : util/mock_env_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) -manual_compaction_test: util/manual_compaction_test.o $(LIBOBJECTS) $(TESTHARNESS) +manual_compaction_test: db/manual_compaction_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) filelock_test: util/filelock_test.o $(LIBOBJECTS) $(TESTHARNESS) @@ -940,7 +948,7 @@ transaction_test: utilities/transactions/transaction_test.o $(LIBOBJECTS) $(TEST sst_dump: tools/sst_dump.o $(LIBOBJECTS) $(AM_LINK) -ldb_cmd_test: util/ldb_cmd_test.o $(LIBOBJECTS) $(TESTHARNESS) +ldb_cmd_test: tools/ldb_cmd_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) ldb: tools/ldb.o $(LIBOBJECTS) @@ -1035,19 +1043,22 @@ liblz4.a: cd lz4-r127/lib && make CFLAGS='-fPIC' all cp lz4-r127/lib/liblz4.a . -# A version of each $(LIBOBJECTS) compiled with -fPIC -java_libobjects = $(patsubst %,jl/%,$(LIBOBJECTS)) -CLEAN_FILES += jl +# A version of each $(LIBOBJECTS) compiled with -fPIC and a fixed set of static compression libraries +java_static_libobjects = $(patsubst %,jls/%,$(LIBOBJECTS)) +CLEAN_FILES += jls -$(java_libobjects): jl/%.o: %.cc - $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -fPIC -c $< -o $@ $(COVERAGEFLAGS) +JAVA_STATIC_FLAGS = -DZLIB -DBZIP2 -DSNAPPY -DLZ4 +JAVA_STATIC_INCLUDES = -I./zlib-1.2.8 -I./bzip2-1.0.6 -I./snappy-1.1.1 -I./lz4-r127/lib -rocksdbjavastatic: $(java_libobjects) libz.a libbz2.a libsnappy.a liblz4.a +$(java_static_libobjects): jls/%.o: %.cc libz.a libbz2.a libsnappy.a liblz4.a + $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) $(JAVA_STATIC_FLAGS) $(JAVA_STATIC_INCLUDES) -fPIC -c $< -o $@ $(COVERAGEFLAGS) + +rocksdbjavastatic: $(java_static_libobjects) cd java;$(MAKE) javalib; rm -f ./java/target/$(ROCKSDBJNILIB) $(CXX) $(CXXFLAGS) -I./java/. $(JAVA_INCLUDE) -shared -fPIC \ -o ./java/target/$(ROCKSDBJNILIB) $(JNI_NATIVE_SOURCES) \ - $(java_libobjects) $(COVERAGEFLAGS) \ + $(java_static_libobjects) $(COVERAGEFLAGS) \ libz.a libbz2.a libsnappy.a liblz4.a $(JAVA_STATIC_LDFLAGS) cd java/target;strip -S -x $(ROCKSDBJNILIB) cd java;jar -cf target/$(ROCKSDB_JAR) HISTORY*.md @@ -1059,7 +1070,7 @@ rocksdbjavastatic: $(java_libobjects) libz.a libbz2.a libsnappy.a liblz4.a rocksdbjavastaticrelease: rocksdbjavastatic cd java/crossbuild && vagrant destroy -f && vagrant up linux32 && vagrant halt linux32 && vagrant up linux64 && vagrant halt linux64 cd java;jar -cf target/$(ROCKSDB_JAR_ALL) HISTORY*.md - cd java;jar -uf target/$(ROCKSDB_JAR_ALL) librocksdbjni-*.so librocksdbjni-*.jnilib + cd java/target;jar -uf $(ROCKSDB_JAR_ALL) librocksdbjni-*.so librocksdbjni-*.jnilib cd java/target/classes;jar -uf ../$(ROCKSDB_JAR_ALL) org/rocksdb/*.class org/rocksdb/util/*.class rocksdbjavastaticpublish: rocksdbjavastaticrelease @@ -1070,6 +1081,13 @@ rocksdbjavastaticpublish: rocksdbjavastaticrelease mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-osx.jar -Dclassifier=osx mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH).jar +# A version of each $(LIBOBJECTS) compiled with -fPIC +java_libobjects = $(patsubst %,jl/%,$(LIBOBJECTS)) +CLEAN_FILES += jl + +$(java_libobjects): jl/%.o: %.cc + $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -fPIC -c $< -o $@ $(COVERAGEFLAGS) + rocksdbjava: $(java_libobjects) $(AM_V_GEN)cd java;$(MAKE) javalib; $(AM_V_at)rm -f ./java/target/$(ROCKSDBJNILIB) diff --git a/build_tools/rocksdb-lego-determinator b/build_tools/rocksdb-lego-determinator index 7f7288172..3fed164fd 100755 --- a/build_tools/rocksdb-lego-determinator +++ b/build_tools/rocksdb-lego-determinator @@ -388,6 +388,8 @@ TSAN_CRASH_TEST_COMMANDS="[ } ]" +TSAN_CRASH_TEST_COMMANDS=$DISABLE_COMMANDS + # # RocksDB format compatible # diff --git a/db/builder.cc b/db/builder.cc index 243f6f38a..bd695f1dd 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -26,6 +26,7 @@ #include "rocksdb/options.h" #include "rocksdb/table.h" #include "table/block_based_table_builder.h" +#include "table/internal_iterator.h" #include "util/file_reader_writer.h" #include "util/iostats_context_imp.h" #include "util/stop_watch.h" @@ -52,8 +53,9 @@ TableBuilder* NewTableBuilder( Status BuildTable( const std::string& dbname, Env* env, const ImmutableCFOptions& ioptions, - const EnvOptions& env_options, TableCache* table_cache, Iterator* iter, - FileMetaData* meta, const InternalKeyComparator& internal_comparator, + const EnvOptions& env_options, TableCache* table_cache, + InternalIterator* iter, FileMetaData* meta, + const InternalKeyComparator& internal_comparator, const std::vector>* int_tbl_prop_collector_factories, uint32_t column_family_id, std::vector snapshots, @@ -74,7 +76,7 @@ Status BuildTable( unique_ptr file_writer; { unique_ptr file; - s = env->NewWritableFile(fname, &file, env_options); + s = NewWritableFile(env, fname, &file, env_options); if (!s.ok()) { return s; } @@ -141,7 +143,7 @@ Status BuildTable( if (s.ok() && !empty) { // Verify that the table is usable - std::unique_ptr it(table_cache->NewIterator( + std::unique_ptr it(table_cache->NewIterator( ReadOptions(), env_options, internal_comparator, meta->fd, nullptr, (internal_stats == nullptr) ? nullptr : internal_stats->GetFileReadHist(0), diff --git a/db/builder.h b/db/builder.h index 797e9de60..cdafa4ab3 100644 --- a/db/builder.h +++ b/db/builder.h @@ -31,6 +31,7 @@ class VersionEdit; class TableBuilder; class WritableFileWriter; class InternalStats; +class InternalIterator; TableBuilder* NewTableBuilder( const ImmutableCFOptions& options, @@ -49,8 +50,9 @@ TableBuilder* NewTableBuilder( // zero, and no Table file will be produced. extern Status BuildTable( const std::string& dbname, Env* env, const ImmutableCFOptions& options, - const EnvOptions& env_options, TableCache* table_cache, Iterator* iter, - FileMetaData* meta, const InternalKeyComparator& internal_comparator, + const EnvOptions& env_options, TableCache* table_cache, + InternalIterator* iter, FileMetaData* meta, + const InternalKeyComparator& internal_comparator, const std::vector>* int_tbl_prop_collector_factories, uint32_t column_family_id, std::vector snapshots, diff --git a/db/column_family.cc b/db/column_family.cc index 88bf0339b..f6233a729 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -27,9 +27,9 @@ #include "db/version_set.h" #include "db/write_controller.h" #include "db/writebuffer.h" +#include "memtable/hash_skiplist_rep.h" #include "util/autovector.h" #include "util/compression.h" -#include "util/hash_skiplist_rep.h" #include "util/options_helper.h" #include "util/thread_status_util.h" #include "util/xfunc.h" diff --git a/db/compaction_iterator.cc b/db/compaction_iterator.cc index d242291dd..278c1cd75 100644 --- a/db/compaction_iterator.cc +++ b/db/compaction_iterator.cc @@ -6,11 +6,12 @@ // of patent rights can be found in the PATENTS file in the same directory. #include "db/compaction_iterator.h" +#include "table/internal_iterator.h" namespace rocksdb { CompactionIterator::CompactionIterator( - Iterator* input, const Comparator* cmp, MergeHelper* merge_helper, + InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper, SequenceNumber last_sequence, std::vector* snapshots, Env* env, bool expect_valid_internal_key, Compaction* compaction, const CompactionFilter* compaction_filter, LogBuffer* log_buffer) diff --git a/db/compaction_iterator.h b/db/compaction_iterator.h index da242f6aa..bd256439c 100644 --- a/db/compaction_iterator.h +++ b/db/compaction_iterator.h @@ -37,7 +37,7 @@ struct CompactionIteratorStats { class CompactionIterator { public: - CompactionIterator(Iterator* input, const Comparator* cmp, + CompactionIterator(InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper, SequenceNumber last_sequence, std::vector* snapshots, Env* env, bool expect_valid_internal_key, @@ -84,7 +84,7 @@ class CompactionIterator { inline SequenceNumber findEarliestVisibleSnapshot( SequenceNumber in, SequenceNumber* prev_snapshot); - Iterator* input_; + InternalIterator* input_; const Comparator* cmp_; MergeHelper* merge_helper_; const std::vector* snapshots_; diff --git a/db/compaction_job.cc b/db/compaction_job.cc index fd8acaafd..ea052b84f 100644 --- a/db/compaction_job.cc +++ b/db/compaction_job.cc @@ -585,7 +585,7 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options, void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) { assert(sub_compact != nullptr); - std::unique_ptr input( + std::unique_ptr input( versions_->MakeInputIterator(sub_compact->compaction)); AutoThreadOperationStageUpdater stage_updater( @@ -601,10 +601,10 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) { if (measure_io_stats_) { prev_perf_level = GetPerfLevel(); SetPerfLevel(PerfLevel::kEnableTime); - prev_write_nanos = iostats_context.write_nanos; - prev_fsync_nanos = iostats_context.fsync_nanos; - prev_range_sync_nanos = iostats_context.range_sync_nanos; - prev_prepare_write_nanos = iostats_context.prepare_write_nanos; + prev_write_nanos = IOSTATS(write_nanos); + prev_fsync_nanos = IOSTATS(fsync_nanos); + prev_range_sync_nanos = IOSTATS(range_sync_nanos); + prev_prepare_write_nanos = IOSTATS(prepare_write_nanos); } ColumnFamilyData* cfd = sub_compact->compaction->column_family_data(); @@ -728,13 +728,13 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) { if (measure_io_stats_) { sub_compact->compaction_job_stats.file_write_nanos += - iostats_context.write_nanos - prev_write_nanos; + IOSTATS(write_nanos) - prev_write_nanos; sub_compact->compaction_job_stats.file_fsync_nanos += - iostats_context.fsync_nanos - prev_fsync_nanos; + IOSTATS(fsync_nanos) - prev_fsync_nanos; sub_compact->compaction_job_stats.file_range_sync_nanos += - iostats_context.range_sync_nanos - prev_range_sync_nanos; + IOSTATS(range_sync_nanos) - prev_range_sync_nanos; sub_compact->compaction_job_stats.file_prepare_write_nanos += - iostats_context.prepare_write_nanos - prev_prepare_write_nanos; + IOSTATS(prepare_write_nanos) - prev_prepare_write_nanos; if (prev_perf_level != PerfLevel::kEnableTime) { SetPerfLevel(prev_perf_level); } @@ -811,7 +811,7 @@ Status CompactionJob::FinishCompactionOutputFile( if (s.ok() && current_entries > 0) { // Verify that the table is usable ColumnFamilyData* cfd = sub_compact->compaction->column_family_data(); - Iterator* iter = cfd->table_cache()->NewIterator( + InternalIterator* iter = cfd->table_cache()->NewIterator( ReadOptions(), env_options_, cfd->internal_comparator(), meta->fd, nullptr, cfd->internal_stats()->GetFileReadHist( compact_->compaction->output_level()), @@ -911,7 +911,7 @@ Status CompactionJob::OpenCompactionOutputFile( unique_ptr writable_file; std::string fname = TableFileName(db_options_.db_paths, file_number, sub_compact->compaction->output_path_id()); - Status s = env_->NewWritableFile(fname, &writable_file, env_options_); + Status s = NewWritableFile(env_, fname, &writable_file, env_options_); if (!s.ok()) { Log(InfoLogLevel::ERROR_LEVEL, db_options_.info_log, "[%s] [JOB %d] OpenCompactionOutputFiles for table #%" PRIu64 diff --git a/db/compaction_job.h b/db/compaction_job.h index 1054fecc9..ab71519f4 100644 --- a/db/compaction_job.h +++ b/db/compaction_job.h @@ -35,9 +35,9 @@ #include "rocksdb/env.h" #include "rocksdb/memtablerep.h" #include "rocksdb/transaction_log.h" +#include "table/scoped_arena_iterator.h" #include "util/autovector.h" #include "util/event_logger.h" -#include "util/scoped_arena_iterator.h" #include "util/stop_watch.h" #include "util/thread_local.h" diff --git a/db/compaction_job_stats_test.cc b/db/compaction_job_stats_test.cc index 8641c8a84..d6a82e18d 100644 --- a/db/compaction_job_stats_test.cc +++ b/db/compaction_job_stats_test.cc @@ -27,6 +27,7 @@ #include "db/job_context.h" #include "db/version_set.h" #include "db/write_batch_internal.h" +#include "memtable/hash_linklist_rep.h" #include "port/stack_trace.h" #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" @@ -47,14 +48,13 @@ #include "table/block_based_table_factory.h" #include "table/mock_table.h" #include "table/plain_table_factory.h" +#include "table/scoped_arena_iterator.h" #include "util/compression.h" #include "util/hash.h" -#include "util/hash_linklist_rep.h" #include "util/logging.h" #include "util/mock_env.h" #include "util/mutexlock.h" #include "util/rate_limiter.h" -#include "util/scoped_arena_iterator.h" #include "util/statistics.h" #include "util/string_util.h" #include "util/sync_point.h" diff --git a/db/compaction_job_test.cc b/db/compaction_job_test.cc index b1a8909ef..b05694017 100644 --- a/db/compaction_job_test.cc +++ b/db/compaction_job_test.cc @@ -3,6 +3,8 @@ // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. +#ifndef ROCKSDB_LITE + #include #include #include @@ -695,3 +697,14 @@ int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } + +#else +#include + +int main(int argc, char** argv) { + fprintf(stderr, + "SKIPPED as CompactionJobStats is not supported in ROCKSDB_LITE\n"); + return 0; +} + +#endif // ROCKSDB_LITE diff --git a/db/comparator_db_test.cc b/db/comparator_db_test.cc index cb944a76a..530c91060 100644 --- a/db/comparator_db_test.cc +++ b/db/comparator_db_test.cc @@ -7,10 +7,11 @@ #include #include +#include "memtable/stl_wrappers.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "util/hash.h" -#include "util/stl_wrappers.h" +#include "util/kv_map.h" #include "util/string_util.h" #include "util/testharness.h" #include "util/testutil.h" diff --git a/db/db_bench.cc b/db/db_bench.cc index 109229730..9e11b56e8 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -486,6 +486,7 @@ DEFINE_int32(deletepercent, 2, "Percentage of deletes out of reads/writes/" DEFINE_uint64(delete_obsolete_files_period_micros, 0, "Ignored. Left here for backward compatibility"); +#ifndef ROCKSDB_LITE DEFINE_bool(optimistic_transaction_db, false, "Open a OptimisticTransactionDB instance. " "Required for randomtransaction benchmark."); @@ -509,6 +510,7 @@ DEFINE_int32(transaction_sleep, 0, DEFINE_uint64(transaction_lock_timeout, 100, "If using a transaction_db, specifies the lock wait timeout in" " milliseconds before failing a transaction waiting on a lock"); +#endif // ROCKSDB_LITE DEFINE_bool(compaction_measure_io_stats, false, "Measure times spents on I/Os while in compactions. "); @@ -645,7 +647,9 @@ DEFINE_int32(max_grandparent_overlap_factor, 10, "Control maximum bytes of " "overlaps in grandparent (i.e., level+2) before we stop building a" " single file in a level->level+1 compaction."); +#ifndef ROCKSDB_LITE DEFINE_bool(readonly, false, "Run read only benchmarks."); +#endif // ROCKSDB_LITE DEFINE_bool(disable_auto_compactions, false, "Do not auto trigger compactions"); @@ -983,7 +987,9 @@ static void AppendWithSpace(std::string* str, Slice msg) { struct DBWithColumnFamilies { std::vector cfh; DB* db; +#ifndef ROCKSDB_LITE OptimisticTransactionDB* opt_txn_db; +#endif // ROCKSDB_LITE std::atomic num_created; // Need to be updated after all the // new entries in cfh are set. size_t num_hot; // Number of column families to be queried at each moment. @@ -991,7 +997,12 @@ struct DBWithColumnFamilies { // Column families will be created and used to be queried. port::Mutex create_cf_mutex; // Only one thread can execute CreateNewCf() - DBWithColumnFamilies() : db(nullptr), opt_txn_db(nullptr) { + DBWithColumnFamilies() + : db(nullptr) +#ifndef ROCKSDB_LITE + , opt_txn_db(nullptr) +#endif // ROCKSDB_LITE + { cfh.clear(); num_created = 0; num_hot = 0; @@ -1000,7 +1011,9 @@ struct DBWithColumnFamilies { DBWithColumnFamilies(const DBWithColumnFamilies& other) : cfh(other.cfh), db(other.db), +#ifndef ROCKSDB_LITE opt_txn_db(other.opt_txn_db), +#endif // ROCKSDB_LITE num_created(other.num_created.load()), num_hot(other.num_hot) {} @@ -1008,13 +1021,18 @@ struct DBWithColumnFamilies { std::for_each(cfh.begin(), cfh.end(), [](ColumnFamilyHandle* cfhi) { delete cfhi; }); cfh.clear(); +#ifndef ROCKSDB_LITE if (opt_txn_db) { delete opt_txn_db; opt_txn_db = nullptr; } else { delete db; + db = nullptr; } +#else + delete db; db = nullptr; +#endif // ROCKSDB_LITE } ColumnFamilyHandle* GetCfh(int64_t rand_num) { @@ -1940,9 +1958,11 @@ class Benchmark { method = &Benchmark::Compress; } else if (name == "uncompress") { method = &Benchmark::Uncompress; +#ifndef ROCKSDB_LITE } else if (name == "randomtransaction") { method = &Benchmark::RandomTransaction; post_process_method = &Benchmark::RandomTransactionVerify; +#endif // ROCKSDB_LITE } else if (name == "randomreplacekeys") { fresh_db = true; method = &Benchmark::RandomReplaceKeys; @@ -2518,10 +2538,12 @@ class Benchmark { NewGenericRateLimiter(FLAGS_rate_limiter_bytes_per_sec)); } +#ifndef ROCKSDB_LITE if (FLAGS_readonly && FLAGS_transaction_db) { fprintf(stderr, "Cannot use readonly flag with transaction_db\n"); exit(1); } +#endif // ROCKSDB_LITE if (FLAGS_num_multi_db <= 1) { OpenDb(options, FLAGS_db, &db_); @@ -2554,6 +2576,7 @@ class Benchmark { column_families.push_back(ColumnFamilyDescriptor( ColumnFamilyName(i), ColumnFamilyOptions(options))); } +#ifndef ROCKSDB_LITE if (FLAGS_readonly) { s = DB::OpenForReadOnly(options, db_name, column_families, &db->cfh, &db->db); @@ -2574,9 +2597,13 @@ class Benchmark { } else { s = DB::Open(options, db_name, column_families, &db->cfh, &db->db); } +#else + s = DB::Open(options, db_name, column_families, &db->cfh, &db->db); +#endif // ROCKSDB_LITE db->cfh.resize(FLAGS_num_column_families); db->num_created = num_hot; db->num_hot = num_hot; +#ifndef ROCKSDB_LITE } else if (FLAGS_readonly) { s = DB::OpenForReadOnly(options, db_name, &db->db); } else if (FLAGS_optimistic_transaction_db) { @@ -2591,7 +2618,7 @@ class Benchmark { if (s.ok()) { db->db = ptr; } - +#endif // ROCKSDB_LITE } else { s = DB::Open(options, db_name, &db->db); } @@ -3636,6 +3663,7 @@ class Benchmark { } } +#ifndef ROCKSDB_LITE // This benchmark stress tests Transactions. For a given --duration (or // total number of --writes, a Transaction will perform a read-modify-write // to increment the value of a key in each of N(--transaction-sets) sets of @@ -3868,6 +3896,7 @@ class Benchmark { fprintf(stdout, "RandomTransactionVerify Success!\n"); } +#endif // ROCKSDB_LITE // Writes and deletes random keys without overwriting keys. // diff --git a/db/db_compaction_filter_test.cc b/db/db_compaction_filter_test.cc index a48c86516..7535ea53a 100644 --- a/db/db_compaction_filter_test.cc +++ b/db/db_compaction_filter_test.cc @@ -186,6 +186,7 @@ class ChangeFilterFactory : public CompactionFilterFactory { virtual const char* Name() const override { return "ChangeFilterFactory"; } }; +#ifndef ROCKSDB_LITE TEST_F(DBTestCompactionFilter, CompactionFilter) { Options options = CurrentOptions(); options.max_open_files = -1; @@ -228,7 +229,7 @@ TEST_F(DBTestCompactionFilter, CompactionFilter) { Arena arena; { ScopedArenaIterator iter( - dbfull()->TEST_NewInternalIterator(&arena, handles_[1])); + dbfull()->NewInternalIterator(&arena, handles_[1])); iter->SeekToFirst(); ASSERT_OK(iter->status()); while (iter->Valid()) { @@ -316,7 +317,7 @@ TEST_F(DBTestCompactionFilter, CompactionFilter) { count = 0; { ScopedArenaIterator iter( - dbfull()->TEST_NewInternalIterator(&arena, handles_[1])); + dbfull()->NewInternalIterator(&arena, handles_[1])); iter->SeekToFirst(); ASSERT_OK(iter->status()); while (iter->Valid()) { @@ -362,6 +363,7 @@ TEST_F(DBTestCompactionFilter, CompactionFilterDeletesAll) { delete itr; } +#endif // ROCKSDB_LITE TEST_F(DBTestCompactionFilter, CompactionFilterWithValueChange) { do { @@ -493,6 +495,7 @@ TEST_F(DBTestCompactionFilter, CompactionFilterWithMergeOperator) { ASSERT_EQ(newvalue, four); } +#ifndef ROCKSDB_LITE TEST_F(DBTestCompactionFilter, CompactionFilterContextManual) { KeepFilterFactory* filter = new KeepFilterFactory(true, true); @@ -533,7 +536,7 @@ TEST_F(DBTestCompactionFilter, CompactionFilterContextManual) { int count = 0; int total = 0; Arena arena; - ScopedArenaIterator iter(dbfull()->TEST_NewInternalIterator(&arena)); + ScopedArenaIterator iter(dbfull()->NewInternalIterator(&arena)); iter->SeekToFirst(); ASSERT_OK(iter->status()); while (iter->Valid()) { @@ -550,6 +553,7 @@ TEST_F(DBTestCompactionFilter, CompactionFilterContextManual) { ASSERT_EQ(count, 1); } } +#endif // ROCKSDB_LITE TEST_F(DBTestCompactionFilter, CompactionFilterContextCfId) { KeepFilterFactory* filter = new KeepFilterFactory(false, true); @@ -580,6 +584,7 @@ TEST_F(DBTestCompactionFilter, CompactionFilterContextCfId) { ASSERT_TRUE(filter->compaction_filter_created()); } +#ifndef ROCKSDB_LITE // Compaction filters should only be applied to records that are newer than the // latest snapshot. This test inserts records and applies a delete filter. TEST_F(DBTestCompactionFilter, CompactionFilterSnapshot) { @@ -615,6 +620,7 @@ TEST_F(DBTestCompactionFilter, CompactionFilterSnapshot) { ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ(0U, CountLiveFiles()); } +#endif // ROCKSDB_LITE } // namespace rocksdb diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 8f8633e83..615adbdc0 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -14,8 +14,7 @@ namespace rocksdb { // SYNC_POINT is not supported in released Windows mode. -#if !(defined NDEBUG) || !defined(OS_WIN) - +#if (!(defined NDEBUG) || !defined(OS_WIN)) && !defined(ROCKSDB_LITE) class DBCompactionTest : public DBTestBase { public: @@ -1844,11 +1843,11 @@ TEST_P(DBCompactionTestWithParam, ForceBottommostLevelCompaction) { INSTANTIATE_TEST_CASE_P(DBCompactionTestWithParam, DBCompactionTestWithParam, ::testing::Values(1, 4)); -#endif // !(defined NDEBUG) || !defined(OS_WIN) +#endif // (!(defined NDEBUG) || !defined(OS_WIN)) && !defined(ROCKSDB_LITE) } // namespace rocksdb int main(int argc, char** argv) { -#if !(defined NDEBUG) || !defined(OS_WIN) +#if (!(defined NDEBUG) || !defined(OS_WIN)) && !defined(ROCKSDB_LITE) rocksdb::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff --git a/db/db_dynamic_level_test.cc b/db/db_dynamic_level_test.cc index 0a37c7002..03e632a77 100644 --- a/db/db_dynamic_level_test.cc +++ b/db/db_dynamic_level_test.cc @@ -10,7 +10,7 @@ // Introduction of SyncPoint effectively disabled building and running this test // in Release build. // which is a pity, it is a good test -#if !(defined NDEBUG) || !defined(OS_WIN) +#if (!(defined NDEBUG) || !defined(OS_WIN)) && !defined(ROCKSDB_LITE) #include "db/db_test_util.h" #include "port/stack_trace.h" @@ -484,10 +484,10 @@ TEST_F(DBTestDynamicLevel, MigrateToDynamicLevelMaxBytesBase) { } } // namespace rocksdb -#endif // !(defined NDEBUG) || !defined(OS_WIN) +#endif // (!(defined NDEBUG) || !defined(OS_WIN)) && !defined(ROCKSDB_LITE) int main(int argc, char** argv) { -#if !(defined NDEBUG) || !defined(OS_WIN) +#if (!(defined NDEBUG) || !defined(OS_WIN)) && !defined(ROCKSDB_LITE) rocksdb::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff --git a/db/db_impl.cc b/db/db_impl.cc index 2a3c5a5db..cfb2e7310 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -50,6 +50,8 @@ #include "db/write_batch_internal.h" #include "db/write_callback.h" #include "db/writebuffer.h" +#include "memtable/hash_linklist_rep.h" +#include "memtable/hash_skiplist_rep.h" #include "port/likely.h" #include "port/port.h" #include "rocksdb/cache.h" @@ -78,8 +80,6 @@ #include "util/db_info_dumper.h" #include "util/file_reader_writer.h" #include "util/file_util.h" -#include "util/hash_linklist_rep.h" -#include "util/hash_skiplist_rep.h" #include "util/iostats_context_imp.h" #include "util/log_buffer.h" #include "util/logging.h" @@ -246,8 +246,10 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname) unscheduled_flushes_(0), unscheduled_compactions_(0), bg_compaction_scheduled_(0), + num_running_compactions_(0), bg_manual_only_(0), bg_flush_scheduled_(0), + num_running_flushes_(0), manual_compaction_(nullptr), disable_delete_obsolete_files_(0), delete_obsolete_files_next_run_( @@ -408,7 +410,7 @@ Status DBImpl::NewDB() { { unique_ptr file; EnvOptions env_options = env_->OptimizeForManifestWrite(env_options_); - s = env_->NewWritableFile(manifest, &file, env_options); + s = NewWritableFile(env_, manifest, &file, env_options); if (!s.ok()) { return s; } @@ -2236,6 +2238,23 @@ Status DBImpl::RunManualCompaction(ColumnFamilyData* cfd, int input_level, return manual.status; } +InternalIterator* DBImpl::NewInternalIterator( + Arena* arena, ColumnFamilyHandle* column_family) { + ColumnFamilyData* cfd; + if (column_family == nullptr) { + cfd = default_cf_handle_->cfd(); + } else { + auto cfh = reinterpret_cast(column_family); + cfd = cfh->cfd(); + } + + mutex_.Lock(); + SuperVersion* super_version = cfd->GetSuperVersion()->Ref(); + mutex_.Unlock(); + ReadOptions roptions; + return NewInternalIterator(roptions, cfd, super_version, arena); +} + Status DBImpl::FlushMemTable(ColumnFamilyData* cfd, const FlushOptions& flush_options) { Status s; @@ -2455,6 +2474,7 @@ void DBImpl::BackgroundCallFlush() { LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, db_options_.info_log.get()); { InstrumentedMutexLock l(&mutex_); + num_running_flushes_++; auto pending_outputs_inserted_elem = CaptureCurrentFileNumberInPendingOutputs(); @@ -2500,6 +2520,8 @@ void DBImpl::BackgroundCallFlush() { mutex_.Lock(); } + assert(num_running_flushes_ > 0); + num_running_flushes_--; bg_flush_scheduled_--; // See if there's more work to be done MaybeScheduleFlushOrCompaction(); @@ -2520,6 +2542,7 @@ void DBImpl::BackgroundCallCompaction() { LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, db_options_.info_log.get()); { InstrumentedMutexLock l(&mutex_); + num_running_compactions_++; auto pending_outputs_inserted_elem = CaptureCurrentFileNumberInPendingOutputs(); @@ -2568,6 +2591,8 @@ void DBImpl::BackgroundCallCompaction() { mutex_.Lock(); } + assert(num_running_compactions_ > 0); + num_running_compactions_--; bg_compaction_scheduled_--; versions_->GetColumnFamilySet()->FreeDeadColumnFamilies(); @@ -2913,11 +2938,11 @@ static void CleanupIteratorState(void* arg1, void* arg2) { } } // namespace -Iterator* DBImpl::NewInternalIterator(const ReadOptions& read_options, - ColumnFamilyData* cfd, - SuperVersion* super_version, - Arena* arena) { - Iterator* internal_iter; +InternalIterator* DBImpl::NewInternalIterator(const ReadOptions& read_options, + ColumnFamilyData* cfd, + SuperVersion* super_version, + Arena* arena) { + InternalIterator* internal_iter; assert(arena != nullptr); // Need to create internal iterator from the arena. MergeIteratorBuilder merge_iter_builder(&cfd->internal_comparator(), arena); @@ -3216,7 +3241,8 @@ Status DBImpl::AddFile(ColumnFamilyHandle* column_family, file_info.num_entries = table_reader->GetTableProperties()->num_entries; ParsedInternalKey key; - std::unique_ptr iter(table_reader->NewIterator(ReadOptions())); + std::unique_ptr iter( + table_reader->NewIterator(ReadOptions())); // Get first (smallest) key from file iter->SeekToFirst(); @@ -3616,7 +3642,7 @@ Iterator* DBImpl::NewIterator(const ReadOptions& read_options, snapshot, sv->mutable_cf_options.max_sequential_skip_in_iterations, read_options.iterate_upper_bound); - Iterator* internal_iter = + InternalIterator* internal_iter = NewInternalIterator(read_options, cfd, sv, db_iter->GetArena()); db_iter->SetIterUnderDBIter(internal_iter); @@ -3683,8 +3709,8 @@ Status DBImpl::NewIterators( ArenaWrappedDBIter* db_iter = NewArenaWrappedDbIterator( env_, *cfd->ioptions(), cfd->user_comparator(), snapshot, sv->mutable_cf_options.max_sequential_skip_in_iterations); - Iterator* internal_iter = NewInternalIterator( - read_options, cfd, sv, db_iter->GetArena()); + InternalIterator* internal_iter = + NewInternalIterator(read_options, cfd, sv, db_iter->GetArena()); db_iter->SetIterUnderDBIter(internal_iter); iterators->push_back(db_iter); } @@ -4124,9 +4150,9 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) { if (creating_new_log) { EnvOptions opt_env_opt = env_->OptimizeForLogWrite(env_options_, db_options_); - s = env_->NewWritableFile( - LogFileName(db_options_.wal_dir, new_log_number), &lfile, - opt_env_opt); + s = NewWritableFile(env_, + LogFileName(db_options_.wal_dir, new_log_number), + &lfile, opt_env_opt); if (s.ok()) { // Our final size should be less than write_buffer_size // (compression, etc) but err on the side of caution. @@ -4203,6 +4229,29 @@ Status DBImpl::GetPropertiesOfAllTables(ColumnFamilyHandle* column_family, return s; } + +Status DBImpl::GetPropertiesOfTablesInRange(ColumnFamilyHandle* column_family, + const Range* range, std::size_t n, + TablePropertiesCollection* props) { + auto cfh = reinterpret_cast(column_family); + auto cfd = cfh->cfd(); + + // Increment the ref count + mutex_.Lock(); + auto version = cfd->current(); + version->Ref(); + mutex_.Unlock(); + + auto s = version->GetPropertiesOfTablesInRange(range, n, props); + + // Decrement the ref count + mutex_.Lock(); + version->Unref(); + mutex_.Unlock(); + + return s; +} + #endif // ROCKSDB_LITE const std::string& DBImpl::GetName() const { @@ -4742,9 +4791,9 @@ Status DB::Open(const DBOptions& db_options, const std::string& dbname, EnvOptions soptions(db_options); EnvOptions opt_env_options = impl->db_options_.env->OptimizeForLogWrite(soptions, impl->db_options_); - s = impl->db_options_.env->NewWritableFile( - LogFileName(impl->db_options_.wal_dir, new_log_number), &lfile, - opt_env_options); + s = NewWritableFile(impl->db_options_.env, + LogFileName(impl->db_options_.wal_dir, new_log_number), + &lfile, opt_env_options); if (s.ok()) { lfile->SetPreallocationBlockSize(1.1 * max_write_buffer_size); impl->logfile_number_ = new_log_number; diff --git a/db/db_impl.h b/db/db_impl.h index 35558773e..4a58f9318 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -36,11 +36,11 @@ #include "rocksdb/env.h" #include "rocksdb/memtablerep.h" #include "rocksdb/transaction_log.h" +#include "table/scoped_arena_iterator.h" #include "util/autovector.h" #include "util/event_logger.h" #include "util/hash.h" #include "util/instrumented_mutex.h" -#include "util/scoped_arena_iterator.h" #include "util/stop_watch.h" #include "util/thread_local.h" @@ -248,7 +248,13 @@ class DBImpl : public DB { const Slice* begin, const Slice* end, bool disallow_trivial_move = false); -#ifndef ROCKSDB_LITE + // Return an internal iterator over the current state of the database. + // The keys of this iterator are internal keys (see format.h). + // The returned iterator should be deleted when no longer needed. + InternalIterator* NewInternalIterator( + Arena* arena, ColumnFamilyHandle* column_family = nullptr); + +#ifndef NDEBUG // Extra methods (for testing) that are not in the public DB interface // Implemented in db_impl_debug.cc @@ -266,12 +272,6 @@ class DBImpl : public DB { // Wait for any compaction Status TEST_WaitForCompact(); - // Return an internal iterator over the current state of the database. - // The keys of this iterator are internal keys (see format.h). - // The returned iterator should be deleted when no longer needed. - Iterator* TEST_NewInternalIterator( - Arena* arena, ColumnFamilyHandle* column_family = nullptr); - // Return the maximum overlapping data (in bytes) at next level for any // file at a level >= 1. int64_t TEST_MaxNextLevelOverlappingBytes(ColumnFamilyHandle* column_family = @@ -305,7 +305,7 @@ class DBImpl : public DB { uint64_t TEST_LogfileNumber(); -#endif // ROCKSDB_LITE +#endif // NDEBUG // Returns the list of live files in 'live' and the list // of all files in the filesystem in 'candidate_files'. @@ -363,6 +363,20 @@ class DBImpl : public DB { // Same as above, should called without mutex held and not on write thread. ColumnFamilyHandle* GetColumnFamilyHandleUnlocked(uint32_t column_family_id); + // Returns the number of currently running flushes. + // REQUIREMENT: mutex_ must be held when calling this function. + int num_running_flushes() { + mutex_.AssertHeld(); + return num_running_flushes_; + } + + // Returns the number of currently running compactions. + // REQUIREMENT: mutex_ must be held when calling this function. + int num_running_compactions() { + mutex_.AssertHeld(); + return num_running_compactions_; + } + protected: Env* const env_; const std::string dbname_; @@ -370,8 +384,10 @@ class DBImpl : public DB { const DBOptions db_options_; Statistics* stats_; - Iterator* NewInternalIterator(const ReadOptions&, ColumnFamilyData* cfd, - SuperVersion* super_version, Arena* arena); + InternalIterator* NewInternalIterator(const ReadOptions&, + ColumnFamilyData* cfd, + SuperVersion* super_version, + Arena* arena); void NotifyOnFlushCompleted(ColumnFamilyData* cfd, FileMetaData* file_meta, const MutableCFOptions& mutable_cf_options, @@ -685,6 +701,9 @@ class DBImpl : public DB { // count how many background compactions are running or have been scheduled int bg_compaction_scheduled_; + // stores the number of compactions are currently running + int num_running_compactions_; + // If non-zero, MaybeScheduleFlushOrCompaction() will only schedule manual // compactions (if manual_compaction_ is not null). This mechanism enables // manual compactions to wait until all other compactions are finished. @@ -693,6 +712,9 @@ class DBImpl : public DB { // number of background memtable flush jobs, submitted to the HIGH pool int bg_flush_scheduled_; + // stores the number of flushes are currently running + int num_running_flushes_; + // Information for a manual compaction struct ManualCompaction { ColumnFamilyData* cfd; @@ -788,6 +810,10 @@ class DBImpl : public DB { virtual Status GetPropertiesOfAllTables(ColumnFamilyHandle* column_family, TablePropertiesCollection* props) override; + virtual Status GetPropertiesOfTablesInRange( + ColumnFamilyHandle* column_family, const Range* range, std::size_t n, + TablePropertiesCollection* props) override; + #endif // ROCKSDB_LITE // Function that Get and KeyMayExist call with no_io true or false diff --git a/db/db_impl_debug.cc b/db/db_impl_debug.cc index dc40fefc6..73dca560d 100644 --- a/db/db_impl_debug.cc +++ b/db/db_impl_debug.cc @@ -7,7 +7,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#ifndef ROCKSDB_LITE +#ifndef NDEBUG #include "db/db_impl.h" #include "util/thread_status_updater.h" @@ -19,23 +19,6 @@ uint64_t DBImpl::TEST_GetLevel0TotalSize() { return default_cf_handle_->cfd()->current()->storage_info()->NumLevelBytes(0); } -Iterator* DBImpl::TEST_NewInternalIterator(Arena* arena, - ColumnFamilyHandle* column_family) { - ColumnFamilyData* cfd; - if (column_family == nullptr) { - cfd = default_cf_handle_->cfd(); - } else { - auto cfh = reinterpret_cast(column_family); - cfd = cfh->cfd(); - } - - mutex_.Lock(); - SuperVersion* super_version = cfd->GetSuperVersion()->Ref(); - mutex_.Unlock(); - ReadOptions roptions; - return NewInternalIterator(roptions, cfd, super_version, arena); -} - int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes( ColumnFamilyHandle* column_family) { ColumnFamilyData* cfd; @@ -153,4 +136,4 @@ uint64_t DBImpl::TEST_LogfileNumber() { } } // namespace rocksdb -#endif // ROCKSDB_LITE +#endif // NDEBUG diff --git a/db/db_iter.cc b/db/db_iter.cc index 065b8e4fc..c34341da9 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -15,11 +15,12 @@ #include "db/filename.h" #include "db/dbformat.h" +#include "port/port.h" #include "rocksdb/env.h" #include "rocksdb/options.h" #include "rocksdb/iterator.h" #include "rocksdb/merge_operator.h" -#include "port/port.h" +#include "table/internal_iterator.h" #include "util/arena.h" #include "util/logging.h" #include "util/mutexlock.h" @@ -58,9 +59,9 @@ class DBIter: public Iterator { kReverse }; - DBIter(Env* env, const ImmutableCFOptions& ioptions, - const Comparator* cmp, Iterator* iter, SequenceNumber s, - bool arena_mode, uint64_t max_sequential_skip_in_iterations, + DBIter(Env* env, const ImmutableCFOptions& ioptions, const Comparator* cmp, + InternalIterator* iter, SequenceNumber s, bool arena_mode, + uint64_t max_sequential_skip_in_iterations, const Slice* iterate_upper_bound = nullptr) : arena_mode_(arena_mode), env_(env), @@ -83,10 +84,10 @@ class DBIter: public Iterator { if (!arena_mode_) { delete iter_; } else { - iter_->~Iterator(); + iter_->~InternalIterator(); } } - virtual void SetIter(Iterator* iter) { + virtual void SetIter(InternalIterator* iter) { assert(iter_ == nullptr); iter_ = iter; } @@ -142,7 +143,7 @@ class DBIter: public Iterator { Logger* logger_; const Comparator* const user_comparator_; const MergeOperator* const user_merge_operator_; - Iterator* iter_; + InternalIterator* iter_; SequenceNumber const sequence_; Status status_; @@ -744,7 +745,7 @@ void DBIter::SeekToLast() { Iterator* NewDBIterator(Env* env, const ImmutableCFOptions& ioptions, const Comparator* user_key_comparator, - Iterator* internal_iter, + InternalIterator* internal_iter, const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations, const Slice* iterate_upper_bound) { @@ -757,7 +758,7 @@ ArenaWrappedDBIter::~ArenaWrappedDBIter() { db_iter_->~DBIter(); } void ArenaWrappedDBIter::SetDBIter(DBIter* iter) { db_iter_ = iter; } -void ArenaWrappedDBIter::SetIterUnderDBIter(Iterator* iter) { +void ArenaWrappedDBIter::SetIterUnderDBIter(InternalIterator* iter) { static_cast(db_iter_)->SetIter(iter); } diff --git a/db/db_iter.h b/db/db_iter.h index c676d6cda..97a0b6ff7 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -10,6 +10,7 @@ #pragma once #include #include "rocksdb/db.h" +#include "rocksdb/iterator.h" #include "db/dbformat.h" #include "util/arena.h" #include "util/autovector.h" @@ -18,18 +19,17 @@ namespace rocksdb { class Arena; class DBIter; +class InternalIterator; // Return a new iterator that converts internal keys (yielded by // "*internal_iter") that were live at the specified "sequence" number // into appropriate user keys. -extern Iterator* NewDBIterator( - Env* env, - const ImmutableCFOptions& options, - const Comparator *user_key_comparator, - Iterator* internal_iter, - const SequenceNumber& sequence, - uint64_t max_sequential_skip_in_iterations, - const Slice* iterate_upper_bound = nullptr); +extern Iterator* NewDBIterator(Env* env, const ImmutableCFOptions& options, + const Comparator* user_key_comparator, + InternalIterator* internal_iter, + const SequenceNumber& sequence, + uint64_t max_sequential_skip_in_iterations, + const Slice* iterate_upper_bound = nullptr); // A wrapper iterator which wraps DB Iterator and the arena, with which the DB // iterator is supposed be allocated. This class is used as an entry point of @@ -50,7 +50,7 @@ class ArenaWrappedDBIter : public Iterator { // Set the internal iterator wrapped inside the DB Iterator. Usually it is // a merging iterator. - virtual void SetIterUnderDBIter(Iterator* iter); + virtual void SetIterUnderDBIter(InternalIterator* iter); virtual bool Valid() const override; virtual void SeekToFirst() override; virtual void SeekToLast() override; @@ -60,6 +60,7 @@ class ArenaWrappedDBIter : public Iterator { virtual Slice key() const override; virtual Slice value() const override; virtual Status status() const override; + void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2); private: diff --git a/db/db_iter_test.cc b/db/db_iter_test.cc index 68c5b158d..ed5c28bae 100644 --- a/db/db_iter_test.cc +++ b/db/db_iter_test.cc @@ -29,7 +29,7 @@ static uint64_t TestGetTickerCount(const Options& options, return options.statistics->getTickerCount(ticker_type); } -class TestIterator : public Iterator { +class TestIterator : public InternalIterator { public: explicit TestIterator(const Comparator* comparator) : initialized_(false), @@ -1864,11 +1864,12 @@ class DBIterWithMergeIterTest : public testing::Test { internal_iter2_->Add("d", kTypeValue, "7", 3u); internal_iter2_->Finish(); - std::vector child_iters; + std::vector child_iters; child_iters.push_back(internal_iter1_); child_iters.push_back(internal_iter2_); InternalKeyComparator icomp(BytewiseComparator()); - Iterator* merge_iter = NewMergingIterator(&icomp_, &child_iters[0], 2u); + InternalIterator* merge_iter = + NewMergingIterator(&icomp_, &child_iters[0], 2u); db_iter_.reset(NewDBIterator(env_, ImmutableCFOptions(options_), BytewiseComparator(), merge_iter, diff --git a/db/db_log_iter_test.cc b/db/db_log_iter_test.cc index e42dbcb44..cb5ccdc26 100644 --- a/db/db_log_iter_test.cc +++ b/db/db_log_iter_test.cc @@ -10,7 +10,7 @@ // Introduction of SyncPoint effectively disabled building and running this test // in Release build. // which is a pity, it is a good test -#if !(defined NDEBUG) || !defined(OS_WIN) +#if (!(defined NDEBUG) || !defined(OS_WIN)) && !defined(ROCKSDB_LITE) #include "db/db_test_util.h" #include "port/stack_trace.h" @@ -277,10 +277,10 @@ TEST_F(DBTestXactLogIterator, TransactionLogIteratorBlobs) { } } // namespace rocksdb -#endif // !(defined NDEBUG) || !defined(OS_WIN) +#endif // (!(defined NDEBUG) || !defined(OS_WIN)) && !defined(ROCKSDB_LITE) int main(int argc, char** argv) { -#if !(defined NDEBUG) || !defined(OS_WIN) +#if (!(defined NDEBUG) || !defined(OS_WIN)) && !defined(ROCKSDB_LITE) rocksdb::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff --git a/db/db_table_properties_test.cc b/db/db_table_properties_test.cc new file mode 100644 index 000000000..f1f4558c5 --- /dev/null +++ b/db/db_table_properties_test.cc @@ -0,0 +1,217 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include +#include + +#include "db/db_test_util.h" +#include "port/stack_trace.h" +#include "rocksdb/db.h" +#include "util/testharness.h" +#include "util/testutil.h" + +#ifndef ROCKSDB_LITE + +namespace rocksdb { + +// A helper function that ensures the table properties returned in +// `GetPropertiesOfAllTablesTest` is correct. +// This test assumes entries size is different for each of the tables. +namespace { + +void VerifyTableProperties(DB* db, uint64_t expected_entries_size) { + TablePropertiesCollection props; + ASSERT_OK(db->GetPropertiesOfAllTables(&props)); + + ASSERT_EQ(4U, props.size()); + std::unordered_set unique_entries; + + // Indirect test + uint64_t sum = 0; + for (const auto& item : props) { + unique_entries.insert(item.second->num_entries); + sum += item.second->num_entries; + } + + ASSERT_EQ(props.size(), unique_entries.size()); + ASSERT_EQ(expected_entries_size, sum); +} +} // namespace + +class DBTablePropertiesTest : public DBTestBase { + public: + DBTablePropertiesTest() : DBTestBase("/db_table_properties_test") {} + TablePropertiesCollection TestGetPropertiesOfTablesInRange( + std::vector ranges, std::size_t* num_properties = nullptr, + std::size_t* num_files = nullptr); +}; + +TEST_F(DBTablePropertiesTest, GetPropertiesOfAllTablesTest) { + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = 8; + Reopen(options); + // Create 4 tables + for (int table = 0; table < 4; ++table) { + for (int i = 0; i < 10 + table; ++i) { + db_->Put(WriteOptions(), ToString(table * 100 + i), "val"); + } + db_->Flush(FlushOptions()); + } + + // 1. Read table properties directly from file + Reopen(options); + VerifyTableProperties(db_, 10 + 11 + 12 + 13); + + // 2. Put two tables to table cache and + Reopen(options); + // fetch key from 1st and 2nd table, which will internally place that table to + // the table cache. + for (int i = 0; i < 2; ++i) { + Get(ToString(i * 100 + 0)); + } + + VerifyTableProperties(db_, 10 + 11 + 12 + 13); + + // 3. Put all tables to table cache + Reopen(options); + // fetch key from 1st and 2nd table, which will internally place that table to + // the table cache. + for (int i = 0; i < 4; ++i) { + Get(ToString(i * 100 + 0)); + } + VerifyTableProperties(db_, 10 + 11 + 12 + 13); +} + +TablePropertiesCollection +DBTablePropertiesTest::TestGetPropertiesOfTablesInRange( + std::vector ranges, std::size_t* num_properties, + std::size_t* num_files) { + // run the query + TablePropertiesCollection props; + EXPECT_OK(db_->GetPropertiesOfTablesInRange( + db_->DefaultColumnFamily(), &ranges[0], ranges.size(), &props)); + + // Make sure that we've received properties for those and for those files + // only which fall within requested ranges + std::vector vmd; + db_->GetLiveFilesMetaData(&vmd); + for (auto md : vmd) { + std::string fn = md.db_path + md.name; + bool in_range = false; + for (auto r : ranges) { + // smallestkey < limit && largestkey >= start + if (r.limit.compare(md.smallestkey) >= 0 && + r.start.compare(md.largestkey) <= 0) { + in_range = true; + EXPECT_GT(props.count(fn), 0); + } + } + if (!in_range) { + EXPECT_EQ(props.count(fn), 0); + } + } + + if (num_properties) { + *num_properties = props.size(); + } + + if (num_files) { + *num_files = vmd.size(); + } + return props; +} + +TEST_F(DBTablePropertiesTest, GetPropertiesOfTablesInRange) { + // Fixed random sead + Random rnd(301); + + Options options; + options.create_if_missing = true; + options.write_buffer_size = 4096; + options.max_write_buffer_number = 8; + options.level0_file_num_compaction_trigger = 2; + options.level0_slowdown_writes_trigger = 2; + options.level0_stop_writes_trigger = 4; + options.target_file_size_base = 2048; + options.max_bytes_for_level_base = 10240; + options.max_bytes_for_level_multiplier = 4; + options.soft_rate_limit = 1.1; + options.num_levels = 8; + + DestroyAndReopen(options); + + // build a decent LSM + for (int i = 0; i < 10000; i++) { + ASSERT_OK(Put(test::RandomKey(&rnd, 5), RandomString(&rnd, 102))); + } + Flush(); + db_->PauseBackgroundWork(); + + // Ensure that we have at least L0, L1 and L2 + ASSERT_GT(NumTableFilesAtLevel(0), 0); + ASSERT_GT(NumTableFilesAtLevel(1), 0); + ASSERT_GT(NumTableFilesAtLevel(2), 0); + + // Query the largest range + std::size_t num_properties, num_files; + TestGetPropertiesOfTablesInRange( + {Range(test::RandomKey(&rnd, 5, test::RandomKeyType::SMALLEST), + test::RandomKey(&rnd, 5, test::RandomKeyType::LARGEST))}, + &num_properties, &num_files); + ASSERT_EQ(num_properties, num_files); + + // Query the empty range + TestGetPropertiesOfTablesInRange( + {Range(test::RandomKey(&rnd, 5, test::RandomKeyType::LARGEST), + test::RandomKey(&rnd, 5, test::RandomKeyType::SMALLEST))}, + &num_properties, &num_files); + ASSERT_GT(num_files, 0); + ASSERT_EQ(num_properties, 0); + + // Query the middle rangee + TestGetPropertiesOfTablesInRange( + {Range(test::RandomKey(&rnd, 5, test::RandomKeyType::MIDDLE), + test::RandomKey(&rnd, 5, test::RandomKeyType::LARGEST))}, + &num_properties, &num_files); + ASSERT_GT(num_files, 0); + ASSERT_GT(num_files, num_properties); + ASSERT_GT(num_properties, 0); + + // Query a bunch of random ranges + for (int j = 0; j < 100; j++) { + // create a bunch of ranges + std::vector random_keys; + auto n = 2 * rnd.Uniform(50); + for (uint32_t i = 0; i < n; ++i) { + random_keys.push_back(test::RandomKey(&rnd, 5)); + } + + std::vector ranges; + auto it = random_keys.begin(); + while (it != random_keys.end()) { + ranges.push_back(Range(*it, *(it + 1))); + it += 2; + } + + TestGetPropertiesOfTablesInRange(std::move(ranges)); + } +} +} // namespace rocksdb + +#endif // ROCKSDB_LITE + +int main(int argc, char** argv) { +#if !(defined NDEBUG) || !defined(OS_WIN) + rocksdb::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +#else + return 0; +#endif +} diff --git a/db/db_tailing_iter_test.cc b/db/db_tailing_iter_test.cc index 914b25774..87e4f1cab 100644 --- a/db/db_tailing_iter_test.cc +++ b/db/db_tailing_iter_test.cc @@ -10,7 +10,7 @@ // Introduction of SyncPoint effectively disabled building and running this test // in Release build. // which is a pity, it is a good test -#if !(defined NDEBUG) || !defined(OS_WIN) +#if (!(defined NDEBUG) || !defined(OS_WIN)) && !defined(ROCKSDB_LITE) #include "db/db_test_util.h" #include "db/forward_iterator.h" @@ -646,10 +646,10 @@ TEST_F(DBTestTailingIterator, ManagedTailingIteratorSeekToSame) { } // namespace rocksdb -#endif // !(defined NDEBUG) || !defined(OS_WIN) +#endif // (!(defined NDEBUG) || !defined(OS_WIN)) && !defined(ROCKSDB_LITE) int main(int argc, char** argv) { -#if !(defined NDEBUG) || !defined(OS_WIN) +#if (!(defined NDEBUG) || !defined(OS_WIN)) && !defined(ROCKSDB_LITE) rocksdb::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff --git a/db/db_test.cc b/db/db_test.cc index beaa47af2..da0af1822 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -30,6 +30,7 @@ #include "db/job_context.h" #include "db/version_set.h" #include "db/write_batch_internal.h" +#include "memtable/hash_linklist_rep.h" #include "port/stack_trace.h" #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" @@ -55,9 +56,9 @@ #include "table/block_based_table_factory.h" #include "table/mock_table.h" #include "table/plain_table_factory.h" +#include "table/scoped_arena_iterator.h" #include "util/file_reader_writer.h" #include "util/hash.h" -#include "util/hash_linklist_rep.h" #include "utilities/merge_operators.h" #include "util/logging.h" #include "util/compression.h" @@ -65,7 +66,6 @@ #include "util/rate_limiter.h" #include "util/statistics.h" #include "util/testharness.h" -#include "util/scoped_arena_iterator.h" #include "util/sync_point.h" #include "util/testutil.h" #include "util/mock_env.h" @@ -79,29 +79,12 @@ static long TestGetTickerCount(const Options& options, Tickers ticker_type) { return options.statistics->getTickerCount(ticker_type); } +#ifndef ROCKSDB_LITE // A helper function that ensures the table properties returned in // `GetPropertiesOfAllTablesTest` is correct. // This test assumes entries size is different for each of the tables. namespace { -void VerifyTableProperties(DB* db, uint64_t expected_entries_size) { - TablePropertiesCollection props; - ASSERT_OK(db->GetPropertiesOfAllTables(&props)); - - ASSERT_EQ(4U, props.size()); - std::unordered_set unique_entries; - - // Indirect test - uint64_t sum = 0; - for (const auto& item : props) { - unique_entries.insert(item.second->num_entries); - sum += item.second->num_entries; - } - - ASSERT_EQ(props.size(), unique_entries.size()); - ASSERT_EQ(expected_entries_size, sum); -} - uint64_t GetNumberOfSstFilesForColumnFamily(DB* db, std::string column_family_name) { std::vector metadata; @@ -114,6 +97,7 @@ uint64_t GetNumberOfSstFilesForColumnFamily(DB* db, } } // namespace +#endif // ROCKSDB_LITE class DBTest : public DBTestBase { public: @@ -132,46 +116,7 @@ class DBTestWithParam : public DBTest, uint32_t max_subcompactions_; }; -class BloomStatsTestWithParam - : public DBTest, - public testing::WithParamInterface> { - public: - BloomStatsTestWithParam() { - use_block_table_ = std::get<0>(GetParam()); - use_block_based_builder_ = std::get<1>(GetParam()); - - options_.create_if_missing = true; - options_.prefix_extractor.reset(rocksdb::NewFixedPrefixTransform(4)); - options_.memtable_prefix_bloom_bits = 8 * 1024; - if (use_block_table_) { - BlockBasedTableOptions table_options; - table_options.hash_index_allow_collision = false; - table_options.filter_policy.reset( - NewBloomFilterPolicy(10, use_block_based_builder_)); - options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); - } else { - PlainTableOptions table_options; - options_.table_factory.reset(NewPlainTableFactory(table_options)); - } - - perf_context.Reset(); - DestroyAndReopen(options_); - } - - ~BloomStatsTestWithParam() { - perf_context.Reset(); - Destroy(options_); - } - - // Required if inheriting from testing::WithParamInterface<> - static void SetUpTestCase() {} - static void TearDownTestCase() {} - - bool use_block_table_; - bool use_block_based_builder_; - Options options_; -}; - +#ifndef ROCKSDB_LITE TEST_F(DBTest, Empty) { do { Options options; @@ -233,6 +178,7 @@ TEST_F(DBTest, Empty) { ASSERT_EQ("0", num); } while (ChangeOptions()); } +#endif // ROCKSDB_LITE TEST_F(DBTest, WriteEmptyBatch) { Options options; @@ -255,6 +201,7 @@ TEST_F(DBTest, WriteEmptyBatch) { ASSERT_EQ("bar", Get(1, "foo")); } +#ifndef ROCKSDB_LITE TEST_F(DBTest, ReadOnlyDB) { ASSERT_OK(Put("foo", "v1")); ASSERT_OK(Put("bar", "v2")); @@ -476,42 +423,6 @@ TEST_F(DBTest, ParanoidFileChecks) { TestGetTickerCount(options, BLOCK_CACHE_ADD)); } -TEST_F(DBTest, GetPropertiesOfAllTablesTest) { - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = 8; - Reopen(options); - // Create 4 tables - for (int table = 0; table < 4; ++table) { - for (int i = 0; i < 10 + table; ++i) { - db_->Put(WriteOptions(), ToString(table * 100 + i), "val"); - } - db_->Flush(FlushOptions()); - } - - // 1. Read table properties directly from file - Reopen(options); - VerifyTableProperties(db_, 10 + 11 + 12 + 13); - - // 2. Put two tables to table cache and - Reopen(options); - // fetch key from 1st and 2nd table, which will internally place that table to - // the table cache. - for (int i = 0; i < 2; ++i) { - Get(ToString(i * 100 + 0)); - } - - VerifyTableProperties(db_, 10 + 11 + 12 + 13); - - // 3. Put all tables to table cache - Reopen(options); - // fetch key from 1st and 2nd table, which will internally place that table to - // the table cache. - for (int i = 0; i < 4; ++i) { - Get(ToString(i * 100 + 0)); - } - VerifyTableProperties(db_, 10 + 11 + 12 + 13); -} - namespace { void ResetTableProperties(TableProperties* tp) { tp->data_size = 0; @@ -658,9 +569,10 @@ TEST_F(DBTest, ReadLatencyHistogramByLevel) { DestroyAndReopen(options); int key_index = 0; Random rnd(301); - for (int num = 0; num < 7; num++) { + for (int num = 0; num < 8; num++) { Put("foo", "bar"); GenerateNewFile(&rnd, &key_index); + dbfull()->TEST_WaitForCompact(); } dbfull()->TEST_WaitForCompact(); @@ -668,7 +580,7 @@ TEST_F(DBTest, ReadLatencyHistogramByLevel) { ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); // Get() after flushes, See latency histogram tracked. - for (int key = 0; key < 500; key++) { + for (int key = 0; key < key_index; key++) { Get(Key(key)); } ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); @@ -679,7 +591,7 @@ TEST_F(DBTest, ReadLatencyHistogramByLevel) { // Reopen and issue Get(). See thee latency tracked Reopen(options); dbfull()->TEST_WaitForCompact(); - for (int key = 0; key < 500; key++) { + for (int key = 0; key < key_index; key++) { Get(Key(key)); } ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); @@ -710,7 +622,7 @@ TEST_F(DBTest, ReadLatencyHistogramByLevel) { ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); - for (int key = 0; key < 500; key++) { + for (int key = 0; key < key_index; key++) { Get(Key(key)); } ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); @@ -790,6 +702,7 @@ TEST_F(DBTest, AggregatedTablePropertiesAtLevel) { } } } +#endif // ROCKSDB_LITE class CoutingUserTblPropCollector : public TablePropertiesCollector { public: @@ -842,6 +755,7 @@ class CoutingUserTblPropCollectorFactory uint32_t num_created_; }; +#ifndef ROCKSDB_LITE TEST_F(DBTest, GetUserDefinedTableProperties) { Options options = CurrentOptions(); options.level0_file_num_compaction_trigger = (1<<30); @@ -881,6 +795,7 @@ TEST_F(DBTest, GetUserDefinedTableProperties) { dbfull()->TEST_CompactRange(0, nullptr, nullptr); ASSERT_GT(collector_factory->num_created_, 0); } +#endif // ROCKSDB_LITE TEST_F(DBTest, UserDefinedTablePropertiesContext) { Options options = CurrentOptions(); @@ -943,6 +858,7 @@ TEST_F(DBTest, UserDefinedTablePropertiesContext) { ASSERT_GT(collector_factory->num_created_, 0); } +#ifndef ROCKSDB_LITE TEST_F(DBTest, LevelLimitReopen) { Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu"}, options); @@ -964,6 +880,7 @@ TEST_F(DBTest, LevelLimitReopen) { options.max_bytes_for_level_multiplier_additional.resize(10, 1); ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options)); } +#endif // ROCKSDB_LITE TEST_F(DBTest, PutDeleteGet) { do { @@ -1110,6 +1027,7 @@ TEST_F(DBTest, GetFromVersions) { } while (ChangeOptions()); } +#ifndef ROCKSDB_LITE TEST_F(DBTest, GetSnapshot) { anon::OptionsOverride options_override; options_override.skip_policy = kSkipNoSnapshot; @@ -1135,6 +1053,7 @@ TEST_F(DBTest, GetSnapshot) { } } while (ChangeOptions()); } +#endif // ROCKSDB_LITE TEST_F(DBTest, GetLevel0Ordering) { do { @@ -1162,6 +1081,7 @@ TEST_F(DBTest, WrongLevel0Config) { ASSERT_OK(DB::Open(options, dbname_, &db_)); } +#ifndef ROCKSDB_LITE TEST_F(DBTest, GetOrderedByLevels) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); @@ -1233,6 +1153,7 @@ TEST_F(DBTest, GetEncountersEmptyLevel) { ASSERT_EQ(NumTableFilesAtLevel(0, 1), 1); // XXX } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction)); } +#endif // ROCKSDB_LITE // KeyMayExist can lead to a few false positives, but not false negatives. // To make test deterministic, use a much larger number of bits per key-20 than @@ -1365,6 +1286,7 @@ TEST_F(DBTest, NonBlockingIteration) { kSkipMmapReads)); } +#ifndef ROCKSDB_LITE TEST_F(DBTest, ManagedNonBlockingIteration) { do { ReadOptions non_blocking_opts, regular_opts; @@ -1429,6 +1351,7 @@ TEST_F(DBTest, ManagedNonBlockingIteration) { } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipHashCuckoo | kSkipMmapReads)); } +#endif // ROCKSDB_LITE // A delete is skipped for key if KeyMayExist(key) returns False // Tests Writebatch consistency and proper delete behaviour @@ -2333,6 +2256,7 @@ TEST_F(DBTest, FlushMultipleMemtable) { } while (ChangeCompactOptions()); } +#ifndef ROCKSDB_LITE TEST_F(DBTest, NumImmutableMemTable) { do { Options options = CurrentOptions(); @@ -2442,6 +2366,7 @@ TEST_F(DBTest, NumImmutableMemTable) { ASSERT_TRUE(GetPerfLevel() == kDisable); } while (ChangeCompactOptions()); } +#endif // ROCKSDB_LITE TEST_F(DBTest, FlushEmptyColumnFamily) { // Block flush thread and disable compaction thread @@ -2487,6 +2412,7 @@ TEST_F(DBTest, FlushEmptyColumnFamily) { sleeping_task_low.WaitUntilDone(); } +#ifndef ROCKSDB_LITE TEST_F(DBTest, GetProperty) { // Set sizes to both background thread pool to be 1 and block them. env_->SetBackgroundThreads(1, Env::HIGH); @@ -2792,6 +2718,7 @@ TEST_F(DBTest, EstimatePendingCompBytes) { "rocksdb.estimate-pending-compaction-bytes", &int_num)); ASSERT_EQ(int_num, 0U); } +#endif // ROCKSDB_LITE TEST_F(DBTest, FLUSH) { do { @@ -2851,7 +2778,7 @@ TEST_F(DBTest, RecoveryWithEmptyLog) { } while (ChangeOptions()); } - +#ifndef ROCKSDB_LITE TEST_F(DBTest, FlushSchedule) { Options options = CurrentOptions(); options.disable_auto_compactions = true; @@ -2892,7 +2819,7 @@ TEST_F(DBTest, FlushSchedule) { ASSERT_LE(pikachu_tables, static_cast(10)); ASSERT_GT(pikachu_tables, static_cast(0)); } - +#endif // ROCKSDB_LITE TEST_F(DBTest, ManifestRollOver) { do { @@ -2940,6 +2867,7 @@ TEST_F(DBTest, IdentityAcrossRestarts) { } while (ChangeCompactOptions()); } +#ifndef ROCKSDB_LITE TEST_F(DBTest, RecoverWithLargeLog) { do { { @@ -2966,6 +2894,7 @@ TEST_F(DBTest, RecoverWithLargeLog) { ASSERT_GT(NumTableFilesAtLevel(0, 1), 1); } while (ChangeCompactOptions()); } +#endif // ROCKSDB_LITE namespace { class KeepFilter : public CompactionFilter { @@ -3030,6 +2959,7 @@ class DelayFilterFactory : public CompactionFilterFactory { }; } // namespace +#ifndef ROCKSDB_LITE TEST_F(DBTest, CompressedCache) { if (!Snappy_Supported()) { return; @@ -3153,6 +3083,7 @@ static std::string CompressibleString(Random* rnd, int len) { test::CompressibleString(rnd, 0.8, len, &r); return r; } +#endif // ROCKSDB_LITE TEST_F(DBTest, FailMoreDbPaths) { Options options = CurrentOptions(); @@ -3183,6 +3114,7 @@ void CheckColumnFamilyMeta(const ColumnFamilyMetaData& cf_meta) { ASSERT_EQ(cf_meta.size, cf_size); } +#ifndef ROCKSDB_LITE TEST_F(DBTest, ColumnFamilyMetaDataTest) { Options options = CurrentOptions(); options.create_if_missing = true; @@ -3329,6 +3261,7 @@ TEST_F(DBTest, RepeatedWritesToSameKey) { } } while (ChangeCompactOptions()); } +#endif // ROCKSDB_LITE TEST_F(DBTest, SparseMerge) { do { @@ -3375,6 +3308,7 @@ TEST_F(DBTest, SparseMerge) { } while (ChangeCompactOptions()); } +#ifndef ROCKSDB_LITE static bool Between(uint64_t val, uint64_t low, uint64_t high) { bool result = (val >= low) && (val <= high); if (!result) { @@ -3587,6 +3521,7 @@ TEST_F(DBTest, ApproximateSizes_MixOfSmallAndLarge) { // ApproximateOffsetOf() is not yet implemented in plain table format. } while (ChangeOptions(kSkipPlainTable)); } +#endif // ROCKSDB_LITE TEST_F(DBTest, IteratorPinsRef) { do { @@ -3614,6 +3549,7 @@ TEST_F(DBTest, IteratorPinsRef) { } while (ChangeCompactOptions()); } +#ifndef ROCKSDB_LITE TEST_F(DBTest, Snapshot) { anon::OptionsOverride options_override; options_override.skip_policy = kSkipNoSnapshot; @@ -3716,6 +3652,7 @@ TEST_F(DBTest, HiddenValuesAreRemoved) { } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction | kSkipPlainTable | kSkipHashCuckoo)); } +#endif // ROCKSDB_LITE TEST_F(DBTest, CompactBetweenSnapshots) { anon::OptionsOverride options_override; @@ -3821,6 +3758,7 @@ TEST_F(DBTest, UnremovableSingleDelete) { kSkipUniversalCompaction | kSkipMergePut)); } +#ifndef ROCKSDB_LITE TEST_F(DBTest, DeletionMarkers1) { Options options = CurrentOptions(); options.max_background_flushes = 0; @@ -3930,6 +3868,7 @@ TEST_F(DBTest, OverlapInLevel0) { ASSERT_EQ("NOT_FOUND", Get(1, "600")); } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction)); } +#endif // ROCKSDB_LITE TEST_F(DBTest, ComparatorCheck) { class NewComparator : public Comparator { @@ -4122,6 +4061,7 @@ TEST_F(DBTest, DestroyDBMetaDatabase) { ASSERT_TRUE(!(DB::Open(options, metametadbname, &db)).ok()); } +#ifndef ROCKSDB_LITE // Check that number of files does not grow when writes are dropped TEST_F(DBTest, DropWrites) { do { @@ -4192,6 +4132,7 @@ TEST_F(DBTest, DropWritesFlush) { env_->drop_writes_.store(false, std::memory_order_release); } while (ChangeCompactOptions()); } +#endif // ROCKSDB_LITE // Check that CompactRange() returns failure if there is not enough space left // on device @@ -4241,6 +4182,7 @@ TEST_F(DBTest, NonWritableFileSystem) { } while (ChangeCompactOptions()); } +#ifndef ROCKSDB_LITE TEST_F(DBTest, ManifestWriteError) { // Test for the following problem: // (a) Compaction produces file F @@ -4282,6 +4224,7 @@ TEST_F(DBTest, ManifestWriteError) { ASSERT_EQ("bar", Get("foo")); } } +#endif // ROCKSDB_LITE TEST_F(DBTest, PutFailsParanoid) { // Test the following: @@ -4540,6 +4483,7 @@ TEST_F(DBTest, BloomFilterWrapper) { ASSERT_EQ(2U * maxKey, policy->GetCounter()); } +#ifndef ROCKSDB_LITE TEST_F(DBTest, SnapshotFiles) { do { Options options = CurrentOptions(); @@ -4667,6 +4611,7 @@ TEST_F(DBTest, SnapshotFiles) { dbfull()->DisableFileDeletions(); } while (ChangeCompactOptions()); } +#endif TEST_F(DBTest, CompactOnFlush) { anon::OptionsOverride options_override; @@ -4806,6 +4751,7 @@ TEST_F(DBTest, FlushOneColumnFamily) { } } +#ifndef ROCKSDB_LITE // In https://reviews.facebook.net/D20661 we change // recovery behavior: previously for each log file each column family // memtable was flushed, even it was empty. Now it's changed: @@ -5011,6 +4957,7 @@ TEST_F(DBTest, SharedWriteBuffer) { static_cast(4)); } } +#endif // ROCKSDB_LITE TEST_F(DBTest, PurgeInfoLogs) { Options options = CurrentOptions(); @@ -5083,6 +5030,7 @@ TEST_F(DBTest, SyncMultipleLogs) { ASSERT_OK(dbfull()->SyncWAL()); } +#ifndef ROCKSDB_LITE // // Test WAL recovery for the various modes available // @@ -5365,7 +5313,6 @@ TEST_F(DBTest, kSkipAnyCorruptedRecords) { } } - // Multi-threaded test: namespace { @@ -5532,6 +5479,7 @@ TEST_P(MultiThreadedDBTest, MultiThreaded) { INSTANTIATE_TEST_CASE_P( MultiThreaded, MultiThreadedDBTest, ::testing::ValuesIn(MultiThreadedDBTest::GenerateOptionConfigs())); +#endif // ROCKSDB_LITE // Group commit test: namespace { @@ -5672,6 +5620,7 @@ class ModelDB: public DB { return s; } +#ifndef ROCKSDB_LITE using DB::AddFile; virtual Status AddFile(ColumnFamilyHandle* column_family, const ExternalSstFileInfo* file_path, @@ -5691,6 +5640,13 @@ class ModelDB: public DB { return Status(); } + virtual Status GetPropertiesOfTablesInRange( + ColumnFamilyHandle* column_family, const Range* range, std::size_t n, + TablePropertiesCollection* props) override { + return Status(); + } +#endif // ROCKSDB_LITE + using DB::KeyMayExist; virtual bool KeyMayExist(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, @@ -5834,7 +5790,9 @@ class ModelDB: public DB { return Status::OK(); } +#ifndef ROCKSDB_LITE virtual Status DisableFileDeletions() override { return Status::OK(); } + virtual Status EnableFileDeletions(bool force) override { return Status::OK(); } @@ -5849,11 +5807,6 @@ class ModelDB: public DB { virtual Status DeleteFile(std::string name) override { return Status::OK(); } - virtual Status GetDbIdentity(std::string& identity) const override { - return Status::OK(); - } - - virtual SequenceNumber GetLatestSequenceNumber() const override { return 0; } virtual Status GetUpdatesSince( rocksdb::SequenceNumber, unique_ptr*, const TransactionLogIterator::ReadOptions& @@ -5861,13 +5814,20 @@ class ModelDB: public DB { return Status::NotSupported("Not supported in Model DB"); } - virtual ColumnFamilyHandle* DefaultColumnFamily() const override { - return nullptr; - } - virtual void GetColumnFamilyMetaData( ColumnFamilyHandle* column_family, ColumnFamilyMetaData* metadata) override {} +#endif // ROCKSDB_LITE + + virtual Status GetDbIdentity(std::string& identity) const override { + return Status::OK(); + } + + virtual SequenceNumber GetLatestSequenceNumber() const override { return 0; } + + virtual ColumnFamilyHandle* DefaultColumnFamily() const override { + return nullptr; + } private: class ModelIter: public Iterator { @@ -6128,6 +6088,7 @@ TEST_F(DBTest, MultiGetEmpty) { } while (ChangeCompactOptions()); } +#ifndef ROCKSDB_LITE namespace { void PrefixScanInit(DBTest *dbtest) { char buf[100]; @@ -6230,6 +6191,7 @@ TEST_F(DBTest, PrefixScan) { } // end of while XFUNC_TEST("", "dbtest_prefix", prefix_skip1, XFuncPoint::SetSkip, 0); } +#endif // ROCKSDB_LITE TEST_F(DBTest, BlockBasedTablePrefixIndexTest) { // create a DB with block prefix index @@ -6291,6 +6253,7 @@ TEST_F(DBTest, ChecksumTest) { ASSERT_EQ("h", Get("g")); } +#ifndef ROCKSDB_LITE TEST_P(DBTestWithParam, FIFOCompactionTest) { for (int iter = 0; iter < 2; ++iter) { // first iteration -- auto compaction @@ -6330,6 +6293,7 @@ TEST_P(DBTestWithParam, FIFOCompactionTest) { } } } +#endif // ROCKSDB_LITE // verify that we correctly deprecated timeout_hint_us TEST_F(DBTest, SimpleWriteTimeoutTest) { @@ -6340,6 +6304,7 @@ TEST_F(DBTest, SimpleWriteTimeoutTest) { ASSERT_NOK(Put(Key(1), Key(1) + std::string(100, 'v'), write_opt)); } +#ifndef ROCKSDB_LITE /* * This test is not reliable enough as it heavily depends on disk behavior. */ @@ -6432,6 +6397,7 @@ TEST_F(DBTest, TableOptionsSanitizeTest) { options.prefix_extractor.reset(NewFixedPrefixTransform(1)); ASSERT_OK(TryReopen(options)); } +#endif // ROCKSDB_LITE TEST_F(DBTest, SanitizeNumThreads) { for (int attempt = 0; attempt < 2; attempt++) { @@ -6677,6 +6643,7 @@ TEST_F(DBTest, DisableDataSyncTest) { } } +#ifndef ROCKSDB_LITE TEST_F(DBTest, DynamicMemtableOptions) { const uint64_t k64KB = 1 << 16; const uint64_t k128KB = 1 << 17; @@ -6818,6 +6785,7 @@ TEST_F(DBTest, DynamicMemtableOptions) { rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } +#endif // ROCKSDB_LITE #if ROCKSDB_USING_THREAD_STATUS namespace { @@ -6926,6 +6894,10 @@ TEST_F(DBTest, ThreadStatusFlush) { ASSERT_EQ("v1", Get(1, "foo")); VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0); + uint64_t num_running_flushes = 0; + db_->GetIntProperty(DB::Properties::kNumRunningFlushes, &num_running_flushes); + ASSERT_EQ(num_running_flushes, 0); + Put(1, "k1", std::string(100000, 'x')); // Fill memtable Put(1, "k2", std::string(100000, 'y')); // Trigger flush @@ -6933,10 +6905,11 @@ TEST_F(DBTest, ThreadStatusFlush) { // running when we perform VerifyOperationCount(). TEST_SYNC_POINT("DBTest::ThreadStatusFlush:1"); VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 1); + db_->GetIntProperty(DB::Properties::kNumRunningFlushes, &num_running_flushes); + ASSERT_EQ(num_running_flushes, 1); // This second sync point is to ensure the flush job will not // be completed until we already perform VerifyOperationCount(). TEST_SYNC_POINT("DBTest::ThreadStatusFlush:2"); - rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } @@ -6981,6 +6954,10 @@ TEST_P(DBTestWithParam, ThreadStatusSingleCompaction) { } // This makes sure a compaction won't be scheduled until // we have done with the above Put Phase. + uint64_t num_running_compactions = 0; + db_->GetIntProperty(DB::Properties::kNumRunningCompactions, + &num_running_compactions); + ASSERT_EQ(num_running_compactions, 0); TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:0"); ASSERT_GE(NumTableFilesAtLevel(0), options.level0_file_num_compaction_trigger); @@ -6995,6 +6972,9 @@ TEST_P(DBTestWithParam, ThreadStatusSingleCompaction) { // If thread tracking is not enabled, compaction count should be 0. VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 0); } + db_->GetIntProperty(DB::Properties::kNumRunningCompactions, + &num_running_compactions); + ASSERT_EQ(num_running_compactions, 1); // TODO(yhchiang): adding assert to verify each compaction stage. TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:2"); @@ -7243,6 +7223,7 @@ TEST_P(DBTestWithParam, PreShutdownCompactionMiddle) { #endif // ROCKSDB_USING_THREAD_STATUS +#ifndef ROCKSDB_LITE TEST_F(DBTest, FlushOnDestroy) { WriteOptions wo; wo.disableWAL = true; @@ -7696,6 +7677,7 @@ TEST_F(DBTest, DynamicCompactionOptions) { rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } +#endif // ROCKSDB_LITE TEST_F(DBTest, FileCreationRandomFailure) { Options options; @@ -7757,6 +7739,7 @@ TEST_F(DBTest, FileCreationRandomFailure) { } } +#ifndef ROCKSDB_LITE TEST_F(DBTest, DynamicMiscOptions) { // Test max_sequential_skip_in_iterations Options options; @@ -7806,6 +7789,7 @@ TEST_F(DBTest, DynamicMiscOptions) { // No reseek assert_reseek_count(300, 1); } +#endif // ROCKSDB_LITE TEST_F(DBTest, DontDeletePendingOutputs) { Options options; @@ -7839,6 +7823,7 @@ TEST_F(DBTest, DontDeletePendingOutputs) { Compact("a", "b"); } +#ifndef ROCKSDB_LITE TEST_F(DBTest, DontDeleteMovedFile) { // This test triggers move compaction and verifies that the file is not // deleted when it's part of move compaction @@ -7941,6 +7926,7 @@ TEST_F(DBTest, OptimizeFiltersForHits) { ASSERT_EQ(Get(1, Key(i)), "val"); } } +#endif // ROCKSDB_LITE TEST_F(DBTest, L0L1L2AndUpHitCounter) { Options options = CurrentOptions(); @@ -8040,6 +8026,7 @@ TEST_F(DBTest, MutexWaitStats) { ThreadStatus::STATE_MUTEX_WAIT, 0); } +#ifndef ROCKSDB_LITE // This reproduces a bug where we don't delete a file because when it was // supposed to be deleted, it was blocked by pending_outputs // Consider: @@ -8135,6 +8122,7 @@ TEST_F(DBTest, DeleteObsoleteFilesPendingOutputs) { ASSERT_EQ(Status::NotFound(), env_->FileExists(dbname_ + file_on_L2)); listener->VerifyMatchedCount(1); } +#endif // ROCKSDB_LITE TEST_F(DBTest, CloseSpeedup) { Options options = CurrentOptions(); @@ -8256,6 +8244,7 @@ TEST_F(DBTest, MergeTestTime) { #endif // ROCKSDB_USING_THREAD_STATUS } +#ifndef ROCKSDB_LITE TEST_P(DBTestWithParam, MergeCompactionTimeTest) { SetPerfLevel(kEnableTime); Options options; @@ -8306,6 +8295,7 @@ TEST_P(DBTestWithParam, FilterCompactionTimeTest) { ASSERT_NE(TestGetTickerCount(options, FILTER_OPERATION_TOTAL_TIME), 0); delete itr; } +#endif // ROCKSDB_LITE TEST_F(DBTest, TestLogCleanup) { Options options = CurrentOptions(); @@ -8322,6 +8312,7 @@ TEST_F(DBTest, TestLogCleanup) { } } +#ifndef ROCKSDB_LITE TEST_F(DBTest, EmptyCompactedDB) { Options options; options.max_open_files = -1; @@ -8332,6 +8323,7 @@ TEST_F(DBTest, EmptyCompactedDB) { ASSERT_TRUE(s.IsNotSupported()); Close(); } +#endif // ROCKSDB_LITE class CountingDeleteTabPropCollector : public TablePropertiesCollector { public: @@ -8373,6 +8365,7 @@ class CountingDeleteTabPropCollectorFactory } }; +#ifndef ROCKSDB_LITE TEST_F(DBTest, TablePropertiesNeedCompactTest) { Random rnd(301); @@ -8671,6 +8664,7 @@ TEST_F(DBTest, PromoteL0Failure) { status = experimental::PromoteL0(db_, db_->DefaultColumnFamily()); ASSERT_TRUE(status.IsInvalidArgument()); } +#endif // ROCKSDB_LITE // Github issue #596 TEST_F(DBTest, HugeNumberOfLevels) { @@ -8877,6 +8871,7 @@ TEST_F(DBTest, HardLimit) { rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } +#ifndef ROCKSDB_LITE TEST_F(DBTest, SoftLimit) { Options options; options.env = env_; @@ -8996,6 +8991,7 @@ TEST_F(DBTest, SoftLimit) { ASSERT_EQ(sleep_count.load(), 0); rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } +#endif // ROCKSDB_LITE TEST_F(DBTest, FailWhenCompressionNotSupportedTest) { CompressionType compressions[] = {kZlibCompression, kBZip2Compression, @@ -9017,6 +9013,7 @@ TEST_F(DBTest, FailWhenCompressionNotSupportedTest) { } } +#ifndef ROCKSDB_LITE TEST_F(DBTest, RowCache) { Options options = CurrentOptions(); options.statistics = rocksdb::CreateDBStatistics(); @@ -9035,6 +9032,7 @@ TEST_F(DBTest, RowCache) { ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1); } +#endif // ROCKSDB_LITE // TODO(3.13): fix the issue of Seek() + Prev() which might not necessary // return the biggest key which is smaller than the seek key. @@ -9095,6 +9093,7 @@ TEST_F(DBTest, DeletingOldWalAfterDrop) { EXPECT_GT(lognum2, lognum1); } +#ifndef ROCKSDB_LITE TEST_F(DBTest, RateLimitedDelete) { rocksdb::SyncPoint::GetInstance()->LoadDependency({ {"DBTest::RateLimitedDelete:1", @@ -9263,6 +9262,7 @@ TEST_F(DBTest, DestroyDBWithRateLimitedDelete) { // We have deleted the 4 sst files in the delete_scheduler ASSERT_EQ(bg_delete_file, 4); } +#endif // ROCKSDB_LITE TEST_F(DBTest, UnsupportedManualSync) { DestroyAndReopen(CurrentOptions()); @@ -9271,6 +9271,7 @@ TEST_F(DBTest, UnsupportedManualSync) { ASSERT_TRUE(s.IsNotSupported()); } +#ifndef ROCKSDB_LITE TEST_F(DBTest, OpenDBWithInfiniteMaxOpenFiles) { // Open DB with infinite max open files // - First iteration use 1 thread to open files @@ -9815,6 +9816,7 @@ TEST_F(DBTest, AddExternalSstFileMultiThreaded) { } while (ChangeOptions(kSkipPlainTable | kSkipUniversalCompaction | kSkipFIFOCompaction)); } +#endif // ROCKSDB_LITE // 1 Create some SST files by inserting K-V pairs into DB // 2 Close DB and change suffix from ".sst" to ".ldb" for every other SST file @@ -10222,6 +10224,47 @@ TEST_F(DBTest, WalFilterTestWithChangeBatch) { } #endif // ROCKSDB_LITE +#ifndef ROCKSDB_LITE +class BloomStatsTestWithParam + : public DBTest, + public testing::WithParamInterface> { + public: + BloomStatsTestWithParam() { + use_block_table_ = std::get<0>(GetParam()); + use_block_based_builder_ = std::get<1>(GetParam()); + + options_.create_if_missing = true; + options_.prefix_extractor.reset(rocksdb::NewFixedPrefixTransform(4)); + options_.memtable_prefix_bloom_bits = 8 * 1024; + if (use_block_table_) { + BlockBasedTableOptions table_options; + table_options.hash_index_allow_collision = false; + table_options.filter_policy.reset( + NewBloomFilterPolicy(10, use_block_based_builder_)); + options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); + } else { + PlainTableOptions table_options; + options_.table_factory.reset(NewPlainTableFactory(table_options)); + } + + perf_context.Reset(); + DestroyAndReopen(options_); + } + + ~BloomStatsTestWithParam() { + perf_context.Reset(); + Destroy(options_); + } + + // Required if inheriting from testing::WithParamInterface<> + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + bool use_block_table_; + bool use_block_based_builder_; + Options options_; +}; + // 1 Insert 2 K-V pairs into DB // 2 Call Get() for both keys - expext memtable bloom hit stat to be 2 // 3 Call Get() for nonexisting key - expect memtable bloom miss stat to be 1 @@ -10335,6 +10378,7 @@ INSTANTIATE_TEST_CASE_P(BloomStatsTestWithParam, BloomStatsTestWithParam, ::testing::Values(std::make_tuple(true, true), std::make_tuple(true, false), std::make_tuple(false, false))); +#endif // ROCKSDB_LITE } // namespace rocksdb #endif diff --git a/db/db_test_util.cc b/db/db_test_util.cc index b545f2104..ab0ab4d69 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -38,11 +38,10 @@ SpecialEnv::SpecialEnv(Env* base) table_write_callback_ = nullptr; } - -DBTestBase::DBTestBase(const std::string path) : option_config_(kDefault), - mem_env_(!getenv("MEM_ENV") ? nullptr : - new MockEnv(Env::Default())), - env_(new SpecialEnv(mem_env_ ? mem_env_ : Env::Default())) { +DBTestBase::DBTestBase(const std::string path) + : option_config_(kDefault), + mem_env_(!getenv("MEM_ENV") ? nullptr : new MockEnv(Env::Default())), + env_(new SpecialEnv(mem_env_ ? mem_env_ : Env::Default())) { env_->SetBackgroundThreads(1, Env::LOW); env_->SetBackgroundThreads(1, Env::HIGH); dbname_ = test::TmpDir(env_) + path; @@ -79,6 +78,23 @@ DBTestBase::~DBTestBase() { // test. Return false if there are no more configurations to test. bool DBTestBase::ChangeOptions(int skip_mask) { for (option_config_++; option_config_ < kEnd; option_config_++) { +#ifdef ROCKSDB_LITE + // These options are not supported in ROCKSDB_LITE + if (option_config_ == kHashSkipList || + option_config_ == kPlainTableFirstBytePrefix || + option_config_ == kPlainTableCappedPrefix || + option_config_ == kPlainTableCappedPrefixNonMmap || + option_config_ == kPlainTableAllBytesPrefix || + option_config_ == kVectorRep || option_config_ == kHashLinkList || + option_config_ == kHashCuckoo || + option_config_ == kUniversalCompaction || + option_config_ == kUniversalCompactionMultiLevel || + option_config_ == kUniversalSubcompactions || + option_config_ == kFIFOCompaction) { + continue; + } +#endif + if ((skip_mask & kSkipDeletesFilterFirst) && option_config_ == kDeletesFilterFirst) { continue; @@ -92,8 +108,7 @@ bool DBTestBase::ChangeOptions(int skip_mask) { continue; } if ((skip_mask & kSkipNoSeekToLast) && - (option_config_ == kHashLinkList || - option_config_ == kHashSkipList)) {; + (option_config_ == kHashLinkList || option_config_ == kHashSkipList)) { continue; } if ((skip_mask & kSkipPlainTable) && @@ -115,8 +130,7 @@ bool DBTestBase::ChangeOptions(int skip_mask) { option_config_ == kFIFOCompaction) { continue; } - if ((skip_mask & kSkipMmapReads) && - option_config_ == kWalDirAndMmapReads) { + if ((skip_mask & kSkipMmapReads) && option_config_ == kWalDirAndMmapReads) { continue; } break; @@ -205,10 +219,10 @@ Options DBTestBase::CurrentOptions( BlockBasedTableOptions table_options; bool set_block_based_table_factory = true; switch (option_config_) { +#ifndef ROCKSDB_LITE case kHashSkipList: options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - options.memtable_factory.reset( - NewHashSkipListRepFactory(16)); + options.memtable_factory.reset(NewHashSkipListRepFactory(16)); break; case kPlainTableFirstBytePrefix: options.table_factory.reset(new PlainTableFactory()); @@ -238,6 +252,19 @@ Options DBTestBase::CurrentOptions( options.max_sequential_skip_in_iterations = 999999; set_block_based_table_factory = false; break; + case kVectorRep: + options.memtable_factory.reset(new VectorRepFactory(100)); + break; + case kHashLinkList: + options.prefix_extractor.reset(NewFixedPrefixTransform(1)); + options.memtable_factory.reset( + NewHashLinkListRepFactory(4, 0, 3, true, 4)); + break; + case kHashCuckoo: + options.memtable_factory.reset( + NewHashCuckooRepFactory(options.write_buffer_size)); + break; +#endif // ROCKSDB_LITE case kMergePut: options.merge_operator = MergeOperators::CreatePutOperator(); break; @@ -274,18 +301,6 @@ Options DBTestBase::CurrentOptions( case kDeletesFilterFirst: options.filter_deletes = true; break; - case kVectorRep: - options.memtable_factory.reset(new VectorRepFactory(100)); - break; - case kHashLinkList: - options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - options.memtable_factory.reset( - NewHashLinkListRepFactory(4, 0, 3, true, 4)); - break; - case kHashCuckoo: - options.memtable_factory.reset( - NewHashCuckooRepFactory(options.write_buffer_size)); - break; case kUniversalCompaction: options.compaction_style = kCompactionStyleUniversal; options.num_levels = 1; @@ -296,7 +311,7 @@ Options DBTestBase::CurrentOptions( break; case kCompressedBlockCache: options.allow_mmap_writes = true; - table_options.block_cache_compressed = NewLRUCache(8*1024*1024); + table_options.block_cache_compressed = NewLRUCache(8 * 1024 * 1024); break; case kInfiniteMaxOpenFiles: options.max_open_files = -1; @@ -355,7 +370,7 @@ Options DBTestBase::CurrentOptions( } void DBTestBase::CreateColumnFamilies(const std::vector& cfs, - const Options& options) { + const Options& options) { ColumnFamilyOptions cf_opts(options); size_t cfi = handles_.size(); handles_.resize(cfi + cfs.size()); @@ -365,7 +380,7 @@ void DBTestBase::CreateColumnFamilies(const std::vector& cfs, } void DBTestBase::CreateAndReopenWithCF(const std::vector& cfs, - const Options& options) { + const Options& options) { CreateColumnFamilies(cfs, options); std::vector cfs_plus_default = cfs; cfs_plus_default.insert(cfs_plus_default.begin(), kDefaultColumnFamilyName); @@ -373,18 +388,17 @@ void DBTestBase::CreateAndReopenWithCF(const std::vector& cfs, } void DBTestBase::ReopenWithColumnFamilies(const std::vector& cfs, - const std::vector& options) { + const std::vector& options) { ASSERT_OK(TryReopenWithColumnFamilies(cfs, options)); } void DBTestBase::ReopenWithColumnFamilies(const std::vector& cfs, - const Options& options) { + const Options& options) { ASSERT_OK(TryReopenWithColumnFamilies(cfs, options)); } Status DBTestBase::TryReopenWithColumnFamilies( - const std::vector& cfs, - const std::vector& options) { + const std::vector& cfs, const std::vector& options) { Close(); EXPECT_EQ(cfs.size(), options.size()); std::vector column_families; @@ -396,8 +410,7 @@ Status DBTestBase::TryReopenWithColumnFamilies( } Status DBTestBase::TryReopenWithColumnFamilies( - const std::vector& cfs, - const Options& options) { + const std::vector& cfs, const Options& options) { Close(); std::vector v_opts(cfs.size(), options); return TryReopenWithColumnFamilies(cfs, v_opts); @@ -454,7 +467,7 @@ Status DBTestBase::Put(const Slice& k, const Slice& v, WriteOptions wo) { } Status DBTestBase::Put(int cf, const Slice& k, const Slice& v, - WriteOptions wo) { + WriteOptions wo) { if (kMergePut == option_config_) { return db_->Merge(wo, handles_[cf], k, v); } else { @@ -493,7 +506,7 @@ std::string DBTestBase::Get(const std::string& k, const Snapshot* snapshot) { } std::string DBTestBase::Get(int cf, const std::string& k, - const Snapshot* snapshot) { + const Snapshot* snapshot) { ReadOptions options; options.verify_checksums = true; options.snapshot = snapshot; @@ -552,9 +565,9 @@ std::string DBTestBase::AllEntriesFor(const Slice& user_key, int cf) { Arena arena; ScopedArenaIterator iter; if (cf == 0) { - iter.set(dbfull()->TEST_NewInternalIterator(&arena)); + iter.set(dbfull()->NewInternalIterator(&arena)); } else { - iter.set(dbfull()->TEST_NewInternalIterator(&arena, handles_[cf])); + iter.set(dbfull()->NewInternalIterator(&arena, handles_[cf])); } InternalKey target(user_key, kMaxSequenceNumber, kTypeValue); iter->Seek(target.Encode()); @@ -605,6 +618,7 @@ std::string DBTestBase::AllEntriesFor(const Slice& user_key, int cf) { return result; } +#ifndef ROCKSDB_LITE int DBTestBase::NumSortedRuns(int cf) { ColumnFamilyMetaData cf_meta; if (cf == 0) { @@ -631,20 +645,6 @@ uint64_t DBTestBase::TotalSize(int cf) { return cf_meta.size; } -int DBTestBase::NumTableFilesAtLevel(int level, int cf) { - std::string property; - if (cf == 0) { - // default cfd - EXPECT_TRUE(db_->GetProperty( - "rocksdb.num-files-at-level" + NumberToString(level), &property)); - } else { - EXPECT_TRUE(db_->GetProperty( - handles_[cf], "rocksdb.num-files-at-level" + NumberToString(level), - &property)); - } - return atoi(property.c_str()); -} - uint64_t DBTestBase::SizeAtLevel(int level) { std::vector metadata; db_->GetLiveFilesMetaData(&metadata); @@ -671,6 +671,27 @@ int DBTestBase::TotalLiveFiles(int cf) { return num_files; } +size_t DBTestBase::CountLiveFiles() { + std::vector metadata; + db_->GetLiveFilesMetaData(&metadata); + return metadata.size(); +} +#endif // ROCKSDB_LITE + +int DBTestBase::NumTableFilesAtLevel(int level, int cf) { + std::string property; + if (cf == 0) { + // default cfd + EXPECT_TRUE(db_->GetProperty( + "rocksdb.num-files-at-level" + NumberToString(level), &property)); + } else { + EXPECT_TRUE(db_->GetProperty( + handles_[cf], "rocksdb.num-files-at-level" + NumberToString(level), + &property)); + } + return atoi(property.c_str()); +} + int DBTestBase::TotalTableFiles(int cf, int levels) { if (levels == -1) { levels = CurrentOptions().num_levels; @@ -713,12 +734,6 @@ size_t DBTestBase::CountFiles() { return files.size() + logfiles.size(); } -size_t DBTestBase::CountLiveFiles() { - std::vector metadata; - db_->GetLiveFilesMetaData(&metadata); - return metadata.size(); -} - uint64_t DBTestBase::Size(const Slice& start, const Slice& limit, int cf) { Range r(start, limit); uint64_t size; @@ -731,7 +746,7 @@ uint64_t DBTestBase::Size(const Slice& start, const Slice& limit, int cf) { } void DBTestBase::Compact(int cf, const Slice& start, const Slice& limit, - uint32_t target_path_id) { + uint32_t target_path_id) { CompactRangeOptions compact_options; compact_options.target_path_id = target_path_id; ASSERT_OK(db_->CompactRange(compact_options, handles_[cf], &start, &limit)); @@ -748,9 +763,8 @@ void DBTestBase::Compact(const Slice& start, const Slice& limit) { // Do n memtable compactions, each of which produces an sstable // covering the range [small,large]. -void DBTestBase::MakeTables( - int n, const std::string& small, - const std::string& large, int cf) { +void DBTestBase::MakeTables(int n, const std::string& small, + const std::string& large, int cf) { for (int i = 0; i < n; i++) { ASSERT_OK(Put(cf, small, "begin")); ASSERT_OK(Put(cf, large, "end")); @@ -761,8 +775,8 @@ void DBTestBase::MakeTables( // Prevent pushing of new sstables into deeper levels by adding // tables that cover a specified range to all levels. -void DBTestBase::FillLevels( - const std::string& smallest, const std::string& largest, int cf) { +void DBTestBase::FillLevels(const std::string& smallest, + const std::string& largest, int cf) { MakeTables(db_->NumberLevels(handles_[cf]), smallest, largest, cf); } @@ -779,7 +793,7 @@ void DBTestBase::MoveFilesToLevel(int level, int cf) { void DBTestBase::DumpFileCounts(const char* label) { fprintf(stderr, "---\n%s:\n", label); fprintf(stderr, "maxoverlap: %" PRIu64 "\n", - dbfull()->TEST_MaxNextLevelOverlappingBytes()); + dbfull()->TEST_MaxNextLevelOverlappingBytes()); for (int level = 0; level < db_->NumberLevels(); level++) { int num = NumTableFilesAtLevel(level); if (num > 0) { @@ -888,9 +902,10 @@ void DBTestBase::VerifyIterLast(std::string expected_key, int cf) { // sets newValue with delta // If previous value is not empty, // updates previous value with 'b' string of previous value size - 1. -UpdateStatus DBTestBase::updateInPlaceSmallerSize( - char* prevValue, uint32_t* prevSize, - Slice delta, std::string* newValue) { +UpdateStatus DBTestBase::updateInPlaceSmallerSize(char* prevValue, + uint32_t* prevSize, + Slice delta, + std::string* newValue) { if (prevValue == nullptr) { *newValue = std::string(delta.size(), 'c'); return UpdateStatus::UPDATED; @@ -902,9 +917,10 @@ UpdateStatus DBTestBase::updateInPlaceSmallerSize( } } -UpdateStatus DBTestBase::updateInPlaceSmallerVarintSize( - char* prevValue, uint32_t* prevSize, - Slice delta, std::string* newValue) { +UpdateStatus DBTestBase::updateInPlaceSmallerVarintSize(char* prevValue, + uint32_t* prevSize, + Slice delta, + std::string* newValue) { if (prevValue == nullptr) { *newValue = std::string(delta.size(), 'c'); return UpdateStatus::UPDATED; @@ -916,16 +932,17 @@ UpdateStatus DBTestBase::updateInPlaceSmallerVarintSize( } } -UpdateStatus DBTestBase::updateInPlaceLargerSize( - char* prevValue, uint32_t* prevSize, - Slice delta, std::string* newValue) { +UpdateStatus DBTestBase::updateInPlaceLargerSize(char* prevValue, + uint32_t* prevSize, + Slice delta, + std::string* newValue) { *newValue = std::string(delta.size(), 'c'); return UpdateStatus::UPDATED; } -UpdateStatus DBTestBase::updateInPlaceNoAction( - char* prevValue, uint32_t* prevSize, - Slice delta, std::string* newValue) { +UpdateStatus DBTestBase::updateInPlaceNoAction(char* prevValue, + uint32_t* prevSize, Slice delta, + std::string* newValue) { return UpdateStatus::UPDATE_FAILED; } @@ -934,9 +951,9 @@ void DBTestBase::validateNumberOfEntries(int numValues, int cf) { ScopedArenaIterator iter; Arena arena; if (cf != 0) { - iter.set(dbfull()->TEST_NewInternalIterator(&arena, handles_[cf])); + iter.set(dbfull()->NewInternalIterator(&arena, handles_[cf])); } else { - iter.set(dbfull()->TEST_NewInternalIterator(&arena)); + iter.set(dbfull()->NewInternalIterator(&arena)); } iter->SeekToFirst(); ASSERT_EQ(iter->status().ok(), true); @@ -953,9 +970,8 @@ void DBTestBase::validateNumberOfEntries(int numValues, int cf) { ASSERT_EQ(0, seq); } -void DBTestBase::CopyFile( - const std::string& source, const std::string& destination, - uint64_t size) { +void DBTestBase::CopyFile(const std::string& source, + const std::string& destination, uint64_t size) { const EnvOptions soptions; unique_ptr srcfile; ASSERT_OK(env_->NewSequentialFile(source, &srcfile, soptions)); diff --git a/db/db_test_util.h b/db/db_test_util.h index 3a079b2da..befd6cd8c 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -30,6 +30,7 @@ #include "db/db_test_util.h" #include "db/dbformat.h" #include "db/filename.h" +#include "memtable/hash_linklist_rep.h" #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/convenience.h" @@ -43,11 +44,11 @@ #include "table/block_based_table_factory.h" #include "table/mock_table.h" #include "table/plain_table_factory.h" +#include "table/scoped_arena_iterator.h" #include "util/compression.h" -#include "util/hash_linklist_rep.h" #include "util/mock_env.h" #include "util/mutexlock.h" -#include "util/scoped_arena_iterator.h" + #include "util/string_util.h" // SyncPoint is not supported in Released Windows Mode. #if !(defined NDEBUG) || !defined(OS_WIN) @@ -131,9 +132,7 @@ class SpecialEnv : public EnvWrapper { public: SSTableFile(SpecialEnv* env, unique_ptr&& base) - : env_(env), - base_(std::move(base)) { - } + : env_(env), base_(std::move(base)) {} Status Append(const Slice& data) override { if (env_->table_write_callback_) { (*env_->table_write_callback_)(); @@ -148,9 +147,7 @@ class SpecialEnv : public EnvWrapper { return base_->Append(data); } } - Status Truncate(uint64_t size) override { - return base_->Truncate(size); - } + Status Truncate(uint64_t size) override { return base_->Truncate(size); } Status Close() override { // SyncPoint is not supported in Released Windows Mode. #if !(defined NDEBUG) || !defined(OS_WIN) @@ -180,7 +177,7 @@ class SpecialEnv : public EnvWrapper { class ManifestFile : public WritableFile { public: ManifestFile(SpecialEnv* env, unique_ptr&& b) - : env_(env), base_(std::move(b)) { } + : env_(env), base_(std::move(b)) {} Status Append(const Slice& data) override { if (env_->manifest_write_error_.load(std::memory_order_acquire)) { return Status::IOError("simulated writer error"); @@ -283,8 +280,7 @@ class SpecialEnv : public EnvWrapper { public: CountingFile(unique_ptr&& target, anon::AtomicCounter* counter) - : target_(std::move(target)), counter_(counter) { - } + : target_(std::move(target)), counter_(counter) {} virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { counter_->Increment(); @@ -329,7 +325,6 @@ class SpecialEnv : public EnvWrapper { return s; } - virtual void SleepForMicroseconds(int micros) override { sleep_counter_.Increment(); if (no_sleep_) { @@ -406,7 +401,7 @@ class SpecialEnv : public EnvWrapper { std::atomic addon_time_; bool no_sleep_; - std::atomic is_wal_sync_thread_safe_ {true}; + std::atomic is_wal_sync_thread_safe_{true}; }; class DBTestBase : public testing::Test { @@ -509,9 +504,7 @@ class DBTestBase : public testing::Test { const Options& defaultOptions, const anon::OptionsOverride& options_override = anon::OptionsOverride()); - DBImpl* dbfull() { - return reinterpret_cast(db_); - } + DBImpl* dbfull() { return reinterpret_cast(db_); } void CreateColumnFamilies(const std::vector& cfs, const Options& options); @@ -525,9 +518,8 @@ class DBTestBase : public testing::Test { void ReopenWithColumnFamilies(const std::vector& cfs, const Options& options); - Status TryReopenWithColumnFamilies( - const std::vector& cfs, - const std::vector& options); + Status TryReopenWithColumnFamilies(const std::vector& cfs, + const std::vector& options); Status TryReopenWithColumnFamilies(const std::vector& cfs, const Options& options); @@ -574,16 +566,20 @@ class DBTestBase : public testing::Test { std::string AllEntriesFor(const Slice& user_key, int cf = 0); +#ifndef ROCKSDB_LITE int NumSortedRuns(int cf = 0); uint64_t TotalSize(int cf = 0); - int NumTableFilesAtLevel(int level, int cf = 0); - uint64_t SizeAtLevel(int level); int TotalLiveFiles(int cf = 0); + size_t CountLiveFiles(); +#endif // ROCKSDB_LITE + + int NumTableFilesAtLevel(int level, int cf = 0); + int TotalTableFiles(int cf = 0, int levels = -1); // Return spread of files per level @@ -591,8 +587,6 @@ class DBTestBase : public testing::Test { size_t CountFiles(); - size_t CountLiveFiles(); - uint64_t Size(const Slice& start, const Slice& limit, int cf = 0); void Compact(int cf, const Slice& start, const Slice& limit, @@ -643,21 +637,21 @@ class DBTestBase : public testing::Test { // sets newValue with delta // If previous value is not empty, // updates previous value with 'b' string of previous value size - 1. - static UpdateStatus updateInPlaceSmallerSize( - char* prevValue, uint32_t* prevSize, - Slice delta, std::string* newValue); + static UpdateStatus updateInPlaceSmallerSize(char* prevValue, + uint32_t* prevSize, Slice delta, + std::string* newValue); - static UpdateStatus updateInPlaceSmallerVarintSize( - char* prevValue, uint32_t* prevSize, - Slice delta, std::string* newValue); + static UpdateStatus updateInPlaceSmallerVarintSize(char* prevValue, + uint32_t* prevSize, + Slice delta, + std::string* newValue); - static UpdateStatus updateInPlaceLargerSize( - char* prevValue, uint32_t* prevSize, - Slice delta, std::string* newValue); + static UpdateStatus updateInPlaceLargerSize(char* prevValue, + uint32_t* prevSize, Slice delta, + std::string* newValue); - static UpdateStatus updateInPlaceNoAction( - char* prevValue, uint32_t* prevSize, - Slice delta, std::string* newValue); + static UpdateStatus updateInPlaceNoAction(char* prevValue, uint32_t* prevSize, + Slice delta, std::string* newValue); // Utility method to test InplaceUpdate void validateNumberOfEntries(int numValues, int cf = 0); diff --git a/db/db_universal_compaction_test.cc b/db/db_universal_compaction_test.cc index 236f5df1f..3323afe31 100644 --- a/db/db_universal_compaction_test.cc +++ b/db/db_universal_compaction_test.cc @@ -9,7 +9,7 @@ #include "db/db_test_util.h" #include "port/stack_trace.h" -#if !(defined NDEBUG) || !defined(OS_WIN) +#if (!(defined NDEBUG) || !defined(OS_WIN)) && !defined(ROCKSDB_LITE) #include "util/sync_point.h" namespace rocksdb { @@ -1210,10 +1210,10 @@ INSTANTIATE_TEST_CASE_P(DBTestUniversalManualCompactionOutputPathId, } // namespace rocksdb -#endif // !(defined NDEBUG) || !defined(OS_WIN) +#endif // (!(defined NDEBUG) || !defined(OS_WIN)) && !defined(ROCKSDB_LITE) int main(int argc, char** argv) { -#if !(defined NDEBUG) || !defined(OS_WIN) +#if (!(defined NDEBUG) || !defined(OS_WIN)) && !defined(ROCKSDB_LITE) rocksdb::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff --git a/db/filename.cc b/db/filename.cc index e15203733..f57b178b9 100644 --- a/db/filename.cc +++ b/db/filename.cc @@ -21,6 +21,7 @@ #include "util/file_reader_writer.h" #include "util/logging.h" #include "util/stop_watch.h" +#include "util/sync_point.h" namespace rocksdb { @@ -322,7 +323,9 @@ Status SetCurrentFile(Env* env, const std::string& dbname, std::string tmp = TempFileName(dbname, descriptor_number); Status s = WriteStringToFile(env, contents.ToString() + "\n", tmp, true); if (s.ok()) { + TEST_KILL_RANDOM("SetCurrentFile:0", rocksdb_kill_odds * REDUCE_ODDS2); s = env->RenameFile(tmp, CurrentFileName(dbname)); + TEST_KILL_RANDOM("SetCurrentFile:1", rocksdb_kill_odds * REDUCE_ODDS2); } if (s.ok()) { if (directory_to_fsync != nullptr) { @@ -351,6 +354,7 @@ Status SetIdentityFile(Env* env, const std::string& dbname) { Status SyncManifest(Env* env, const DBOptions* db_options, WritableFileWriter* file) { + TEST_KILL_RANDOM("SyncManifest:0", rocksdb_kill_odds * REDUCE_ODDS2); if (db_options->disableDataSync) { return Status::OK(); } else { diff --git a/db/flush_job.cc b/db/flush_job.cc index f2d142298..a20a0ba98 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -197,7 +197,7 @@ Status FlushJob::WriteLevel0Table(const autovector& mems, if (log_buffer_) { log_buffer_->FlushBufferToLog(); } - std::vector memtables; + std::vector memtables; ReadOptions ro; ro.total_order_seek = true; Arena arena; diff --git a/db/flush_job.h b/db/flush_job.h index 6d9f63ea1..dbc4113e1 100644 --- a/db/flush_job.h +++ b/db/flush_job.h @@ -27,12 +27,12 @@ #include "rocksdb/env.h" #include "rocksdb/memtablerep.h" #include "rocksdb/transaction_log.h" +#include "table/scoped_arena_iterator.h" #include "util/autovector.h" #include "util/event_logger.h" #include "util/instrumented_mutex.h" #include "util/stop_watch.h" #include "util/thread_local.h" -#include "util/scoped_arena_iterator.h" #include "db/internal_stats.h" #include "db/write_controller.h" #include "db/flush_scheduler.h" diff --git a/db/forward_iterator.cc b/db/forward_iterator.cc index c0d7647c5..2d68368ea 100644 --- a/db/forward_iterator.cc +++ b/db/forward_iterator.cc @@ -28,7 +28,7 @@ namespace rocksdb { // iter.SetFileIndex(file_index); // iter.Seek(target); // iter.Next() -class LevelIterator : public Iterator { +class LevelIterator : public InternalIterator { public: LevelIterator(const ColumnFamilyData* const cfd, const ReadOptions& read_options, @@ -113,7 +113,7 @@ class LevelIterator : public Iterator { bool valid_; uint32_t file_index_; Status status_; - std::unique_ptr file_iter_; + std::unique_ptr file_iter_; }; ForwardIterator::ForwardIterator(DBImpl* db, const ReadOptions& read_options, @@ -146,10 +146,10 @@ ForwardIterator::~ForwardIterator() { void ForwardIterator::Cleanup(bool release_sv) { if (mutable_iter_ != nullptr) { - mutable_iter_->~Iterator(); + mutable_iter_->~InternalIterator(); } for (auto* m : imm_iters_) { - m->~Iterator(); + m->~InternalIterator(); } imm_iters_.clear(); for (auto* f : l0_iters_) { diff --git a/db/forward_iterator.h b/db/forward_iterator.h index e6ef0bdfc..a159a6101 100644 --- a/db/forward_iterator.h +++ b/db/forward_iterator.h @@ -14,6 +14,7 @@ #include "rocksdb/iterator.h" #include "rocksdb/options.h" #include "db/dbformat.h" +#include "table/internal_iterator.h" #include "util/arena.h" namespace rocksdb { @@ -30,16 +31,15 @@ class MinIterComparator { explicit MinIterComparator(const Comparator* comparator) : comparator_(comparator) {} - bool operator()(Iterator* a, Iterator* b) { + bool operator()(InternalIterator* a, InternalIterator* b) { return comparator_->Compare(a->key(), b->key()) > 0; } private: const Comparator* comparator_; }; -typedef std::priority_queue, - MinIterComparator> MinIterHeap; +typedef std::priority_queue, + MinIterComparator> MinIterHeap; /** * ForwardIterator is a special type of iterator that only supports Seek() @@ -48,7 +48,7 @@ typedef std::priority_queue imm_iters_; - std::vector l0_iters_; + InternalIterator* mutable_iter_; + std::vector imm_iters_; + std::vector l0_iters_; std::vector level_iters_; - Iterator* current_; + InternalIterator* current_; bool valid_; // Internal iterator status; set only by one of the unsupported methods. diff --git a/db/internal_stats.cc b/db/internal_stats.cc index 4e37c1d08..6bf19701e 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -130,6 +130,8 @@ static const std::string aggregated_table_properties = "aggregated-table-properties"; static const std::string aggregated_table_properties_at_level = aggregated_table_properties + "-at-level"; +static const std::string num_running_compactions = "num-running-compactions"; +static const std::string num_running_flushes = "num-running-flushes"; const std::string DB::Properties::kNumFilesAtLevelPrefix = rocksdb_prefix + num_files_at_level_prefix; @@ -143,6 +145,10 @@ const std::string DB::Properties::kMemTableFlushPending = rocksdb_prefix + mem_table_flush_pending; const std::string DB::Properties::kCompactionPending = rocksdb_prefix + compaction_pending; +const std::string DB::Properties::kNumRunningCompactions = + rocksdb_prefix + num_running_compactions; +const std::string DB::Properties::kNumRunningFlushes = + rocksdb_prefix + num_running_flushes; const std::string DB::Properties::kBackgroundErrors = rocksdb_prefix + background_errors; const std::string DB::Properties::kCurSizeActiveMemTable = @@ -260,6 +266,10 @@ DBPropertyType GetPropertyType(const Slice& property, bool* is_int_property, return kTotalSstFilesSize; } else if (in == estimate_pending_comp_bytes) { return kEstimatePendingCompactionBytes; + } else if (in == num_running_flushes) { + return kNumRunningFlushes; + } else if (in == num_running_compactions) { + return kNumRunningCompactions; } return kUnknown; } @@ -388,11 +398,17 @@ bool InternalStats::GetIntProperty(DBPropertyType property_type, // Return number of mem tables that are ready to flush (made immutable) *value = (cfd_->imm()->IsFlushPending() ? 1 : 0); return true; + case kNumRunningFlushes: + *value = db->num_running_flushes(); + return true; case kCompactionPending: // 1 if the system already determines at least one compaction is needed. // 0 otherwise, *value = (cfd_->compaction_picker()->NeedsCompaction(vstorage) ? 1 : 0); return true; + case kNumRunningCompactions: + *value = db->num_running_compactions_; + return true; case kBackgroundErrors: // Accumulated number of errors in background flushes or compactions. *value = GetBackgroundErrorCount(); diff --git a/db/internal_stats.h b/db/internal_stats.h index eeb226e5e..0e3413bc7 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -38,7 +38,10 @@ enum DBPropertyType : uint32_t { // in memory that have already been flushed kMemtableFlushPending, // Return 1 if mem table flushing is pending, // otherwise 0. + kNumRunningFlushes, // Return the number of currently running flushes. kCompactionPending, // Return 1 if a compaction is pending. Otherwise 0. + kNumRunningCompactions, // Return the number of currently running + // compactions. kBackgroundErrors, // Return accumulated background errors encountered. kCurSizeActiveMemTable, // Return current size of the active memtable kCurSizeAllMemTables, // Return current size of unflushed @@ -49,10 +52,10 @@ enum DBPropertyType : uint32_t { // memtable. kNumEntriesInImmutableMemtable, // Return sum of number of entries in all // the immutable mem tables. - kNumDeletesInMutableMemtable, // Return number of entries in the mutable - // memtable. - kNumDeletesInImmutableMemtable, // Return sum of number of deletes in all - // the immutable mem tables. + kNumDeletesInMutableMemtable, // Return number of deletion entries in the + // mutable memtable. + kNumDeletesInImmutableMemtable, // Return the total number of deletion + // entries in all the immutable mem tables. kEstimatedNumKeys, // Estimated total number of keys in the database. kEstimatedUsageByTableReaders, // Estimated memory by table readers. kIsFileDeletionEnabled, // Equals disable_delete_obsolete_files_, diff --git a/db/listener_test.cc b/db/listener_test.cc index 0880a6597..08fe096dc 100644 --- a/db/listener_test.cc +++ b/db/listener_test.cc @@ -8,6 +8,7 @@ #include "db/filename.h" #include "db/version_set.h" #include "db/write_batch_internal.h" +#include "memtable/hash_linklist_rep.h" #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/db.h" @@ -22,7 +23,6 @@ #include "table/block_based_table_factory.h" #include "table/plain_table_factory.h" #include "util/hash.h" -#include "util/hash_linklist_rep.h" #include "util/logging.h" #include "util/mutexlock.h" #include "util/rate_limiter.h" diff --git a/util/manual_compaction_test.cc b/db/manual_compaction_test.cc similarity index 100% rename from util/manual_compaction_test.cc rename to db/manual_compaction_test.cc diff --git a/db/memtable.cc b/db/memtable.cc index 54c119ee2..e48e75e3b 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -21,6 +21,7 @@ #include "rocksdb/iterator.h" #include "rocksdb/merge_operator.h" #include "rocksdb/slice_transform.h" +#include "table/internal_iterator.h" #include "table/merger.h" #include "util/arena.h" #include "util/coding.h" @@ -202,7 +203,7 @@ const char* EncodeKey(std::string* scratch, const Slice& target) { return scratch->data(); } -class MemTableIterator: public Iterator { +class MemTableIterator : public InternalIterator { public: MemTableIterator( const MemTable& mem, const ReadOptions& read_options, Arena* arena) @@ -285,7 +286,8 @@ class MemTableIterator: public Iterator { void operator=(const MemTableIterator&); }; -Iterator* MemTable::NewIterator(const ReadOptions& read_options, Arena* arena) { +InternalIterator* MemTable::NewIterator(const ReadOptions& read_options, + Arena* arena) { assert(arena != nullptr); auto mem = arena->AllocateAligned(sizeof(MemTableIterator)); return new (mem) MemTableIterator(*this, read_options, arena); diff --git a/db/memtable.h b/db/memtable.h index f09082ce0..11aa8fed8 100644 --- a/db/memtable.h +++ b/db/memtable.h @@ -31,6 +31,7 @@ class Mutex; class MemTableIterator; class MergeContext; class WriteBuffer; +class InternalIterator; struct MemTableOptions { explicit MemTableOptions( @@ -140,7 +141,7 @@ class MemTable { // arena: If not null, the arena needs to be used to allocate the Iterator. // Calling ~Iterator of the iterator will destroy all the states but // those allocated in arena. - Iterator* NewIterator(const ReadOptions& read_options, Arena* arena); + InternalIterator* NewIterator(const ReadOptions& read_options, Arena* arena); // Add an entry into memtable that maps key to value at the // specified sequence number and with the specified type. diff --git a/db/memtable_list.cc b/db/memtable_list.cc index b2bbbd165..1734eda03 100644 --- a/db/memtable_list.cc +++ b/db/memtable_list.cc @@ -138,9 +138,9 @@ bool MemTableListVersion::GetFromList(std::list* list, return false; } -void MemTableListVersion::AddIterators(const ReadOptions& options, - std::vector* iterator_list, - Arena* arena) { +void MemTableListVersion::AddIterators( + const ReadOptions& options, std::vector* iterator_list, + Arena* arena) { for (auto& m : memlist_) { iterator_list->push_back(m->NewIterator(options, arena)); } diff --git a/db/memtable_list.h b/db/memtable_list.h index 63e27732b..117b4a506 100644 --- a/db/memtable_list.h +++ b/db/memtable_list.h @@ -74,7 +74,8 @@ class MemTableListVersion { } void AddIterators(const ReadOptions& options, - std::vector* iterator_list, Arena* arena); + std::vector* iterator_list, + Arena* arena); void AddIterators(const ReadOptions& options, MergeIteratorBuilder* merge_iter_builder); diff --git a/db/memtablerep_bench.cc b/db/memtablerep_bench.cc index a2a872226..5590bad5b 100644 --- a/db/memtablerep_bench.cc +++ b/db/memtablerep_bench.cc @@ -592,6 +592,7 @@ int main(int argc, char** argv) { std::unique_ptr factory; if (FLAGS_memtablerep == "skiplist") { factory.reset(new rocksdb::SkipListFactory); +#ifndef ROCKSDB_LITE } else if (FLAGS_memtablerep == "vector") { factory.reset(new rocksdb::VectorRepFactory); } else if (FLAGS_memtablerep == "hashskiplist") { @@ -613,6 +614,7 @@ int main(int argc, char** argv) { static_cast(FLAGS_hash_function_count))); options.prefix_extractor.reset( rocksdb::NewFixedPrefixTransform(FLAGS_prefix_length)); +#endif // ROCKSDB_LITE } else { fprintf(stdout, "Unknown memtablerep: %s\n", FLAGS_memtablerep.c_str()); exit(1); diff --git a/db/merge_helper.cc b/db/merge_helper.cc index f9cb67e9c..c443ca2d9 100644 --- a/db/merge_helper.cc +++ b/db/merge_helper.cc @@ -12,6 +12,7 @@ #include "rocksdb/comparator.h" #include "rocksdb/db.h" #include "rocksdb/merge_operator.h" +#include "table/internal_iterator.h" #include "util/perf_context_imp.h" #include "util/statistics.h" @@ -56,7 +57,8 @@ Status MergeHelper::TimedFullMerge(const Slice& key, const Slice* value, // keys_ stores the list of keys encountered while merging. // operands_ stores the list of merge operands encountered while merging. // keys_[i] corresponds to operands_[i] for each i. -Status MergeHelper::MergeUntil(Iterator* iter, const SequenceNumber stop_before, +Status MergeHelper::MergeUntil(InternalIterator* iter, + const SequenceNumber stop_before, const bool at_bottom) { // Get a copy of the internal key, before it's invalidated by iter->Next() // Also maintain the list of merge operands seen. diff --git a/db/merge_helper.h b/db/merge_helper.h index ade3d71a6..488c7ac2b 100644 --- a/db/merge_helper.h +++ b/db/merge_helper.h @@ -22,6 +22,7 @@ class Iterator; class Logger; class MergeOperator; class Statistics; +class InternalIterator; class MergeHelper { public: @@ -82,7 +83,8 @@ class MergeHelper { // with asserts removed). // // REQUIRED: The first key in the input is not corrupted. - Status MergeUntil(Iterator* iter, const SequenceNumber stop_before = 0, + Status MergeUntil(InternalIterator* iter, + const SequenceNumber stop_before = 0, const bool at_bottom = false); // Filters a merge operand using the compaction filter specified diff --git a/db/perf_context_test.cc b/db/perf_context_test.cc index 359562a16..00065dc92 100644 --- a/db/perf_context_test.cc +++ b/db/perf_context_test.cc @@ -5,6 +5,7 @@ // #include #include +#include #include #include "rocksdb/db.h" @@ -444,6 +445,7 @@ void ProfileQueries(bool enabled_time = false) { } } +#ifndef ROCKSDB_LITE TEST_F(PerfContextTest, KeyComparisonCount) { SetPerfLevel(kEnableCount); ProfileQueries(); @@ -454,6 +456,7 @@ TEST_F(PerfContextTest, KeyComparisonCount) { SetPerfLevel(kEnableTime); ProfileQueries(true); } +#endif // ROCKSDB_LITE // make perf_context_test // export ROCKSDB_TESTS=PerfContextTest.SeekKeyComparison @@ -539,6 +542,49 @@ TEST_F(PerfContextTest, SeekKeyComparison) { } } +TEST_F(PerfContextTest, DBMutexLockCounter) { + SetPerfLevel(kEnableTime); + int stats_code[] = {0, static_cast(DB_MUTEX_WAIT_MICROS)}; + for (int c = 0; c < 2; ++c) { + InstrumentedMutex mutex(nullptr, Env::Default(), stats_code[c]); + mutex.Lock(); + std::thread child_thread([&] { + SetPerfLevel(kEnableTime); + perf_context.Reset(); + ASSERT_EQ(perf_context.db_mutex_lock_nanos, 0); + mutex.Lock(); + mutex.Unlock(); + if (stats_code[c] == DB_MUTEX_WAIT_MICROS) { + // increment the counter only when it's a DB Mutex + ASSERT_GT(perf_context.db_mutex_lock_nanos, 0); + } else { + ASSERT_EQ(perf_context.db_mutex_lock_nanos, 0); + } + }); + Env::Default()->SleepForMicroseconds(100); + mutex.Unlock(); + child_thread.join(); + } +} + +TEST_F(PerfContextTest, FalseDBMutexWait) { + SetPerfLevel(kEnableTime); + int stats_code[] = {0, static_cast(DB_MUTEX_WAIT_MICROS)}; + for (int c = 0; c < 2; ++c) { + InstrumentedMutex mutex(nullptr, Env::Default(), stats_code[c]); + InstrumentedCondVar lock(&mutex); + perf_context.Reset(); + mutex.Lock(); + lock.TimedWait(100); + mutex.Unlock(); + if (stats_code[c] == static_cast(DB_MUTEX_WAIT_MICROS)) { + // increment the counter only when it's a DB Mutex + ASSERT_GT(perf_context.db_condition_wait_nanos, 0); + } else { + ASSERT_EQ(perf_context.db_condition_wait_nanos, 0); + } + } +} } int main(int argc, char** argv) { diff --git a/db/repair.cc b/db/repair.cc index 42c702d0b..ba63850be 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -81,8 +81,8 @@ #include "rocksdb/env.h" #include "rocksdb/options.h" #include "rocksdb/immutable_options.h" +#include "table/scoped_arena_iterator.h" #include "util/file_reader_writer.h" -#include "util/scoped_arena_iterator.h" namespace rocksdb { @@ -340,7 +340,7 @@ class Repairer { t->meta.fd = FileDescriptor(t->meta.fd.GetNumber(), t->meta.fd.GetPathId(), file_size); if (status.ok()) { - Iterator* iter = table_cache_->NewIterator( + InternalIterator* iter = table_cache_->NewIterator( ReadOptions(), env_options_, icmp_, t->meta.fd); bool empty = true; ParsedInternalKey parsed; diff --git a/db/table_cache.cc b/db/table_cache.cc index b240fc7d0..82b52ddb5 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -14,6 +14,7 @@ #include "db/version_edit.h" #include "rocksdb/statistics.h" +#include "table/internal_iterator.h" #include "table/iterator_wrapper.h" #include "table/table_builder.h" #include "table/table_reader.h" @@ -148,13 +149,11 @@ Status TableCache::FindTable(const EnvOptions& env_options, return s; } -Iterator* TableCache::NewIterator(const ReadOptions& options, - const EnvOptions& env_options, - const InternalKeyComparator& icomparator, - const FileDescriptor& fd, - TableReader** table_reader_ptr, - HistogramImpl* file_read_hist, - bool for_compaction, Arena* arena) { +InternalIterator* TableCache::NewIterator( + const ReadOptions& options, const EnvOptions& env_options, + const InternalKeyComparator& icomparator, const FileDescriptor& fd, + TableReader** table_reader_ptr, HistogramImpl* file_read_hist, + bool for_compaction, Arena* arena) { PERF_TIMER_GUARD(new_table_iterator_nanos); if (table_reader_ptr != nullptr) { @@ -171,7 +170,7 @@ Iterator* TableCache::NewIterator(const ReadOptions& options, env_options, icomparator, fd, /* sequential mode */ true, /* record stats */ false, nullptr, &table_reader_unique_ptr); if (!s.ok()) { - return NewErrorIterator(s, arena); + return NewErrorInternalIterator(s, arena); } table_reader = table_reader_unique_ptr.release(); } else { @@ -182,13 +181,13 @@ Iterator* TableCache::NewIterator(const ReadOptions& options, options.read_tier == kBlockCacheTier /* no_io */, !for_compaction /* record read_stats */, file_read_hist); if (!s.ok()) { - return NewErrorIterator(s, arena); + return NewErrorInternalIterator(s, arena); } table_reader = GetTableReaderFromHandle(handle); } } - Iterator* result = table_reader->NewIterator(options, arena); + InternalIterator* result = table_reader->NewIterator(options, arena); if (create_new_table_reader) { assert(handle == nullptr); diff --git a/db/table_cache.h b/db/table_cache.h index d9ae01348..631946e5f 100644 --- a/db/table_cache.h +++ b/db/table_cache.h @@ -29,6 +29,7 @@ class Arena; struct FileDescriptor; class GetContext; class HistogramImpl; +class InternalIterator; class TableCache { public: @@ -43,12 +44,12 @@ class TableCache { // the returned iterator. The returned "*tableptr" object is owned by // the cache and should not be deleted, and is valid for as long as the // returned iterator is live. - Iterator* NewIterator(const ReadOptions& options, const EnvOptions& toptions, - const InternalKeyComparator& internal_comparator, - const FileDescriptor& file_fd, - TableReader** table_reader_ptr = nullptr, - HistogramImpl* file_read_hist = nullptr, - bool for_compaction = false, Arena* arena = nullptr); + InternalIterator* NewIterator( + const ReadOptions& options, const EnvOptions& toptions, + const InternalKeyComparator& internal_comparator, + const FileDescriptor& file_fd, TableReader** table_reader_ptr = nullptr, + HistogramImpl* file_read_hist = nullptr, bool for_compaction = false, + Arena* arena = nullptr); // If a seek to internal key "k" in specified file finds an entry, // call (*handle_result)(arg, found_key, found_value) repeatedly until diff --git a/db/version_set.cc b/db/version_set.cc index 91471c49d..34e67aa0f 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -35,6 +35,7 @@ #include "db/writebuffer.h" #include "rocksdb/env.h" #include "rocksdb/merge_operator.h" +#include "table/internal_iterator.h" #include "table/table_reader.h" #include "table/merger.h" #include "table/two_level_iterator.h" @@ -420,7 +421,7 @@ namespace { // is the largest key that occurs in the file, and value() is an // 16-byte value containing the file number and file size, both // encoded using EncodeFixed64. -class LevelFileNumIterator : public Iterator { +class LevelFileNumIterator : public InternalIterator { public: LevelFileNumIterator(const InternalKeyComparator& icmp, const LevelFilesBrief* flevel) @@ -488,9 +489,9 @@ class LevelFileIteratorState : public TwoLevelIteratorState { file_read_hist_(file_read_hist), for_compaction_(for_compaction) {} - Iterator* NewSecondaryIterator(const Slice& meta_handle) override { + InternalIterator* NewSecondaryIterator(const Slice& meta_handle) override { if (meta_handle.size() != sizeof(FileDescriptor)) { - return NewErrorIterator( + return NewErrorInternalIterator( Status::Corruption("FileReader invoked with unexpected value")); } else { const FileDescriptor* fd = @@ -541,7 +542,7 @@ class BaseReferencedVersionBuilder { Status Version::GetTableProperties(std::shared_ptr* tp, const FileMetaData* file_meta, - const std::string* fname) { + const std::string* fname) const { auto table_cache = cfd_->table_cache(); auto ioptions = cfd_->ioptions(); Status s = table_cache->GetTableProperties( @@ -623,6 +624,38 @@ Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props, return Status::OK(); } +Status Version::GetPropertiesOfTablesInRange( + const Range* range, std::size_t n, TablePropertiesCollection* props) const { + for (int level = 0; level < storage_info_.num_non_empty_levels(); level++) { + for (decltype(n) i = 0; i < n; i++) { + // Convert user_key into a corresponding internal key. + InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek); + InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek); + std::vector files; + storage_info_.GetOverlappingInputs(level, &k1, &k2, &files, -1, nullptr, + false); + for (const auto& file_meta : files) { + auto fname = + TableFileName(vset_->db_options_->db_paths, + file_meta->fd.GetNumber(), file_meta->fd.GetPathId()); + if (props->count(fname) == 0) { + // 1. If the table is already present in table cache, load table + // properties from there. + std::shared_ptr table_properties; + Status s = GetTableProperties(&table_properties, file_meta, &fname); + if (s.ok()) { + props->insert({fname, table_properties}); + } else { + return s; + } + } + } + } + } + + return Status::OK(); +} + Status Version::GetAggregatedTableProperties( std::shared_ptr* tp, int level) { TablePropertiesCollection props; @@ -1064,7 +1097,7 @@ void VersionStorageInfo::EstimateCompactionBytesNeeded( // We keep doing it to Level 2, 3, etc, until the last level and return the // accumulated bytes. - size_t bytes_compact_to_next_level = 0; + uint64_t bytes_compact_to_next_level = 0; // Level 0 bool level0_compact_triggered = false; if (static_cast(files_[0].size()) > @@ -1080,7 +1113,7 @@ void VersionStorageInfo::EstimateCompactionBytesNeeded( // Level 1 and up. for (int level = base_level(); level <= MaxInputLevel(); level++) { - size_t level_size = 0; + uint64_t level_size = 0; for (auto* f : files_[level]) { level_size += f->fd.GetFileSize(); } @@ -1091,7 +1124,7 @@ void VersionStorageInfo::EstimateCompactionBytesNeeded( // Add size added by previous compaction level_size += bytes_compact_to_next_level; bytes_compact_to_next_level = 0; - size_t level_target = MaxBytesForLevel(level); + uint64_t level_target = MaxBytesForLevel(level); if (level_size > level_target) { bytes_compact_to_next_level = level_size - level_target; // Simplify to assume the actual compaction fan-out ratio is always @@ -1405,7 +1438,8 @@ bool VersionStorageInfo::OverlapInLevel(int level, // The file_index returns a pointer to any file in an overlapping range. void VersionStorageInfo::GetOverlappingInputs( int level, const InternalKey* begin, const InternalKey* end, - std::vector* inputs, int hint_index, int* file_index) { + std::vector* inputs, int hint_index, int* file_index, + bool expand_range) const { if (level >= num_non_empty_levels_) { // this level is empty, no overlapping inputs return; @@ -1438,7 +1472,7 @@ void VersionStorageInfo::GetOverlappingInputs( // "f" is completely after specified range; skip it } else { inputs->push_back(files_[level][i-1]); - if (level == 0) { + if (level == 0 && expand_range) { // Level-0 files may overlap each other. So check if the newly // added file has expanded the range. If so, restart search. if (begin != nullptr && user_cmp->Compare(file_start, user_begin) < 0) { @@ -1464,7 +1498,7 @@ void VersionStorageInfo::GetOverlappingInputs( // forwards to find all overlapping files. void VersionStorageInfo::GetOverlappingInputsBinarySearch( int level, const Slice& user_begin, const Slice& user_end, - std::vector* inputs, int hint_index, int* file_index) { + std::vector* inputs, int hint_index, int* file_index) const { assert(level > 0); int min = 0; int mid = 0; @@ -1512,8 +1546,7 @@ void VersionStorageInfo::GetOverlappingInputsBinarySearch( // Use FileLevel in searching, make it faster void VersionStorageInfo::ExtendOverlappingInputs( int level, const Slice& user_begin, const Slice& user_end, - std::vector* inputs, unsigned int midIndex) { - + std::vector* inputs, unsigned int midIndex) const { const Comparator* user_cmp = user_comparator_; const FdWithKeyRange* files = level_files_brief_[level].files; #ifndef NDEBUG @@ -2070,8 +2103,8 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data, "Creating manifest %" PRIu64 "\n", pending_manifest_file_number_); unique_ptr descriptor_file; EnvOptions opt_env_opts = env_->OptimizeForManifestWrite(env_options_); - s = env_->NewWritableFile( - DescriptorFileName(dbname_, pending_manifest_file_number_), + s = NewWritableFile( + env_, DescriptorFileName(dbname_, pending_manifest_file_number_), &descriptor_file, opt_env_opts); if (s.ok()) { descriptor_file->SetPreallocationBlockSize( @@ -2098,6 +2131,8 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data, "Unable to Encode VersionEdit:" + e->DebugString(true)); break; } + TEST_KILL_RANDOM("VersionSet::LogAndApply:BeforeAddRecord", + rocksdb_kill_odds * REDUCE_ODDS2); s = descriptor_log_->AddRecord(record); if (!s.ok()) { break; @@ -3119,7 +3154,7 @@ uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f, // "key" falls in the range for this table. Add the // approximate offset of "key" within the table. TableReader* table_reader_ptr; - Iterator* iter = v->cfd_->table_cache()->NewIterator( + InternalIterator* iter = v->cfd_->table_cache()->NewIterator( ReadOptions(), env_options_, v->cfd_->internal_comparator(), f.fd, &table_reader_ptr); if (table_reader_ptr != nullptr) { @@ -3166,7 +3201,7 @@ void VersionSet::AddLiveFiles(std::vector* live_list) { } } -Iterator* VersionSet::MakeInputIterator(Compaction* c) { +InternalIterator* VersionSet::MakeInputIterator(Compaction* c) { auto cfd = c->column_family_data(); ReadOptions read_options; read_options.verify_checksums = @@ -3182,7 +3217,7 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) { const size_t space = (c->level() == 0 ? c->input_levels(0)->num_files + c->num_input_levels() - 1 : c->num_input_levels()); - Iterator** list = new Iterator* [space]; + InternalIterator** list = new InternalIterator* [space]; size_t num = 0; for (size_t which = 0; which < c->num_input_levels(); which++) { if (c->input_levels(which)->num_files != 0) { @@ -3209,7 +3244,7 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) { } } assert(num <= space); - Iterator* result = + InternalIterator* result = NewMergingIterator(&c->column_family_data()->internal_comparator(), list, static_cast(num)); delete[] list; diff --git a/db/version_set.h b/db/version_set.h index 396460095..c250dddbd 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -48,7 +48,7 @@ class Writer; } class Compaction; -class Iterator; +class InternalIterator; class LogBuffer; class LookupKey; class MemTable; @@ -159,23 +159,26 @@ class VersionStorageInfo { int level, const InternalKey* begin, // nullptr means before all keys const InternalKey* end, // nullptr means after all keys std::vector* inputs, - int hint_index = -1, // index of overlap file - int* file_index = nullptr); // return index of overlap file + int hint_index = -1, // index of overlap file + int* file_index = nullptr, // return index of overlap file + bool expand_range = true) // if set, returns files which overlap the + const; // range and overlap each other. If false, + // then just files intersecting the range void GetOverlappingInputsBinarySearch( int level, const Slice& begin, // nullptr means before all keys const Slice& end, // nullptr means after all keys std::vector* inputs, - int hint_index, // index of overlap file - int* file_index); // return index of overlap file + int hint_index, // index of overlap file + int* file_index) const; // return index of overlap file void ExtendOverlappingInputs( int level, const Slice& begin, // nullptr means before all keys const Slice& end, // nullptr means after all keys std::vector* inputs, - unsigned int index); // start extending from this index + unsigned int index) const; // start extending from this index // Returns true iff some file in the specified level overlaps // some part of [*smallest_user_key,*largest_user_key]. @@ -456,15 +459,16 @@ class Version { // file-name conversion. Status GetTableProperties(std::shared_ptr* tp, const FileMetaData* file_meta, - const std::string* fname = nullptr); + const std::string* fname = nullptr) const; // REQUIRES: lock is held // On success, *props will be populated with all SSTables' table properties. // The keys of `props` are the sst file name, the values of `props` are the // tables' propertis, represented as shared_ptr. Status GetPropertiesOfAllTables(TablePropertiesCollection* props); - Status GetPropertiesOfAllTables(TablePropertiesCollection* props, int level); + Status GetPropertiesOfTablesInRange(const Range* range, std::size_t n, + TablePropertiesCollection* props) const; // REQUIRES: lock is held // On success, "tp" will contains the aggregated table property amoug @@ -502,7 +506,8 @@ class Version { return storage_info_.user_comparator_; } - bool PrefixMayMatch(const ReadOptions& read_options, Iterator* level_iter, + bool PrefixMayMatch(const ReadOptions& read_options, + InternalIterator* level_iter, const Slice& internal_prefix) const; // The helper function of UpdateAccumulatedStats, which may fill the missing @@ -643,7 +648,7 @@ class VersionSet { // Create an iterator that reads over the compaction inputs for "*c". // The caller should delete the iterator when no longer needed. - Iterator* MakeInputIterator(Compaction* c); + InternalIterator* MakeInputIterator(Compaction* c); // Add all files listed in any live version to *live. void AddLiveFiles(std::vector* live_list); diff --git a/db/write_batch_test.cc b/db/write_batch_test.cc index d8c6f8cb0..4f73c82c8 100644 --- a/db/write_batch_test.cc +++ b/db/write_batch_test.cc @@ -17,10 +17,10 @@ #include "rocksdb/env.h" #include "rocksdb/memtablerep.h" #include "rocksdb/utilities/write_batch_with_index.h" +#include "table/scoped_arena_iterator.h" #include "util/logging.h" #include "util/string_util.h" #include "util/testharness.h" -#include "util/scoped_arena_iterator.h" namespace rocksdb { diff --git a/include/rocksdb/compaction_filter.h b/include/rocksdb/compaction_filter.h index b3d42ca33..89558f208 100644 --- a/include/rocksdb/compaction_filter.h +++ b/include/rocksdb/compaction_filter.h @@ -116,20 +116,6 @@ class CompactionFilterFactory { virtual const char* Name() const = 0; }; -// Default implementation of CompactionFilterFactory which does not -// return any filter -class DefaultCompactionFilterFactory : public CompactionFilterFactory { - public: - virtual std::unique_ptr CreateCompactionFilter( - const CompactionFilter::Context& context) override { - return std::unique_ptr(nullptr); - } - - virtual const char* Name() const override { - return "DefaultCompactionFilterFactory"; - } -}; - } // namespace rocksdb #endif // STORAGE_ROCKSDB_INCLUDE_COMPACTION_FILTER_H_ diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 5a49638bd..e760f409e 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -324,34 +324,37 @@ class DB { // "rocksdb.compaction-pending" - 1 if at least one compaction is pending // "rocksdb.background-errors" - accumulated number of background errors // "rocksdb.cur-size-active-mem-table" -// "rocksdb.size-all-mem-tables" -// "rocksdb.num-entries-active-mem-table" -// "rocksdb.num-entries-imm-mem-tables" -// "rocksdb.num-deletes-active-mem-table" -// "rocksdb.num-deletes-imm-mem-tables" -// "rocksdb.estimate-num-keys" - estimated keys in the column family -// "rocksdb.estimate-table-readers-mem" - estimated memory used for reding -// SST tables, that is not counted as a part of block cache. -// "rocksdb.is-file-deletions-enabled" -// "rocksdb.num-snapshots" -// "rocksdb.oldest-snapshot-time" -// "rocksdb.num-live-versions" - `version` is an internal data structure. -// See version_set.h for details. More live versions often mean more SST -// files are held from being deleted, by iterators or unfinished -// compactions. -// "rocksdb.estimate-live-data-size" -// "rocksdb.total-sst-files-size" - total size of all used sst files, this may -// slow down online queries if there are too many files. -// "rocksdb.base-level" -// "rocksdb.estimate-pending-compaction-bytes" - estimated total number of -// bytes compaction needs to rewrite the data to get all levels down -// to under target size. Not valid for other compactions than level-based. -// "rocksdb.aggregated-table-properties" - returns a string representation of -// the aggregated table properties of the target column family. -// "rocksdb.aggregated-table-properties-at-level", same as the previous -// one but only returns the aggregated table properties of the specified -// level "N" at the target column family. -// replaced by the target level. + // "rocksdb.size-all-mem-tables" + // "rocksdb.num-entries-active-mem-table" + // "rocksdb.num-entries-imm-mem-tables" + // "rocksdb.num-deletes-active-mem-table" + // "rocksdb.num-deletes-imm-mem-tables" + // "rocksdb.estimate-num-keys" - estimated keys in the column family + // "rocksdb.estimate-table-readers-mem" - estimated memory used for reding + // SST tables, that is not counted as a part of block cache. + // "rocksdb.is-file-deletions-enabled" + // "rocksdb.num-snapshots" + // "rocksdb.oldest-snapshot-time" + // "rocksdb.num-live-versions" - `version` is an internal data structure. + // See version_set.h for details. More live versions often mean more SST + // files are held from being deleted, by iterators or unfinished + // compactions. + // "rocksdb.estimate-live-data-size" + // "rocksdb.total-sst-files-size" - total size of all used sst files, this + // may slow down online queries if there are too many files. + // "rocksdb.base-level" + // "rocksdb.estimate-pending-compaction-bytes" - estimated total number of + // bytes compaction needs to rewrite the data to get all levels down + // to under target size. Not valid for other compactions than + // level-based. + // "rocksdb.aggregated-table-properties" - returns a string representation + // of the aggregated table properties of the target column family. + // "rocksdb.aggregated-table-properties-at-level", same as the previous + // one but only returns the aggregated table properties of the specified + // level "N" at the target column family. + // "rocksdb.num-running-compactions" - the number of currently running + // compacitons. + // "rocksdb.num-running-flushes" - the number of currently running flushes. #ifndef ROCKSDB_LITE struct Properties { static const std::string kNumFilesAtLevelPrefix; @@ -361,7 +364,9 @@ class DB { static const std::string kDBStats; static const std::string kNumImmutableMemTable; static const std::string kMemTableFlushPending; + static const std::string kNumRunningFlushes; static const std::string kCompactionPending; + static const std::string kNumRunningCompactions; static const std::string kBackgroundErrors; static const std::string kCurSizeActiveMemTable; static const std::string kCurSizeAllMemTables; @@ -414,6 +419,8 @@ class DB { // "rocksdb.total-sst-files-size" // "rocksdb.base-level" // "rocksdb.estimate-pending-compaction-bytes" + // "rocksdb.num-running-compactions" + // "rocksdb.num-running-flushes" virtual bool GetIntProperty(ColumnFamilyHandle* column_family, const Slice& property, uint64_t* value) = 0; virtual bool GetIntProperty(const Slice& property, uint64_t* value) { @@ -709,6 +716,9 @@ class DB { virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) { return GetPropertiesOfAllTables(DefaultColumnFamily(), props); } + virtual Status GetPropertiesOfTablesInRange( + ColumnFamilyHandle* column_family, const Range* range, std::size_t n, + TablePropertiesCollection* props) = 0; #endif // ROCKSDB_LITE // Needed for StackableDB diff --git a/include/rocksdb/iterator.h b/include/rocksdb/iterator.h index 7538e9cfb..1e7600d84 100644 --- a/include/rocksdb/iterator.h +++ b/include/rocksdb/iterator.h @@ -24,10 +24,32 @@ namespace rocksdb { -class Iterator { +class Cleanable { public: - Iterator(); - virtual ~Iterator(); + Cleanable(); + ~Cleanable(); + // Clients are allowed to register function/arg1/arg2 triples that + // will be invoked when this iterator is destroyed. + // + // Note that unlike all of the preceding methods, this method is + // not abstract and therefore clients should not override it. + typedef void (*CleanupFunction)(void* arg1, void* arg2); + void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2); + + protected: + struct Cleanup { + CleanupFunction function; + void* arg1; + void* arg2; + Cleanup* next; + }; + Cleanup cleanup_; +}; + +class Iterator : public Cleanable { + public: + Iterator() {} + virtual ~Iterator() {} // An iterator is either positioned at a key/value pair, or // not valid. This method returns true iff the iterator is valid. @@ -73,23 +95,7 @@ class Iterator { // satisfied without doing some IO, then this returns Status::Incomplete(). virtual Status status() const = 0; - // Clients are allowed to register function/arg1/arg2 triples that - // will be invoked when this iterator is destroyed. - // - // Note that unlike all of the preceding methods, this method is - // not abstract and therefore clients should not override it. - typedef void (*CleanupFunction)(void* arg1, void* arg2); - void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2); - private: - struct Cleanup { - CleanupFunction function; - void* arg1; - void* arg2; - Cleanup* next; - }; - Cleanup cleanup_; - // No copying allowed Iterator(const Iterator&); void operator=(const Iterator&); diff --git a/include/rocksdb/perf_context.h b/include/rocksdb/perf_context.h index a7c993c7b..c2af729e3 100644 --- a/include/rocksdb/perf_context.h +++ b/include/rocksdb/perf_context.h @@ -27,42 +27,42 @@ struct PerfContext { uint64_t block_cache_hit_count; // total number of block cache hits uint64_t block_read_count; // total number of block reads (with IO) uint64_t block_read_byte; // total number of bytes from block reads - uint64_t block_read_time; // total time spent on block reads - uint64_t block_checksum_time; // total time spent on block checksum - uint64_t block_decompress_time; // total time spent on block decompression + uint64_t block_read_time; // total nanos spent on block reads + uint64_t block_checksum_time; // total nanos spent on block checksum + uint64_t block_decompress_time; // total nanos spent on block decompression // total number of internal keys skipped over during iteration (overwritten or // deleted, to be more specific, hidden by a put or delete of the same key) uint64_t internal_key_skipped_count; // total number of deletes and single deletes skipped over during iteration uint64_t internal_delete_skipped_count; - uint64_t get_snapshot_time; // total time spent on getting snapshot - uint64_t get_from_memtable_time; // total time spent on querying memtables + uint64_t get_snapshot_time; // total nanos spent on getting snapshot + uint64_t get_from_memtable_time; // total nanos spent on querying memtables uint64_t get_from_memtable_count; // number of mem tables queried - // total time spent after Get() finds a key + // total nanos spent after Get() finds a key uint64_t get_post_process_time; - uint64_t get_from_output_files_time; // total time reading from output files - // total time spent on seeking memtable + uint64_t get_from_output_files_time; // total nanos reading from output files + // total nanos spent on seeking memtable uint64_t seek_on_memtable_time; // number of seeks issued on memtable uint64_t seek_on_memtable_count; - // total time spent on seeking child iters + // total nanos spent on seeking child iters uint64_t seek_child_seek_time; // number of seek issued in child iterators uint64_t seek_child_seek_count; - uint64_t seek_min_heap_time; // total time spent on the merge heap - // total time spent on seeking the internal entries + uint64_t seek_min_heap_time; // total nanos spent on the merge heap + // total nanos spent on seeking the internal entries uint64_t seek_internal_seek_time; - // total time spent on iterating internal entries to find the next user entry + // total nanos spent on iterating internal entries to find the next user entry uint64_t find_next_user_entry_time; - // total time spent on writing to WAL + // total nanos spent on writing to WAL uint64_t write_wal_time; - // total time spent on writing to mem tables + // total nanos spent on writing to mem tables uint64_t write_memtable_time; - // total time spent on delaying write + // total nanos spent on delaying write uint64_t write_delay_time; - // total time spent on writing a record, excluding the above three times + // total nanos spent on writing a record, excluding the above three times uint64_t write_pre_and_post_process_time; uint64_t db_mutex_lock_nanos; // time spent on acquiring DB mutex. diff --git a/include/rocksdb/utilities/stackable_db.h b/include/rocksdb/utilities/stackable_db.h index aef192b07..50b5538f7 100644 --- a/include/rocksdb/utilities/stackable_db.h +++ b/include/rocksdb/utilities/stackable_db.h @@ -279,6 +279,13 @@ class StackableDB : public DB { return db_->GetPropertiesOfAllTables(column_family, props); } + using DB::GetPropertiesOfTablesInRange; + virtual Status GetPropertiesOfTablesInRange( + ColumnFamilyHandle* column_family, const Range* range, std::size_t n, + TablePropertiesCollection* props) override { + return db_->GetPropertiesOfTablesInRange(column_family, range, n, props); + } + virtual Status GetUpdatesSince( SequenceNumber seq_number, unique_ptr* iter, const TransactionLogIterator::ReadOptions& read_options) override { diff --git a/include/rocksdb/utilities/transaction.h b/include/rocksdb/utilities/transaction.h index 12be5fd24..cbeeceabc 100644 --- a/include/rocksdb/utilities/transaction.h +++ b/include/rocksdb/utilities/transaction.h @@ -83,10 +83,22 @@ class Transaction { // Returns the Snapshot created by the last call to SetSnapshot(). // // REQUIRED: The returned Snapshot is only valid up until the next time - // SetSnapshot()/SetSnapshotOnNextSavePoint() is called or the Transaction - // is deleted. + // SetSnapshot()/SetSnapshotOnNextSavePoint() is called, ClearSnapshot() + // is called, or the Transaction is deleted. virtual const Snapshot* GetSnapshot() const = 0; + // Clears the current snapshot (i.e. no snapshot will be 'set') + // + // This removes any snapshot that currently exists or is set to be created + // on the next update operation (SetSnapshotOnNextOperation). + // + // Calling ClearSnapshot() has no effect on keys written before this function + // has been called. + // + // If a reference to a snapshot was retrieved via GetSnapshot(), it will no + // longer be valid and should be discarded after a call to ClearSnapshot(). + virtual void ClearSnapshot() = 0; + // Write all batched keys to the db atomically. // // Returns OK on success. @@ -202,10 +214,6 @@ class Transaction { // // The returned iterator is only valid until Commit(), Rollback(), or // RollbackToSavePoint() is called. - // NOTE: Transaction::Put/Merge/Delete will currently invalidate this iterator - // until - // the following issue is fixed: - // https://github.com/facebook/rocksdb/issues/616 virtual Iterator* GetIterator(const ReadOptions& read_options) = 0; virtual Iterator* GetIterator(const ReadOptions& read_options, diff --git a/java/RELEASE.md b/java/RELEASE.md index 084460c88..cb9aaf987 100644 --- a/java/RELEASE.md +++ b/java/RELEASE.md @@ -15,7 +15,7 @@ Once you have these items, run this make command from RocksDB's root source dire This command will build RocksDB natively on OSX, and will then spin up two Vagrant Virtualbox Ubuntu images to build RocksDB for both 32-bit and 64-bit Linux. -You can find all native binaries and JARs in the java directory upon completion: +You can find all native binaries and JARs in the java/target directory upon completion: librocksdbjni-linux32.so librocksdbjni-linux64.so diff --git a/java/crossbuild/Vagrantfile b/java/crossbuild/Vagrantfile index 8a52b9261..21cce1201 100644 --- a/java/crossbuild/Vagrantfile +++ b/java/crossbuild/Vagrantfile @@ -20,7 +20,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| end config.vm.provision :shell, path: "build-linux-centos.sh" - config.vm.synced_folder "../", "/rocksdb-build" + config.vm.synced_folder "../target", "/rocksdb-build" config.vm.synced_folder "../..", "/rocksdb", type: "rsync" config.vm.boot_timeout = 1200 end diff --git a/java/crossbuild/build-linux-centos.sh b/java/crossbuild/build-linux-centos.sh index 7fd8cfe76..2e8f81d94 100755 --- a/java/crossbuild/build-linux-centos.sh +++ b/java/crossbuild/build-linux-centos.sh @@ -1,9 +1,7 @@ #!/usr/bin/env bash # install all required packages for rocksdb that are available through yum ARCH=$(uname -i) -sudo yum -y install openssl java-1.7.0-openjdk-devel.$ARCH zlib zlib-devel bzip2 bzip2-devel -sudo yum -y install epel-release-5-4.noarch -sudo yum -y install snappy snappy-devel +sudo yum -y install openssl java-1.7.0-openjdk-devel.$ARCH # install gcc/g++ 4.8.2 via CERN (http://linux.web.cern.ch/linux/devtoolset/) sudo wget -O /etc/yum.repos.d/slc5-devtoolset.repo http://linuxsoft.cern.ch/cern/devtoolset/slc5-devtoolset.repo diff --git a/java/rocksjni/write_batch.cc b/java/rocksjni/write_batch.cc index aa0c2309a..dc3f6d2c6 100644 --- a/java/rocksjni/write_batch.cc +++ b/java/rocksjni/write_batch.cc @@ -20,8 +20,8 @@ #include "db/writebuffer.h" #include "rocksdb/env.h" #include "rocksdb/memtablerep.h" +#include "table/scoped_arena_iterator.h" #include "util/logging.h" -#include "util/scoped_arena_iterator.h" #include "util/testharness.h" /* diff --git a/java/rocksjni/write_batch_test.cc b/java/rocksjni/write_batch_test.cc index d54029141..98e53ff17 100644 --- a/java/rocksjni/write_batch_test.cc +++ b/java/rocksjni/write_batch_test.cc @@ -21,8 +21,8 @@ #include "rocksdb/status.h" #include "rocksdb/write_batch.h" #include "rocksjni/portal.h" +#include "table/scoped_arena_iterator.h" #include "util/logging.h" -#include "util/scoped_arena_iterator.h" #include "util/testharness.h" /* diff --git a/util/hash_cuckoo_rep.cc b/memtable/hash_cuckoo_rep.cc similarity index 99% rename from util/hash_cuckoo_rep.cc rename to memtable/hash_cuckoo_rep.cc index 6e5057a73..447113ddf 100644 --- a/util/hash_cuckoo_rep.cc +++ b/memtable/hash_cuckoo_rep.cc @@ -5,8 +5,7 @@ // #ifndef ROCKSDB_LITE - -#include "util/hash_cuckoo_rep.h" +#include "memtable/hash_cuckoo_rep.h" #include #include @@ -18,9 +17,10 @@ #include "db/memtable.h" #include "db/skiplist.h" +#include "memtable/stl_wrappers.h" +#include "port/port.h" #include "rocksdb/memtablerep.h" #include "util/murmurhash.h" -#include "util/stl_wrappers.h" namespace rocksdb { namespace { diff --git a/util/hash_cuckoo_rep.h b/memtable/hash_cuckoo_rep.h similarity index 98% rename from util/hash_cuckoo_rep.h rename to memtable/hash_cuckoo_rep.h index 9f374a978..6de4baa20 100644 --- a/util/hash_cuckoo_rep.h +++ b/memtable/hash_cuckoo_rep.h @@ -6,8 +6,9 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#ifndef ROCKSDB_LITE #pragma once +#ifndef ROCKSDB_LITE +#include "port/port.h" #include "rocksdb/slice_transform.h" #include "rocksdb/memtablerep.h" diff --git a/util/hash_linklist_rep.cc b/memtable/hash_linklist_rep.cc similarity index 99% rename from util/hash_linklist_rep.cc rename to memtable/hash_linklist_rep.cc index 1e6eadfe6..2e761ce15 100644 --- a/util/hash_linklist_rep.cc +++ b/memtable/hash_linklist_rep.cc @@ -5,7 +5,7 @@ // #ifndef ROCKSDB_LITE -#include "util/hash_linklist_rep.h" +#include "memtable/hash_linklist_rep.h" #include #include diff --git a/util/hash_linklist_rep.h b/memtable/hash_linklist_rep.h similarity index 100% rename from util/hash_linklist_rep.h rename to memtable/hash_linklist_rep.h index 629272394..1bab441ed 100644 --- a/util/hash_linklist_rep.h +++ b/memtable/hash_linklist_rep.h @@ -6,8 +6,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#ifndef ROCKSDB_LITE #pragma once +#ifndef ROCKSDB_LITE #include "rocksdb/slice_transform.h" #include "rocksdb/memtablerep.h" diff --git a/util/hash_skiplist_rep.cc b/memtable/hash_skiplist_rep.cc similarity index 99% rename from util/hash_skiplist_rep.cc rename to memtable/hash_skiplist_rep.cc index 142903d42..cbdd88993 100644 --- a/util/hash_skiplist_rep.cc +++ b/memtable/hash_skiplist_rep.cc @@ -5,7 +5,7 @@ // #ifndef ROCKSDB_LITE -#include "util/hash_skiplist_rep.h" +#include "memtable/hash_skiplist_rep.h" #include diff --git a/util/hash_skiplist_rep.h b/memtable/hash_skiplist_rep.h similarity index 100% rename from util/hash_skiplist_rep.h rename to memtable/hash_skiplist_rep.h index 15d0fc77f..a6544ff04 100644 --- a/util/hash_skiplist_rep.h +++ b/memtable/hash_skiplist_rep.h @@ -6,8 +6,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#ifndef ROCKSDB_LITE #pragma once +#ifndef ROCKSDB_LITE #include "rocksdb/slice_transform.h" #include "rocksdb/memtablerep.h" diff --git a/util/stl_wrappers.h b/memtable/stl_wrappers.h similarity index 73% rename from util/stl_wrappers.h rename to memtable/stl_wrappers.h index 15b9bdf52..cef8301fa 100644 --- a/util/stl_wrappers.h +++ b/memtable/stl_wrappers.h @@ -30,17 +30,5 @@ struct Compare : private Base { } }; -struct LessOfComparator { - explicit LessOfComparator(const Comparator* c = BytewiseComparator()) - : cmp(c) {} - - bool operator()(const std::string& a, const std::string& b) const { - return cmp->Compare(Slice(a), Slice(b)) < 0; - } - - const Comparator* cmp; -}; - -typedef std::map KVMap; } } diff --git a/port/win/env_win.cc b/port/win/env_win.cc index 45847324b..c55cd3039 100644 --- a/port/win/env_win.cc +++ b/port/win/env_win.cc @@ -963,7 +963,7 @@ class WinWritableFile : public WritableFile { virtual Status Allocate(off_t offset, off_t len) override { Status status; - TEST_KILL_RANDOM(rocksdb_kill_odds); + TEST_KILL_RANDOM("WinWritableFile::Allocate", rocksdb_kill_odds); // Make sure that we reserve an aligned amount of space // since the reservation block size is driven outside so we want diff --git a/port/win/port_win.h b/port/win/port_win.h index edee22e7a..8f5d7e1e0 100644 --- a/port/win/port_win.h +++ b/port/win/port_win.h @@ -114,8 +114,11 @@ class CondVar; class Mutex { public: - /* implicit */ Mutex(bool adaptive = false) : locked_(false) { - } + /* implicit */ Mutex(bool adaptive = false) +#ifndef NDEBUG + : locked_(false) +#endif + { } ~Mutex(); diff --git a/src.mk b/src.mk index 39f9c1fa7..cb43744be 100644 --- a/src.mk +++ b/src.mk @@ -46,6 +46,9 @@ LIB_SOURCES = \ db/write_batch_base.cc \ db/write_controller.cc \ db/write_thread.cc \ + memtable/hash_cuckoo_rep.cc \ + memtable/hash_linklist_rep.cc \ + memtable/hash_skiplist_rep.cc \ port/stack_trace.cc \ port/port_posix.cc \ table/adaptive_table_factory.cc \ @@ -96,9 +99,6 @@ LIB_SOURCES = \ util/file_reader_writer.cc \ util/filter_policy.cc \ util/hash.cc \ - util/hash_cuckoo_rep.cc \ - util/hash_linklist_rep.cc \ - util/hash_skiplist_rep.cc \ util/histogram.cc \ util/instrumented_mutex.cc \ util/iostats_context.cc \ @@ -161,9 +161,9 @@ LIB_SOURCES = \ util/xxhash.cc \ TOOL_SOURCES = \ - util/ldb_cmd.cc \ - util/ldb_tool.cc \ - util/sst_dump_tool.cc \ + tools/ldb_cmd.cc \ + tools/ldb_tool.cc \ + tools/sst_dump_tool.cc \ MOCK_SOURCES = \ table/mock_table.cc \ @@ -190,6 +190,7 @@ TEST_BENCH_SOURCES = \ db/db_universal_compaction_test.cc \ db/db_tailing_iter_test.cc \ db/db_wal_test.cc \ + db/db_table_properties_test.cc \ db/deletefile_test.cc \ db/fault_injection_test.cc \ db/file_indexer_test.cc \ @@ -198,6 +199,7 @@ TEST_BENCH_SOURCES = \ db/listener_test.cc \ db/log_and_apply_bench.cc \ db/log_test.cc \ + db/manual_compaction_test.cc \ db/memtablerep_bench.cc \ db/merge_test.cc \ db/perf_context_test.cc \ @@ -223,7 +225,9 @@ TEST_BENCH_SOURCES = \ table/table_reader_bench.cc \ table/table_test.cc \ tools/db_sanity_test.cc \ + tools/ldb_cmd_test.cc \ tools/reduce_levels_test.cc \ + tools/sst_dump_test.cc \ util/arena_test.cc \ util/auto_roll_logger_test.cc \ util/autovector_test.cc \ @@ -252,19 +256,16 @@ TEST_BENCH_SOURCES = \ utilities/ttl/ttl_test.cc \ utilities/write_batch_with_index/write_batch_with_index_test.cc \ util/log_write_bench.cc \ - util/manual_compaction_test.cc \ util/memenv_test.cc \ util/mock_env_test.cc \ util/options_test.cc \ util/event_logger_test.cc \ util/rate_limiter_test.cc \ util/slice_transform_test.cc \ - util/sst_dump_test.cc \ util/testharness.cc \ util/testutil.cc \ util/thread_list_test.cc \ - util/thread_local_test.cc \ - util/ldb_cmd_test.cc + util/thread_local_test.cc JNI_NATIVE_SOURCES = \ java/rocksjni/backupenginejni.cc \ diff --git a/table/block.cc b/table/block.cc index 99c76f695..9e72a0bd9 100644 --- a/table/block.cc +++ b/table/block.cc @@ -316,14 +316,14 @@ Block::Block(BlockContents&& contents) } } -Iterator* Block::NewIterator( - const Comparator* cmp, BlockIter* iter, bool total_order_seek) { +InternalIterator* Block::NewIterator(const Comparator* cmp, BlockIter* iter, + bool total_order_seek) { if (size_ < 2*sizeof(uint32_t)) { if (iter != nullptr) { iter->SetStatus(Status::Corruption("bad block contents")); return iter; } else { - return NewErrorIterator(Status::Corruption("bad block contents")); + return NewErrorInternalIterator(Status::Corruption("bad block contents")); } } const uint32_t num_restarts = NumRestarts(); @@ -332,7 +332,7 @@ Iterator* Block::NewIterator( iter->SetStatus(Status::OK()); return iter; } else { - return NewEmptyIterator(); + return NewEmptyInternalIterator(); } } else { BlockHashIndex* hash_index_ptr = diff --git a/table/block.h b/table/block.h index 2ce48d3fd..0a37b90fa 100644 --- a/table/block.h +++ b/table/block.h @@ -19,6 +19,7 @@ #include "db/dbformat.h" #include "table/block_prefix_index.h" #include "table/block_hash_index.h" +#include "table/internal_iterator.h" #include "format.h" @@ -66,8 +67,9 @@ class Block { // If total_order_seek is true, hash_index_ and prefix_index_ are ignored. // This option only applies for index block. For data block, hash_index_ // and prefix_index_ are null, so this option does not matter. - Iterator* NewIterator(const Comparator* comparator, - BlockIter* iter = nullptr, bool total_order_seek = true); + InternalIterator* NewIterator(const Comparator* comparator, + BlockIter* iter = nullptr, + bool total_order_seek = true); void SetBlockHashIndex(BlockHashIndex* hash_index); void SetBlockPrefixIndex(BlockPrefixIndex* prefix_index); @@ -87,7 +89,7 @@ class Block { void operator=(const Block&); }; -class BlockIter : public Iterator { +class BlockIter : public InternalIterator { public: BlockIter() : comparator_(nullptr), diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index b11327248..ad383726a 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -32,6 +32,7 @@ #include "table/block_hash_index.h" #include "table/block_prefix_index.h" #include "table/format.h" +#include "table/internal_iterator.h" #include "table/meta_blocks.h" #include "table/two_level_iterator.h" #include "table/get_context.h" @@ -146,8 +147,8 @@ class BlockBasedTable::IndexReader { // Create an iterator for index access. // An iter is passed in, if it is not null, update this one and return it // If it is null, create a new Iterator - virtual Iterator* NewIterator( - BlockIter* iter = nullptr, bool total_order_seek = true) = 0; + virtual InternalIterator* NewIterator(BlockIter* iter = nullptr, + bool total_order_seek = true) = 0; // The size of the index. virtual size_t size() const = 0; @@ -187,8 +188,8 @@ class BinarySearchIndexReader : public IndexReader { return s; } - virtual Iterator* NewIterator( - BlockIter* iter = nullptr, bool dont_care = true) override { + virtual InternalIterator* NewIterator(BlockIter* iter = nullptr, + bool dont_care = true) override { return index_block_->NewIterator(comparator_, iter, true); } @@ -219,7 +220,8 @@ class HashIndexReader : public IndexReader { const Footer& footer, RandomAccessFileReader* file, Env* env, const Comparator* comparator, const BlockHandle& index_handle, - Iterator* meta_index_iter, IndexReader** index_reader, + InternalIterator* meta_index_iter, + IndexReader** index_reader, bool hash_index_allow_collision) { std::unique_ptr index_block; auto s = ReadBlockFromFile(file, footer, ReadOptions(), index_handle, @@ -298,8 +300,8 @@ class HashIndexReader : public IndexReader { return Status::OK(); } - virtual Iterator* NewIterator( - BlockIter* iter = nullptr, bool total_order_seek = true) override { + virtual InternalIterator* NewIterator(BlockIter* iter = nullptr, + bool total_order_seek = true) override { return index_block_->NewIterator(comparator_, iter, total_order_seek); } @@ -512,7 +514,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, // Read meta index std::unique_ptr meta; - std::unique_ptr meta_iter; + std::unique_ptr meta_iter; s = ReadMetaBlock(rep, &meta, &meta_iter); if (!s.ok()) { return s; @@ -580,7 +582,8 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, assert(table_options.block_cache != nullptr); // Hack: Call NewIndexIterator() to implicitly add index to the // block_cache - unique_ptr iter(new_table->NewIndexIterator(ReadOptions())); + unique_ptr iter( + new_table->NewIndexIterator(ReadOptions())); s = iter->status(); if (s.ok()) { @@ -652,10 +655,9 @@ size_t BlockBasedTable::ApproximateMemoryUsage() const { // Load the meta-block from the file. On success, return the loaded meta block // and its iterator. -Status BlockBasedTable::ReadMetaBlock( - Rep* rep, - std::unique_ptr* meta_block, - std::unique_ptr* iter) { +Status BlockBasedTable::ReadMetaBlock(Rep* rep, + std::unique_ptr* meta_block, + std::unique_ptr* iter) { // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates // it is an empty block. // TODO: we never really verify check sum for meta index block @@ -898,8 +900,8 @@ BlockBasedTable::CachableEntry BlockBasedTable::GetFilter( return { filter, cache_handle }; } -Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options, - BlockIter* input_iter) { +InternalIterator* BlockBasedTable::NewIndexIterator( + const ReadOptions& read_options, BlockIter* input_iter) { // index reader has already been pre-populated. if (rep_->index_reader) { return rep_->index_reader->NewIterator( @@ -922,7 +924,7 @@ Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options, input_iter->SetStatus(Status::Incomplete("no blocking io")); return input_iter; } else { - return NewErrorIterator(Status::Incomplete("no blocking io")); + return NewErrorInternalIterator(Status::Incomplete("no blocking io")); } } @@ -942,7 +944,7 @@ Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options, input_iter->SetStatus(s); return input_iter; } else { - return NewErrorIterator(s); + return NewErrorInternalIterator(s); } } @@ -965,8 +967,8 @@ Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options, // into an iterator over the contents of the corresponding block. // If input_iter is null, new a iterator // If input_iter is not null, update this iter and return it -Iterator* BlockBasedTable::NewDataBlockIterator(Rep* rep, - const ReadOptions& ro, const Slice& index_value, +InternalIterator* BlockBasedTable::NewDataBlockIterator( + Rep* rep, const ReadOptions& ro, const Slice& index_value, BlockIter* input_iter) { PERF_TIMER_GUARD(new_table_block_iter_nanos); @@ -987,7 +989,7 @@ Iterator* BlockBasedTable::NewDataBlockIterator(Rep* rep, input_iter->SetStatus(s); return input_iter; } else { - return NewErrorIterator(s); + return NewErrorInternalIterator(s); } } @@ -1040,7 +1042,7 @@ Iterator* BlockBasedTable::NewDataBlockIterator(Rep* rep, input_iter->SetStatus(Status::Incomplete("no blocking io")); return input_iter; } else { - return NewErrorIterator(Status::Incomplete("no blocking io")); + return NewErrorInternalIterator(Status::Incomplete("no blocking io")); } } std::unique_ptr block_value; @@ -1051,7 +1053,7 @@ Iterator* BlockBasedTable::NewDataBlockIterator(Rep* rep, } } - Iterator* iter; + InternalIterator* iter; if (block.value != nullptr) { iter = block.value->NewIterator(&rep->internal_comparator, input_iter); if (block.cache_handle != nullptr) { @@ -1065,7 +1067,7 @@ Iterator* BlockBasedTable::NewDataBlockIterator(Rep* rep, input_iter->SetStatus(s); iter = input_iter; } else { - iter = NewErrorIterator(s); + iter = NewErrorInternalIterator(s); } } return iter; @@ -1080,7 +1082,7 @@ class BlockBasedTable::BlockEntryIteratorState : public TwoLevelIteratorState { table_(table), read_options_(read_options) {} - Iterator* NewSecondaryIterator(const Slice& index_value) override { + InternalIterator* NewSecondaryIterator(const Slice& index_value) override { return NewDataBlockIterator(table_->rep_, read_options_, index_value); } @@ -1138,7 +1140,7 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) { // Then, try find it within each block if (may_match) { - unique_ptr iiter(NewIndexIterator(no_io_read_options)); + unique_ptr iiter(NewIndexIterator(no_io_read_options)); iiter->Seek(internal_prefix); if (!iiter->Valid()) { @@ -1184,8 +1186,8 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) { return may_match; } -Iterator* BlockBasedTable::NewIterator(const ReadOptions& read_options, - Arena* arena) { +InternalIterator* BlockBasedTable::NewIterator(const ReadOptions& read_options, + Arena* arena) { return NewTwoLevelIterator(new BlockEntryIteratorState(this, read_options), NewIndexIterator(read_options), arena); } @@ -1326,7 +1328,7 @@ Status BlockBasedTable::Prefetch(const Slice* const begin, bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options, const Slice& key) { - std::unique_ptr iiter(NewIndexIterator(options)); + std::unique_ptr iiter(NewIndexIterator(options)); iiter->Seek(key); assert(iiter->Valid()); CachableEntry block; @@ -1361,8 +1363,8 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options, // 3. options // 4. internal_comparator // 5. index_type -Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader, - Iterator* preloaded_meta_index_iter) { +Status BlockBasedTable::CreateIndexReader( + IndexReader** index_reader, InternalIterator* preloaded_meta_index_iter) { // Some old version of block-based tables don't have index type present in // table properties. If that's the case we can safely use the kBinarySearch. auto index_type_on_file = BlockBasedTableOptions::kBinarySearch; @@ -1396,7 +1398,7 @@ Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader, } case BlockBasedTableOptions::kHashSearch: { std::unique_ptr meta_guard; - std::unique_ptr meta_iter_guard; + std::unique_ptr meta_iter_guard; auto meta_index_iter = preloaded_meta_index_iter; if (meta_index_iter == nullptr) { auto s = ReadMetaBlock(rep_, &meta_guard, &meta_iter_guard); @@ -1430,7 +1432,7 @@ Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader, } uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key) { - unique_ptr index_iter(NewIndexIterator(ReadOptions())); + unique_ptr index_iter(NewIndexIterator(ReadOptions())); index_iter->Seek(key); uint64_t result; @@ -1484,7 +1486,7 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) { "Metaindex Details:\n" "--------------------------------------\n"); std::unique_ptr meta; - std::unique_ptr meta_iter; + std::unique_ptr meta_iter; Status s = ReadMetaBlock(rep_, &meta, &meta_iter); if (s.ok()) { for (meta_iter->SeekToFirst(); meta_iter->Valid(); meta_iter->Next()) { @@ -1567,7 +1569,8 @@ Status BlockBasedTable::DumpIndexBlock(WritableFile* out_file) { "Index Details:\n" "--------------------------------------\n"); - std::unique_ptr blockhandles_iter(NewIndexIterator(ReadOptions())); + std::unique_ptr blockhandles_iter( + NewIndexIterator(ReadOptions())); Status s = blockhandles_iter->status(); if (!s.ok()) { out_file->Append("Can not read Index Block \n\n"); @@ -1608,7 +1611,8 @@ Status BlockBasedTable::DumpIndexBlock(WritableFile* out_file) { } Status BlockBasedTable::DumpDataBlocks(WritableFile* out_file) { - std::unique_ptr blockhandles_iter(NewIndexIterator(ReadOptions())); + std::unique_ptr blockhandles_iter( + NewIndexIterator(ReadOptions())); Status s = blockhandles_iter->status(); if (!s.ok()) { out_file->Append("Can not read Index Block \n\n"); @@ -1630,7 +1634,7 @@ Status BlockBasedTable::DumpDataBlocks(WritableFile* out_file) { out_file->Append("\n"); out_file->Append("--------------------------------------\n"); - std::unique_ptr datablock_iter; + std::unique_ptr datablock_iter; datablock_iter.reset( NewDataBlockIterator(rep_, ReadOptions(), blockhandles_iter->value())); s = datablock_iter->status(); diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index d81f610b8..4e095cb66 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -43,6 +43,7 @@ struct BlockBasedTableOptions; struct EnvOptions; struct ReadOptions; class GetContext; +class InternalIterator; using std::unique_ptr; @@ -79,7 +80,8 @@ class BlockBasedTable : public TableReader { // Returns a new iterator over the table contents. // The result of NewIterator() is initially invalid (caller must // call one of the Seek methods on the iterator before using it). - Iterator* NewIterator(const ReadOptions&, Arena* arena = nullptr) override; + InternalIterator* NewIterator(const ReadOptions&, + Arena* arena = nullptr) override; Status Get(const ReadOptions& readOptions, const Slice& key, GetContext* get_context) override; @@ -129,9 +131,9 @@ class BlockBasedTable : public TableReader { class BlockEntryIteratorState; // input_iter: if it is not null, update this one and return it as Iterator - static Iterator* NewDataBlockIterator(Rep* rep, const ReadOptions& ro, - const Slice& index_value, - BlockIter* input_iter = nullptr); + static InternalIterator* NewDataBlockIterator( + Rep* rep, const ReadOptions& ro, const Slice& index_value, + BlockIter* input_iter = nullptr); // For the following two functions: // if `no_io == true`, we will not try to read filter/index from sst file @@ -148,8 +150,8 @@ class BlockBasedTable : public TableReader { // 2. index is not present in block cache. // 3. We disallowed any io to be performed, that is, read_options == // kBlockCacheTier - Iterator* NewIndexIterator(const ReadOptions& read_options, - BlockIter* input_iter = nullptr); + InternalIterator* NewIndexIterator(const ReadOptions& read_options, + BlockIter* input_iter = nullptr); // Read block cache from block caches (if set): block_cache and // block_cache_compressed. @@ -186,17 +188,16 @@ class BlockBasedTable : public TableReader { // Optionally, user can pass a preloaded meta_index_iter for the index that // need to access extra meta blocks for index construction. This parameter // helps avoid re-reading meta index block if caller already created one. - Status CreateIndexReader(IndexReader** index_reader, - Iterator* preloaded_meta_index_iter = nullptr); + Status CreateIndexReader( + IndexReader** index_reader, + InternalIterator* preloaded_meta_index_iter = nullptr); bool FullFilterKeyMayMatch(FilterBlockReader* filter, const Slice& user_key) const; // Read the meta block from sst. - static Status ReadMetaBlock( - Rep* rep, - std::unique_ptr* meta_block, - std::unique_ptr* iter); + static Status ReadMetaBlock(Rep* rep, std::unique_ptr* meta_block, + std::unique_ptr* iter); // Create the filter from the filter block. static FilterBlockReader* ReadFilter(Rep* rep, size_t* filter_size = nullptr); diff --git a/table/block_hash_index.cc b/table/block_hash_index.cc index fd1329660..b38cc8a57 100644 --- a/table/block_hash_index.cc +++ b/table/block_hash_index.cc @@ -10,6 +10,7 @@ #include "rocksdb/comparator.h" #include "rocksdb/iterator.h" #include "rocksdb/slice_transform.h" +#include "table/internal_iterator.h" #include "util/coding.h" namespace rocksdb { @@ -53,8 +54,9 @@ Status CreateBlockHashIndex(const SliceTransform* hash_key_extractor, } BlockHashIndex* CreateBlockHashIndexOnTheFly( - Iterator* index_iter, Iterator* data_iter, const uint32_t num_restarts, - const Comparator* comparator, const SliceTransform* hash_key_extractor) { + InternalIterator* index_iter, InternalIterator* data_iter, + const uint32_t num_restarts, const Comparator* comparator, + const SliceTransform* hash_key_extractor) { assert(hash_key_extractor); auto hash_index = new BlockHashIndex( hash_key_extractor, diff --git a/table/block_hash_index.h b/table/block_hash_index.h index 582910796..fc110d54a 100644 --- a/table/block_hash_index.h +++ b/table/block_hash_index.h @@ -14,7 +14,7 @@ namespace rocksdb { class Comparator; -class Iterator; +class InternalIterator; class Slice; class SliceTransform; @@ -79,7 +79,8 @@ Status CreateBlockHashIndex(const SliceTransform* hash_key_extractor, // @params hash_key_extractor: extract the hashable part of a given key. // On error, nullptr will be returned. BlockHashIndex* CreateBlockHashIndexOnTheFly( - Iterator* index_iter, Iterator* data_iter, const uint32_t num_restarts, - const Comparator* comparator, const SliceTransform* hash_key_extractor); + InternalIterator* index_iter, InternalIterator* data_iter, + const uint32_t num_restarts, const Comparator* comparator, + const SliceTransform* hash_key_extractor); } // namespace rocksdb diff --git a/table/block_hash_index_test.cc b/table/block_hash_index_test.cc index b001c203a..ffca663d1 100644 --- a/table/block_hash_index_test.cc +++ b/table/block_hash_index_test.cc @@ -11,6 +11,7 @@ #include "rocksdb/iterator.h" #include "rocksdb/slice_transform.h" #include "table/block_hash_index.h" +#include "table/internal_iterator.h" #include "util/testharness.h" #include "util/testutil.h" @@ -18,7 +19,7 @@ namespace rocksdb { typedef std::map Data; -class MapIterator : public Iterator { +class MapIterator : public InternalIterator { public: explicit MapIterator(const Data& data) : data_(data), pos_(data_.end()) {} diff --git a/table/block_test.cc b/table/block_test.cc index c86f38da5..e9c0179c1 100644 --- a/table/block_test.cc +++ b/table/block_test.cc @@ -96,7 +96,7 @@ TEST_F(BlockTest, SimpleTest) { // read contents of block sequentially int count = 0; - Iterator* iter = reader.NewIterator(options.comparator); + InternalIterator *iter = reader.NewIterator(options.comparator); for (iter->SeekToFirst();iter->Valid(); count++, iter->Next()) { // read kv from block @@ -170,10 +170,10 @@ void CheckBlockContents(BlockContents contents, const int max_key, delete iter2; } - std::unique_ptr hash_iter( + std::unique_ptr hash_iter( reader1.NewIterator(BytewiseComparator(), nullptr, false)); - std::unique_ptr regular_iter( + std::unique_ptr regular_iter( reader2.NewIterator(BytewiseComparator())); // Seek existent keys diff --git a/table/cuckoo_table_reader.cc b/table/cuckoo_table_reader.cc index 8c0329c66..2d413f043 100644 --- a/table/cuckoo_table_reader.cc +++ b/table/cuckoo_table_reader.cc @@ -17,6 +17,7 @@ #include #include "rocksdb/iterator.h" #include "rocksdb/table.h" +#include "table/internal_iterator.h" #include "table/meta_blocks.h" #include "table/cuckoo_table_factory.h" #include "table/get_context.h" @@ -173,7 +174,7 @@ void CuckooTableReader::Prepare(const Slice& key) { } } -class CuckooTableIterator : public Iterator { +class CuckooTableIterator : public InternalIterator { public: explicit CuckooTableIterator(CuckooTableReader* reader); ~CuckooTableIterator() {} @@ -348,16 +349,17 @@ Slice CuckooTableIterator::value() const { return curr_value_; } -extern Iterator* NewErrorIterator(const Status& status, Arena* arena); +extern InternalIterator* NewErrorInternalIterator(const Status& status, + Arena* arena); -Iterator* CuckooTableReader::NewIterator( +InternalIterator* CuckooTableReader::NewIterator( const ReadOptions& read_options, Arena* arena) { if (!status().ok()) { - return NewErrorIterator( + return NewErrorInternalIterator( Status::Corruption("CuckooTableReader status is not okay."), arena); } if (read_options.total_order_seek) { - return NewErrorIterator( + return NewErrorInternalIterator( Status::InvalidArgument("total_order_seek is not supported."), arena); } CuckooTableIterator* iter; diff --git a/table/cuckoo_table_reader.h b/table/cuckoo_table_reader.h index 6643be025..ee17dc44f 100644 --- a/table/cuckoo_table_reader.h +++ b/table/cuckoo_table_reader.h @@ -24,6 +24,7 @@ namespace rocksdb { class Arena; class TableReader; +class InternalIterator; class CuckooTableReader: public TableReader { public: @@ -43,7 +44,8 @@ class CuckooTableReader: public TableReader { Status Get(const ReadOptions& read_options, const Slice& key, GetContext* get_context) override; - Iterator* NewIterator(const ReadOptions&, Arena* arena = nullptr) override; + InternalIterator* NewIterator(const ReadOptions&, + Arena* arena = nullptr) override; void Prepare(const Slice& target) override; // Report an approximation of how much memory has been used. diff --git a/table/cuckoo_table_reader_test.cc b/table/cuckoo_table_reader_test.cc index f10fcc571..9758af3f2 100644 --- a/table/cuckoo_table_reader_test.cc +++ b/table/cuckoo_table_reader_test.cc @@ -148,7 +148,7 @@ class CuckooReaderTest : public testing::Test { CuckooTableReader reader(ioptions, std::move(file_reader), file_size, ucomp, GetSliceHash); ASSERT_OK(reader.status()); - Iterator* it = reader.NewIterator(ReadOptions(), nullptr); + InternalIterator* it = reader.NewIterator(ReadOptions(), nullptr); ASSERT_OK(it->status()); ASSERT_TRUE(!it->Valid()); it->SeekToFirst(); @@ -196,7 +196,7 @@ class CuckooReaderTest : public testing::Test { ASSERT_TRUE(keys[num_items/2] == it->key()); ASSERT_TRUE(values[num_items/2] == it->value()); ASSERT_OK(it->status()); - it->~Iterator(); + it->~InternalIterator(); } std::vector keys; diff --git a/table/internal_iterator.h b/table/internal_iterator.h new file mode 100644 index 000000000..51a163256 --- /dev/null +++ b/table/internal_iterator.h @@ -0,0 +1,75 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// + +#pragma once + +#include "rocksdb/iterator.h" +#include "rocksdb/status.h" + +namespace rocksdb { + +class InternalIterator : public Cleanable { + public: + InternalIterator() {} + virtual ~InternalIterator() {} + + // An iterator is either positioned at a key/value pair, or + // not valid. This method returns true iff the iterator is valid. + virtual bool Valid() const = 0; + + // Position at the first key in the source. The iterator is Valid() + // after this call iff the source is not empty. + virtual void SeekToFirst() = 0; + + // Position at the last key in the source. The iterator is + // Valid() after this call iff the source is not empty. + virtual void SeekToLast() = 0; + + // Position at the first key in the source that at or past target + // The iterator is Valid() after this call iff the source contains + // an entry that comes at or past target. + virtual void Seek(const Slice& target) = 0; + + // Moves to the next entry in the source. After this call, Valid() is + // true iff the iterator was not positioned at the last entry in the source. + // REQUIRES: Valid() + virtual void Next() = 0; + + // Moves to the previous entry in the source. After this call, Valid() is + // true iff the iterator was not positioned at the first entry in source. + // REQUIRES: Valid() + virtual void Prev() = 0; + + // Return the key for the current entry. The underlying storage for + // the returned slice is valid only until the next modification of + // the iterator. + // REQUIRES: Valid() + virtual Slice key() const = 0; + + // Return the value for the current entry. The underlying storage for + // the returned slice is valid only until the next modification of + // the iterator. + // REQUIRES: !AtEnd() && !AtStart() + virtual Slice value() const = 0; + + // If an error has occurred, return it. Else return an ok status. + // If non-blocking IO is requested and this operation cannot be + // satisfied without doing some IO, then this returns Status::Incomplete(). + virtual Status status() const = 0; + + private: + // No copying allowed + InternalIterator(const InternalIterator&) = delete; + InternalIterator& operator=(const InternalIterator&) = delete; +}; + +// Return an empty iterator (yields nothing). +extern InternalIterator* NewEmptyInternalIterator(); + +// Return an empty iterator with the specified status. +extern InternalIterator* NewErrorInternalIterator(const Status& status); + +} // namespace rocksdb diff --git a/table/iterator.cc b/table/iterator.cc index f97879aea..2db321edd 100644 --- a/table/iterator.cc +++ b/table/iterator.cc @@ -8,17 +8,18 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "rocksdb/iterator.h" +#include "table/internal_iterator.h" #include "table/iterator_wrapper.h" #include "util/arena.h" namespace rocksdb { -Iterator::Iterator() { +Cleanable::Cleanable() { cleanup_.function = nullptr; cleanup_.next = nullptr; } -Iterator::~Iterator() { +Cleanable::~Cleanable() { if (cleanup_.function != nullptr) { (*cleanup_.function)(cleanup_.arg1, cleanup_.arg2); for (Cleanup* c = cleanup_.next; c != nullptr; ) { @@ -30,7 +31,7 @@ Iterator::~Iterator() { } } -void Iterator::RegisterCleanup(CleanupFunction func, void* arg1, void* arg2) { +void Cleanable::RegisterCleanup(CleanupFunction func, void* arg1, void* arg2) { assert(func != nullptr); Cleanup* c; if (cleanup_.function == nullptr) { @@ -68,31 +69,62 @@ class EmptyIterator : public Iterator { private: Status status_; }; + +class EmptyInternalIterator : public InternalIterator { + public: + explicit EmptyInternalIterator(const Status& s) : status_(s) {} + virtual bool Valid() const override { return false; } + virtual void Seek(const Slice& target) override {} + virtual void SeekToFirst() override {} + virtual void SeekToLast() override {} + virtual void Next() override { assert(false); } + virtual void Prev() override { assert(false); } + Slice key() const override { + assert(false); + return Slice(); + } + Slice value() const override { + assert(false); + return Slice(); + } + virtual Status status() const override { return status_; } + + private: + Status status_; +}; } // namespace Iterator* NewEmptyIterator() { return new EmptyIterator(Status::OK()); } -Iterator* NewEmptyIterator(Arena* arena) { +Iterator* NewErrorIterator(const Status& status) { + return new EmptyIterator(status); +} + +InternalIterator* NewEmptyInternalIterator() { + return new EmptyInternalIterator(Status::OK()); +} + +InternalIterator* NewEmptyInternalIterator(Arena* arena) { if (arena == nullptr) { - return NewEmptyIterator(); + return NewEmptyInternalIterator(); } else { auto mem = arena->AllocateAligned(sizeof(EmptyIterator)); - return new (mem) EmptyIterator(Status::OK()); + return new (mem) EmptyInternalIterator(Status::OK()); } } -Iterator* NewErrorIterator(const Status& status) { - return new EmptyIterator(status); +InternalIterator* NewErrorInternalIterator(const Status& status) { + return new EmptyInternalIterator(status); } -Iterator* NewErrorIterator(const Status& status, Arena* arena) { +InternalIterator* NewErrorInternalIterator(const Status& status, Arena* arena) { if (arena == nullptr) { - return NewErrorIterator(status); + return NewErrorInternalIterator(status); } else { auto mem = arena->AllocateAligned(sizeof(EmptyIterator)); - return new (mem) EmptyIterator(status); + return new (mem) EmptyInternalIterator(status); } } diff --git a/table/iterator_wrapper.h b/table/iterator_wrapper.h index d64047bea..2eb33b537 100644 --- a/table/iterator_wrapper.h +++ b/table/iterator_wrapper.h @@ -9,7 +9,7 @@ #pragma once -#include "rocksdb/iterator.h" +#include "table/internal_iterator.h" namespace rocksdb { @@ -20,13 +20,15 @@ namespace rocksdb { class IteratorWrapper { public: IteratorWrapper(): iter_(nullptr), valid_(false) { } - explicit IteratorWrapper(Iterator* _iter) : iter_(nullptr) { Set(_iter); } + explicit IteratorWrapper(InternalIterator* _iter) : iter_(nullptr) { + Set(_iter); + } ~IteratorWrapper() {} - Iterator* iter() const { return iter_; } + InternalIterator* iter() const { return iter_; } // Takes ownership of "iter" and will delete it when destroyed, or // when Set() is invoked again. - void Set(Iterator* _iter) { + void Set(InternalIterator* _iter) { delete iter_; iter_ = _iter; if (iter_ == nullptr) { @@ -40,7 +42,7 @@ class IteratorWrapper { if (!is_arena_mode) { delete iter_; } else { - iter_->~Iterator(); + iter_->~InternalIterator(); } } @@ -64,16 +66,17 @@ class IteratorWrapper { } } - Iterator* iter_; + InternalIterator* iter_; bool valid_; Slice key_; }; class Arena; // Return an empty iterator (yields nothing) allocated from arena. -extern Iterator* NewEmptyIterator(Arena* arena); +extern InternalIterator* NewEmptyInternalIterator(Arena* arena); // Return an empty iterator with the specified status, allocated arena. -extern Iterator* NewErrorIterator(const Status& status, Arena* arena); +extern InternalIterator* NewErrorInternalIterator(const Status& status, + Arena* arena); } // namespace rocksdb diff --git a/table/merger.cc b/table/merger.cc index 242587ea8..49e512581 100644 --- a/table/merger.cc +++ b/table/merger.cc @@ -14,6 +14,7 @@ #include "rocksdb/comparator.h" #include "rocksdb/iterator.h" #include "rocksdb/options.h" +#include "table/internal_iterator.h" #include "table/iter_heap.h" #include "table/iterator_wrapper.h" #include "util/arena.h" @@ -32,10 +33,10 @@ typedef BinaryHeap MergerMinIterHeap; const size_t kNumIterReserve = 4; -class MergingIterator : public Iterator { +class MergingIterator : public InternalIterator { public: - MergingIterator(const Comparator* comparator, Iterator** children, int n, - bool is_arena_mode) + MergingIterator(const Comparator* comparator, InternalIterator** children, + int n, bool is_arena_mode) : is_arena_mode_(is_arena_mode), comparator_(comparator), current_(nullptr), @@ -53,7 +54,7 @@ class MergingIterator : public Iterator { current_ = CurrentForward(); } - virtual void AddIterator(Iterator* iter) { + virtual void AddIterator(InternalIterator* iter) { assert(direction_ == kForward); children_.emplace_back(iter); auto new_wrapper = children_.back(); @@ -288,11 +289,12 @@ void MergingIterator::InitMaxHeap() { } } -Iterator* NewMergingIterator(const Comparator* cmp, Iterator** list, int n, - Arena* arena) { +InternalIterator* NewMergingIterator(const Comparator* cmp, + InternalIterator** list, int n, + Arena* arena) { assert(n >= 0); if (n == 0) { - return NewEmptyIterator(arena); + return NewEmptyInternalIterator(arena); } else if (n == 1) { return list[0]; } else { @@ -313,7 +315,7 @@ MergeIteratorBuilder::MergeIteratorBuilder(const Comparator* comparator, merge_iter = new (mem) MergingIterator(comparator, nullptr, 0, true); } -void MergeIteratorBuilder::AddIterator(Iterator* iter) { +void MergeIteratorBuilder::AddIterator(InternalIterator* iter) { if (!use_merging_iter && first_iter != nullptr) { merge_iter->AddIterator(first_iter); use_merging_iter = true; @@ -325,7 +327,7 @@ void MergeIteratorBuilder::AddIterator(Iterator* iter) { } } -Iterator* MergeIteratorBuilder::Finish() { +InternalIterator* MergeIteratorBuilder::Finish() { if (!use_merging_iter) { return first_iter; } else { diff --git a/table/merger.h b/table/merger.h index 7dcf2afe7..5ea624648 100644 --- a/table/merger.h +++ b/table/merger.h @@ -14,7 +14,7 @@ namespace rocksdb { class Comparator; -class Iterator; +class InternalIterator; class Env; class Arena; @@ -26,9 +26,9 @@ class Arena; // key is present in K child iterators, it will be yielded K times. // // REQUIRES: n >= 0 -extern Iterator* NewMergingIterator(const Comparator* comparator, - Iterator** children, int n, - Arena* arena = nullptr); +extern InternalIterator* NewMergingIterator(const Comparator* comparator, + InternalIterator** children, int n, + Arena* arena = nullptr); class MergingIterator; @@ -41,18 +41,18 @@ class MergeIteratorBuilder { ~MergeIteratorBuilder() {} // Add iter to the merging iterator. - void AddIterator(Iterator* iter); + void AddIterator(InternalIterator* iter); // Get arena used to build the merging iterator. It is called one a child // iterator needs to be allocated. Arena* GetArena() { return arena; } // Return the result merging iterator. - Iterator* Finish(); + InternalIterator* Finish(); private: MergingIterator* merge_iter; - Iterator* first_iter; + InternalIterator* first_iter; bool use_merging_iter; Arena* arena; }; diff --git a/table/merger_test.cc b/table/merger_test.cc index 562c0ae85..e9397dc1d 100644 --- a/table/merger_test.cc +++ b/table/merger_test.cc @@ -88,7 +88,7 @@ class MergerTest : public testing::Test { void Generate(size_t num_iterators, size_t strings_per_iterator, int letters_per_string) { - std::vector small_iterators; + std::vector small_iterators; for (size_t i = 0; i < num_iterators; ++i) { auto strings = GenerateStrings(strings_per_iterator, letters_per_string); small_iterators.push_back(new test::VectorIterator(strings)); @@ -102,8 +102,8 @@ class MergerTest : public testing::Test { } Random rnd_; - std::unique_ptr merging_iterator_; - std::unique_ptr single_iterator_; + std::unique_ptr merging_iterator_; + std::unique_ptr single_iterator_; std::vector all_keys_; }; diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index 7bcdf7576..505dbacd0 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -12,6 +12,7 @@ #include "rocksdb/table_properties.h" #include "table/block.h" #include "table/format.h" +#include "table/internal_iterator.h" #include "table/table_properties_internal.h" #include "util/coding.h" @@ -152,7 +153,7 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, } Block properties_block(std::move(block_contents)); - std::unique_ptr iter( + std::unique_ptr iter( properties_block.NewIterator(BytewiseComparator())); auto new_table_properties = new TableProperties(); @@ -237,7 +238,7 @@ Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size, return s; } Block metaindex_block(std::move(metaindex_contents)); - std::unique_ptr meta_iter( + std::unique_ptr meta_iter( metaindex_block.NewIterator(BytewiseComparator())); // -- Read property block @@ -258,7 +259,7 @@ Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size, return s; } -Status FindMetaBlock(Iterator* meta_index_iter, +Status FindMetaBlock(InternalIterator* meta_index_iter, const std::string& meta_block_name, BlockHandle* block_handle) { meta_index_iter->Seek(meta_block_name); @@ -292,7 +293,7 @@ Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size, } Block metaindex_block(std::move(metaindex_contents)); - std::unique_ptr meta_iter; + std::unique_ptr meta_iter; meta_iter.reset(metaindex_block.NewIterator(BytewiseComparator())); return FindMetaBlock(meta_iter.get(), meta_block_name, block_handle); @@ -323,7 +324,7 @@ Status ReadMetaBlock(RandomAccessFileReader* file, uint64_t file_size, // Finding metablock Block metaindex_block(std::move(metaindex_contents)); - std::unique_ptr meta_iter; + std::unique_ptr meta_iter; meta_iter.reset(metaindex_block.NewIterator(BytewiseComparator())); BlockHandle block_handle; diff --git a/table/meta_blocks.h b/table/meta_blocks.h index 005bcaae2..085ae308e 100644 --- a/table/meta_blocks.h +++ b/table/meta_blocks.h @@ -11,12 +11,12 @@ #include "db/builder.h" #include "db/table_properties_collector.h" +#include "util/kv_map.h" #include "rocksdb/comparator.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "table/block_builder.h" #include "table/format.h" -#include "util/stl_wrappers.h" namespace rocksdb { @@ -27,6 +27,7 @@ class Footer; class Logger; class RandomAccessFile; struct TableProperties; +class InternalIterator; class MetaIndexBuilder { public: @@ -105,7 +106,7 @@ Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size, Logger* info_log, TableProperties** properties); // Find the meta block from the meta index block. -Status FindMetaBlock(Iterator* meta_index_iter, +Status FindMetaBlock(InternalIterator* meta_index_iter, const std::string& meta_block_name, BlockHandle* block_handle); diff --git a/table/mock_table.cc b/table/mock_table.cc index d75630374..f736060f6 100644 --- a/table/mock_table.cc +++ b/table/mock_table.cc @@ -28,7 +28,8 @@ stl_wrappers::KVMap MakeMockFile( return stl_wrappers::KVMap(l, stl_wrappers::LessOfComparator(&icmp_)); } -Iterator* MockTableReader::NewIterator(const ReadOptions&, Arena* arena) { +InternalIterator* MockTableReader::NewIterator(const ReadOptions&, + Arena* arena) { return new MockTableIterator(table_); } diff --git a/table/mock_table.h b/table/mock_table.h index e313fbc08..c13636af1 100644 --- a/table/mock_table.h +++ b/table/mock_table.h @@ -14,13 +14,14 @@ #include #include +#include "util/kv_map.h" #include "port/port.h" #include "rocksdb/comparator.h" #include "rocksdb/table.h" +#include "table/internal_iterator.h" #include "table/table_builder.h" #include "table/table_reader.h" #include "util/mutexlock.h" -#include "util/stl_wrappers.h" #include "util/testharness.h" #include "util/testutil.h" @@ -39,7 +40,7 @@ class MockTableReader : public TableReader { public: explicit MockTableReader(const stl_wrappers::KVMap& table) : table_(table) {} - Iterator* NewIterator(const ReadOptions&, Arena* arena) override; + InternalIterator* NewIterator(const ReadOptions&, Arena* arena) override; Status Get(const ReadOptions&, const Slice& key, GetContext* get_context) override; @@ -58,7 +59,7 @@ class MockTableReader : public TableReader { const stl_wrappers::KVMap& table_; }; -class MockTableIterator : public Iterator { +class MockTableIterator : public InternalIterator { public: explicit MockTableIterator(const stl_wrappers::KVMap& table) : table_(table) { itr_ = table_.end(); diff --git a/table/plain_table_reader.cc b/table/plain_table_reader.cc index 1aabbb98f..6d34378bb 100644 --- a/table/plain_table_reader.cc +++ b/table/plain_table_reader.cc @@ -22,6 +22,7 @@ #include "table/bloom_block.h" #include "table/filter_block.h" #include "table/format.h" +#include "table/internal_iterator.h" #include "table/meta_blocks.h" #include "table/two_level_iterator.h" #include "table/plain_table_factory.h" @@ -51,7 +52,7 @@ inline uint32_t GetFixed32Element(const char* base, size_t offset) { } // namespace // Iterator to iterate IndexedTable -class PlainTableIterator : public Iterator { +class PlainTableIterator : public InternalIterator { public: explicit PlainTableIterator(PlainTableReader* table, bool use_prefix_seek); ~PlainTableIterator(); @@ -186,10 +187,10 @@ Status PlainTableReader::Open(const ImmutableCFOptions& ioptions, void PlainTableReader::SetupForCompaction() { } -Iterator* PlainTableReader::NewIterator(const ReadOptions& options, - Arena* arena) { +InternalIterator* PlainTableReader::NewIterator(const ReadOptions& options, + Arena* arena) { if (options.total_order_seek && !IsTotalOrderMode()) { - return NewErrorIterator( + return NewErrorInternalIterator( Status::InvalidArgument("total_order_seek not supported"), arena); } if (arena == nullptr) { diff --git a/table/plain_table_reader.h b/table/plain_table_reader.h index b9d8cebba..8406fc7d1 100644 --- a/table/plain_table_reader.h +++ b/table/plain_table_reader.h @@ -38,6 +38,7 @@ class TableReader; class InternalKeyComparator; class PlainTableKeyDecoder; class GetContext; +class InternalIterator; using std::unique_ptr; using std::unordered_map; @@ -77,7 +78,8 @@ class PlainTableReader: public TableReader { size_t index_sparseness, size_t huge_page_tlb_size, bool full_scan_mode); - Iterator* NewIterator(const ReadOptions&, Arena* arena = nullptr) override; + InternalIterator* NewIterator(const ReadOptions&, + Arena* arena = nullptr) override; void Prepare(const Slice& target) override; diff --git a/util/scoped_arena_iterator.h b/table/scoped_arena_iterator.h similarity index 63% rename from util/scoped_arena_iterator.h rename to table/scoped_arena_iterator.h index 2021d2dc2..0372b5691 100644 --- a/util/scoped_arena_iterator.h +++ b/table/scoped_arena_iterator.h @@ -7,22 +7,23 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once -#include "rocksdb/iterator.h" +#include "table/internal_iterator.h" namespace rocksdb { class ScopedArenaIterator { public: - explicit ScopedArenaIterator(Iterator* iter = nullptr) : iter_(iter) {} + explicit ScopedArenaIterator(InternalIterator* iter = nullptr) + : iter_(iter) {} - Iterator* operator->() { return iter_; } + InternalIterator* operator->() { return iter_; } - void set(Iterator* iter) { iter_ = iter; } + void set(InternalIterator* iter) { iter_ = iter; } - Iterator* get() { return iter_; } + InternalIterator* get() { return iter_; } - ~ScopedArenaIterator() { iter_->~Iterator(); } + ~ScopedArenaIterator() { iter_->~InternalIterator(); } private: - Iterator* iter_; + InternalIterator* iter_; }; } // namespace rocksdb diff --git a/table/table_properties.cc b/table/table_properties.cc index 9193499fa..7a51779fe 100644 --- a/table/table_properties.cc +++ b/table/table_properties.cc @@ -8,6 +8,7 @@ #include "rocksdb/iterator.h" #include "rocksdb/env.h" #include "port/port.h" +#include "table/internal_iterator.h" #include "util/string_util.h" namespace rocksdb { @@ -114,7 +115,7 @@ extern const std::string kPropertiesBlockOldName = "rocksdb.stats"; // Seek to the properties block. // Return true if it successfully seeks to the properties block. -Status SeekToPropertiesBlock(Iterator* meta_iter, bool* is_found) { +Status SeekToPropertiesBlock(InternalIterator* meta_iter, bool* is_found) { *is_found = true; meta_iter->Seek(kPropertiesBlock); if (meta_iter->status().ok() && diff --git a/table/table_properties_internal.h b/table/table_properties_internal.h index 9ef8ad432..10f38cdf2 100644 --- a/table/table_properties_internal.h +++ b/table/table_properties_internal.h @@ -10,9 +10,11 @@ namespace rocksdb { +class InternalIterator; + // Seek to the properties block. // If it successfully seeks to the properties block, "is_found" will be // set to true. -Status SeekToPropertiesBlock(Iterator* meta_iter, bool* is_found); +Status SeekToPropertiesBlock(InternalIterator* meta_iter, bool* is_found); } // namespace rocksdb diff --git a/table/table_reader.h b/table/table_reader.h index 2058b868c..60a593b42 100644 --- a/table/table_reader.h +++ b/table/table_reader.h @@ -19,6 +19,7 @@ class Arena; struct ReadOptions; struct TableProperties; class GetContext; +class InternalIterator; // A Table is a sorted map from strings to strings. Tables are // immutable and persistent. A Table may be safely accessed from @@ -34,7 +35,8 @@ class TableReader { // When destroying the iterator, the caller will not call "delete" // but Iterator::~Iterator() directly. The destructor needs to destroy // all the states but those allocated in arena. - virtual Iterator* NewIterator(const ReadOptions&, Arena* arena = nullptr) = 0; + virtual InternalIterator* NewIterator(const ReadOptions&, + Arena* arena = nullptr) = 0; // Given a key, return an approximate byte offset in the file where // the data for that key begins (or would begin if the key were diff --git a/table/table_reader_bench.cc b/table/table_reader_bench.cc index b940d89de..c4106e4b3 100644 --- a/table/table_reader_bench.cc +++ b/table/table_reader_bench.cc @@ -19,6 +19,7 @@ int main() { #include "db/db_impl.h" #include "db/dbformat.h" #include "table/block_based_table_factory.h" +#include "table/internal_iterator.h" #include "table/plain_table_factory.h" #include "table/table_builder.h" #include "table/get_context.h" @@ -187,14 +188,17 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options, std::string end_key = MakeKey(r1, r2 + r2_len, through_db); uint64_t total_time = 0; uint64_t start_time = Now(env, measured_by_nanosecond); - Iterator* iter; + Iterator* iter = nullptr; + InternalIterator* iiter = nullptr; if (!through_db) { - iter = table_reader->NewIterator(read_options); + iiter = table_reader->NewIterator(read_options); } else { iter = db->NewIterator(read_options); } int count = 0; - for(iter->Seek(start_key); iter->Valid(); iter->Next()) { + for (through_db ? iter->Seek(start_key) : iiter->Seek(start_key); + through_db ? iter->Valid() : iiter->Valid(); + through_db ? iter->Next() : iiter->Next()) { if (if_query_empty_keys) { break; } diff --git a/table/table_test.cc b/table/table_test.cc index 928861ae2..58607bbb2 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -21,6 +21,7 @@ #include "db/memtable.h" #include "db/write_batch_internal.h" #include "db/writebuffer.h" +#include "memtable/stl_wrappers.h" #include "rocksdb/cache.h" #include "rocksdb/db.h" #include "rocksdb/env.h" @@ -36,13 +37,13 @@ #include "table/block_builder.h" #include "table/format.h" #include "table/get_context.h" +#include "table/internal_iterator.h" #include "table/meta_blocks.h" #include "table/plain_table_factory.h" +#include "table/scoped_arena_iterator.h" #include "util/compression.h" #include "util/random.h" -#include "util/scoped_arena_iterator.h" #include "util/statistics.h" -#include "util/stl_wrappers.h" #include "util/string_util.h" #include "util/testharness.h" #include "util/testutil.h" @@ -142,7 +143,7 @@ class Constructor { const InternalKeyComparator& internal_comparator, const stl_wrappers::KVMap& data) = 0; - virtual Iterator* NewIterator() const = 0; + virtual InternalIterator* NewIterator() const = 0; virtual const stl_wrappers::KVMap& data() { return data_; } @@ -188,7 +189,7 @@ class BlockConstructor: public Constructor { block_ = new Block(std::move(contents)); return Status::OK(); } - virtual Iterator* NewIterator() const override { + virtual InternalIterator* NewIterator() const override { return block_->NewIterator(comparator_); } @@ -201,13 +202,14 @@ class BlockConstructor: public Constructor { }; // A helper class that converts internal format keys into user keys -class KeyConvertingIterator: public Iterator { +class KeyConvertingIterator : public InternalIterator { public: - explicit KeyConvertingIterator(Iterator* iter, bool arena_mode = false) + explicit KeyConvertingIterator(InternalIterator* iter, + bool arena_mode = false) : iter_(iter), arena_mode_(arena_mode) {} virtual ~KeyConvertingIterator() { if (arena_mode_) { - iter_->~Iterator(); + iter_->~InternalIterator(); } else { delete iter_; } @@ -241,7 +243,7 @@ class KeyConvertingIterator: public Iterator { private: mutable Status status_; - Iterator* iter_; + InternalIterator* iter_; bool arena_mode_; // No copying allowed @@ -301,9 +303,9 @@ class TableConstructor: public Constructor { std::move(file_reader_), GetSink()->contents().size(), &table_reader_); } - virtual Iterator* NewIterator() const override { + virtual InternalIterator* NewIterator() const override { ReadOptions ro; - Iterator* iter = table_reader_->NewIterator(ro); + InternalIterator* iter = table_reader_->NewIterator(ro); if (convert_to_internal_key_) { return new KeyConvertingIterator(iter); } else { @@ -390,7 +392,7 @@ class MemTableConstructor: public Constructor { } return Status::OK(); } - virtual Iterator* NewIterator() const override { + virtual InternalIterator* NewIterator() const override { return new KeyConvertingIterator( memtable_->NewIterator(ReadOptions(), &arena_), true); } @@ -408,6 +410,23 @@ class MemTableConstructor: public Constructor { std::shared_ptr table_factory_; }; +class InternalIteratorFromIterator : public InternalIterator { + public: + explicit InternalIteratorFromIterator(Iterator* it) : it_(it) {} + virtual bool Valid() const override { return it_->Valid(); } + virtual void Seek(const Slice& target) override { it_->Seek(target); } + virtual void SeekToFirst() override { it_->SeekToFirst(); } + virtual void SeekToLast() override { it_->SeekToLast(); } + virtual void Next() override { it_->Next(); } + virtual void Prev() override { it_->Prev(); } + Slice key() const override { return it_->key(); } + Slice value() const override { return it_->value(); } + virtual Status status() const override { return it_->status(); } + + private: + unique_ptr it_; +}; + class DBConstructor: public Constructor { public: explicit DBConstructor(const Comparator* cmp) @@ -434,8 +453,9 @@ class DBConstructor: public Constructor { } return Status::OK(); } - virtual Iterator* NewIterator() const override { - return db_->NewIterator(ReadOptions()); + + virtual InternalIterator* NewIterator() const override { + return new InternalIteratorFromIterator(db_->NewIterator(ReadOptions())); } virtual DB* db() const override { return db_; } @@ -705,7 +725,7 @@ class HarnessTest : public testing::Test { void TestForwardScan(const std::vector& keys, const stl_wrappers::KVMap& data) { - Iterator* iter = constructor_->NewIterator(); + InternalIterator* iter = constructor_->NewIterator(); ASSERT_TRUE(!iter->Valid()); iter->SeekToFirst(); for (stl_wrappers::KVMap::const_iterator model_iter = data.begin(); @@ -715,7 +735,7 @@ class HarnessTest : public testing::Test { } ASSERT_TRUE(!iter->Valid()); if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) { - iter->~Iterator(); + iter->~InternalIterator(); } else { delete iter; } @@ -723,7 +743,7 @@ class HarnessTest : public testing::Test { void TestBackwardScan(const std::vector& keys, const stl_wrappers::KVMap& data) { - Iterator* iter = constructor_->NewIterator(); + InternalIterator* iter = constructor_->NewIterator(); ASSERT_TRUE(!iter->Valid()); iter->SeekToLast(); for (stl_wrappers::KVMap::const_reverse_iterator model_iter = data.rbegin(); @@ -733,7 +753,7 @@ class HarnessTest : public testing::Test { } ASSERT_TRUE(!iter->Valid()); if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) { - iter->~Iterator(); + iter->~InternalIterator(); } else { delete iter; } @@ -742,7 +762,7 @@ class HarnessTest : public testing::Test { void TestRandomAccess(Random* rnd, const std::vector& keys, const stl_wrappers::KVMap& data) { static const bool kVerbose = false; - Iterator* iter = constructor_->NewIterator(); + InternalIterator* iter = constructor_->NewIterator(); ASSERT_TRUE(!iter->Valid()); stl_wrappers::KVMap::const_iterator model_iter = data.begin(); if (kVerbose) fprintf(stderr, "---\n"); @@ -806,7 +826,7 @@ class HarnessTest : public testing::Test { } } if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) { - iter->~Iterator(); + iter->~InternalIterator(); } else { delete iter; } @@ -830,7 +850,7 @@ class HarnessTest : public testing::Test { } } - std::string ToString(const Iterator* it) { + std::string ToString(const InternalIterator* it) { if (!it->Valid()) { return "END"; } else { @@ -1191,7 +1211,7 @@ TEST_F(BlockBasedTableTest, TotalOrderSeekOnHashIndex) { auto* reader = c.GetTableReader(); ReadOptions ro; ro.total_order_seek = true; - std::unique_ptr iter(reader->NewIterator(ro)); + std::unique_ptr iter(reader->NewIterator(ro)); iter->Seek(InternalKey("b", 0, kTypeValue).Encode()); ASSERT_OK(iter->status()); @@ -1275,7 +1295,8 @@ TEST_F(TableTest, HashIndexTest) { auto props = reader->GetTableProperties(); ASSERT_EQ(5u, props->num_data_blocks); - std::unique_ptr hash_iter(reader->NewIterator(ReadOptions())); + std::unique_ptr hash_iter( + reader->NewIterator(ReadOptions())); // -- Find keys do not exist, but have common prefix. std::vector prefixes = {"001", "003", "005", "007", "009"}; @@ -1545,7 +1566,7 @@ TEST_F(BlockBasedTableTest, FilterBlockInBlockCache) { // -- PART 1: Open with regular block cache. // Since block_cache is disabled, no cache activities will be involved. - unique_ptr iter; + unique_ptr iter; int64_t last_cache_bytes_read = 0; // At first, no block will be accessed. @@ -1778,7 +1799,7 @@ TEST_F(BlockBasedTableTest, BlockCacheLeak) { const ImmutableCFOptions ioptions(opt); c.Finish(opt, ioptions, table_options, *ikc, &keys, &kvmap); - unique_ptr iter(c.NewIterator()); + unique_ptr iter(c.NewIterator()); iter->SeekToFirst(); while (iter->Valid()) { iter->key(); @@ -1974,6 +1995,7 @@ TEST_F(HarnessTest, Randomized) { } } +#ifndef ROCKSDB_LITE TEST_F(HarnessTest, RandomizedLongDB) { Random rnd(test::RandomSeed()); TestArgs args = { DB_TEST, false, 16, kNoCompression, 0 }; @@ -1997,6 +2019,7 @@ TEST_F(HarnessTest, RandomizedLongDB) { } ASSERT_GT(files, 0); } +#endif // ROCKSDB_LITE class MemTableTest : public testing::Test {}; diff --git a/table/two_level_iterator.cc b/table/two_level_iterator.cc index f540d3b16..dbc378529 100644 --- a/table/two_level_iterator.cc +++ b/table/two_level_iterator.cc @@ -19,10 +19,10 @@ namespace rocksdb { namespace { -class TwoLevelIterator: public Iterator { +class TwoLevelIterator : public InternalIterator { public: explicit TwoLevelIterator(TwoLevelIteratorState* state, - Iterator* first_level_iter, + InternalIterator* first_level_iter, bool need_free_iter_and_state); virtual ~TwoLevelIterator() { @@ -68,7 +68,7 @@ class TwoLevelIterator: public Iterator { } void SkipEmptyDataBlocksForward(); void SkipEmptyDataBlocksBackward(); - void SetSecondLevelIterator(Iterator* iter); + void SetSecondLevelIterator(InternalIterator* iter); void InitDataBlock(); TwoLevelIteratorState* state_; @@ -82,7 +82,7 @@ class TwoLevelIterator: public Iterator { }; TwoLevelIterator::TwoLevelIterator(TwoLevelIteratorState* state, - Iterator* first_level_iter, + InternalIterator* first_level_iter, bool need_free_iter_and_state) : state_(state), first_level_iter_(first_level_iter), @@ -168,7 +168,7 @@ void TwoLevelIterator::SkipEmptyDataBlocksBackward() { } } -void TwoLevelIterator::SetSecondLevelIterator(Iterator* iter) { +void TwoLevelIterator::SetSecondLevelIterator(InternalIterator* iter) { if (second_level_iter_.iter() != nullptr) { SaveError(second_level_iter_.status()); } @@ -186,7 +186,7 @@ void TwoLevelIterator::InitDataBlock() { // second_level_iter is already constructed with this iterator, so // no need to change anything } else { - Iterator* iter = state_->NewSecondaryIterator(handle); + InternalIterator* iter = state_->NewSecondaryIterator(handle); data_block_handle_.assign(handle.data(), handle.size()); SetSecondLevelIterator(iter); } @@ -195,9 +195,10 @@ void TwoLevelIterator::InitDataBlock() { } // namespace -Iterator* NewTwoLevelIterator(TwoLevelIteratorState* state, - Iterator* first_level_iter, Arena* arena, - bool need_free_iter_and_state) { +InternalIterator* NewTwoLevelIterator(TwoLevelIteratorState* state, + InternalIterator* first_level_iter, + Arena* arena, + bool need_free_iter_and_state) { if (arena == nullptr) { return new TwoLevelIterator(state, first_level_iter, need_free_iter_and_state); diff --git a/table/two_level_iterator.h b/table/two_level_iterator.h index 4c6b48c2c..ed5380bd4 100644 --- a/table/two_level_iterator.h +++ b/table/two_level_iterator.h @@ -23,7 +23,7 @@ struct TwoLevelIteratorState { : check_prefix_may_match(_check_prefix_may_match) {} virtual ~TwoLevelIteratorState() {} - virtual Iterator* NewSecondaryIterator(const Slice& handle) = 0; + virtual InternalIterator* NewSecondaryIterator(const Slice& handle) = 0; virtual bool PrefixMayMatch(const Slice& internal_key) = 0; // If call PrefixMayMatch() @@ -45,9 +45,8 @@ struct TwoLevelIteratorState { // all the states but those allocated in arena. // need_free_iter_and_state: free `state` and `first_level_iter` if // true. Otherwise, just call destructor. -extern Iterator* NewTwoLevelIterator(TwoLevelIteratorState* state, - Iterator* first_level_iter, - Arena* arena = nullptr, - bool need_free_iter_and_state = true); +extern InternalIterator* NewTwoLevelIterator( + TwoLevelIteratorState* state, InternalIterator* first_level_iter, + Arena* arena = nullptr, bool need_free_iter_and_state = true); } // namespace rocksdb diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 6ef20ff40..6f9a1e867 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -4,161 +4,183 @@ import re import sys import time import random -import getopt import logging import tempfile import subprocess import shutil +import argparse -# This script runs and kills db_stress multiple times. It checks consistency -# in case of unsafe crashes in RocksDB. +# params overwrite priority: +# for default: +# default_params < blackbox|whitebox_default_params < args +# for simple: +# simple_default_params < blackbox|whitebox_simple_default_params < args -def main(argv): - try: - opts, args = getopt.getopt(argv, "hsd:t:i:o:b:") - except getopt.GetoptError: - print("db_crashtest.py -d -t <#threads> " - "-i -o " - "-b [-s (simple mode)]\n") - sys.exit(2) - - # default values, will be overridden by cmdline args - interval = 120 # time for one db_stress instance to run - duration = 6000 # total time for this script to test db_stress - threads = 32 - # since we will be killing anyway, use large value for ops_per_thread - ops_per_thread = 100000000 - write_buf_size = 4 * 1024 * 1024 - simple_mode = False - write_buf_size_set = False - for opt, arg in opts: - if opt == '-h': - print("db_crashtest.py -d " - " -t <#threads> -i " - " -o -b " - " [-s (simple mode)]\n") - sys.exit() - elif opt == '-s': - simple_mode = True - if not write_buf_size_set: - write_buf_size = 32 * 1024 * 1024 - elif opt == "-d": - duration = int(arg) - elif opt == "-t": - threads = int(arg) - elif opt == "-i": - interval = int(arg) - elif opt == "-o": - ops_per_thread = int(arg) - elif opt == "-b": - write_buf_size = int(arg) - write_buf_size_set = True - else: - print("db_crashtest.py -d " - " -t <#threads> -i " - " -o -b \n") - sys.exit(2) - - exit_time = time.time() + duration +default_params = { + "block_size": 16384, + "cache_size": 1048576, + "delpercent": 5, + "destroy_db_initially": 0, + "disable_data_sync": 0, + "disable_wal": 0, + "filter_deletes": lambda: random.randint(0, 1), + "iterpercent": 10, + "max_background_compactions": 20, + "max_bytes_for_level_base": 10485760, + "max_key": 100000000, + "max_write_buffer_number": 3, + "memtablerep": "prefix_hash", + "mmap_read": lambda: random.randint(0, 1), + "open_files": 500000, + "prefix_size": 7, + "prefixpercent": 5, + "progress_reports": 0, + "readpercent": 45, + "reopen": 20, + "sync": 0, + "target_file_size_base": 2097152, + "target_file_size_multiplier": 2, + "threads": 32, + "verify_checksum": 1, + "write_buffer_size": 4 * 1024 * 1024, + "writepercent": 35, +} - print("Running blackbox-crash-test with \ninterval_between_crash=" - + str(interval) + "\ntotal-duration=" + str(duration) - + "\nthreads=" + str(threads) + "\nops_per_thread=" - + str(ops_per_thread) + "\nwrite_buffer_size=" - + str(write_buf_size) + "\n") +def get_dbname(test_name): test_tmpdir = os.environ.get("TEST_TMPDIR") if test_tmpdir is None or test_tmpdir == "": - dbname = tempfile.mkdtemp(prefix='rocksdb_crashtest_') + dbname = tempfile.mkdtemp(prefix='rocksdb_crashtest_' + test_name) else: - dbname = test_tmpdir + "/rocksdb_crashtest" + dbname = test_tmpdir + "/rocksdb_crashtest_" + test_name shutil.rmtree(dbname, True) + return dbname + +blackbox_default_params = { + 'db': lambda: get_dbname('blackbox'), + # total time for this script to test db_stress + "duration": 6000, + # time for one db_stress instance to run + "interval": 120, + # since we will be killing anyway, use large value for ops_per_thread + "ops_per_thread": 100000000, + "set_options_one_in": 10000, + "test_batches_snapshots": 1, +} + +whitebox_default_params = { + 'db': lambda: get_dbname('whitebox'), + "duration": 10000, + "log2_keys_per_lock": 10, + "nooverwritepercent": 1, + "ops_per_thread": 200000, + "test_batches_snapshots": lambda: random.randint(0, 1), + "write_buffer_size": 4 * 1024 * 1024, +} + +simple_default_params = { + "block_size": 16384, + "cache_size": 1048576, + "column_families": 1, + "delpercent": 5, + "destroy_db_initially": 0, + "disable_data_sync": 0, + "disable_wal": 0, + "filter_deletes": lambda: random.randint(0, 1), + "iterpercent": 10, + "max_background_compactions": 1, + "max_bytes_for_level_base": 67108864, + "max_key": 100000000, + "max_write_buffer_number": 3, + "memtablerep": "skip_list", + "mmap_read": lambda: random.randint(0, 1), + "prefix_size": 0, + "prefixpercent": 0, + "progress_reports": 0, + "readpercent": 50, + "reopen": 20, + "sync": 0, + "target_file_size_base": 16777216, + "target_file_size_multiplier": 1, + "test_batches_snapshots": 0, + "threads": 32, + "verify_checksum": 1, + "write_buffer_size": 32 * 1024 * 1024, + "writepercent": 35, +} + +blackbox_simple_default_params = { + 'db': lambda: get_dbname('blackbox'), + "duration": 6000, + "interval": 120, + "open_files": -1, + "ops_per_thread": 100000000, + "set_options_one_in": 0, + "test_batches_snapshots": 0, +} + +whitebox_simple_default_params = { + 'db': lambda: get_dbname('whitebox'), + "duration": 10000, + "log2_keys_per_lock": 10, + "nooverwritepercent": 1, + "open_files": 500000, + "ops_per_thread": 200000, + "write_buffer_size": 32 * 1024 * 1024, +} + + +def gen_cmd_params(args): + params = {} + + if args.simple: + params.update(simple_default_params) + if args.test_type == 'blackbox': + params.update(blackbox_simple_default_params) + if args.test_type == 'whitebox': + params.update(whitebox_simple_default_params) + + if not args.simple: + params.update(default_params) + if args.test_type == 'blackbox': + params.update(blackbox_default_params) + if args.test_type == 'whitebox': + params.update(whitebox_default_params) + + for k, v in vars(args).items(): + if v is not None: + params[k] = v + return params + + +def gen_cmd(params): + cmd = './db_stress ' + ' '.join( + '--{0}={1}'.format(k, v() if callable(v) else v) + for k, v in params.items() + if k not in set(['test_type', 'simple', 'duration', 'interval']) + and v is not None) + return cmd + + +# This script runs and kills db_stress multiple times. It checks consistency +# in case of unsafe crashes in RocksDB. +def blackbox_crash_main(args): + cmd_params = gen_cmd_params(args) + + exit_time = time.time() + cmd_params['duration'] + + print("Running blackbox-crash-test with \n" + + "interval_between_crash=" + str(cmd_params['interval']) + "\n" + + "total-duration=" + str(cmd_params['duration']) + "\n" + + "threads=" + str(cmd_params['threads']) + "\n" + + "ops_per_thread=" + str(cmd_params['ops_per_thread']) + "\n" + + "write_buffer_size=" + str(cmd_params['write_buffer_size']) + "\n") while time.time() < exit_time: run_had_errors = False - killtime = time.time() + interval - - if simple_mode: - cmd = re.sub('\s+', ' ', """ - ./db_stress - --column_families=1 - --test_batches_snapshots=0 - --ops_per_thread=%s - --threads=%s - --write_buffer_size=%s - --destroy_db_initially=0 - --reopen=20 - --readpercent=50 - --prefixpercent=0 - --writepercent=35 - --delpercent=5 - --iterpercent=10 - --db=%s - --max_key=100000000 - --mmap_read=%s - --block_size=16384 - --cache_size=1048576 - --open_files=-1 - --verify_checksum=1 - --sync=0 - --progress_reports=0 - --disable_wal=0 - --disable_data_sync=1 - --target_file_size_base=16777216 - --target_file_size_multiplier=1 - --max_write_buffer_number=3 - --max_background_compactions=1 - --max_bytes_for_level_base=67108864 - --filter_deletes=%s - --memtablerep=skip_list - --prefix_size=0 - --set_options_one_in=0 - """ % (ops_per_thread, - threads, - write_buf_size, - dbname, - random.randint(0, 1), - random.randint(0, 1))) - else: - cmd = re.sub('\s+', ' ', """ - ./db_stress - --test_batches_snapshots=1 - --ops_per_thread=%s - --threads=%s - --write_buffer_size=%s - --destroy_db_initially=0 - --reopen=20 - --readpercent=45 - --prefixpercent=5 - --writepercent=35 - --delpercent=5 - --iterpercent=10 - --db=%s - --max_key=100000000 - --mmap_read=%s - --block_size=16384 - --cache_size=1048576 - --open_files=500000 - --verify_checksum=1 - --sync=0 - --progress_reports=0 - --disable_wal=0 - --disable_data_sync=1 - --target_file_size_base=2097152 - --target_file_size_multiplier=2 - --max_write_buffer_number=3 - --max_background_compactions=20 - --max_bytes_for_level_base=10485760 - --filter_deletes=%s - --memtablerep=prefix_hash - --prefix_size=7 - --set_options_one_in=10000 - """ % (ops_per_thread, - threads, - write_buf_size, - dbname, - random.randint(0, 1), - random.randint(0, 1))) + killtime = time.time() + cmd_params['interval'] + + cmd = gen_cmd(cmd_params) child = subprocess.Popen([cmd], stderr=subprocess.PIPE, shell=True) @@ -199,5 +221,140 @@ def main(argv): # we need to clean up after ourselves -- only do this on test success shutil.rmtree(dbname, True) -if __name__ == "__main__": - sys.exit(main(sys.argv[1:])) + +# This python script runs db_stress multiple times. Some runs with +# kill_random_test that causes rocksdb to crash at various points in code. +def whitebox_crash_main(args): + cmd_params = gen_cmd_params(args) + + cur_time = time.time() + exit_time = cur_time + cmd_params['duration'] + half_time = cur_time + cmd_params['duration'] / 2 + + print("Running whitebox-crash-test with \n" + + "total-duration=" + str(cmd_params['duration']) + "\n" + + "threads=" + str(cmd_params['threads']) + "\n" + + "ops_per_thread=" + str(cmd_params['ops_per_thread']) + "\n" + + "write_buffer_size=" + str(cmd_params['write_buffer_size']) + "\n") + + total_check_mode = 4 + check_mode = 0 + kill_random_test = 97 + kill_mode = 0 + + while time.time() < exit_time: + if check_mode == 0: + additional_opts = { + # use large ops per thread since we will kill it anyway + "ops_per_thread": 100 * cmd_params['ops_per_thread'], + } + # run with kill_random_test + if kill_mode == 0: + additional_opts.update({ + "kill_random_test": kill_random_test, + }) + elif kill_mode == 1: + additional_opts.update({ + "kill_random_test": (kill_random_test / 3 + 1), + "kill_prefix_blacklist": "WritableFileWriter::Append," + + "WritableFileWriter::WriteBuffered", + }) + + # Run kill mode 0 and 1 by turn. + kill_mode = (kill_mode + 1) % 2 + elif check_mode == 1: + # normal run with universal compaction mode + additional_opts = { + "kill_random_test": None, + "ops_per_thread": cmd_params['ops_per_thread'], + "compaction_style": 1, + } + elif check_mode == 2: + # normal run with FIFO compaction mode + # ops_per_thread is divided by 5 because FIFO compaction + # style is quite a bit slower on reads with lot of files + additional_opts = { + "kill_random_test": None, + "ops_per_thread": cmd_params['ops_per_thread'] / 5, + "compaction_style": 2, + } + else: + # normal run + additional_opts = additional_opts = { + "kill_random_test": None, + "ops_per_thread": cmd_params['ops_per_thread'], + } + + cmd = gen_cmd(dict(cmd_params.items() + additional_opts.items())) + + print "Running:" + cmd + "\n" + + popen = subprocess.Popen([cmd], stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + shell=True) + stdoutdata, stderrdata = popen.communicate() + retncode = popen.returncode + msg = ("check_mode={0}, kill option={1}, exitcode={2}\n".format( + check_mode, additional_opts['kill_random_test'], retncode)) + print msg + print stdoutdata + + expected = False + if additional_opts['kill_random_test'] is None and (retncode == 0): + # we expect zero retncode if no kill option + expected = True + elif additional_opts['kill_random_test'] is not None and retncode < 0: + # we expect negative retncode if kill option was given + expected = True + + if not expected: + print "TEST FAILED. See kill option and exit code above!!!\n" + sys.exit(1) + + stdoutdata = stdoutdata.lower() + errorcount = (stdoutdata.count('error') - + stdoutdata.count('got errors 0 times')) + print "#times error occurred in output is " + str(errorcount) + "\n" + + if (errorcount > 0): + print "TEST FAILED. Output has 'error'!!!\n" + sys.exit(2) + if (stdoutdata.find('fail') >= 0): + print "TEST FAILED. Output has 'fail'!!!\n" + sys.exit(2) + + # First half of the duration, keep doing kill test. For the next half, + # try different modes. + if time.time() > half_time: + # we need to clean up after ourselves -- only do this on test + # success + shutil.rmtree(dbname, True) + check_mode = (check_mode + 1) % total_check_mode + + time.sleep(1) # time to stabilize after a kill + + +def main(): + parser = argparse.ArgumentParser(description="This script runs and kills \ + db_stress multiple times") + parser.add_argument("test_type", choices=["blackbox", "whitebox"]) + parser.add_argument("--simple", action="store_true") + + all_params = dict(default_params.items() + + blackbox_default_params.items() + + whitebox_default_params.items() + + simple_default_params.items() + + blackbox_simple_default_params.items() + + whitebox_simple_default_params.items()) + + for k, v in all_params.items(): + parser.add_argument("--" + k, type=type(v() if callable(v) else v)) + args = parser.parse_args() + + if args.test_type == 'blackbox': + blackbox_crash_main(args) + if args.test_type == 'whitebox': + whitebox_crash_main(args) + +if __name__ == '__main__': + main() diff --git a/tools/db_crashtest2.py b/tools/db_crashtest2.py deleted file mode 100644 index a74053ef7..000000000 --- a/tools/db_crashtest2.py +++ /dev/null @@ -1,231 +0,0 @@ -#! /usr/bin/env python -import os -import re -import sys -import time -import random -import getopt -import logging -import tempfile -import subprocess -import shutil - -# This python script runs db_stress multiple times. Some runs with -# kill_random_test that causes rocksdb to crash at various points in code. - -def main(argv): - try: - opts, args = getopt.getopt(argv, "hsd:t:k:o:b:") - except getopt.GetoptError: - print str(getopt.GetoptError) - print "db_crashtest2.py -d -t <#threads> " \ - "-k -o "\ - "-b [-s (simple mode)]\n" - sys.exit(2) - - # default values, will be overridden by cmdline args - kill_random_test = 97 # kill with probability 1/97 by default - duration = 10000 # total time for this script to test db_stress - threads = 32 - ops_per_thread = 200000 - write_buf_size = 4 * 1024 * 1024 - simple_mode = False - write_buf_size_set = False - - for opt, arg in opts: - if opt == '-h': - print "db_crashtest2.py -d -t <#threads> " \ - "-k -o " \ - "-b [-s (simple mode)]\n" - sys.exit() - elif opt == '-s': - simple_mode = True - if not write_buf_size_set: - write_buf_size = 32 * 1024 * 1024 - elif opt == "-d": - duration = int(arg) - elif opt == "-t": - threads = int(arg) - elif opt == "-k": - kill_random_test = int(arg) - elif opt == "-o": - ops_per_thread = int(arg) - elif opt == "-b": - write_buf_size = int(arg) - write_buf_size_set = True - else: - print "unrecognized option " + str(opt) + "\n" - print "db_crashtest2.py -d -t <#threads> " \ - "-k -o " \ - "-b \n" - sys.exit(2) - - cur_time = time.time() - exit_time = cur_time + duration - half_time = cur_time + duration / 2 - - print "Running whitebox-crash-test with \ntotal-duration=" + str(duration) \ - + "\nthreads=" + str(threads) + "\nops_per_thread=" \ - + str(ops_per_thread) + "\nwrite_buffer_size=" \ - + str(write_buf_size) + "\n" - - total_check_mode = 4 - check_mode = 0 - - test_tmpdir = os.environ.get("TEST_TMPDIR") - if test_tmpdir is None or test_tmpdir == "": - dbname = tempfile.mkdtemp(prefix='rocksdb_crashtest2_') - else: - dbname = test_tmpdir + "/rocksdb_crashtest2" - shutil.rmtree(dbname, True) - - while time.time() < exit_time: - killoption = "" - if check_mode == 0: - # run with kill_random_test - killoption = " --kill_random_test=" + str(kill_random_test) - # use large ops per thread since we will kill it anyway - additional_opts = "--ops_per_thread=" + \ - str(100 * ops_per_thread) + killoption - elif check_mode == 1: - # normal run with universal compaction mode - additional_opts = "--ops_per_thread=" + str(ops_per_thread) + \ - " --compaction_style=1" - elif check_mode == 2: - # normal run with FIFO compaction mode - # ops_per_thread is divided by 5 because FIFO compaction - # style is quite a bit slower on reads with lot of files - additional_opts = "--ops_per_thread=" + str(ops_per_thread / 5) + \ - " --compaction_style=2" - else: - # normal run - additional_opts = "--ops_per_thread=" + str(ops_per_thread) - - if simple_mode: - cmd = re.sub('\s+', ' ', """ - ./db_stress - --column_families=1 - --threads=%s - --write_buffer_size=%s - --destroy_db_initially=0 - --reopen=20 - --prefixpercent=0 - --readpercent=50 - --writepercent=35 - --delpercent=5 - --iterpercent=10 - --db=%s - --max_key=100000000 - --mmap_read=%s - --block_size=16384 - --cache_size=1048576 - --open_files=500000 - --verify_checksum=1 - --sync=0 - --progress_reports=0 - --disable_wal=0 - --disable_data_sync=1 - --target_file_size_base=16777216 - --target_file_size_multiplier=1 - --max_write_buffer_number=3 - --max_background_compactions=1 - --max_bytes_for_level_base=67108864 - --filter_deletes=%s - --memtablerep=skip_list - --prefix_size=0 - %s - """ % (threads, - write_buf_size, - dbname, - random.randint(0, 1), - random.randint(0, 1), - additional_opts)) - else: - cmd = re.sub('\s+', ' ', """ - ./db_stress - --test_batches_snapshots=%s - --threads=%s - --write_buffer_size=%s - --destroy_db_initially=0 - --reopen=20 - --readpercent=45 - --prefixpercent=5 - --writepercent=35 - --delpercent=5 - --iterpercent=10 - --db=%s - --max_key=100000000 - --mmap_read=%s - --block_size=16384 - --cache_size=1048576 - --open_files=500000 - --verify_checksum=1 - --sync=0 - --progress_reports=0 - --disable_wal=0 - --disable_data_sync=1 - --target_file_size_base=2097152 - --target_file_size_multiplier=2 - --max_write_buffer_number=3 - --max_background_compactions=20 - --max_bytes_for_level_base=10485760 - --filter_deletes=%s - --memtablerep=prefix_hash - --prefix_size=7 - %s - """ % (random.randint(0, 1), - threads, - write_buf_size, - dbname, - random.randint(0, 1), - random.randint(0, 1), - additional_opts)) - - print "Running:" + cmd + "\n" - - popen = subprocess.Popen([cmd], stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - shell=True) - stdoutdata, stderrdata = popen.communicate() - retncode = popen.returncode - msg = ("check_mode={0}, kill option={1}, exitcode={2}\n".format( - check_mode, killoption, retncode)) - print msg - print stdoutdata - - expected = False - if (killoption == '') and (retncode == 0): - # we expect zero retncode if no kill option - expected = True - elif killoption != '' and retncode < 0: - # we expect negative retncode if kill option was given - expected = True - - if not expected: - print "TEST FAILED. See kill option and exit code above!!!\n" - sys.exit(1) - - stdoutdata = stdoutdata.lower() - errorcount = (stdoutdata.count('error') - - stdoutdata.count('got errors 0 times')) - print "#times error occurred in output is " + str(errorcount) + "\n" - - if (errorcount > 0): - print "TEST FAILED. Output has 'error'!!!\n" - sys.exit(2) - if (stdoutdata.find('fail') >= 0): - print "TEST FAILED. Output has 'fail'!!!\n" - sys.exit(2) - - # First half of the duration, keep doing kill test. For the next half, - # try different modes. - if time.time() > half_time: - # we need to clean up after ourselves -- only do this on test - # success - shutil.rmtree(dbname, True) - check_mode = (check_mode + 1) % total_check_mode - - time.sleep(1) # time to stabilize after a kill - -if __name__ == "__main__": - sys.exit(main(sys.argv[1:])) diff --git a/tools/db_stress.cc b/tools/db_stress.cc index 634045dfb..1b1672ca2 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -277,6 +277,11 @@ static const bool FLAGS_kill_random_test_dummy __attribute__((unused)) = RegisterFlagValidator(&FLAGS_kill_random_test, &ValidateInt32Positive); extern int rocksdb_kill_odds; +DEFINE_string(kill_prefix_blacklist, "", + "If non-empty, kill points with prefix in the list given will be" + " skipped. Items are comma-separated."); +extern std::vector rocksdb_kill_prefix_blacklist; + DEFINE_bool(disable_wal, false, "If true, do not write WAL for write."); DEFINE_int32(target_file_size_base, 64 * KB, @@ -356,6 +361,21 @@ enum rocksdb::CompressionType StringToCompressionType(const char* ctype) { fprintf(stdout, "Cannot parse compression type '%s'\n", ctype); return rocksdb::kSnappyCompression; //default value } + +std::vector SplitString(std::string src) { + std::vector ret; + if (src.empty()) { + return ret; + } + size_t pos = 0; + size_t pos_comma; + while ((pos_comma = src.find(',', pos)) != std::string::npos) { + ret.push_back(src.substr(pos, pos_comma - pos)); + pos = pos_comma + 1; + } + ret.push_back(src.substr(pos, src.length())); + return ret; +} } // namespace DEFINE_string(compression_type, "snappy", @@ -1921,6 +1941,14 @@ class StressTest { fprintf(stdout, "Memtablerep : %s\n", memtablerep); + fprintf(stdout, "Test kill odd : %d\n", rocksdb_kill_odds); + if (!rocksdb_kill_prefix_blacklist.empty()) { + fprintf(stdout, "Skipping kill points prefixes:\n"); + for (auto& p : rocksdb_kill_prefix_blacklist) { + fprintf(stdout, " %s\n", p.c_str()); + } + } + fprintf(stdout, "------------------------------------------------\n"); } @@ -1952,7 +1980,6 @@ class StressTest { options_.disableDataSync = FLAGS_disable_data_sync; options_.use_fsync = FLAGS_use_fsync; options_.allow_mmap_reads = FLAGS_mmap_read; - rocksdb_kill_odds = FLAGS_kill_random_test; options_.target_file_size_base = FLAGS_target_file_size_base; options_.target_file_size_multiplier = FLAGS_target_file_size_multiplier; options_.max_bytes_for_level_base = FLAGS_max_bytes_for_level_base; @@ -2186,6 +2213,9 @@ int main(int argc, char** argv) { FLAGS_db = default_db_path; } + rocksdb_kill_odds = FLAGS_kill_random_test; + rocksdb_kill_prefix_blacklist = SplitString(FLAGS_kill_prefix_blacklist); + rocksdb::StressTest stress; if (stress.Run()) { return 0; diff --git a/util/ldb_cmd.cc b/tools/ldb_cmd.cc similarity index 99% rename from util/ldb_cmd.cc rename to tools/ldb_cmd.cc index a441d7167..112014351 100644 --- a/util/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -4,7 +4,7 @@ // of patent rights can be found in the PATENTS file in the same directory. // #ifndef ROCKSDB_LITE -#include "util/ldb_cmd.h" +#include "tools/ldb_cmd.h" #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS @@ -21,11 +21,11 @@ #include "rocksdb/write_batch.h" #include "rocksdb/cache.h" #include "rocksdb/table_properties.h" +#include "table/scoped_arena_iterator.h" #include "port/dirent.h" +#include "tools/sst_dump_tool_imp.h" #include "util/coding.h" -#include "util/sst_dump_tool_imp.h" #include "util/string_util.h" -#include "util/scoped_arena_iterator.h" #include "utilities/ttl/db_ttl_impl.h" #include @@ -809,7 +809,7 @@ void InternalDumpCommand::DoCommand() { uint64_t s1=0,s2=0; // Setup internal key iterator Arena arena; - ScopedArenaIterator iter(idb->TEST_NewInternalIterator(&arena)); + ScopedArenaIterator iter(idb->NewInternalIterator(&arena)); Status st = iter->status(); if (!st.ok()) { exec_state_ = diff --git a/util/ldb_cmd.h b/tools/ldb_cmd.h similarity index 99% rename from util/ldb_cmd.h rename to tools/ldb_cmd.h index d48fcf667..71d5f647b 100644 --- a/util/ldb_cmd.h +++ b/tools/ldb_cmd.h @@ -23,8 +23,8 @@ #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/utilities/db_ttl.h" +#include "tools/ldb_cmd_execute_result.h" #include "util/logging.h" -#include "util/ldb_cmd_execute_result.h" #include "util/string_util.h" #include "utilities/ttl/db_ttl_impl.h" diff --git a/util/ldb_cmd_execute_result.h b/tools/ldb_cmd_execute_result.h similarity index 100% rename from util/ldb_cmd_execute_result.h rename to tools/ldb_cmd_execute_result.h diff --git a/util/ldb_cmd_test.cc b/tools/ldb_cmd_test.cc similarity index 85% rename from util/ldb_cmd_test.cc rename to tools/ldb_cmd_test.cc index c918cf565..edb6a2106 100644 --- a/util/ldb_cmd_test.cc +++ b/tools/ldb_cmd_test.cc @@ -3,7 +3,9 @@ // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. // -#include "util/ldb_cmd.h" +#ifndef ROCKSDB_LITE + +#include "tools/ldb_cmd.h" #include "util/testharness.h" class LdbCmdTest : public testing::Test {}; @@ -42,3 +44,12 @@ int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } +#else +#include + +int main(int argc, char** argv) { + fprintf(stderr, "SKIPPED as LDBCommand is not supported in ROCKSDB_LITE\n"); + return 0; +} + +#endif // ROCKSDB_LITE diff --git a/util/ldb_tool.cc b/tools/ldb_tool.cc similarity index 99% rename from util/ldb_tool.cc rename to tools/ldb_tool.cc index fe84fa933..366202c2d 100644 --- a/util/ldb_tool.cc +++ b/tools/ldb_tool.cc @@ -5,7 +5,7 @@ // #ifndef ROCKSDB_LITE #include "rocksdb/ldb_tool.h" -#include "util/ldb_cmd.h" +#include "tools/ldb_cmd.h" namespace rocksdb { diff --git a/tools/reduce_levels_test.cc b/tools/reduce_levels_test.cc index f3091edbb..863d8607e 100644 --- a/tools/reduce_levels_test.cc +++ b/tools/reduce_levels_test.cc @@ -9,10 +9,10 @@ #include "rocksdb/db.h" #include "db/db_impl.h" #include "db/version_set.h" +#include "tools/ldb_cmd.h" #include "util/logging.h" #include "util/testutil.h" #include "util/testharness.h" -#include "util/ldb_cmd.h" namespace rocksdb { diff --git a/util/sst_dump_test.cc b/tools/sst_dump_test.cc similarity index 100% rename from util/sst_dump_test.cc rename to tools/sst_dump_test.cc diff --git a/util/sst_dump_tool.cc b/tools/sst_dump_tool.cc similarity index 98% rename from util/sst_dump_tool.cc rename to tools/sst_dump_tool.cc index de7f6f13c..ae22da36d 100644 --- a/util/sst_dump_tool.cc +++ b/tools/sst_dump_tool.cc @@ -5,7 +5,7 @@ // #ifndef ROCKSDB_LITE -#include "util/sst_dump_tool_imp.h" +#include "tools/sst_dump_tool_imp.h" #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS @@ -127,7 +127,7 @@ uint64_t SstFileReader::CalculateCompressedTableSize( tb_options, TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, dest_writer.get())); - unique_ptr iter(table_reader_->NewIterator(ReadOptions())); + unique_ptr iter(table_reader_->NewIterator(ReadOptions())); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { if (!iter->status().ok()) { fputs(iter->status().ToString().c_str(), stderr); @@ -261,8 +261,8 @@ Status SstFileReader::ReadSequential(bool print_kv, return init_result_; } - Iterator* iter = table_reader_->NewIterator(ReadOptions(verify_checksum_, - false)); + InternalIterator* iter = + table_reader_->NewIterator(ReadOptions(verify_checksum_, false)); uint64_t i = 0; if (has_from) { InternalKey ikey; diff --git a/util/sst_dump_tool_imp.h b/tools/sst_dump_tool_imp.h similarity index 99% rename from util/sst_dump_tool_imp.h rename to tools/sst_dump_tool_imp.h index b7d9e4dc2..dd65d3b10 100644 --- a/util/sst_dump_tool_imp.h +++ b/tools/sst_dump_tool_imp.h @@ -2,8 +2,8 @@ // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. -#ifndef ROCKSDB_LITE #pragma once +#ifndef ROCKSDB_LITE #include "rocksdb/sst_dump_tool.h" @@ -29,8 +29,8 @@ #include "table/format.h" #include "table/meta_blocks.h" #include "table/plain_table_factory.h" +#include "tools/ldb_cmd.h" #include "util/file_reader_writer.h" -#include "util/ldb_cmd.h" #include "util/random.h" #include "util/testharness.h" #include "util/testutil.h" diff --git a/util/env_posix.cc b/util/env_posix.cc index 5c031a74d..7d241ca63 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -317,7 +317,7 @@ class PosixMmapReadableFile: public RandomAccessFile { *result = Slice(); return IOError(filename_, EINVAL); } else if (offset + n > length_) { - n = length_ - offset; + n = static_cast(length_ - offset); } *result = Slice(reinterpret_cast(mmapped_region_) + offset, n); return s; @@ -368,7 +368,7 @@ class PosixMmapFile : public WritableFile { } Status UnmapCurrentRegion() { - TEST_KILL_RANDOM(rocksdb_kill_odds); + TEST_KILL_RANDOM("PosixMmapFile::UnmapCurrentRegion:0", rocksdb_kill_odds); if (base_ != nullptr) { int munmap_status = munmap(base_, limit_ - base_); if (munmap_status != 0) { @@ -392,7 +392,7 @@ class PosixMmapFile : public WritableFile { #ifdef ROCKSDB_FALLOCATE_PRESENT assert(base_ == nullptr); - TEST_KILL_RANDOM(rocksdb_kill_odds); + TEST_KILL_RANDOM("PosixMmapFile::UnmapCurrentRegion:0", rocksdb_kill_odds); // we can't fallocate with FALLOC_FL_KEEP_SIZE here if (allow_fallocate_) { IOSTATS_TIMER_GUARD(allocate_nanos); @@ -407,13 +407,13 @@ class PosixMmapFile : public WritableFile { } } - TEST_KILL_RANDOM(rocksdb_kill_odds); + TEST_KILL_RANDOM("PosixMmapFile::Append:1", rocksdb_kill_odds); void* ptr = mmap(nullptr, map_size_, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, file_offset_); if (ptr == MAP_FAILED) { return Status::IOError("MMap failed on " + filename_); } - TEST_KILL_RANDOM(rocksdb_kill_odds); + TEST_KILL_RANDOM("PosixMmapFile::Append:2", rocksdb_kill_odds); base_ = reinterpret_cast(ptr); limit_ = base_ + map_size_; @@ -434,7 +434,7 @@ class PosixMmapFile : public WritableFile { size_t p1 = TruncateToPageBoundary(last_sync_ - base_); size_t p2 = TruncateToPageBoundary(dst_ - base_ - 1); last_sync_ = dst_; - TEST_KILL_RANDOM(rocksdb_kill_odds); + TEST_KILL_RANDOM("PosixMmapFile::Msync:0", rocksdb_kill_odds); if (msync(base_ + p1, p2 - p1 + page_size_, MS_SYNC) < 0) { return IOError(filename_, errno); } @@ -484,7 +484,7 @@ class PosixMmapFile : public WritableFile { if (!s.ok()) { return s; } - TEST_KILL_RANDOM(rocksdb_kill_odds); + TEST_KILL_RANDOM("PosixMmapFile::Append:0", rocksdb_kill_odds); } size_t n = (left <= avail) ? left : avail; @@ -576,7 +576,7 @@ class PosixMmapFile : public WritableFile { #ifdef ROCKSDB_FALLOCATE_PRESENT virtual Status Allocate(off_t offset, off_t len) override { - TEST_KILL_RANDOM(rocksdb_kill_odds); + TEST_KILL_RANDOM("PosixMmapFile::Allocate:0", rocksdb_kill_odds); int alloc_status = 0; if (allow_fallocate_) { alloc_status = @@ -722,7 +722,7 @@ class PosixWritableFile : public WritableFile { #ifdef ROCKSDB_FALLOCATE_PRESENT virtual Status Allocate(off_t offset, off_t len) override { - TEST_KILL_RANDOM(rocksdb_kill_odds); + TEST_KILL_RANDOM("PosixWritableFile::Allocate:0", rocksdb_kill_odds); IOSTATS_TIMER_GUARD(allocate_nanos); int alloc_status = 0; if (allow_fallocate_) { diff --git a/util/file_reader_writer.cc b/util/file_reader_writer.cc index 86d70b62d..ff459262c 100644 --- a/util/file_reader_writer.cc +++ b/util/file_reader_writer.cc @@ -57,7 +57,8 @@ Status WritableFileWriter::Append(const Slice& data) { pending_sync_ = true; pending_fsync_ = true; - TEST_KILL_RANDOM(rocksdb_kill_odds * REDUCE_ODDS2); + TEST_KILL_RANDOM("WritableFileWriter::Append:0", + rocksdb_kill_odds * REDUCE_ODDS2); { IOSTATS_TIMER_GUARD(prepare_write_nanos); @@ -114,7 +115,7 @@ Status WritableFileWriter::Append(const Slice& data) { s = WriteBuffered(src, left); } - TEST_KILL_RANDOM(rocksdb_kill_odds); + TEST_KILL_RANDOM("WritableFileWriter::Append:1", rocksdb_kill_odds); filesize_ += data.size(); return Status::OK(); } @@ -141,13 +142,14 @@ Status WritableFileWriter::Close() { s = interim; } - TEST_KILL_RANDOM(rocksdb_kill_odds); + TEST_KILL_RANDOM("WritableFileWriter::Close:0", rocksdb_kill_odds); interim = writable_file_->Close(); if (!interim.ok() && s.ok()) { s = interim; } writable_file_.reset(); + TEST_KILL_RANDOM("WritableFileWriter::Close:1", rocksdb_kill_odds); return s; } @@ -156,7 +158,8 @@ Status WritableFileWriter::Close() { // write out the cached data to the OS cache Status WritableFileWriter::Flush() { Status s; - TEST_KILL_RANDOM(rocksdb_kill_odds * REDUCE_ODDS2); + TEST_KILL_RANDOM("WritableFileWriter::Flush:0", + rocksdb_kill_odds * REDUCE_ODDS2); if (buf_.CurrentSize() > 0) { if (use_os_buffer_) { @@ -209,14 +212,14 @@ Status WritableFileWriter::Sync(bool use_fsync) { if (!s.ok()) { return s; } - TEST_KILL_RANDOM(rocksdb_kill_odds); + TEST_KILL_RANDOM("WritableFileWriter::Sync:0", rocksdb_kill_odds); if (!direct_io_ && pending_sync_) { s = SyncInternal(use_fsync); if (!s.ok()) { return s; } } - TEST_KILL_RANDOM(rocksdb_kill_odds); + TEST_KILL_RANDOM("WritableFileWriter::Sync:1", rocksdb_kill_odds); pending_sync_ = false; if (use_fsync) { pending_fsync_ = false; @@ -294,7 +297,7 @@ Status WritableFileWriter::WriteBuffered(const char* data, size_t size) { } IOSTATS_ADD(bytes_written, allowed); - TEST_KILL_RANDOM(rocksdb_kill_odds); + TEST_KILL_RANDOM("WritableFileWriter::WriteBuffered:0", rocksdb_kill_odds); left -= allowed; src += allowed; @@ -409,8 +412,7 @@ class ReadaheadRandomAccessFile : public RandomAccessFile { // if offset between [buffer_offset_, buffer_offset_ + buffer_len> if (offset >= buffer_offset_ && offset < buffer_len_ + buffer_offset_) { uint64_t offset_in_buffer = offset - buffer_offset_; - copied = std::min(static_cast(buffer_len_) - offset_in_buffer, - static_cast(n)); + copied = std::min(buffer_len_ - static_cast(offset_in_buffer), n); memcpy(scratch, buffer_.get() + offset_in_buffer, copied); if (copied == n) { // fully cached @@ -468,4 +470,12 @@ std::unique_ptr NewReadaheadRandomAccessFile( return result; } +Status NewWritableFile(Env* env, const std::string& fname, + unique_ptr* result, + const EnvOptions& options) { + Status s = env->NewWritableFile(fname, result, options); + TEST_KILL_RANDOM("NewWritableFile:0", rocksdb_kill_odds * REDUCE_ODDS2); + return s; +} + } // namespace rocksdb diff --git a/util/file_reader_writer.h b/util/file_reader_writer.h index 4134a0ea2..9a076af56 100644 --- a/util/file_reader_writer.h +++ b/util/file_reader_writer.h @@ -7,6 +7,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once +#include #include "rocksdb/env.h" #include "util/aligned_buffer.h" #include "port/port.h" @@ -36,8 +37,8 @@ class SequentialFileReader { return *this; } - SequentialFileReader(SequentialFileReader&) = delete; - SequentialFileReader& operator=(SequentialFileReader&) = delete; + SequentialFileReader(const SequentialFileReader&) = delete; + SequentialFileReader& operator=(const SequentialFileReader&) = delete; Status Read(size_t n, Slice* result, char* scratch); @@ -163,4 +164,8 @@ class WritableFileWriter { size_t RequestToken(size_t bytes, bool align); Status SyncInternal(bool use_fsync); }; + +extern Status NewWritableFile(Env* env, const std::string& fname, + unique_ptr* result, + const EnvOptions& options); } // namespace rocksdb diff --git a/util/file_util.cc b/util/file_util.cc index 1bcf3ed48..d4f7b4004 100644 --- a/util/file_util.cc +++ b/util/file_util.cc @@ -49,8 +49,7 @@ Status CopyFile(Env* env, const std::string& source, char buffer[4096]; Slice slice; while (size > 0) { - uint64_t bytes_to_read = - std::min(static_cast(sizeof(buffer)), size); + size_t bytes_to_read = std::min(sizeof(buffer), static_cast(size)); if (s.ok()) { s = src_reader->Read(bytes_to_read, &slice, buffer); } diff --git a/util/histogram.cc b/util/histogram.cc index 5a875e54d..4165121f0 100644 --- a/util/histogram.cc +++ b/util/histogram.cc @@ -82,7 +82,7 @@ void HistogramImpl::Clear() { memset(buckets_, 0, sizeof buckets_); } -bool HistogramImpl::Empty() { return sum_squares_ == 0; } +bool HistogramImpl::Empty() { return num_ == 0; } void HistogramImpl::Add(uint64_t value) { const size_t index = bucketMapper.IndexForValue(value); diff --git a/util/instrumented_mutex.cc b/util/instrumented_mutex.cc index 2e240cc82..bfb989a1d 100644 --- a/util/instrumented_mutex.cc +++ b/util/instrumented_mutex.cc @@ -3,13 +3,14 @@ // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. -#include "util/perf_context_imp.h" #include "util/instrumented_mutex.h" +#include "util/perf_context_imp.h" #include "util/thread_status_util.h" namespace rocksdb { void InstrumentedMutex::Lock() { - PERF_TIMER_GUARD(db_mutex_lock_nanos); + PERF_CONDITIONAL_TIMER_GUARD(db_mutex_lock_nanos, + stats_code_ == DB_MUTEX_WAIT_MICROS); uint64_t wait_time_micros = 0; if (env_ != nullptr && stats_ != nullptr) { { @@ -30,7 +31,8 @@ void InstrumentedMutex::LockInternal() { } void InstrumentedCondVar::Wait() { - PERF_TIMER_GUARD(db_condition_wait_nanos); + PERF_CONDITIONAL_TIMER_GUARD(db_condition_wait_nanos, + stats_code_ == DB_MUTEX_WAIT_MICROS); uint64_t wait_time_micros = 0; if (env_ != nullptr && stats_ != nullptr) { { @@ -51,7 +53,8 @@ void InstrumentedCondVar::WaitInternal() { } bool InstrumentedCondVar::TimedWait(uint64_t abs_time_us) { - PERF_TIMER_GUARD(db_condition_wait_nanos); + PERF_CONDITIONAL_TIMER_GUARD(db_condition_wait_nanos, + stats_code_ == DB_MUTEX_WAIT_MICROS); uint64_t wait_time_micros = 0; bool result = false; if (env_ != nullptr && stats_ != nullptr) { diff --git a/util/kv_map.h b/util/kv_map.h new file mode 100644 index 000000000..486db1918 --- /dev/null +++ b/util/kv_map.h @@ -0,0 +1,31 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +#pragma once + +#include +#include + +#include "rocksdb/comparator.h" +#include "rocksdb/slice.h" +#include "util/coding.h" +#include "util/murmurhash.h" + +namespace rocksdb { +namespace stl_wrappers { + +struct LessOfComparator { + explicit LessOfComparator(const Comparator* c = BytewiseComparator()) + : cmp(c) {} + + bool operator()(const std::string& a, const std::string& b) const { + return cmp->Compare(Slice(a), Slice(b)) < 0; + } + + const Comparator* cmp; +}; + +typedef std::map KVMap; +} +} diff --git a/util/perf_context_imp.h b/util/perf_context_imp.h index cde7ee33d..a5c4c39d9 100644 --- a/util/perf_context_imp.h +++ b/util/perf_context_imp.h @@ -13,6 +13,7 @@ namespace rocksdb { #if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE) #define PERF_TIMER_GUARD(metric) +#define PERF_CONDITIONAL_TIMER_GUARD(metric, condition) #define PERF_TIMER_MEASURE(metric) #define PERF_TIMER_STOP(metric) #define PERF_TIMER_START(metric) @@ -32,6 +33,12 @@ namespace rocksdb { PerfStepTimer perf_step_timer_ ## metric(&(perf_context.metric)); \ perf_step_timer_ ## metric.Start(); +#define PERF_CONDITIONAL_TIMER_GUARD(metric, condition) \ + PerfStepTimer perf_step_timer_##metric(&(perf_context.metric)); \ + if ((condition)) { \ + perf_step_timer_##metric.Start(); \ + } + // Update metric with time elapsed since last START. start time is reset // to current timestamp. #define PERF_TIMER_MEASURE(metric) \ diff --git a/util/sync_point.cc b/util/sync_point.cc index 7051b5103..53930e2e3 100644 --- a/util/sync_point.cc +++ b/util/sync_point.cc @@ -8,11 +8,19 @@ #include "util/random.h" int rocksdb_kill_odds = 0; +std::vector rocksdb_kill_prefix_blacklist; #ifndef NDEBUG namespace rocksdb { -void TestKillRandom(int odds, const std::string& srcfile, int srcline) { +void TestKillRandom(std::string kill_point, int odds, + const std::string& srcfile, int srcline) { + for (auto& p : rocksdb_kill_prefix_blacklist) { + if (kill_point.substr(0, p.length()) == p) { + return; + } + } + time_t curtime = time(nullptr); Random r((uint32_t)curtime); diff --git a/util/sync_point.h b/util/sync_point.h index 6a4629cb3..910d4a95f 100644 --- a/util/sync_point.h +++ b/util/sync_point.h @@ -15,26 +15,29 @@ // This is only set from db_stress.cc and for testing only. // If non-zero, kill at various points in source code with probability 1/this extern int rocksdb_kill_odds; +// If kill point has a prefix on this list, will skip killing. +extern std::vector rocksdb_kill_prefix_blacklist; #ifdef NDEBUG // empty in release build -#define TEST_KILL_RANDOM(rocksdb_kill_odds) +#define TEST_KILL_RANDOM(kill_point, rocksdb_kill_odds) #else namespace rocksdb { // Kill the process with probablity 1/odds for testing. -extern void TestKillRandom(int odds, const std::string& srcfile, int srcline); +extern void TestKillRandom(std::string kill_point, int odds, + const std::string& srcfile, int srcline); // To avoid crashing always at some frequently executed codepaths (during // kill random test), use this factor to reduce odds #define REDUCE_ODDS 2 #define REDUCE_ODDS2 4 -#define TEST_KILL_RANDOM(rocksdb_kill_odds) \ - { \ - if (rocksdb_kill_odds > 0) { \ - TestKillRandom(rocksdb_kill_odds, __FILE__, __LINE__); \ - } \ +#define TEST_KILL_RANDOM(kill_point, rocksdb_kill_odds) \ + { \ + if (rocksdb_kill_odds > 0) { \ + TestKillRandom(kill_point, rocksdb_kill_odds, __FILE__, __LINE__); \ + } \ } } // namespace rocksdb #endif diff --git a/util/testutil.cc b/util/testutil.cc index 5f7422172..b995a2e53 100644 --- a/util/testutil.cc +++ b/util/testutil.cc @@ -33,7 +33,7 @@ extern std::string RandomHumanReadableString(Random* rnd, int len) { return ret; } -std::string RandomKey(Random* rnd, int len) { +std::string RandomKey(Random* rnd, int len, RandomKeyType type) { // Make sure to generate a wide variety of characters so we // test the boundary conditions for short-key optimizations. static const char kTestChars[] = { @@ -41,7 +41,22 @@ std::string RandomKey(Random* rnd, int len) { }; std::string result; for (int i = 0; i < len; i++) { - result += kTestChars[rnd->Uniform(sizeof(kTestChars))]; + std::size_t indx = 0; + switch (type) { + case RandomKeyType::RANDOM: + indx = rnd->Uniform(sizeof(kTestChars)); + break; + case RandomKeyType::LARGEST: + indx = sizeof(kTestChars) - 1; + break; + case RandomKeyType::MIDDLE: + indx = sizeof(kTestChars) / 2; + break; + case RandomKeyType::SMALLEST: + indx = 0; + break; + } + result += kTestChars[indx]; } return result; } diff --git a/util/testutil.h b/util/testutil.h index 29806285e..0373532a8 100644 --- a/util/testutil.h +++ b/util/testutil.h @@ -17,6 +17,7 @@ #include "rocksdb/env.h" #include "rocksdb/iterator.h" #include "rocksdb/slice.h" +#include "table/internal_iterator.h" #include "util/mutexlock.h" #include "util/random.h" @@ -34,7 +35,9 @@ extern std::string RandomHumanReadableString(Random* rnd, int len); // Return a random key with the specified length that may contain interesting // characters (e.g. \x00, \xff, etc.). -extern std::string RandomKey(Random* rnd, int len); +enum RandomKeyType : char { RANDOM, LARGEST, SMALLEST, MIDDLE }; +extern std::string RandomKey(Random* rnd, int len, + RandomKeyType type = RandomKeyType::RANDOM); // Store in *dst a string of length "len" that will compress to // "N*compressed_fraction" bytes and return a Slice that references @@ -127,7 +130,7 @@ class SimpleSuffixReverseComparator : public Comparator { extern const Comparator* Uint64Comparator(); // Iterator over a vector of keys/values -class VectorIterator : public Iterator { +class VectorIterator : public InternalIterator { public: explicit VectorIterator(const std::vector& keys) : keys_(keys), current_(keys.size()) { @@ -187,7 +190,7 @@ class StringSink: public WritableFile { const std::string& contents() const { return contents_; } virtual Status Truncate(uint64_t size) override { - contents_.resize(size); + contents_.resize(static_cast(size)); return Status::OK(); } virtual Status Close() override { return Status::OK(); } @@ -240,13 +243,13 @@ class StringSource: public RandomAccessFile { return Status::InvalidArgument("invalid Read offset"); } if (offset + n > contents_.size()) { - n = contents_.size() - offset; + n = contents_.size() - static_cast(offset); } if (!mmap_) { - memcpy(scratch, &contents_[offset], n); + memcpy(scratch, &contents_[static_cast(offset)], n); *result = Slice(scratch, n); } else { - *result = Slice(&contents_[offset], n); + *result = Slice(&contents_[static_cast(offset)], n); } return Status::OK(); } diff --git a/util/vectorrep.cc b/util/vectorrep.cc index 017f89f7c..324439a1d 100644 --- a/util/vectorrep.cc +++ b/util/vectorrep.cc @@ -14,9 +14,9 @@ #include "util/arena.h" #include "db/memtable.h" +#include "memtable/stl_wrappers.h" #include "port/port.h" #include "util/mutexlock.h" -#include "util/stl_wrappers.h" namespace rocksdb { namespace { diff --git a/utilities/transactions/transaction_base.h b/utilities/transactions/transaction_base.h index e609e563b..47294b944 100644 --- a/utilities/transactions/transaction_base.h +++ b/utilities/transactions/transaction_base.h @@ -171,6 +171,11 @@ class TransactionBaseImpl : public Transaction { void SetSnapshot() override; void SetSnapshotOnNextOperation() override; + void ClearSnapshot() override { + snapshot_.reset(); + snapshot_needed_ = false; + } + void DisableIndexing() override { indexing_enabled_ = false; } void EnableIndexing() override { indexing_enabled_ = true; } diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index 3f792a99d..2d8fb2044 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -2161,6 +2161,53 @@ TEST_F(TransactionTest, DeferSnapshotSavePointTest) { delete txn1; } +TEST_F(TransactionTest, ClearSnapshotTest) { + WriteOptions write_options; + ReadOptions read_options, snapshot_read_options; + string value; + Status s; + + s = db->Put(write_options, "foo", "0"); + ASSERT_OK(s); + + Transaction* txn = db->BeginTransaction(write_options); + ASSERT_TRUE(txn); + + s = db->Put(write_options, "foo", "1"); + ASSERT_OK(s); + + snapshot_read_options.snapshot = txn->GetSnapshot(); + ASSERT_FALSE(snapshot_read_options.snapshot); + + // No snapshot created yet + s = txn->Get(snapshot_read_options, "foo", &value); + ASSERT_EQ(value, "1"); + + txn->SetSnapshot(); + snapshot_read_options.snapshot = txn->GetSnapshot(); + ASSERT_TRUE(snapshot_read_options.snapshot); + + s = db->Put(write_options, "foo", "2"); + ASSERT_OK(s); + + // Snapshot was created before change to '2' + s = txn->Get(snapshot_read_options, "foo", &value); + ASSERT_EQ(value, "1"); + + txn->ClearSnapshot(); + snapshot_read_options.snapshot = txn->GetSnapshot(); + ASSERT_FALSE(snapshot_read_options.snapshot); + + // Snapshot has now been cleared + s = txn->Get(snapshot_read_options, "foo", &value); + ASSERT_EQ(value, "2"); + + s = txn->Commit(); + ASSERT_OK(s); + + delete txn; +} + } // namespace rocksdb int main(int argc, char** argv) {