merge from master

main
Praveen Rao 9 years ago
commit 4f1c74a46e
  1. 1
      .gitignore
  2. 2
      .travis.yml
  3. 58
      CMakeLists.txt
  4. 22
      HISTORY.md
  5. 2
      INSTALL.md
  6. 2
      LICENSE
  7. 90
      Makefile
  8. 10
      USERS.md
  9. 176
      arcanist_util/config/FacebookArcanistConfiguration.php
  10. 2
      arcanist_util/cpp_linter/cpplint.py
  11. 10
      build_tools/build_detect_platform
  12. 16
      build_tools/dependencies.sh
  13. 16
      build_tools/dependencies_4.8.1.sh
  14. 73
      build_tools/fbcode_config.sh
  15. 75
      build_tools/fbcode_config4.8.1.sh
  16. 2
      build_tools/make_new_version.sh
  17. 198
      build_tools/precommit_checker.py
  18. 127
      build_tools/update_dependencies.sh
  19. 18
      db/auto_roll_logger.cc
  20. 27
      db/auto_roll_logger.h
  21. 62
      db/auto_roll_logger_test.cc
  22. 7
      db/builder.cc
  23. 4
      db/builder.h
  24. 7
      db/c.cc
  25. 96
      db/column_family.cc
  26. 4
      db/column_family.h
  27. 195
      db/column_family_test.cc
  28. 63
      db/compact_files_test.cc
  29. 2
      db/compacted_db_impl.cc
  30. 2
      db/compacted_db_impl.h
  31. 2
      db/compaction.cc
  32. 4
      db/compaction.h
  33. 2
      db/compaction_iterator.cc
  34. 2
      db/compaction_iterator.h
  35. 2
      db/compaction_iterator_test.cc
  36. 57
      db/compaction_job.cc
  37. 13
      db/compaction_job.h
  38. 2
      db/compaction_job_stats_test.cc
  39. 12
      db/compaction_job_test.cc
  40. 16
      db/compaction_picker.cc
  41. 4
      db/compaction_picker.h
  42. 83
      db/compaction_picker_test.cc
  43. 2
      db/comparator_db_test.cc
  44. 2
      db/convenience.cc
  45. 2
      db/corruption_test.cc
  46. 2
      db/cuckoo_table_db_test.cc
  47. 240
      db/db_block_cache_test.cc
  48. 2
      db/db_compaction_filter_test.cc
  49. 129
      db/db_compaction_test.cc
  50. 2
      db/db_dynamic_level_test.cc
  51. 2
      db/db_filesnapshot.cc
  52. 406
      db/db_impl.cc
  53. 24
      db/db_impl.h
  54. 2
      db/db_impl_debug.cc
  55. 2
      db/db_impl_experimental.cc
  56. 16
      db/db_impl_readonly.cc
  57. 2
      db/db_impl_readonly.h
  58. 5
      db/db_info_dumper.cc
  59. 2
      db/db_info_dumper.h
  60. 2
      db/db_inplace_update_test.cc
  61. 113
      db/db_iter.cc
  62. 21
      db/db_iter.h
  63. 188
      db/db_iter_test.cc
  64. 2
      db/db_log_iter_test.cc
  65. 1206
      db/db_properties_test.cc
  66. 2
      db/db_table_properties_test.cc
  67. 47
      db/db_tailing_iter_test.cc
  68. 2284
      db/db_test.cc
  69. 86
      db/db_test2.cc
  70. 120
      db/db_test_util.cc
  71. 17
      db/db_test_util.h
  72. 82
      db/db_universal_compaction_test.cc
  73. 2
      db/db_wal_test.cc
  74. 2
      db/dbformat.cc
  75. 2
      db/dbformat.h
  76. 2
      db/dbformat_test.cc
  77. 3
      db/deletefile_test.cc
  78. 2
      db/event_helpers.cc
  79. 2
      db/event_helpers.h
  80. 2
      db/experimental.cc
  81. 2
      db/fault_injection_test.cc
  82. 2
      db/file_indexer.cc
  83. 2
      db/file_indexer.h
  84. 2
      db/file_indexer_test.cc
  85. 2
      db/filename.cc
  86. 2
      db/filename.h
  87. 2
      db/filename_test.cc
  88. 16
      db/flush_job.cc
  89. 2
      db/flush_job.h
  90. 2
      db/flush_job_test.cc
  91. 2
      db/flush_scheduler.cc
  92. 2
      db/flush_scheduler.h
  93. 18
      db/forward_iterator.cc
  94. 4
      db/forward_iterator.h
  95. 2
      db/forward_iterator_bench.cc
  96. 33
      db/inlineskiplist.h
  97. 2
      db/inlineskiplist_test.cc
  98. 427
      db/internal_stats.cc
  99. 162
      db/internal_stats.h
  100. 11
      db/job_context.h
  101. Some files were not shown because too many files have changed in this diff Show More

1
.gitignore vendored

@ -42,6 +42,7 @@ unity.a
tags
rocksdb_dump
rocksdb_undump
db_test2
java/out
java/target

@ -34,7 +34,7 @@ before_script:
# as EnvPosixTest::AllocateTest expects within the Travis OpenVZ environment.
script:
- if [[ "${TRAVIS_OS_NAME}" == 'linux' ]]; then OPT=-DTRAVIS CLANG_FORMAT_DIFF=/tmp/clang-format-diff.py make format || true; fi
- OPT=-DTRAVIS V=1 make -j4 check && OPT=-DTRAVIS V=1 make clean jclean rocksdbjava jtest && make clean && OPT="-DTRAVIS -DROCKSDB_LITE" V=1 make -j4 check
- OPT=-DTRAVIS V=1 make -j4 check && OPT=-DTRAVIS V=1 make clean jclean rocksdbjava jtest && make clean && OPT="-DTRAVIS -DROCKSDB_LITE" V=1 make -j4 static_lib
notifications:
email:

@ -13,8 +13,8 @@
# cd build
# 3. Run cmake to generate project files for Windows, add more options to enable required third-party libraries.
# See thirdparty.inc for more information.
# sample command: cmake -G "Visual Studio 12 Win64" -DGFLAGS=1 -DSNAPPY=1 -DJEMALLOC=1 ..
# OR for VS Studio 15 cmake -G "Visual Studio 14 Win64" -DGFLAGS=1 -DSNAPPY=1 -DJEMALLOC=1 ..
# sample command: cmake -G "Visual Studio 12 Win64" -DGFLAGS=1 -DSNAPPY=1 -DJEMALLOC=1 -DJNI=1 ..
# OR for VS Studio 15 cmake -G "Visual Studio 14 Win64" -DGFLAGS=1 -DSNAPPY=1 -DJEMALLOC=1 -DJNI=1 ..
# 4. Then build the project in debug mode (you may want to add /m[:<N>] flag to run msbuild in <N> parallel threads
# or simply /m ot use all avail cores)
# msbuild rocksdb.sln
@ -30,10 +30,9 @@ cmake_minimum_required(VERSION 2.6)
project(rocksdb)
include(${CMAKE_CURRENT_SOURCE_DIR}/thirdparty.inc)
execute_process(COMMAND $ENV{COMSPEC} " /C date /T" OUTPUT_VARIABLE DATE)
execute_process(COMMAND $ENV{COMSPEC} " /C time /T" OUTPUT_VARIABLE TIME)
string(REGEX REPLACE "(..)/(..)/..(..).*" "\\1/\\2/\\3" DATE ${DATE})
execute_process(COMMAND powershell -Command "Get-Date -format MM_dd_yyyy" OUTPUT_VARIABLE DATE)
execute_process(COMMAND powershell -Command "Get-Date -format HH:mm:ss" OUTPUT_VARIABLE TIME)
string(REGEX REPLACE "(..)_(..)_..(..).*" "\\1/\\2/\\3" DATE ${DATE})
string(REGEX REPLACE "(..):(.....).*" " \\1:\\2" TIME ${TIME})
string(CONCAT GIT_DATE_TIME ${DATE} ${TIME})
@ -99,6 +98,7 @@ add_subdirectory(third-party/gtest-1.7.0/fused-src/gtest)
# Main library source code
set(SOURCES
db/auto_roll_logger.cc
db/builder.cc
db/c.cc
db/column_family.cc
@ -114,6 +114,7 @@ set(SOURCES
db/db_impl_debug.cc
db/db_impl_experimental.cc
db/db_impl_readonly.cc
db/db_info_dumper.cc
db/db_iter.cc
db/event_helpers.cc
db/experimental.cc
@ -145,9 +146,12 @@ set(SOURCES
db/write_batch_base.cc
db/write_controller.cc
db/write_thread.cc
db/xfunc_test_points.cc
memtable/hash_cuckoo_rep.cc
memtable/hash_linklist_rep.cc
memtable/hash_skiplist_rep.cc
memtable/skiplistrep.cc
memtable/vectorrep.cc
port/stack_trace.cc
port/win/env_win.cc
port/win/port_win.cc
@ -173,7 +177,6 @@ set(SOURCES
table/merger.cc
table/sst_file_writer.cc
table/meta_blocks.cc
table/mock_table.cc
table/plain_table_builder.cc
table/plain_table_factory.cc
table/plain_table_index.cc
@ -182,9 +185,9 @@ set(SOURCES
table/table_properties.cc
table/two_level_iterator.cc
tools/sst_dump_tool.cc
tools/db_bench_tool.cc
tools/dump/db_dump_tool.cc
util/arena.cc
util/auto_roll_logger.cc
util/bloom.cc
util/build_version.cc
util/cache.cc
@ -193,17 +196,18 @@ set(SOURCES
util/comparator.cc
util/concurrent_arena.cc
util/crc32c.cc
util/db_info_dumper.cc
util/delete_scheduler_impl.cc
util/delete_scheduler.cc
util/dynamic_bloom.cc
util/env.cc
util/env_hdfs.cc
util/event_logger.cc
util/file_util.cc
util/file_reader_writer.cc
util/sst_file_manager_impl.cc
util/filter_policy.cc
util/hash.cc
util/histogram.cc
util/histogram_windowing.cc
util/instrumented_mutex.cc
util/iostats_context.cc
tools/ldb_cmd.cc
@ -211,7 +215,6 @@ set(SOURCES
util/logging.cc
util/log_buffer.cc
util/memenv.cc
util/mock_env.cc
util/murmurhash.cc
util/mutable_cf_options.cc
util/options.cc
@ -223,7 +226,6 @@ set(SOURCES
util/perf_level.cc
util/random.cc
util/rate_limiter.cc
util/skiplistrep.cc
util/slice.cc
util/statistics.cc
util/status.cc
@ -237,11 +239,12 @@ set(SOURCES
util/thread_status_updater.cc
util/thread_status_util.cc
util/thread_status_util_debug.cc
util/vectorrep.cc
util/transaction_test_util.cc
util/xfunc.cc
util/xxhash.cc
utilities/backupable/backupable_db.cc
utilities/checkpoint/checkpoint.cc
utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc
utilities/document/document_db.cc
utilities/document/json_document.cc
utilities/document/json_document_builder.cc
@ -275,6 +278,8 @@ set(SOURCES
# and linked to tests. Add test only code that is not #ifdefed for Release here.
set(TESTUTIL_SOURCE
db/db_test_util.cc
table/mock_table.cc
util/mock_env.cc
util/thread_status_updater_debug.cc
)
@ -287,8 +292,19 @@ set_target_properties(rocksdb${ARTIFACT_SUFFIX} PROPERTIES COMPILE_FLAGS "-DROCK
add_dependencies(rocksdb${ARTIFACT_SUFFIX} GenerateBuildVersion)
target_link_libraries(rocksdb${ARTIFACT_SUFFIX} ${LIBS})
if (DEFINED JNI)
if (${JNI} EQUAL 1)
message(STATUS "JNI library is enabled")
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/java)
else()
message(STATUS "JNI library is disabled")
endif()
else()
message(STATUS "JNI library is disabled")
endif()
set(APPS
db/db_bench.cc
tools/db_bench.cc
db/memtablerep_bench.cc
table/table_reader_bench.cc
tools/db_stress.cc
@ -303,6 +319,7 @@ set(APPS
set(C_TESTS db/c_test.c)
set(TESTS
db/auto_roll_logger_test.cc
db/column_family_test.cc
db/compact_files_test.cc
db/compaction_iterator_test.cc
@ -312,17 +329,20 @@ set(TESTS
db/comparator_db_test.cc
db/corruption_test.cc
db/cuckoo_table_db_test.cc
db/db_iter_test.cc
db/db_test.cc
db/db_compaction_filter_test.cc
db/db_compaction_test.cc
db/db_dynamic_level_test.cc
db/db_inplace_update_test.cc
db/db_iter_test.cc
db/db_log_iter_test.cc
db/db_properties_test.cc
db/db_table_properties_test.cc
db/db_tailing_iter_test.cc
db/db_test.cc
db/db_test2.cc
db/db_block_cache_test.cc
db/db_universal_compaction_test.cc
db/db_wal_test.cc
db/db_tailing_iter_test.cc
db/db_table_properties_test.cc
db/dbformat_test.cc
db/deletefile_test.cc
db/fault_injection_test.cc
@ -363,7 +383,6 @@ set(TESTS
tools/sst_dump_test.cc
util/arena_test.cc
util/autovector_test.cc
util/auto_roll_logger_test.cc
util/bloom_test.cc
util/cache_test.cc
util/coding_test.cc
@ -376,6 +395,7 @@ set(TESTS
util/file_reader_writer_test.cc
util/heap_test.cc
util/histogram_test.cc
util/iostats_context_test.cc
util/memenv_test.cc
util/mock_env_test.cc
util/options_test.cc

@ -1,6 +1,26 @@
# Rocksdb Change Log
## 4.6.0 (3/10/2016)
### Public API Changes
* Change default of BlockBasedTableOptions.format_version to 2. It means default DB created by 4.6 or up cannot be opened by RocksDB version 3.9 or earlier.
* Added strict_capacity_limit option to NewLRUCache. If the flag is set to true, insert to cache will fail if no enough capacity can be free. Signiture of Cache::Insert() is updated accordingly.
* Tickers [NUMBER_DB_NEXT, NUMBER_DB_PREV, NUMBER_DB_NEXT_FOUND, NUMBER_DB_PREV_FOUND, ITER_BYTES_READ] are not updated immediately. The are updated when the Iterator is deleted.
* Add monotonically increasing counter (DB property "rocksdb.current-super-version-number") that increments upon any change to the LSM tree.
### New Features
* Add CompactionPri::kMinOverlappingRatio, a compaction picking mode friendly to write amplification.
* Deprecate Iterator::IsKeyPinned() and replace it with Iterator::GetProperty() with prop_name="rocksdb.iterator.is.key.pinned"
## 4.5.0 (2/5/2016)
### Public API Changes
* Add a new perf context level between kEnableCount and kEnableTime. Level 2 now does not include timers for mutexes.
* Statistics of mutex operation durations will not be measured by default. If you want to have them enabled, you need to set Statistics::stats_level_ to kAll.
* DBOptions::delete_scheduler and NewDeleteScheduler() are removed, please use DBOptions::sst_file_manager and NewSstFileManager() instead
### New Features
* ldb tool now supports operations to non-default column families.
* Add kPersistedTier to ReadTier. This option allows Get and MultiGet to read only the persited data and skip mem-tables if writes were done with disableWAL = true.
* Add DBOptions::sst_file_manager. Use NewSstFileManager() in include/rocksdb/sst_file_manager.h to create a SstFileManager that can be used to track the total size of SST files and control the SST files deletion rate.
## Unreleased
## 4.4.0 (1/14/2016)
### Public API Changes
* Change names in CompactionPri and add a new one.
* Deprecate options.soft_rate_limit and add options.soft_pending_compaction_bytes_limit.

@ -21,7 +21,7 @@ depend on gflags. You will need to have gflags installed to run `make all`. This
use binaries compiled by `make all` in production.
* By default the binary we produce is optimized for the platform you're compiling on
(-march=native). If you want to build a portable binary, add 'PORTABLE=1' before
(-march=native or the equivalent). If you want to build a portable binary, add 'PORTABLE=1' before
your make commands, like this: `PORTABLE=1 make static_lib`
## Dependencies

@ -2,7 +2,7 @@ BSD License
For rocksdb software
Copyright (c) 2014, Facebook, Inc.
Copyright (c) 2011-present, Facebook, Inc.
All rights reserved.
---------------------------------------------------------------------

@ -84,7 +84,8 @@ endif
# compile with -O2 if debug level is not 2
ifneq ($(DEBUG_LEVEL), 2)
OPT += -O2 -fno-omit-frame-pointer
ifneq ($(MACHINE),ppc64) # ppc64 doesn't support -momit-leaf-frame-pointer
# Skip for archs that don't support -momit-leaf-frame-pointer
ifeq (,$(shell $(CXX) -fsyntax-only -momit-leaf-frame-pointer -xc /dev/null 2>&1))
OPT += -momit-leaf-frame-pointer
endif
endif
@ -143,6 +144,9 @@ else
OPT += -DNDEBUG
endif
ifeq ($(PLATFORM), OS_SOLARIS)
PLATFORM_CXXFLAGS += -D _GLIBCXX_USE_C99
endif
ifneq ($(filter -DROCKSDB_LITE,$(OPT)),)
# found
CFLAGS += -fno-exceptions
@ -237,8 +241,12 @@ VALGRIND_VER := $(join $(VALGRIND_VER),valgrind)
VALGRIND_OPTS = --error-exitcode=$(VALGRIND_ERROR) --leak-check=full
BENCHTOOLOBJECTS = $(BENCH_SOURCES:.cc=.o) $(LIBOBJECTS) $(TESTUTIL)
TESTS = \
db_test \
db_test2 \
db_block_cache_test \
db_iter_test \
db_log_iter_test \
db_compaction_filter_test \
@ -248,6 +256,7 @@ TESTS = \
db_tailing_iter_test \
db_universal_compaction_test \
db_wal_test \
db_properties_test \
db_table_properties_test \
block_hash_index_test \
autovector_test \
@ -332,7 +341,8 @@ TESTS = \
compact_on_deletion_collector_test \
compaction_job_stats_test \
transaction_test \
ldb_cmd_test
ldb_cmd_test \
iostats_context_test
SUBSET := $(shell echo $(TESTS) |sed s/^.*$(ROCKSDBTESTS_START)/$(ROCKSDBTESTS_START)/)
@ -460,7 +470,7 @@ test_names = \
-e '/^(\s*)(\S+)/; !$$1 and do {$$p=$$2; break};' \
-e 'print qq! $$p$$2!'
ifeq ($(MAKECMDGOALS),check)
ifneq (,$(filter check parallel_check,$(MAKECMDGOALS)),)
# Use /dev/shm if it has the sticky bit set (otherwise, /tmp),
# and create a randomly-named rocksdb.XXXX directory therein.
# We'll use that directory in the "make check" rules.
@ -616,6 +626,46 @@ valgrind_check: $(TESTS)
fi; \
done
ifneq ($(PAR_TEST),)
parloop:
ret_bad=0; \
for t in $(PAR_TEST); do \
echo "===== Running $$t in parallel $(NUM_PAR)";\
if [ $(db_test) -eq 1 ]; then \
seq $(J) | v="$$t" parallel --gnu 's=$(TMPD)/rdb-{}; export TEST_TMPDIR=$$s;' \
'timeout 2m ./db_test --gtest_filter=$$v >> $$s/log-{} 2>1'; \
else\
seq $(J) | v="./$$t" parallel --gnu 's=$(TMPD)/rdb-{};' \
'export TEST_TMPDIR=$$s; timeout 10m $$v >> $$s/log-{} 2>1'; \
fi; \
ret_code=$$?; \
if [ $$ret_code -ne 0 ]; then \
ret_bad=$$ret_code; \
echo $$t exited with $$ret_code; \
fi; \
done; \
exit $$ret_bad;
endif
parallel_check: $(TESTS)
$(AM_V_GEN)if test "$(J)" > 1 \
&& (parallel --gnu --help 2>/dev/null) | \
grep -q 'GNU Parallel'; \
then \
echo Running in parallel $(J); \
else \
echo "Need to have GNU Parallel and J > 1"; exit 1; \
fi; \
ret_bad=0; \
echo $(J);\
echo Test Dir: $(TMPD); \
seq $(J) | parallel --gnu 's=$(TMPD)/rdb-{}; rm -rf $$s; mkdir $$s'; \
$(MAKE) PAR_TEST="$(shell $(test_names))" TMPD=$(TMPD) \
J=$(J) db_test=1 parloop; \
$(MAKE) PAR_TEST="$(filter-out db_test, $(TESTS))" \
TMPD=$(TMPD) J=$(J) db_test=0 parloop;
analyze: clean
$(CLANG_SCAN_BUILD) --use-analyzer=$(CLANG_ANALYZER) \
--use-c++=$(CXX) --use-cc=$(CC) --status-bugs \
@ -652,7 +702,7 @@ clean:
tags:
ctags * -R
cscope -b `find . -name '*.cc'` `find . -name '*.h'`
cscope -b `find . -name '*.cc'` `find . -name '*.h'` `find . -name '*.c'`
format:
build_tools/format-diff.sh
@ -667,7 +717,7 @@ $(LIBRARY): $(LIBOBJECTS)
$(AM_V_AR)rm -f $@
$(AM_V_at)$(AR) $(ARFLAGS) $@ $(LIBOBJECTS)
db_bench: db/db_bench.o $(LIBOBJECTS) $(TESTUTIL)
db_bench: tools/db_bench.o $(BENCHTOOLOBJECTS)
$(AM_LINK)
cache_bench: util/cache_bench.o $(LIBOBJECTS) $(TESTUTIL)
@ -742,6 +792,12 @@ slice_transform_test: util/slice_transform_test.o $(LIBOBJECTS) $(TESTHARNESS)
db_test: db/db_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
db_test2: db/db_test2.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
db_block_cache_test: db/db_block_cache_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
db_log_iter_test: db/db_log_iter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
@ -769,6 +825,9 @@ db_universal_compaction_test: db/db_universal_compaction_test.o db/db_test_util.
db_wal_test: db/db_wal_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
db_properties_test: db/db_properties_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
db_table_properties_test: db/db_table_properties_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
@ -970,7 +1029,7 @@ manual_compaction_test: db/manual_compaction_test.o $(LIBOBJECTS) $(TESTHARNESS)
filelock_test: util/filelock_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
auto_roll_logger_test: util/auto_roll_logger_test.o $(LIBOBJECTS) $(TESTHARNESS)
auto_roll_logger_test: db/auto_roll_logger_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
memtable_list_test: db/memtable_list_test.o $(LIBOBJECTS) $(TESTHARNESS)
@ -994,6 +1053,9 @@ ldb_cmd_test: tools/ldb_cmd_test.o $(LIBOBJECTS) $(TESTHARNESS)
ldb: tools/ldb.o $(LIBOBJECTS)
$(AM_LINK)
iostats_context_test: util/iostats_context_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_V_CCLD)$(CXX) $^ $(EXEC_LDFLAGS) -o $@ $(LDFLAGS)
#-------------------------------------------------
# make install related stuff
INSTALL_PATH ?= /usr/local
@ -1036,7 +1098,11 @@ install: install-static
# ---------------------------------------------------------------------------
JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/linux
ifeq ($(PLATFORM), OS_SOLARIS)
ARCH := $(shell isainfo -b)
else
ARCH := $(shell getconf LONG_BIT)
endif
ROCKSDBJNILIB = librocksdbjni-linux$(ARCH).so
ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-linux$(ARCH).jar
ROCKSDB_JAR_ALL = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH).jar
@ -1052,6 +1118,11 @@ else
JAVA_INCLUDE = -I/System/Library/Frameworks/JavaVM.framework/Headers/
endif
endif
ifeq ($(PLATFORM), OS_SOLARIS)
ROCKSDBJNILIB = librocksdbjni-solaris$(ARCH).so
ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-solaris$(ARCH).jar
JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/solaris
endif
libz.a:
-rm -rf zlib-1.2.8
@ -1145,11 +1216,10 @@ jtest: rocksdbjava
jdb_bench:
cd java;$(MAKE) db_bench;
commit-prereq:
$(MAKE) clean && $(MAKE) all check;
commit_prereq: build_tools/rocksdb-lego-determinator \
build_tools/precommit_checker.py
J=$(J) build_tools/precommit_checker.py unit unit_481 clang_unit tsan asan lite
$(MAKE) clean && $(MAKE) jclean && $(MAKE) rocksdbjava;
$(MAKE) clean && USE_CLANG=1 $(MAKE) all;
$(MAKE) clean && OPT=-DROCKSDB_LITE $(MAKE) static_lib;
xfunc:
for xftest in $(XFUNC_TESTS); do \

@ -46,3 +46,13 @@ Pinterest's Object Retrieval System uses RocksDB for storage: https://www.youtub
## Smyte
[Smyte](https://www.smyte.com/) uses RocksDB as the storage layer for their core key-value storage, high-performance counters and time-windowed HyperLogLog services.
## Rakuten Marketing
[Rakuten Marketing](https://marketing.rakuten.com/) uses RocksDB as the disk cache layer for the real-time bidding service in their Performance DSP.
## VWO, Wingify
[VWO's](https://vwo.com/) Smart Code checker and URL helper uses RocksDB to store all the URLs where VWO's Smart Code is installed.
## quasardb
[quasardb](https://www.quasardb.net) is a high-performance, distributed, transactional key-value database that integrates well with in-memory analytics engines such as Apache Spark.
quasardb uses a heavily tuned RocksDB as its persistence layer.

@ -10,13 +10,184 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration {
ArcanistBaseWorkflow $workflow,
$error_code) {
if ($command == 'diff' && !$workflow->isRawDiffSource()) {
$this->maybePushToJenkins($workflow);
$this->startTestsInJenkins($workflow);
$this->startTestsInSandcastle($workflow);
}
}
//////////////////////////////////////////////////////////////////////
/* Run tests in sandcastle */
function postURL($diffID, $url) {
$cmd = 'echo \'{"diff_id": "' . $diffID . '", '
. '"name":"click here for sandcastle tests for D' . $diffID . '", '
. '"link":"' . $url . '"}\' | '
. 'http_proxy=fwdproxy.any.facebook.com:8080 '
. 'https_proxy=fwdproxy.any.facebook.com:8080 arc call-conduit '
. 'differential.updateunitresults';
shell_exec($cmd);
}
function updateTestCommand($diffID, $test, $status) {
$cmd = 'echo \'{"diff_id": "' . $diffID . '", '
. '"name":"' . $test . '", '
. '"result":"' . $status . '"}\' | '
. 'http_proxy=fwdproxy.any.facebook.com:8080 '
. 'https_proxy=fwdproxy.any.facebook.com:8080 arc call-conduit '
. 'differential.updateunitresults';
return $cmd;
}
function updateTest($diffID, $test) {
shell_exec($this->updateTestCommand($diffID, $test, "waiting"));
}
function getSteps($diffID, $username, $test) {
$arcrc_content = exec("cat ~/.arcrc | gzip -f | base64 -w0");
// Sandcastle machines don't have arc setup. We copy the user certificate
// and authenticate using that in sandcastle
$setup = array(
"name" => "Setup arcrc",
"shell" => "echo " . $arcrc_content . " | base64 --decode"
. " | gzip -d > ~/.arcrc",
"user" => "root"
);
// arc demands certain permission on its config
$fix_permission = array(
"name" => "Fix environment",
"shell" => "chmod 600 ~/.arcrc",
"user" => "root"
);
// fbcode is a sub-repo. We cannot patch until we add it to ignore otherwise
// git thinks it is uncommited change
$fix_git_ignore = array(
"name" => "Fix git ignore",
"shell" => "echo fbcode >> .git/info/exclude",
"user" => "root"
);
// Patch the code (keep your fingures crossed)
$patch = array(
"name" => "Patch " . $diffID,
"shell" => "HTTPS_PROXY=fwdproxy:8080 arc --arcrc-file ~/.arcrc "
. "patch --diff " . $diffID,
"user" => "root"
);
// Clean up the user arc config we are using
$cleanup = array(
"name" => "Arc cleanup",
"shell" => "rm -f ~/.arcrc",
"user" => "root"
);
// Construct the steps in the order of execution
$steps[] = $setup;
$steps[] = $fix_permission;
$steps[] = $fix_git_ignore;
$steps[] = $patch;
// Run the actual command
$this->updateTest($diffID, $test);
$cmd = $this->updateTestCommand($diffID, $test, "running") . ";"
. "(./build_tools/precommit_checker.py " . $test
. "&& "
. $this->updateTestCommand($diffID, $test, "pass") . ")"
. "|| " . $this->updateTestCommand($diffID, $test, "fail")
. "; cat /tmp/precommit-check.log"
. "; for f in `ls t/log-*`; do echo \$f; cat \$f; done";
$run_test = array(
"name" => "Run " . $test,
"shell" => $cmd,
"user" => "root",
);
$steps[] = $run_test;
$steps[] = $cleanup;
return $steps;
}
function startTestsInSandcastle($workflow) {
// extract information we need from workflow or CLI
$diffID = $workflow->getDiffId();
$username = exec("whoami");
if ($diffID == null || $username == null) {
// there is no diff and we can't extract username
// we cannot schedule sandcasstle job
return;
}
if (strcmp(getenv("ROCKSDB_CHECK_ALL"), 1) == 0) {
// extract all tests from the CI definition
$output = file_get_contents("build_tools/rocksdb-lego-determinator");
preg_match_all('/[ ]{2}([a-zA-Z0-9_]+)[\)]{1}/', $output, $matches);
$tests = $matches[1];
} else {
// manually list of tests we want to run in sandcastle
$tests = array(
"unit", "unit_481", "clang_unit", "tsan", "asan", "lite", "valgrind"
);
}
// construct a job definition for each test and add it to the master plan
foreach ($tests as $test) {
$arg[] = array(
"name" => "RocksDB diff " . $diffID . " test " . $test,
"steps" => $this->getSteps($diffID, $username, $test)
);
}
// we cannot submit the parallel execution master plan to sandcastle
// we need supply the job plan as a determinator
// so we construct a small job that will spit out the master job plan
// which sandcastle will parse and execute
// Why compress ? Otherwise we run over the max string size.
$cmd = "echo " . base64_encode(json_encode($arg))
. " | gzip -f | base64 -w0";
$arg_encoded = shell_exec($cmd);
$command = array(
"name" => "Run diff " . $diffID . "for user " . $username,
"steps" => array()
);
$command["steps"][] = array(
"name" => "Generate determinator",
"shell" => "echo " . $arg_encoded . " | base64 --decode | gzip -d"
. " | base64 --decode",
"determinator" => true,
"user" => "root"
);
// submit to sandcastle
$url = 'https://interngraph.intern.facebook.com/sandcastle/generate?'
.'command=SandcastleUniversalCommand'
.'&vcs=rocksdb-git&revision=origin%2Fmaster&type=lego'
.'&user=krad&alias=rocksdb-precommit'
.'&command-args=' . urlencode(json_encode($command));
$cmd = 'https_proxy= HTTPS_PROXY= curl -s -k -F app=659387027470559 '
. '-F token=AeO_3f2Ya3TujjnxGD4 "' . $url . '"';
$output = shell_exec($cmd);
// extract sandcastle URL from the response
preg_match('/url": "(.+)"/', $output, $sandcastle_url);
echo "\nSandcastle URL: " . $sandcastle_url[1] . "\n";
// Ask phabricator to display it on the diff UI
$this->postURL($diffID, $sandcastle_url[1]);
}
//////////////////////////////////////////////////////////////////////
/* Send off builds to jenkins */
function maybePushToJenkins($workflow) {
function startTestsInJenkins($workflow) {
$diffID = $workflow->getDiffID();
if ($diffID === null) {
return;
@ -31,5 +202,4 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration {
."buildWithParameters?token=AUTH&DIFF_ID=$diffID";
system("curl --noproxy '*' \"$url\" > /dev/null 2>&1");
}
}

@ -1,5 +1,5 @@
#!/usr/bin/python
# Copyright (c) 2013, Facebook, Inc. All rights reserved.
# Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.

@ -189,6 +189,7 @@ if [ "$CROSS_COMPILE" = "true" -o "$FBCODE_BUILD" = "true" ]; then
# Also don't need any compilation tests if compiling on fbcode
true
else
if ! test $ROCKSDB_DISABLE_FALLOCATE; then
# Test whether fallocate is available
$CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <fcntl.h>
@ -201,6 +202,7 @@ EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_FALLOCATE_PRESENT"
fi
fi
# Test whether Snappy library is installed
# http://code.google.com/p/snappy/
@ -359,8 +361,16 @@ if test "$USE_SSE"; then
# if Intel SSE instruction set is supported, set USE_SSE=1
COMMON_FLAGS="$COMMON_FLAGS -msse -msse4.2 "
elif test -z "$PORTABLE"; then
if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then
# Tune for this POWER processor, treating '+' models as base models
POWER=`LD_SHOW_AUXV=1 /bin/true | grep AT_PLATFORM | grep -E -o power[0-9]+`
COMMON_FLAGS="$COMMON_FLAGS -mcpu=$POWER -mtune=$POWER "
elif test -n "`echo $TARGET_ARCHITECTURE | grep ^s390x`"; then
COMMON_FLAGS="$COMMON_FLAGS -march=z10 "
else
COMMON_FLAGS="$COMMON_FLAGS -march=native "
fi
fi
PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS"
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS"

@ -0,0 +1,16 @@
GCC_BASE=/mnt/vol/engshare/fbcode/third-party2/gcc/4.9.x/centos6-native/1317bc4/
CLANG_BASE=/mnt/gvfs/third-party2/clang/fc904e50a9266b9d7b98cae1993afa0c5aae1440/3.7.1/centos6-native/9d9ecb9/
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/ea2fd1278810d3af2ea52218d2767e09d786dbd0/4.9.x/gcc-4.9-glibc-2.20/024dbc3
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/f5484f168c0e4d19823d41df052c5870c6e575a4/2.20/gcc-4.9-glibc-2.20/500e281
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/cbf6f1f209e5bd160bdc5d971744e039f36b1566/1.1.3/gcc-4.9-glibc-2.20/e9936bf
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/6d39cb54708049f527e713ad19f2aadb9d3667e8/1.2.8/gcc-4.9-glibc-2.20/e9936bf
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/2ddd45f0853bfc8bb1c27f0f447236a1a26c338a/1.0.6/gcc-4.9-glibc-2.20/e9936bf
LZ4_BASE=/mnt/gvfs/third-party2/lz4/6858fac689e0f92e584224d91bdb0e39f6c8320d/r131/gcc-4.9-glibc-2.20/e9936bf
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/cb6c4880fcb4fee471574ba6af63a3882155a16a/0.5.1/gcc-4.9-glibc-2.20/e9936bf
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/c7275a4ceae0aca0929e56964a31dafc53c1ee96/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/40791a3fef9206a77f2c4bc51f8169e5bf10d68e/master/gcc-4.9-glibc-2.20/a6c5e1e
NUMA_BASE=/mnt/gvfs/third-party2/numa/ae54a5ed22cdabb1c6446dce4e8ffae5b4446d73/2.0.8/gcc-4.9-glibc-2.20/e9936bf
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/303048f72efc92ae079e62dfc84823401aecfd94/trunk/gcc-4.9-glibc-2.20/12266b1
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/1a48835975c66d30e47770ec419758ed3b9ba010/3.10.62-62_fbk17_03959_ge29cc63/gcc-4.9-glibc-2.20/da39a3e
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/a5b8152b2a15ce8a98808cf954fbccec825a97bc/2.25/centos6-native/da39a3e
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/af85c56f424cd5edfc2c97588299b44ecdec96bb/3.10.0/gcc-4.9-glibc-2.20/e9936bf

@ -0,0 +1,16 @@
GCC_BASE=/mnt/vol/engshare/fbcode/third-party2/gcc/4.8.1/centos6-native/cc6c9dc/
CLANG_BASE=/mnt/gvfs/third-party2/clang/fc904e50a9266b9d7b98cae1993afa0c5aae1440/3.7.1/centos6-native/9d9ecb9/
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/ea2fd1278810d3af2ea52218d2767e09d786dbd0/4.8.1/gcc-4.8.1-glibc-2.17/8aac7fc
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/f5484f168c0e4d19823d41df052c5870c6e575a4/2.17/gcc-4.8.1-glibc-2.17/99df8fc
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/cbf6f1f209e5bd160bdc5d971744e039f36b1566/1.1.3/gcc-4.8.1-glibc-2.17/c3f970a
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/6d39cb54708049f527e713ad19f2aadb9d3667e8/1.2.8/gcc-4.8.1-glibc-2.17/c3f970a
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/2ddd45f0853bfc8bb1c27f0f447236a1a26c338a/1.0.6/gcc-4.8.1-glibc-2.17/c3f970a
LZ4_BASE=/mnt/gvfs/third-party2/lz4/6858fac689e0f92e584224d91bdb0e39f6c8320d/r131/gcc-4.8.1-glibc-2.17/c3f970a
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/cb6c4880fcb4fee471574ba6af63a3882155a16a/0.5.1/gcc-4.8.1-glibc-2.17/c3f970a
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/c7275a4ceae0aca0929e56964a31dafc53c1ee96/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/40791a3fef9206a77f2c4bc51f8169e5bf10d68e/master/gcc-4.8.1-glibc-2.17/8d31e51
NUMA_BASE=/mnt/gvfs/third-party2/numa/ae54a5ed22cdabb1c6446dce4e8ffae5b4446d73/2.0.8/gcc-4.8.1-glibc-2.17/c3f970a
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/303048f72efc92ae079e62dfc84823401aecfd94/trunk/gcc-4.8.1-glibc-2.17/675d945
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/1a48835975c66d30e47770ec419758ed3b9ba010/3.10.62-62_fbk17_03959_ge29cc63/gcc-4.8.1-glibc-2.17/da39a3e
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/a5b8152b2a15ce8a98808cf954fbccec825a97bc/2.25/centos6-native/da39a3e
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/af85c56f424cd5edfc2c97588299b44ecdec96bb/3.8.1/gcc-4.8.1-glibc-2.17/c3f970a

@ -6,87 +6,88 @@
# Environment variables that change the behavior of this script:
# PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included
BASEDIR=`dirname $BASH_SOURCE`
source "$BASEDIR/dependencies.sh"
CFLAGS=""
# location of libgcc
LIBGCC_BASE="/mnt/gvfs/third-party2/libgcc/0473c80518a10d6efcbe24c5eeca3fb4ec9b519c/4.9.x/gcc-4.9-glibc-2.20/e1a7e4e"
# libgcc
LIBGCC_INCLUDE="$LIBGCC_BASE/include"
LIBGCC_LIBS=" -L $LIBGCC_BASE/libs"
# location of glibc
GLIBC_REV=7397bed99280af5d9543439cdb7d018af7542720
GLIBC_INCLUDE="/mnt/gvfs/third-party2/glibc/$GLIBC_REV/2.20/gcc-4.9-glibc-2.20/99df8fc/include"
GLIBC_LIBS=" -L /mnt/gvfs/third-party2/glibc/$GLIBC_REV/2.20/gcc-4.9-glibc-2.20/99df8fc/lib"
SNAPPY_INCLUDE=" -I /mnt/gvfs/third-party2/snappy/b0f269b3ca47770121aa159b99e1d8d2ab260e1f/1.0.3/gcc-4.9-glibc-2.20/c32916f/include/"
# glibc
GLIBC_INCLUDE="$GLIBC_BASE/include"
GLIBC_LIBS=" -L $GLIB_BASE/lib"
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
if test -z $PIC_BUILD; then
SNAPPY_LIBS=" /mnt/gvfs/third-party2/snappy/b0f269b3ca47770121aa159b99e1d8d2ab260e1f/1.0.3/gcc-4.9-glibc-2.20/c32916f/lib/libsnappy.a"
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a"
else
SNAPPY_LIBS=" /mnt/gvfs/third-party2/snappy/b0f269b3ca47770121aa159b99e1d8d2ab260e1f/1.0.3/gcc-4.9-glibc-2.20/c32916f/lib/libsnappy_pic.a"
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a"
fi
CFLAGS+=" -DSNAPPY"
if test -z $PIC_BUILD; then
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I /mnt/gvfs/third-party2/zlib/feb983d9667f4cf5e9da07ce75abc824764b67a1/1.2.8/gcc-4.9-glibc-2.20/4230243/include/"
ZLIB_LIBS=" /mnt/gvfs/third-party2/zlib/feb983d9667f4cf5e9da07ce75abc824764b67a1/1.2.8/gcc-4.9-glibc-2.20/4230243/lib/libz.a"
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a"
CFLAGS+=" -DZLIB"
# location of bzip headers and libraries
BZIP_INCLUDE=" -I /mnt/gvfs/third-party2/bzip2/af004cceebb2dfd173ca29933ea5915e727aad2f/1.0.6/gcc-4.9-glibc-2.20/4230243/include/"
BZIP_LIBS=" /mnt/gvfs/third-party2/bzip2/af004cceebb2dfd173ca29933ea5915e727aad2f/1.0.6/gcc-4.9-glibc-2.20/4230243/lib/libbz2.a"
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a"
CFLAGS+=" -DBZIP2"
LZ4_INCLUDE=" -I /mnt/gvfs/third-party2/lz4/6858fac689e0f92e584224d91bdb0e39f6c8320d/r131/gcc-4.9-glibc-2.20/e9936bf/include/"
LZ4_LIBS=" /mnt/gvfs/third-party2/lz4/6858fac689e0f92e584224d91bdb0e39f6c8320d/r131/gcc-4.9-glibc-2.20/e9936bf/lib/liblz4.a"
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
CFLAGS+=" -DLZ4"
ZSTD_REV=810b81b4705def5243e998b54701f3c504e4009e
ZSTD_INCLUDE=" -I /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.4.2/gcc-4.8.1-glibc-2.17/c3f970a/include"
ZSTD_LIBS=" /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.4.2/gcc-4.8.1-glibc-2.17/c3f970a/lib/libzstd.a"
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
CFLAGS+=" -DZSTD"
fi
# location of gflags headers and libraries
GFLAGS_INCLUDE=" -I /mnt/gvfs/third-party2/gflags/0fa60e2b88de3e469db6c482d6e6dac72f5d65f9/1.6/gcc-4.9-glibc-2.20/4230243/include/"
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
if test -z $PIC_BUILD; then
GFLAGS_LIBS=" /mnt/gvfs/third-party2/gflags/0fa60e2b88de3e469db6c482d6e6dac72f5d65f9/1.6/gcc-4.9-glibc-2.20/4230243/lib/libgflags.a"
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a"
else
GFLAGS_LIBS=" /mnt/gvfs/third-party2/gflags/0fa60e2b88de3e469db6c482d6e6dac72f5d65f9/1.6/gcc-4.9-glibc-2.20/4230243/lib/libgflags_pic.a"
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags_pic.a"
fi
CFLAGS+=" -DGFLAGS=google"
# location of jemalloc
JEMALLOC_INCLUDE=" -I /mnt/gvfs/third-party2/jemalloc/bcd68e5e419efa4e61b9486d6854564d6d75a0b5/3.6.0/gcc-4.9-glibc-2.20/2aafc78/include/"
JEMALLOC_LIB=" /mnt/gvfs/third-party2/jemalloc/bcd68e5e419efa4e61b9486d6854564d6d75a0b5/3.6.0/gcc-4.9-glibc-2.20/2aafc78/lib/libjemalloc.a"
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc.a"
if test -z $PIC_BUILD; then
# location of numa
NUMA_INCLUDE=" -I /mnt/gvfs/third-party2/numa/bbefc39ecbf31d0ca184168eb613ef8d397790ee/2.0.8/gcc-4.9-glibc-2.20/4230243/include/"
NUMA_LIB=" /mnt/gvfs/third-party2/numa/bbefc39ecbf31d0ca184168eb613ef8d397790ee/2.0.8/gcc-4.9-glibc-2.20/4230243/lib/libnuma.a"
NUMA_INCLUDE=" -I $NUMA_BASE/include/"
NUMA_LIB=" $NUMA_BASE/lib/libnuma.a"
CFLAGS+=" -DNUMA"
# location of libunwind
LIBUNWIND="/mnt/gvfs/third-party2/libunwind/1de3b75e0afedfe5585b231bbb340ec7a1542335/1.1/gcc-4.9-glibc-2.20/34235e8/lib/libunwind.a"
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a"
fi
# use Intel SSE support for checksum calculations
export USE_SSE=1
BINUTILS="/mnt/gvfs/third-party2/binutils/0b6ad0c88ddd903333a48ae8bff134efac468e4a/2.25/centos6-native/da39a3e/bin"
BINUTILS="$BINUTILS_BASE/bin"
AR="$BINUTILS/ar"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE"
GCC_BASE="/mnt/gvfs/third-party2/gcc/1c67a0b88f64d4d9ced0382d141c76aaa7d62fba/4.9.x/centos6-native/1317bc4"
STDLIBS="-L $GCC_BASE/lib64"
CLANG_BASE="/mnt/gvfs/third-party2/clang/d81444dd214df3d2466734de45bb264a0486acc3/dev"
CLANG_BIN="$CLANG_BASE/centos6-native/af4b1a0/bin"
CLANG_BIN="$CLANG_BASE/bin"
CLANG_LIB="$CLANG_BASE/lib"
CLANG_SRC="$CLANG_BASE/../../src"
CLANG_ANALYZER="$CLANG_BIN/clang++"
CLANG_SCAN_BUILD="$CLANG_BASE/src/clang/tools/scan-build/scan-build"
CLANG_SCAN_BUILD="$CLANG_SRC/clang/tools/scan-build/scan-build"
if [ -z "$USE_CLANG" ]; then
# gcc
@ -98,11 +99,11 @@ if [ -z "$USE_CLANG" ]; then
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
else
# clang
CLANG_INCLUDE="$CLANG_BASE/gcc-4.9-glibc-2.20/74c386f/lib/clang/dev/include/"
CLANG_INCLUDE="$CLANG_LIB/clang/*/include"
CC="$CLANG_BIN/clang"
CXX="$CLANG_BIN/clang++"
KERNEL_HEADERS_INCLUDE="/mnt/gvfs/third-party2/kernel-headers/ffd14f660a43c4b92717986b1bba66722ef089d0/3.2.18_70_fbk11_00129_gc8882d0/gcc-4.9-glibc-2.20/da39a3e/include"
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
CFLAGS+=" -B$BINUTILS/gold -nostdinc -nostdlib"
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.9.x "
@ -128,6 +129,6 @@ PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS"
VALGRIND_VER="/mnt/gvfs/third-party2/valgrind/6c45ef049cbf11c2df593addb712cd891049e737/3.10.0/gcc-4.9-glibc-2.20/4230243/bin/"
VALGRIND_VER="$VALGRIND_BASE/bin/"
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD

@ -4,67 +4,58 @@
# fbcode settings. It uses the latest g++ compiler and also
# uses jemalloc
BASEDIR=`dirname $BASH_SOURCE`
source "$BASEDIR/dependencies_4.8.1.sh"
# location of libgcc
LIBGCC_BASE="/mnt/gvfs/third-party2/libgcc/d00277f4559e261ed0a81f30f23c0ce5564e359e/4.8.1/gcc-4.8.1-glibc-2.17/8aac7fc"
LIBGCC_INCLUDE="$LIBGCC_BASE/include"
LIBGCC_LIBS=" -L $LIBGCC_BASE/libs"
# location of glibc
GLIBC_REV=0600c95b31226b5e535614c590677d87c62d8016
GLIBC_INCLUDE="/mnt/gvfs/third-party2/glibc/$GLIBC_REV/2.17/gcc-4.8.1-glibc-2.17/99df8fc/include"
GLIBC_LIBS=" -L /mnt/gvfs/third-party2/glibc/$GLIBC_REV/2.17/gcc-4.8.1-glibc-2.17/99df8fc/lib"
GLIBC_INCLUDE="$GLIBC_BASE/include"
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
# location of snappy headers and libraries
SNAPPY_REV=cbf6f1f209e5bd160bdc5d971744e039f36b1566
SNAPPY_INCLUDE=" -I /mnt/gvfs/third-party2/snappy/$SNAPPY_REV/1.1.3/gcc-4.8.1-glibc-2.17/c3f970a/include"
SNAPPY_LIBS=" /mnt/gvfs/third-party2/snappy/$SNAPPY_REV/1.1.3/gcc-4.8.1-glibc-2.17/c3f970a/lib/libsnappy.a"
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include"
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a"
# location of zlib headers and libraries
ZLIB_REV=6d39cb54708049f527e713ad19f2aadb9d3667e8
ZLIB_INCLUDE=" -I /mnt/gvfs/third-party2/zlib/$ZLIB_REV/1.2.8/gcc-4.8.1-glibc-2.17/c3f970a/include"
ZLIB_LIBS=" /mnt/gvfs/third-party2/zlib/$ZLIB_REV/1.2.8/gcc-4.8.1-glibc-2.17/c3f970a/lib/libz.a"
ZLIB_INCLUDE=" -I $ZLIB_BASE/include"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a"
# location of bzip headers and libraries
BZIP_REV=d6c789bfc2ec4c51a63d66df2878926b8158cde8
BZIP_INCLUDE=" -I /mnt/gvfs/third-party2/bzip2/$BZIP_REV/1.0.6/gcc-4.8.1-glibc-2.17/c3f970a/include/"
BZIP_LIBS=" /mnt/gvfs/third-party2/bzip2/$BZIP_REV/1.0.6/gcc-4.8.1-glibc-2.17/c3f970a/lib/libbz2.a"
BZIP2_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP2_LIBS=" $BZIP2_BASE/lib/libbz2.a"
LZ4_REV=6858fac689e0f92e584224d91bdb0e39f6c8320d
LZ4_INCLUDE=" -I /mnt/gvfs/third-party2/lz4/$LZ4_REV/r131/gcc-4.8.1-glibc-2.17/c3f970a/include"
LZ4_LIBS=" /mnt/gvfs/third-party2/lz4/$LZ4_REV/r131/gcc-4.8.1-glibc-2.17/c3f970a/lib/liblz4.a"
LZ4_INCLUDE=" -I $LZ4_BASE/include"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
ZSTD_REV=810b81b4705def5243e998b54701f3c504e4009e
ZSTD_INCLUDE=" -I /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.4.2/gcc-4.8.1-glibc-2.17/c3f970a/include"
ZSTD_LIBS=" /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.4.2/gcc-4.8.1-glibc-2.17/c3f970a/lib/libzstd.a"
ZSTD_INCLUDE=" -I $ZSTD_BASE/include"
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
# location of gflags headers and libraries
GFLAGS_REV=c7275a4ceae0aca0929e56964a31dafc53c1ee96
GFLAGS_INCLUDE=" -I /mnt/gvfs/third-party2/gflags/$GFLAGS_REV/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a/include/"
GFLAGS_LIBS=" /mnt/gvfs/third-party2/gflags/$GFLAGS_REV/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a/lib/libgflags.a"
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a"
# location of jemalloc
JEMALLOC_REV=c370265e58c4b6602e798df23335a1e9913dae52
JEMALLOC_INCLUDE=" -I /mnt/gvfs/third-party2/jemalloc/$JEMALLOC_REV/4.0.3/gcc-4.8.1-glibc-2.17/8d31e51/include"
JEMALLOC_LIB="/mnt/gvfs/third-party2/jemalloc/$JEMALLOC_REV/4.0.3/gcc-4.8.1-glibc-2.17/8d31e51/lib/libjemalloc.a"
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include"
JEMALLOC_LIB="$JEMALLOC_BASE/lib/libjemalloc.a"
# location of numa
NUMA_REV=ae54a5ed22cdabb1c6446dce4e8ffae5b4446d73
NUMA_INCLUDE=" -I /mnt/gvfs/third-party2/numa/$NUMA_REV/2.0.8/gcc-4.8.1-glibc-2.17/c3f970a/include/"
NUMA_LIB=" /mnt/gvfs/third-party2/numa/$NUMA_REV/2.0.8/gcc-4.8.1-glibc-2.17/c3f970a/lib/libnuma.a"
NUMA_INCLUDE=" -I $NUMA_BASE/include/"
NUMA_LIB=" $NUMA_BASE/lib/libnuma.a"
# location of libunwind
LIBUNWIND_REV=121f1a75c4414683aea8c70b761bfaf187f7c1a3
LIBUNWIND="/mnt/gvfs/third-party2/libunwind/$LIBUNWIND_REV/trunk/gcc-4.8.1-glibc-2.17/675d945/lib/libunwind.a"
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a"
# use Intel SSE support for checksum calculations
export USE_SSE=1
BINUTILS="/mnt/gvfs/third-party2/binutils/75670d0d8ef4891fd1ec2a7513ef01cd002c823b/2.25/centos6-native/da39a3e/bin"
BINUTILS="$BINUTILS_BASE/bin"
AR="$BINUTILS/ar"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP2_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE"
GCC_BASE="/mnt/gvfs/third-party2/gcc/c0064002d2609ab649603f769f0bd110bbe48029/4.8.1/centos6-native/cc6c9dc"
STDLIBS="-L $GCC_BASE/lib64"
if [ -z "$USE_CLANG" ]; then
@ -77,12 +68,13 @@ if [ -z "$USE_CLANG" ]; then
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
else
# clang
CLANG_BASE="/mnt/gvfs/third-party2/clang/ab054e9a490a8fd4537c0b6ec56e5c91c0f81c91/3.7"
CLANG_INCLUDE="$CLANG_BASE/gcc-4.8.1-glibc-2.17/ee9b060/lib/clang/3.7/include"
CC="$CLANG_BASE/centos6-native/b2feaee/bin/clang"
CXX="$CLANG_BASE/centos6-native/b2feaee/bin/clang++"
CLANG_BIN="$CLANG_BASE/bin"
CLANG_LIB="$CLANG_BASE/lib"
CLANG_INCLUDE="$CLANG_LIB/clang/*/include"
CC="$CLANG_BIN/clang"
CXX="$CLANG_BIN/clang++"
KERNEL_HEADERS_INCLUDE="/mnt/gvfs/third-party2/kernel-headers/1a48835975c66d30e47770ec419758ed3b9ba010/3.10.62-62_fbk17_03959_ge29cc63/gcc-4.8.1-glibc-2.17/da39a3e/include/"
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include/"
CFLAGS="-B$BINUTILS/gold -nostdinc -nostdlib"
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.8.1 "
@ -100,16 +92,15 @@ CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PR
CFLAGS+=" -DSNAPPY -DGFLAGS=google -DZLIB -DBZIP2 -DLZ4 -DZSTD -DNUMA"
CXXFLAGS+=" $CFLAGS"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP2_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/gcc-4.8.1-glibc-2.17/lib/ld.so"
EXEC_LDFLAGS+=" $LIBUNWIND"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/gcc-4.8.1-glibc-2.17/lib"
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP2_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS"
VALGRIND_REV=af85c56f424cd5edfc2c97588299b44ecdec96bb
VALGRIND_VER="/mnt/gvfs/third-party2/valgrind/$VALGRIND_REV/3.8.1/gcc-4.8.1-glibc-2.17/c3f970a/bin/"
VALGRIND_VER="$VALGRIND_BASE/bin/"
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE

@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2013, Facebook, Inc. All rights reserved.
# Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.

@ -0,0 +1,198 @@
#!/usr/local/fbcode/gcc-4.8.1-glibc-2.17-fb/bin/python2.7
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import commands
import subprocess
import sys
import re
import os
import time
#
# Simple logger
#
class Log:
LOG_FILE = "/tmp/precommit-check.log"
def __init__(self):
self.filename = Log.LOG_FILE
self.f = open(self.filename, 'w+', 0)
def caption(self, str):
line = "\n##### %s #####\n" % str
if self.f:
self.f.write("%s \n" % line)
else:
print(line)
def error(self, str):
data = "\n\n##### ERROR ##### %s" % str
if self.f:
self.f.write("%s \n" % data)
else:
print(data)
def log(self, str):
if self.f:
self.f.write("%s \n" % str)
else:
print(str)
#
# Shell Environment
#
class Env(object):
def __init__(self, tests):
self.tests = tests
self.log = Log()
def shell(self, cmd, path=os.getcwd()):
if path:
os.chdir(path)
self.log.log("==== shell session ===========================")
self.log.log("%s> %s" % (path, cmd))
status = subprocess.call("cd %s; %s" % (path, cmd), shell=True,
stdout=self.log.f, stderr=self.log.f)
self.log.log("status = %s" % status)
self.log.log("============================================== \n\n")
return status
def GetOutput(self, cmd, path=os.getcwd()):
if path:
os.chdir(path)
self.log.log("==== shell session ===========================")
self.log.log("%s> %s" % (path, cmd))
status, out = commands.getstatusoutput(cmd)
self.log.log("status = %s" % status)
self.log.log("out = %s" % out)
self.log.log("============================================== \n\n")
return status, out
#
# Pre-commit checker
#
class PreCommitChecker(Env):
def __init__(self, tests):
Env.__init__(self, tests)
#
# Get commands for a given job from the determinator file
#
def get_commands(self, test):
status, out = self.GetOutput(
"build_tools/rocksdb-lego-determinator %s" % test, ".")
return status, out
#
# Run a specific CI job
#
def run_test(self, test):
self.log.caption("Running test %s locally" % test)
# get commands for the CI job determinator
status, cmds = self.get_commands(test)
if status != 0:
self.log.error("Error getting commands for test %s" % test)
return False
# Parse the JSON to extract the commands to run
cmds = re.findall("'shell':'([^\']*)'", cmds)
if len(cmds) == 0:
self.log.log("No commands found")
return False
# Run commands
for cmd in cmds:
# Replace J=<..> with the local environment variable
if "J" in os.environ:
cmd = cmd.replace("J=1", "J=%s" % os.environ["J"])
cmd = cmd.replace("make ", "make -j%s " % os.environ["J"])
# Run the command
status = self.shell(cmd, ".")
if status != 0:
self.log.error("Error running command %s for test %s"
% (cmd, test))
return False
return True
#
# Run specified CI jobs
#
def run_tests(self):
if not self.tests:
self.log.error("Invalid args. Please provide tests")
return False
self.print_separator()
self.print_row("TEST", "RESULT")
self.print_separator()
for test in self.tests:
start_time = time.time()
self.print_test(test)
result = self.run_test(test)
elapsed_min = (time.time() - start_time) / 60
if not result:
self.log.error("Error running test %s" % test)
self.print_result("FAIL (%dm)" % elapsed_min)
return False
self.print_result("PASS (%dm)" % elapsed_min)
self.print_separator()
return True
#
# Print a line
#
def print_separator(self):
print("".ljust(60, "-"))
#
# Print two colums
#
def print_row(self, c0, c1):
print("%s%s" % (c0.ljust(40), c1.ljust(20)))
def print_test(self, test):
print(test.ljust(40), end="")
sys.stdout.flush()
def print_result(self, result):
print(result.ljust(20))
#
# Main
#
parser = argparse.ArgumentParser(description='RocksDB pre-commit checker.')
# <test ....>
parser.add_argument('test', nargs='+',
help='CI test(s) to run. e.g: unit punit asan tsan')
print("Please follow log %s" % Log.LOG_FILE)
args = parser.parse_args()
checker = PreCommitChecker(args.test)
if not checker.run_tests():
print("Error running tests. Please check log file %s" % Log.LOG_FILE)
sys.exit(1)
sys.exit(0)

@ -0,0 +1,127 @@
#!/bin/sh
#
# Update dependencies.sh file with the latest avaliable versions
BASEDIR=$(dirname $0)
OUTPUT=""
function log_variable()
{
echo "$1=${!1}" >> "$OUTPUT"
}
TP2_LATEST="/mnt/vol/engshare/fbcode/third-party2"
## $1 => lib name
## $2 => lib version (if not provided, will try to pick latest)
## $3 => platform (if not provided, will try to pick latest gcc)
##
## get_lib_base will set a variable named ${LIB_NAME}_BASE to the lib location
function get_lib_base()
{
local lib_name=$1
local lib_version=$2
local lib_platform=$3
local result="$TP2_LATEST/$lib_name/"
# Lib Version
if [ -z "$lib_version" ] || [ "$lib_version" = "LATEST" ]; then
# version is not provided, use latest
result=`ls -dr1v $result/*/ | head -n1`
else
result="$result/$lib_version/"
fi
# Lib Platform
if [ -z "$lib_platform" ]; then
# platform is not provided, use latest gcc
result=`ls -dr1v $result/gcc-*[^fb]/ | head -n1`
else
result="$result/$lib_platform/"
fi
result=`ls -1d $result/*/ | head -n1`
# lib_name => LIB_NAME_BASE
local __res_var=${lib_name^^}"_BASE"
__res_var=`echo $__res_var | tr - _`
# LIB_NAME_BASE=$result
eval $__res_var=`readlink -f $result`
log_variable $__res_var
}
###########################################################
# 4.9.x dependencies #
###########################################################
OUTPUT="$BASEDIR/dependencies.sh"
rm -f "$OUTPUT"
touch "$OUTPUT"
echo "Writing dependencies to $OUTPUT"
# Compilers locations
GCC_BASE=`ls -d1 $TP2_LATEST/gcc/4.9.x/centos6-native/*/ | head -n1`
CLANG_BASE=`ls -d1 /mnt/gvfs/third-party2/clang/fc904e50a9266b9d7b98cae1993afa0c5aae1440/3.7.1/centos6-native/*/ | head -n1`
log_variable GCC_BASE
log_variable CLANG_BASE
# Libraries locations
get_lib_base libgcc 4.9.x
get_lib_base glibc 2.20
get_lib_base snappy LATEST
get_lib_base zlib LATEST
get_lib_base bzip2 LATEST
get_lib_base lz4 LATEST
get_lib_base zstd LATEST
get_lib_base gflags LATEST
get_lib_base jemalloc LATEST
get_lib_base numa LATEST
get_lib_base libunwind LATEST
get_lib_base kernel-headers LATEST
get_lib_base binutils LATEST centos6-native
get_lib_base valgrind LATEST
git diff $OUTPUT
###########################################################
# 4.8.1 dependencies #
###########################################################
OUTPUT="$BASEDIR/dependencies_4.8.1.sh"
rm -f "$OUTPUT"
touch "$OUTPUT"
echo "Writing 4.8.1 dependencies to $OUTPUT"
# Compilers locations
GCC_BASE=`ls -d1 $TP2_LATEST/gcc/4.8.1/centos6-native/*/ | head -n1`
CLANG_BASE=`ls -d1 /mnt/gvfs/third-party2/clang/fc904e50a9266b9d7b98cae1993afa0c5aae1440/3.7.1/centos6-native/*/ | head -n1`
log_variable GCC_BASE
log_variable CLANG_BASE
# Libraries locations
get_lib_base libgcc 4.8.1 gcc-4.8.1-glibc-2.17
get_lib_base glibc 2.17 gcc-4.8.1-glibc-2.17
get_lib_base snappy LATEST gcc-4.8.1-glibc-2.17
get_lib_base zlib LATEST gcc-4.8.1-glibc-2.17
get_lib_base bzip2 LATEST gcc-4.8.1-glibc-2.17
get_lib_base lz4 LATEST gcc-4.8.1-glibc-2.17
get_lib_base zstd LATEST gcc-4.8.1-glibc-2.17
get_lib_base gflags LATEST gcc-4.8.1-glibc-2.17
get_lib_base jemalloc LATEST gcc-4.8.1-glibc-2.17
get_lib_base numa LATEST gcc-4.8.1-glibc-2.17
get_lib_base libunwind LATEST gcc-4.8.1-glibc-2.17
get_lib_base kernel-headers LATEST gcc-4.8.1-glibc-2.17
get_lib_base binutils LATEST centos6-native
get_lib_base valgrind 3.8.1 gcc-4.8.1-glibc-2.17
git diff $OUTPUT

@ -1,9 +1,9 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
#include "util/auto_roll_logger.h"
#include "db/auto_roll_logger.h"
#include "util/mutexlock.h"
using namespace std;
@ -12,7 +12,9 @@ namespace rocksdb {
// -- AutoRollLogger
Status AutoRollLogger::ResetLogger() {
TEST_SYNC_POINT("AutoRollLogger::ResetLogger:BeforeNewLogger");
status_ = env_->NewLogger(log_fname_, &logger_);
TEST_SYNC_POINT("AutoRollLogger::ResetLogger:AfterNewLogger");
if (!status_.ok()) {
return status_;
@ -32,8 +34,16 @@ Status AutoRollLogger::ResetLogger() {
}
void AutoRollLogger::RollLogFile() {
std::string old_fname = OldInfoLogFileName(
dbname_, env_->NowMicros(), db_absolute_path_, db_log_dir_);
// This function is called when log is rotating. Two rotations
// can happen quickly (NowMicro returns same value). To not overwrite
// previous log file we increment by one micro second and try again.
uint64_t now = env_->NowMicros();
std::string old_fname;
do {
old_fname = OldInfoLogFileName(
dbname_, now, db_absolute_path_, db_log_dir_);
now++;
} while (env_->FileExists(old_fname).ok());
env_->RenameFile(log_fname_, old_fname);
}

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -13,6 +13,8 @@
#include "db/filename.h"
#include "port/port.h"
#include "port/util_logger.h"
#include "util/sync_point.h"
#include "util/mutexlock.h"
namespace rocksdb {
@ -53,11 +55,26 @@ class AutoRollLogger : public Logger {
return status_;
}
size_t GetLogFileSize() const override { return logger_->GetLogFileSize(); }
size_t GetLogFileSize() const override {
std::shared_ptr<Logger> logger;
{
MutexLock l(&mutex_);
// pin down the current logger_ instance before releasing the mutex.
logger = logger_;
}
return logger->GetLogFileSize();
}
void Flush() override {
if (logger_) {
logger_->Flush();
std::shared_ptr<Logger> logger;
{
MutexLock l(&mutex_);
// pin down the current logger_ instance before releasing the mutex.
logger = logger_;
}
TEST_SYNC_POINT("AutoRollLogger::Flush:PinnedLogger");
if (logger) {
logger->Flush();
}
}
@ -101,7 +118,7 @@ class AutoRollLogger : public Logger {
uint64_t ctime_;
uint64_t cached_now_access_count;
uint64_t call_NowMicros_every_N_records_;
port::Mutex mutex_;
mutable port::Mutex mutex_;
};
// Facade to craete logger automatically

@ -1,17 +1,20 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
#include <string>
#include <thread>
#include <vector>
#include <cmath>
#include <iostream>
#include <fstream>
#include <iterator>
#include <algorithm>
#include "db/auto_roll_logger.h"
#include "port/port.h"
#include "util/sync_point.h"
#include "util/testharness.h"
#include "util/auto_roll_logger.h"
#include "rocksdb/db.h"
#include <sys/stat.h>
#include <errno.h>
@ -260,7 +263,60 @@ TEST_F(AutoRollLoggerTest, CreateLoggerFromOptions) {
auto_roll_logger, options.log_file_time_to_roll,
kSampleMessage + ":CreateLoggerFromOptions - both");
}
#endif
TEST_F(AutoRollLoggerTest, LogFlushWhileRolling) {
DBOptions options;
shared_ptr<Logger> logger;
InitTestDb();
options.max_log_file_size = 1024 * 5;
ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger));
AutoRollLogger* auto_roll_logger =
dynamic_cast<AutoRollLogger*>(logger.get());
ASSERT_TRUE(auto_roll_logger);
std::thread flush_thread;
rocksdb::SyncPoint::GetInstance()->LoadDependency({
// Need to pin the old logger before beginning the roll, as rolling grabs
// the mutex, which would prevent us from accessing the old logger.
{"AutoRollLogger::Flush:PinnedLogger",
"AutoRollLoggerTest::LogFlushWhileRolling:PreRollAndPostThreadInit"},
// Need to finish the flush thread init before this callback because the
// callback accesses flush_thread.get_id() in order to apply certain sync
// points only to the flush thread.
{"AutoRollLoggerTest::LogFlushWhileRolling:PreRollAndPostThreadInit",
"AutoRollLoggerTest::LogFlushWhileRolling:FlushCallbackBegin"},
// Need to reset logger at this point in Flush() to exercise a race
// condition case, which is executing the flush with the pinned (old)
// logger after the roll has cut over to a new logger.
{"AutoRollLoggerTest::LogFlushWhileRolling:FlushCallback1",
"AutoRollLogger::ResetLogger:BeforeNewLogger"},
{"AutoRollLogger::ResetLogger:AfterNewLogger",
"AutoRollLoggerTest::LogFlushWhileRolling:FlushCallback2"},
});
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"PosixLogger::Flush:BeginCallback", [&](void* arg) {
TEST_SYNC_POINT(
"AutoRollLoggerTest::LogFlushWhileRolling:FlushCallbackBegin");
if (std::this_thread::get_id() == flush_thread.get_id()) {
TEST_SYNC_POINT(
"AutoRollLoggerTest::LogFlushWhileRolling:FlushCallback1");
TEST_SYNC_POINT(
"AutoRollLoggerTest::LogFlushWhileRolling:FlushCallback2");
}
});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
flush_thread = std::thread([&]() { auto_roll_logger->Flush(); });
TEST_SYNC_POINT(
"AutoRollLoggerTest::LogFlushWhileRolling:PreRollAndPostThreadInit");
RollLogFileBySizeTest(auto_roll_logger, options.max_log_file_size,
kSampleMessage + ":LogFlushWhileRolling");
flush_thread.join();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
}
#endif // OS_WIN
TEST_F(AutoRollLoggerTest, InfoLogLevel) {
InitTestDb();

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -63,7 +63,7 @@ Status BuildTable(
const CompressionType compression,
const CompressionOptions& compression_opts, bool paranoid_file_checks,
InternalStats* internal_stats, const Env::IOPriority io_priority,
TableProperties* table_properties) {
TableProperties* table_properties, int level) {
// Reports the IOStats for flush for every following bytes.
const size_t kReportFlushIOStatsEvery = 1048576;
Status s;
@ -149,7 +149,8 @@ Status BuildTable(
ReadOptions(), env_options, internal_comparator, meta->fd, nullptr,
(internal_stats == nullptr) ? nullptr
: internal_stats->GetFileReadHist(0),
false));
false /* for_compaction */, nullptr /* arena */,
false /* skip_filter */, level));
s = it->status();
if (s.ok() && paranoid_file_checks) {
for (it->SeekToFirst(); it->Valid(); it->Next()) {

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -61,6 +61,6 @@ extern Status BuildTable(
const CompressionOptions& compression_opts, bool paranoid_file_checks,
InternalStats* internal_stats,
const Env::IOPriority io_priority = Env::IO_HIGH,
TableProperties* table_properties = nullptr);
TableProperties* table_properties = nullptr, int level = -1);
} // namespace rocksdb

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -1288,6 +1288,11 @@ void rocksdb_block_based_options_set_cache_index_and_filter_blocks(
options->rep.cache_index_and_filter_blocks = v;
}
void rocksdb_block_based_options_set_pin_l0_filter_and_index_blocks_in_cache(
rocksdb_block_based_table_options_t* options, unsigned char v) {
options->rep.pin_l0_filter_and_index_blocks_in_cache = v;
}
void rocksdb_block_based_options_set_skip_table_builder_flush(
rocksdb_block_based_table_options_t* options, unsigned char v) {
options->rep.skip_table_builder_flush = v;

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -135,6 +135,10 @@ Status CheckConcurrentWritesSupported(const ColumnFamilyOptions& cf_options) {
"Delete filtering (filter_deletes) is not compatible with concurrent "
"memtable writes (allow_concurrent_memtable_writes)");
}
if (!cf_options.memtable_factory->IsInsertConcurrentlySupported()) {
return Status::InvalidArgument(
"Memtable doesn't concurrent writes (allow_concurrent_memtable_write)");
}
return Status::OK();
}
@ -143,13 +147,10 @@ ColumnFamilyOptions SanitizeOptions(const DBOptions& db_options,
const ColumnFamilyOptions& src) {
ColumnFamilyOptions result = src;
result.comparator = icmp;
#ifdef OS_MACOSX
// TODO(icanadi) make write_buffer_size uint64_t instead of size_t
ClipToRange(&result.write_buffer_size, ((size_t)64) << 10, ((size_t)1) << 30);
#else
ClipToRange(&result.write_buffer_size,
((size_t)64) << 10, ((size_t)64) << 30);
#endif
size_t clamp_max = std::conditional<
sizeof(size_t) == 4, std::integral_constant<size_t, 0xffffffff>,
std::integral_constant<size_t, 64ull << 30>>::type::value;
ClipToRange(&result.write_buffer_size, ((size_t)64) << 10, clamp_max);
// if user sets arena_block_size, we trust user to use this value. Otherwise,
// calculate a proper value from writer_buffer_size;
if (result.arena_block_size <= 0) {
@ -239,6 +240,17 @@ ColumnFamilyOptions SanitizeOptions(const DBOptions& db_options,
result.level0_slowdown_writes_trigger,
result.level0_file_num_compaction_trigger);
}
if (result.soft_pending_compaction_bytes_limit == 0) {
result.soft_pending_compaction_bytes_limit =
result.hard_pending_compaction_bytes_limit;
} else if (result.hard_pending_compaction_bytes_limit > 0 &&
result.soft_pending_compaction_bytes_limit >
result.hard_pending_compaction_bytes_limit) {
result.soft_pending_compaction_bytes_limit =
result.hard_pending_compaction_bytes_limit;
}
if (result.level_compaction_dynamic_level_bytes) {
if (result.compaction_style != kCompactionStyleLevel ||
db_options.db_paths.size() > 1U) {
@ -513,6 +525,21 @@ std::unique_ptr<WriteControllerToken> SetupDelay(
}
return write_controller->GetDelayToken(write_rate);
}
int GetL0ThresholdSpeedupCompaction(int level0_file_num_compaction_trigger,
int level0_slowdown_writes_trigger) {
// SanitizeOptions() ensures it.
assert(level0_file_num_compaction_trigger <= level0_slowdown_writes_trigger);
// 1/4 of the way between L0 compaction trigger threshold and slowdown
// condition.
// Or twice as compaction trigger, if it is smaller.
return std::min(level0_file_num_compaction_trigger * 2,
level0_file_num_compaction_trigger +
(level0_slowdown_writes_trigger -
level0_file_num_compaction_trigger) /
4);
}
} // namespace
void ColumnFamilyData::RecalculateWriteStallConditions(
@ -531,21 +558,6 @@ void ColumnFamilyData::RecalculateWriteStallConditions(
"(waiting for flush), max_write_buffer_number is set to %d",
name_.c_str(), imm()->NumNotFlushed(),
mutable_cf_options.max_write_buffer_number);
} else if (mutable_cf_options.max_write_buffer_number > 3 &&
imm()->NumNotFlushed() >=
mutable_cf_options.max_write_buffer_number - 1) {
write_controller_token_ =
SetupDelay(ioptions_.delayed_write_rate, write_controller,
compaction_needed_bytes, prev_compaction_needed_bytes_,
mutable_cf_options.disable_auto_compactions);
internal_stats_->AddCFStats(InternalStats::MEMTABLE_SLOWDOWN, 1);
Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log,
"[%s] Stalling writes because we have %d immutable memtables "
"(waiting for flush), max_write_buffer_number is set to %d "
"rate %" PRIu64,
name_.c_str(), imm()->NumNotFlushed(),
mutable_cf_options.max_write_buffer_number,
write_controller->delayed_write_rate());
} else if (vstorage->l0_delay_trigger_count() >=
mutable_cf_options.level0_stop_writes_trigger) {
write_controller_token_ = write_controller->GetStopToken();
@ -567,6 +579,21 @@ void ColumnFamilyData::RecalculateWriteStallConditions(
"[%s] Stopping writes because of estimated pending compaction "
"bytes %" PRIu64,
name_.c_str(), compaction_needed_bytes);
} else if (mutable_cf_options.max_write_buffer_number > 3 &&
imm()->NumNotFlushed() >=
mutable_cf_options.max_write_buffer_number - 1) {
write_controller_token_ =
SetupDelay(ioptions_.delayed_write_rate, write_controller,
compaction_needed_bytes, prev_compaction_needed_bytes_,
mutable_cf_options.disable_auto_compactions);
internal_stats_->AddCFStats(InternalStats::MEMTABLE_SLOWDOWN, 1);
Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log,
"[%s] Stalling writes because we have %d immutable memtables "
"(waiting for flush), max_write_buffer_number is set to %d "
"rate %" PRIu64,
name_.c_str(), imm()->NumNotFlushed(),
mutable_cf_options.max_write_buffer_number,
write_controller->delayed_write_rate());
} else if (mutable_cf_options.level0_slowdown_writes_trigger >= 0 &&
vstorage->l0_delay_trigger_count() >=
mutable_cf_options.level0_slowdown_writes_trigger) {
@ -598,6 +625,29 @@ void ColumnFamilyData::RecalculateWriteStallConditions(
"bytes %" PRIu64 " rate %" PRIu64,
name_.c_str(), vstorage->estimated_compaction_needed_bytes(),
write_controller->delayed_write_rate());
} else if (vstorage->l0_delay_trigger_count() >=
GetL0ThresholdSpeedupCompaction(
mutable_cf_options.level0_file_num_compaction_trigger,
mutable_cf_options.level0_slowdown_writes_trigger)) {
write_controller_token_ = write_controller->GetCompactionPressureToken();
Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log,
"[%s] Increasing compaction threads because we have %d level-0 "
"files ",
name_.c_str(), vstorage->l0_delay_trigger_count());
} else if (vstorage->estimated_compaction_needed_bytes() >=
mutable_cf_options.soft_pending_compaction_bytes_limit / 4) {
// Increase compaction threads if bytes needed for compaction exceeds
// 1/4 of threshold for slowing down.
// If soft pending compaction byte limit is not set, always speed up
// compaction.
write_controller_token_ = write_controller->GetCompactionPressureToken();
if (mutable_cf_options.soft_pending_compaction_bytes_limit > 0) {
Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log,
"[%s] Increasing compaction threads because of estimated pending "
"compaction "
"bytes %" PRIu64,
name_.c_str(), vstorage->estimated_compaction_needed_bytes());
}
} else {
write_controller_token_.reset();
}

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -465,6 +465,8 @@ class ColumnFamilySet {
// Don't call while iterating over ColumnFamilySet
void FreeDeadColumnFamilies();
Cache* get_table_cache() { return table_cache_; }
private:
friend class ColumnFamilyData;
// helper function that gets called from cfd destructor

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -65,6 +65,7 @@ class ColumnFamilyTest : public testing::Test {
~ColumnFamilyTest() {
Close();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
Destroy();
delete env_;
}
@ -1951,6 +1952,11 @@ TEST_F(ColumnFamilyTest, ReadDroppedColumnFamily) {
PutRandomData(1, kKeysNum, 100);
PutRandomData(2, kKeysNum, 100);
{
std::unique_ptr<Iterator> iterator(
db_->NewIterator(ReadOptions(), handles_[2]));
iterator->SeekToFirst();
if (iter == 0) {
// Drop CF two
ASSERT_OK(db_->DropColumnFamily(handles_[2]));
@ -1959,6 +1965,15 @@ TEST_F(ColumnFamilyTest, ReadDroppedColumnFamily) {
delete handles_[2];
handles_[2] = nullptr;
}
// Make sure iterator created can still be used.
int count = 0;
for (; iterator->Valid(); iterator->Next()) {
ASSERT_OK(iterator->status());
++count;
}
ASSERT_OK(iterator->status());
ASSERT_EQ(count, kKeysNum);
}
// Add bunch more data to other CFs
PutRandomData(0, kKeysNum, 100);
@ -1999,7 +2014,9 @@ TEST_F(ColumnFamilyTest, FlushAndDropRaceCondition) {
Reopen({options, options});
rocksdb::SyncPoint::GetInstance()->LoadDependency(
{{"VersionSet::LogAndApply::ColumnFamilyDrop:1",
{{"VersionSet::LogAndApply::ColumnFamilyDrop:0",
"FlushJob::WriteLevel0Table"},
{"VersionSet::LogAndApply::ColumnFamilyDrop:1",
"FlushJob::InstallResults"},
{"FlushJob::InstallResults",
"VersionSet::LogAndApply::ColumnFamilyDrop:2"}});
@ -2045,7 +2062,6 @@ TEST_F(ColumnFamilyTest, FlushAndDropRaceCondition) {
Close();
Destroy();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
}
#ifndef ROCKSDB_LITE
@ -2123,7 +2139,6 @@ TEST_F(ColumnFamilyTest, CreateAndDropRace) {
drop_cf_thread.join();
Close();
Destroy();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
for (auto* comparator : comparators) {
if (comparator) {
delete comparator;
@ -2135,6 +2150,9 @@ TEST_F(ColumnFamilyTest, CreateAndDropRace) {
TEST_F(ColumnFamilyTest, WriteStallSingleColumnFamily) {
const uint64_t kBaseRate = 810000u;
db_options_.delayed_write_rate = kBaseRate;
db_options_.base_background_compactions = 2;
db_options_.max_background_compactions = 6;
Open({"default"});
ColumnFamilyData* cfd =
static_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())->cfd();
@ -2160,6 +2178,7 @@ TEST_F(ColumnFamilyTest, WriteStallSingleColumnFamily) {
ASSERT_TRUE(!dbfull()->TEST_write_controler().IsStopped());
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
ASSERT_EQ(kBaseRate, dbfull()->TEST_write_controler().delayed_write_rate());
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->TEST_set_estimated_compaction_needed_bytes(400);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
@ -2167,6 +2186,7 @@ TEST_F(ColumnFamilyTest, WriteStallSingleColumnFamily) {
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
ASSERT_EQ(kBaseRate / 1.2,
dbfull()->TEST_write_controler().delayed_write_rate());
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->TEST_set_estimated_compaction_needed_bytes(500);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
@ -2222,6 +2242,7 @@ TEST_F(ColumnFamilyTest, WriteStallSingleColumnFamily) {
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_TRUE(dbfull()->TEST_write_controler().IsStopped());
ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->TEST_set_estimated_compaction_needed_bytes(3001);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
@ -2246,6 +2267,7 @@ TEST_F(ColumnFamilyTest, WriteStallSingleColumnFamily) {
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
ASSERT_EQ(kBaseRate / 1.2,
dbfull()->TEST_write_controler().delayed_write_rate());
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->set_l0_delay_trigger_count(101);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
@ -2318,6 +2340,73 @@ TEST_F(ColumnFamilyTest, WriteStallSingleColumnFamily) {
dbfull()->TEST_write_controler().delayed_write_rate());
}
TEST_F(ColumnFamilyTest, CompactionSpeedupSingleColumnFamily) {
db_options_.base_background_compactions = 2;
db_options_.max_background_compactions = 6;
Open({"default"});
ColumnFamilyData* cfd =
static_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())->cfd();
VersionStorageInfo* vstorage = cfd->current()->storage_info();
MutableCFOptions mutable_cf_options(
Options(db_options_, column_family_options_),
ImmutableCFOptions(Options(db_options_, column_family_options_)));
// Speed up threshold = min(4 * 2, 4 + (36 - 4)/4) = 8
mutable_cf_options.level0_file_num_compaction_trigger = 4;
mutable_cf_options.level0_slowdown_writes_trigger = 36;
mutable_cf_options.level0_stop_writes_trigger = 50;
// Speedup threshold = 200 / 4 = 50
mutable_cf_options.soft_pending_compaction_bytes_limit = 200;
mutable_cf_options.hard_pending_compaction_bytes_limit = 2000;
vstorage->TEST_set_estimated_compaction_needed_bytes(40);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
vstorage->TEST_set_estimated_compaction_needed_bytes(50);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->TEST_set_estimated_compaction_needed_bytes(300);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->TEST_set_estimated_compaction_needed_bytes(45);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
vstorage->set_l0_delay_trigger_count(7);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
vstorage->set_l0_delay_trigger_count(9);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->set_l0_delay_trigger_count(6);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
// Speed up threshold = min(4 * 2, 4 + (12 - 4)/4) = 6
mutable_cf_options.level0_file_num_compaction_trigger = 4;
mutable_cf_options.level0_slowdown_writes_trigger = 16;
mutable_cf_options.level0_stop_writes_trigger = 30;
vstorage->set_l0_delay_trigger_count(5);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
vstorage->set_l0_delay_trigger_count(7);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->set_l0_delay_trigger_count(3);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
}
TEST_F(ColumnFamilyTest, WriteStallTwoColumnFamilies) {
const uint64_t kBaseRate = 810000u;
db_options_.delayed_write_rate = kBaseRate;
@ -2399,6 +2488,104 @@ TEST_F(ColumnFamilyTest, WriteStallTwoColumnFamilies) {
ASSERT_EQ(kBaseRate / 1.2,
dbfull()->TEST_write_controler().delayed_write_rate());
}
TEST_F(ColumnFamilyTest, CompactionSpeedupTwoColumnFamilies) {
db_options_.base_background_compactions = 2;
db_options_.max_background_compactions = 6;
column_family_options_.soft_pending_compaction_bytes_limit = 200;
column_family_options_.hard_pending_compaction_bytes_limit = 2000;
Open();
CreateColumnFamilies({"one"});
ColumnFamilyData* cfd =
static_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())->cfd();
VersionStorageInfo* vstorage = cfd->current()->storage_info();
ColumnFamilyData* cfd1 =
static_cast<ColumnFamilyHandleImpl*>(handles_[1])->cfd();
VersionStorageInfo* vstorage1 = cfd1->current()->storage_info();
MutableCFOptions mutable_cf_options(
Options(db_options_, column_family_options_),
ImmutableCFOptions(Options(db_options_, column_family_options_)));
// Speed up threshold = min(4 * 2, 4 + (36 - 4)/4) = 8
mutable_cf_options.level0_file_num_compaction_trigger = 4;
mutable_cf_options.level0_slowdown_writes_trigger = 36;
mutable_cf_options.level0_stop_writes_trigger = 30;
// Speedup threshold = 200 / 4 = 50
mutable_cf_options.soft_pending_compaction_bytes_limit = 200;
mutable_cf_options.hard_pending_compaction_bytes_limit = 2000;
MutableCFOptions mutable_cf_options1 = mutable_cf_options;
mutable_cf_options1.level0_slowdown_writes_trigger = 16;
vstorage->TEST_set_estimated_compaction_needed_bytes(40);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
vstorage->TEST_set_estimated_compaction_needed_bytes(60);
cfd1->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage1->TEST_set_estimated_compaction_needed_bytes(30);
cfd1->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage1->TEST_set_estimated_compaction_needed_bytes(70);
cfd1->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->TEST_set_estimated_compaction_needed_bytes(20);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage1->TEST_set_estimated_compaction_needed_bytes(3);
cfd1->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
vstorage->set_l0_delay_trigger_count(9);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage1->set_l0_delay_trigger_count(2);
cfd1->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->set_l0_delay_trigger_count(0);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
}
TEST_F(ColumnFamilyTest, LogSyncConflictFlush) {
Open();
CreateColumnFamiliesAndReopen({"one", "two"});
Put(0, "", "");
Put(1, "foo", "bar");
rocksdb::SyncPoint::GetInstance()->LoadDependency(
{{"DBImpl::SyncWAL:BeforeMarkLogsSynced:1",
"ColumnFamilyTest::LogSyncConflictFlush:1"},
{"ColumnFamilyTest::LogSyncConflictFlush:2",
"DBImpl::SyncWAL:BeforeMarkLogsSynced:2"}});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
std::thread thread([&] { db_->SyncWAL(); });
TEST_SYNC_POINT("ColumnFamilyTest::LogSyncConflictFlush:1");
Flush(1);
Put(1, "foo", "bar");
Flush(1);
TEST_SYNC_POINT("ColumnFamilyTest::LogSyncConflictFlush:2");
thread.join();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
Close();
}
} // namespace rocksdb
int main(int argc, char** argv) {

@ -1,4 +1,4 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -7,6 +7,7 @@
#include <mutex>
#include <string>
#include <thread>
#include <vector>
#include "rocksdb/db.h"
@ -107,6 +108,7 @@ TEST_F(CompactFilesTest, L0ConflictsFiles) {
break;
}
}
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
delete db;
}
@ -141,9 +143,6 @@ TEST_F(CompactFilesTest, ObsoleteFiles) {
}
auto l0_files = collector->GetFlushedFiles();
CompactionOptions compact_opt;
compact_opt.compression = kNoCompression;
compact_opt.output_file_size_limit = kWriteBufferSize * 5;
ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files, 1));
// verify all compaction input files are deleted
@ -153,6 +152,62 @@ TEST_F(CompactFilesTest, ObsoleteFiles) {
delete db;
}
TEST_F(CompactFilesTest, CapturingPendingFiles) {
Options options;
options.create_if_missing = true;
// Disable RocksDB background compaction.
options.compaction_style = kCompactionStyleNone;
// Always do full scans for obsolete files (needed to reproduce the issue).
options.delete_obsolete_files_period_micros = 0;
// Add listener.
FlushedFileCollector* collector = new FlushedFileCollector();
options.listeners.emplace_back(collector);
DB* db = nullptr;
DestroyDB(db_name_, options);
Status s = DB::Open(options, db_name_, &db);
assert(s.ok());
assert(db);
// Create 5 files.
for (int i = 0; i < 5; ++i) {
db->Put(WriteOptions(), "key" + ToString(i), "value");
db->Flush(FlushOptions());
}
auto l0_files = collector->GetFlushedFiles();
EXPECT_EQ(5, l0_files.size());
rocksdb::SyncPoint::GetInstance()->LoadDependency({
{"CompactFilesImpl:2", "CompactFilesTest.CapturingPendingFiles:0"},
{"CompactFilesTest.CapturingPendingFiles:1", "CompactFilesImpl:3"},
});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
// Start compacting files.
std::thread compaction_thread(
[&] { EXPECT_OK(db->CompactFiles(CompactionOptions(), l0_files, 1)); });
// In the meantime flush another file.
TEST_SYNC_POINT("CompactFilesTest.CapturingPendingFiles:0");
db->Put(WriteOptions(), "key5", "value");
db->Flush(FlushOptions());
TEST_SYNC_POINT("CompactFilesTest.CapturingPendingFiles:1");
compaction_thread.join();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
delete db;
// Make sure we can reopen the DB.
s = DB::Open(options, db_name_, &db);
ASSERT_TRUE(s.ok());
assert(db);
delete db;
}
} // namespace rocksdb
int main(int argc, char** argv) {

@ -1,4 +1,4 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -138,6 +138,8 @@ class Compaction {
// Clear all files to indicate that they are not being compacted
// Delete this compaction from the list of running compactions.
//
// Requirement: DB mutex held
void ReleaseCompactionFiles(Status status);
// Returns the summary of the compaction in "output" with maximum "len"

@ -1,6 +1,6 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,6 +1,6 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -51,6 +51,7 @@
#include "util/iostats_context_imp.h"
#include "util/log_buffer.h"
#include "util/logging.h"
#include "util/sst_file_manager_impl.h"
#include "util/mutexlock.h"
#include "util/perf_context_imp.h"
#include "util/stop_watch.h"
@ -211,6 +212,7 @@ CompactionJob::CompactionJob(
const EnvOptions& env_options, VersionSet* versions,
std::atomic<bool>* shutting_down, LogBuffer* log_buffer,
Directory* db_directory, Directory* output_directory, Statistics* stats,
InstrumentedMutex* db_mutex, Status* db_bg_error,
std::vector<SequenceNumber> existing_snapshots,
SequenceNumber earliest_write_conflict_snapshot,
std::shared_ptr<Cache> table_cache, EventLogger* event_logger,
@ -230,6 +232,8 @@ CompactionJob::CompactionJob(
db_directory_(db_directory),
output_directory_(output_directory),
stats_(stats),
db_mutex_(db_mutex),
db_bg_error_(db_bg_error),
existing_snapshots_(std::move(existing_snapshots)),
earliest_write_conflict_snapshot_(earliest_write_conflict_snapshot),
table_cache_(std::move(table_cache)),
@ -237,7 +241,9 @@ CompactionJob::CompactionJob(
paranoid_file_checks_(paranoid_file_checks),
measure_io_stats_(measure_io_stats) {
assert(log_buffer_ != nullptr);
ThreadStatusUtil::SetColumnFamily(compact_->compaction->column_family_data());
const auto* cfd = compact_->compaction->column_family_data();
ThreadStatusUtil::SetColumnFamily(cfd, cfd->ioptions()->env,
cfd->options()->enable_thread_tracking);
ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_COMPACTION);
ReportStartedCompaction(compaction);
}
@ -249,8 +255,9 @@ CompactionJob::~CompactionJob() {
void CompactionJob::ReportStartedCompaction(
Compaction* compaction) {
ThreadStatusUtil::SetColumnFamily(
compact_->compaction->column_family_data());
const auto* cfd = compact_->compaction->column_family_data();
ThreadStatusUtil::SetColumnFamily(cfd, cfd->ioptions()->env,
cfd->options()->enable_thread_tracking);
ThreadStatusUtil::SetThreadOperationProperty(
ThreadStatus::COMPACTION_JOB_ID,
@ -356,7 +363,7 @@ void CompactionJob::GenSubcompactionBoundaries() {
size_t num_files = flevel->num_files;
if (num_files == 0) {
break;
continue;
}
if (lvl == 0) {
@ -415,12 +422,9 @@ void CompactionJob::GenSubcompactionBoundaries() {
// Group the ranges into subcompactions
const double min_file_fill_percent = 4.0 / 5;
uint64_t max_output_files =
static_cast<uint64_t>(
std::ceil(
uint64_t max_output_files = static_cast<uint64_t>(std::ceil(
sum / min_file_fill_percent /
cfd->GetCurrentMutableCFOptions()->MaxFileSizeForLevel(out_lvl))
);
cfd->GetCurrentMutableCFOptions()->MaxFileSizeForLevel(out_lvl)));
uint64_t subcompactions =
std::min({static_cast<uint64_t>(ranges.size()),
static_cast<uint64_t>(db_options_.max_subcompactions),
@ -518,18 +522,17 @@ Status CompactionJob::Run() {
return status;
}
Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options,
InstrumentedMutex* db_mutex) {
Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) {
AutoThreadOperationStageUpdater stage_updater(
ThreadStatus::STAGE_COMPACTION_INSTALL);
db_mutex->AssertHeld();
db_mutex_->AssertHeld();
Status status = compact_->status;
ColumnFamilyData* cfd = compact_->compaction->column_family_data();
cfd->internal_stats()->AddCompactionStats(
compact_->compaction->output_level(), compaction_stats_);
if (status.ok()) {
status = InstallCompactionResults(mutable_cf_options, db_mutex);
status = InstallCompactionResults(mutable_cf_options);
}
VersionStorageInfo::LevelSummaryStorage tmp;
auto vstorage = cfd->current()->storage_info();
@ -855,13 +858,33 @@ Status CompactionJob::FinishCompactionOutputFile(
event_logger_, cfd->ioptions()->listeners, meta->fd, info);
}
}
// Report new file to SstFileManagerImpl
auto sfm =
static_cast<SstFileManagerImpl*>(db_options_.sst_file_manager.get());
if (sfm && meta->fd.GetPathId() == 0) {
ColumnFamilyData* cfd = sub_compact->compaction->column_family_data();
auto fn = TableFileName(cfd->ioptions()->db_paths, meta->fd.GetNumber(),
meta->fd.GetPathId());
sfm->OnAddFile(fn);
if (sfm->IsMaxAllowedSpaceReached()) {
InstrumentedMutexLock l(db_mutex_);
if (db_bg_error_->ok()) {
s = Status::IOError("Max allowed space was reached");
*db_bg_error_ = s;
TEST_SYNC_POINT(
"CompactionJob::FinishCompactionOutputFile:MaxAllowedSpaceReached");
}
}
}
sub_compact->builder.reset();
return s;
}
Status CompactionJob::InstallCompactionResults(
const MutableCFOptions& mutable_cf_options, InstrumentedMutex* db_mutex) {
db_mutex->AssertHeld();
const MutableCFOptions& mutable_cf_options) {
db_mutex_->AssertHeld();
auto* compaction = compact_->compaction;
// paranoia: verify that the files that we started with
@ -896,7 +919,7 @@ Status CompactionJob::InstallCompactionResults(
}
return versions_->LogAndApply(compaction->column_family_data(),
mutable_cf_options, compaction->edit(),
db_mutex, db_directory_);
db_mutex_, db_directory_);
}
void CompactionJob::RecordCompactionIOStats() {

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -56,7 +56,8 @@ class CompactionJob {
const EnvOptions& env_options, VersionSet* versions,
std::atomic<bool>* shutting_down, LogBuffer* log_buffer,
Directory* db_directory, Directory* output_directory,
Statistics* stats,
Statistics* stats, InstrumentedMutex* db_mutex,
Status* db_bg_error,
std::vector<SequenceNumber> existing_snapshots,
SequenceNumber earliest_write_conflict_snapshot,
std::shared_ptr<Cache> table_cache, EventLogger* event_logger,
@ -77,8 +78,7 @@ class CompactionJob {
Status Run();
// REQUIRED: mutex held
Status Install(const MutableCFOptions& mutable_cf_options,
InstrumentedMutex* db_mutex);
Status Install(const MutableCFOptions& mutable_cf_options);
private:
struct SubcompactionState;
@ -95,8 +95,7 @@ class CompactionJob {
Status FinishCompactionOutputFile(const Status& input_status,
SubcompactionState* sub_compact);
Status InstallCompactionResults(const MutableCFOptions& mutable_cf_options,
InstrumentedMutex* db_mutex);
Status InstallCompactionResults(const MutableCFOptions& mutable_cf_options);
void RecordCompactionIOStats();
Status OpenCompactionOutputFile(SubcompactionState* sub_compact);
void CleanupCompaction();
@ -130,6 +129,8 @@ class CompactionJob {
Directory* db_directory_;
Directory* output_directory_;
Statistics* stats_;
InstrumentedMutex* db_mutex_;
Status* db_bg_error_;
// If there were two snapshots with seq numbers s1 and
// s2 and s1 < s2, and if we find two instances of a key k1 then lies
// entirely within s1 and s2, then the earlier version of k1 can be safely

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -250,9 +250,9 @@ class CompactionJobTest : public testing::Test {
EventLogger event_logger(db_options_.info_log.get());
CompactionJob compaction_job(
0, &compaction, db_options_, env_options_, versions_.get(),
&shutting_down_, &log_buffer, nullptr, nullptr, nullptr, snapshots,
earliest_write_conflict_snapshot, table_cache_, &event_logger, false,
false, dbname_, &compaction_job_stats_);
&shutting_down_, &log_buffer, nullptr, nullptr, nullptr, &mutex_,
&bg_error_, snapshots, earliest_write_conflict_snapshot, table_cache_,
&event_logger, false, false, dbname_, &compaction_job_stats_);
VerifyInitializationOfCompactionJobStats(compaction_job_stats_);
@ -262,8 +262,7 @@ class CompactionJobTest : public testing::Test {
s = compaction_job.Run();
ASSERT_OK(s);
mutex_.Lock();
ASSERT_OK(compaction_job.Install(*cfd->GetLatestMutableCFOptions(),
&mutex_));
ASSERT_OK(compaction_job.Install(*cfd->GetLatestMutableCFOptions()));
mutex_.Unlock();
if (expected_results.size() == 0) {
@ -295,6 +294,7 @@ class CompactionJobTest : public testing::Test {
ColumnFamilyData* cfd_;
std::unique_ptr<CompactionFilter> compaction_filter_;
std::shared_ptr<MergeOperator> merge_op_;
Status bg_error_;
};
TEST_F(CompactionJobTest, Simple) {

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -141,7 +141,8 @@ CompactionPicker::~CompactionPicker() {}
// Delete this compaction from the list of running compactions.
void CompactionPicker::ReleaseCompactionFiles(Compaction* c, Status status) {
if (c->start_level() == 0) {
if (c->start_level() == 0 ||
ioptions_.compaction_style == kCompactionStyleUniversal) {
level0_compactions_in_progress_.erase(c);
}
if (!status.ok()) {
@ -612,6 +613,17 @@ Compaction* CompactionPicker::CompactRange(
if (input_level == 0) {
level0_compactions_in_progress_.insert(compaction);
}
// Creating a compaction influences the compaction score because the score
// takes running compactions into account (by skipping files that are already
// being compacted). Since we just changed compaction score, we recalculate it
// here
{ // this piece of code recomputes compaction score
CompactionOptionsFIFO dummy_compaction_options_fifo;
vstorage->ComputeCompactionScore(mutable_cf_options,
dummy_compaction_options_fifo);
}
return compaction;
}

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -83,6 +83,8 @@ class CompactionPicker {
#endif // ROCKSDB_LITE
// Free up the files that participated in a compaction
//
// Requirement: DB mutex held
void ReleaseCompactionFiles(Compaction* c, Status status);
// Returns true if any one of the specified files are being compacted

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -487,6 +487,87 @@ TEST_F(CompactionPickerTest, NeedsCompactionFIFO) {
}
#endif // ROCKSDB_LITE
TEST_F(CompactionPickerTest, CompactionPriMinOverlapping1) {
NewVersionStorage(6, kCompactionStyleLevel);
mutable_cf_options_.target_file_size_base = 10000000;
mutable_cf_options_.target_file_size_multiplier = 10;
mutable_cf_options_.compaction_pri = kMinOverlappingRatio;
Add(2, 6U, "150", "179", 50000000U);
Add(2, 7U, "180", "220", 50000000U);
Add(2, 8U, "321", "400", 50000000U); // File not overlapping
Add(2, 9U, "721", "800", 50000000U);
Add(3, 26U, "150", "170", 260000000U);
Add(3, 27U, "171", "179", 260000000U);
Add(3, 28U, "191", "220", 260000000U);
Add(3, 29U, "221", "300", 260000000U);
Add(3, 30U, "750", "900", 260000000U);
UpdateVersionStorageInfo();
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
ASSERT_TRUE(compaction.get() != nullptr);
ASSERT_EQ(1U, compaction->num_input_files(0));
// Pick file 8 because it overlaps with 0 files on level 3.
ASSERT_EQ(8U, compaction->input(0, 0)->fd.GetNumber());
}
TEST_F(CompactionPickerTest, CompactionPriMinOverlapping2) {
NewVersionStorage(6, kCompactionStyleLevel);
mutable_cf_options_.target_file_size_base = 10000000;
mutable_cf_options_.target_file_size_multiplier = 10;
mutable_cf_options_.compaction_pri = kMinOverlappingRatio;
Add(2, 6U, "150", "175",
60000000U); // Overlaps with file 26, 27, total size 521M
Add(2, 7U, "176", "200", 60000000U); // Overlaps with file 27, 28, total size
// 520M, the smalelst overlapping
Add(2, 8U, "201", "300",
60000000U); // Overlaps with file 28, 29, total size 521M
Add(3, 26U, "100", "110", 261000000U);
Add(3, 26U, "150", "170", 261000000U);
Add(3, 27U, "171", "179", 260000000U);
Add(3, 28U, "191", "220", 260000000U);
Add(3, 29U, "221", "300", 261000000U);
Add(3, 30U, "321", "400", 261000000U);
UpdateVersionStorageInfo();
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
ASSERT_TRUE(compaction.get() != nullptr);
ASSERT_EQ(1U, compaction->num_input_files(0));
// Picking file 7 because overlapping ratio is the biggest.
ASSERT_EQ(7U, compaction->input(0, 0)->fd.GetNumber());
}
TEST_F(CompactionPickerTest, CompactionPriMinOverlapping3) {
NewVersionStorage(6, kCompactionStyleLevel);
mutable_cf_options_.target_file_size_base = 10000000;
mutable_cf_options_.target_file_size_multiplier = 10;
mutable_cf_options_.compaction_pri = kMinOverlappingRatio;
// file 7 and 8 over lap with the same file, but file 8 is smaller so
// it will be picked.
Add(2, 6U, "150", "175", 60000000U); // Overlaps with file 26, 27
Add(2, 7U, "176", "200", 60000000U); // Overlaps with file 27
Add(2, 8U, "201", "300", 61000000U); // Overlaps with file 27
Add(3, 26U, "160", "165", 260000000U);
Add(3, 26U, "166", "170", 260000000U);
Add(3, 27U, "180", "400", 260000000U);
Add(3, 28U, "401", "500", 260000000U);
UpdateVersionStorageInfo();
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
ASSERT_TRUE(compaction.get() != nullptr);
ASSERT_EQ(1U, compaction->num_input_files(0));
// Picking file 8 because overlapping ratio is the biggest.
ASSERT_EQ(8U, compaction->input(0, 0)->fd.GetNumber());
}
// This test exhibits the bug where we don't properly reset parent_index in
// PickCompaction()
TEST_F(CompactionPickerTest, ParentIndexResetBug) {

@ -1,6 +1,6 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -0,0 +1,240 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <cstdlib>
#include "db/db_test_util.h"
#include "port/stack_trace.h"
namespace rocksdb {
static uint64_t TestGetTickerCount(const Options& options,
Tickers ticker_type) {
return options.statistics->getTickerCount(ticker_type);
}
class DBBlockCacheTest : public DBTestBase {
private:
size_t miss_count_ = 0;
size_t hit_count_ = 0;
size_t insert_count_ = 0;
size_t failure_count_ = 0;
size_t compressed_miss_count_ = 0;
size_t compressed_hit_count_ = 0;
size_t compressed_insert_count_ = 0;
size_t compressed_failure_count_ = 0;
public:
const size_t kNumBlocks = 10;
const size_t kValueSize = 100;
DBBlockCacheTest() : DBTestBase("/db_block_cache_test") {}
BlockBasedTableOptions GetTableOptions() {
BlockBasedTableOptions table_options;
// Set a small enough block size so that each key-value get its own block.
table_options.block_size = 1;
return table_options;
}
Options GetOptions(const BlockBasedTableOptions& table_options) {
Options options = CurrentOptions();
options.create_if_missing = true;
// options.compression = kNoCompression;
options.statistics = rocksdb::CreateDBStatistics();
options.table_factory.reset(new BlockBasedTableFactory(table_options));
return options;
}
void InitTable(const Options& options) {
std::string value(kValueSize, 'a');
for (size_t i = 0; i < kNumBlocks; i++) {
ASSERT_OK(Put(ToString(i), value.c_str()));
}
}
void RecordCacheCounters(const Options& options) {
miss_count_ = TestGetTickerCount(options, BLOCK_CACHE_MISS);
hit_count_ = TestGetTickerCount(options, BLOCK_CACHE_HIT);
insert_count_ = TestGetTickerCount(options, BLOCK_CACHE_ADD);
failure_count_ = TestGetTickerCount(options, BLOCK_CACHE_ADD_FAILURES);
compressed_miss_count_ =
TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS);
compressed_hit_count_ =
TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT);
compressed_insert_count_ =
TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD);
compressed_failure_count_ =
TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD_FAILURES);
}
void CheckCacheCounters(const Options& options, size_t expected_misses,
size_t expected_hits, size_t expected_inserts,
size_t expected_failures) {
size_t new_miss_count = TestGetTickerCount(options, BLOCK_CACHE_MISS);
size_t new_hit_count = TestGetTickerCount(options, BLOCK_CACHE_HIT);
size_t new_insert_count = TestGetTickerCount(options, BLOCK_CACHE_ADD);
size_t new_failure_count =
TestGetTickerCount(options, BLOCK_CACHE_ADD_FAILURES);
ASSERT_EQ(miss_count_ + expected_misses, new_miss_count);
ASSERT_EQ(hit_count_ + expected_hits, new_hit_count);
ASSERT_EQ(insert_count_ + expected_inserts, new_insert_count);
ASSERT_EQ(failure_count_ + expected_failures, new_failure_count);
miss_count_ = new_miss_count;
hit_count_ = new_hit_count;
insert_count_ = new_insert_count;
failure_count_ = new_failure_count;
}
void CheckCompressedCacheCounters(const Options& options,
size_t expected_misses,
size_t expected_hits,
size_t expected_inserts,
size_t expected_failures) {
size_t new_miss_count =
TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS);
size_t new_hit_count =
TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT);
size_t new_insert_count =
TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD);
size_t new_failure_count =
TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD_FAILURES);
ASSERT_EQ(compressed_miss_count_ + expected_misses, new_miss_count);
ASSERT_EQ(compressed_hit_count_ + expected_hits, new_hit_count);
ASSERT_EQ(compressed_insert_count_ + expected_inserts, new_insert_count);
ASSERT_EQ(compressed_failure_count_ + expected_failures, new_failure_count);
compressed_miss_count_ = new_miss_count;
compressed_hit_count_ = new_hit_count;
compressed_insert_count_ = new_insert_count;
compressed_failure_count_ = new_failure_count;
}
};
TEST_F(DBBlockCacheTest, TestWithoutCompressedBlockCache) {
ReadOptions read_options;
auto table_options = GetTableOptions();
auto options = GetOptions(table_options);
InitTable(options);
std::shared_ptr<Cache> cache = NewLRUCache(0, 0, false);
table_options.block_cache = cache;
options.table_factory.reset(new BlockBasedTableFactory(table_options));
Reopen(options);
RecordCacheCounters(options);
std::vector<std::unique_ptr<Iterator>> iterators(kNumBlocks - 1);
Iterator* iter = nullptr;
// Load blocks into cache.
for (size_t i = 0; i < kNumBlocks - 1; i++) {
iter = db_->NewIterator(read_options);
iter->Seek(ToString(i));
ASSERT_OK(iter->status());
CheckCacheCounters(options, 1, 0, 1, 0);
iterators[i].reset(iter);
}
size_t usage = cache->GetUsage();
ASSERT_LT(0, usage);
cache->SetCapacity(usage);
ASSERT_EQ(usage, cache->GetPinnedUsage());
// Test with strict capacity limit.
cache->SetStrictCapacityLimit(true);
iter = db_->NewIterator(read_options);
iter->Seek(ToString(kNumBlocks - 1));
ASSERT_TRUE(iter->status().IsIncomplete());
CheckCacheCounters(options, 1, 0, 0, 1);
delete iter;
iter = nullptr;
// Release interators and access cache again.
for (size_t i = 0; i < kNumBlocks - 1; i++) {
iterators[i].reset();
CheckCacheCounters(options, 0, 0, 0, 0);
}
ASSERT_EQ(0, cache->GetPinnedUsage());
for (size_t i = 0; i < kNumBlocks - 1; i++) {
iter = db_->NewIterator(read_options);
iter->Seek(ToString(i));
ASSERT_OK(iter->status());
CheckCacheCounters(options, 0, 1, 0, 0);
iterators[i].reset(iter);
}
}
#ifdef SNAPPY
TEST_F(DBBlockCacheTest, TestWithCompressedBlockCache) {
ReadOptions read_options;
auto table_options = GetTableOptions();
auto options = GetOptions(table_options);
options.compression = CompressionType::kSnappyCompression;
InitTable(options);
std::shared_ptr<Cache> cache = NewLRUCache(0, 0, false);
std::shared_ptr<Cache> compressed_cache = NewLRUCache(0, 0, false);
table_options.block_cache = cache;
table_options.block_cache_compressed = compressed_cache;
options.table_factory.reset(new BlockBasedTableFactory(table_options));
Reopen(options);
RecordCacheCounters(options);
std::vector<std::unique_ptr<Iterator>> iterators(kNumBlocks - 1);
Iterator* iter = nullptr;
// Load blocks into cache.
for (size_t i = 0; i < kNumBlocks - 1; i++) {
iter = db_->NewIterator(read_options);
iter->Seek(ToString(i));
ASSERT_OK(iter->status());
CheckCacheCounters(options, 1, 0, 1, 0);
CheckCompressedCacheCounters(options, 1, 0, 1, 0);
iterators[i].reset(iter);
}
size_t usage = cache->GetUsage();
ASSERT_LT(0, usage);
ASSERT_EQ(usage, cache->GetPinnedUsage());
size_t compressed_usage = compressed_cache->GetUsage();
ASSERT_LT(0, compressed_usage);
// Compressed block cache cannot be pinned.
ASSERT_EQ(0, compressed_cache->GetPinnedUsage());
// Set strict capacity limit flag. Now block will only load into compressed
// block cache.
cache->SetCapacity(usage);
cache->SetStrictCapacityLimit(true);
ASSERT_EQ(usage, cache->GetPinnedUsage());
// compressed_cache->SetCapacity(compressed_usage);
compressed_cache->SetCapacity(0);
// compressed_cache->SetStrictCapacityLimit(true);
iter = db_->NewIterator(read_options);
iter->Seek(ToString(kNumBlocks - 1));
ASSERT_TRUE(iter->status().IsIncomplete());
CheckCacheCounters(options, 1, 0, 0, 1);
CheckCompressedCacheCounters(options, 1, 0, 1, 0);
delete iter;
iter = nullptr;
// Clear strict capacity limit flag. This time we shall hit compressed block
// cache.
cache->SetStrictCapacityLimit(false);
iter = db_->NewIterator(read_options);
iter->Seek(ToString(kNumBlocks - 1));
ASSERT_OK(iter->status());
CheckCacheCounters(options, 1, 0, 1, 0);
CheckCompressedCacheCounters(options, 0, 1, 0, 0);
delete iter;
iter = nullptr;
}
#endif
} // namespace rocksdb
int main(int argc, char** argv) {
rocksdb::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -533,6 +533,104 @@ TEST_P(DBCompactionTestWithParam, CompactionTrigger) {
ASSERT_EQ(NumTableFilesAtLevel(1, 1), 1);
}
TEST_F(DBCompactionTest, BGCompactionsAllowed) {
// Create several column families. Make compaction triggers in all of them
// and see number of compactions scheduled to be less than allowed.
const int kNumKeysPerFile = 100;
Options options;
options.write_buffer_size = 110 << 10; // 110KB
options.arena_block_size = 4 << 10;
options.num_levels = 3;
// Should speed up compaction when there are 4 files.
options.level0_file_num_compaction_trigger = 2;
options.level0_slowdown_writes_trigger = 20;
options.soft_pending_compaction_bytes_limit = 1 << 30; // Infinitely large
options.base_background_compactions = 1;
options.max_background_compactions = 3;
options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile));
options = CurrentOptions(options);
// Block all threads in thread pool.
const size_t kTotalTasks = 4;
env_->SetBackgroundThreads(4, Env::LOW);
test::SleepingBackgroundTask sleeping_tasks[kTotalTasks];
for (size_t i = 0; i < kTotalTasks; i++) {
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
&sleeping_tasks[i], Env::Priority::LOW);
sleeping_tasks[i].WaitUntilSleeping();
}
CreateAndReopenWithCF({"one", "two", "three"}, options);
Random rnd(301);
for (int cf = 0; cf < 4; cf++) {
for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) {
for (int i = 0; i < kNumKeysPerFile; i++) {
ASSERT_OK(Put(cf, Key(i), ""));
}
// put extra key to trigger flush
ASSERT_OK(Put(cf, "", ""));
dbfull()->TEST_WaitForFlushMemTable(handles_[cf]);
ASSERT_EQ(NumTableFilesAtLevel(0, cf), num + 1);
}
}
// Now all column families qualify compaction but only one should be
// scheduled, because no column family hits speed up condition.
ASSERT_EQ(1, env_->GetThreadPoolQueueLen(Env::Priority::LOW));
// Create two more files for one column family, which triggers speed up
// condition, three compactions will be scheduled.
for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) {
for (int i = 0; i < kNumKeysPerFile; i++) {
ASSERT_OK(Put(2, Key(i), ""));
}
// put extra key to trigger flush
ASSERT_OK(Put(2, "", ""));
dbfull()->TEST_WaitForFlushMemTable(handles_[2]);
ASSERT_EQ(options.level0_file_num_compaction_trigger + num + 1,
NumTableFilesAtLevel(0, 2));
}
ASSERT_EQ(3, env_->GetThreadPoolQueueLen(Env::Priority::LOW));
// Unblock all threads to unblock all compactions.
for (size_t i = 0; i < kTotalTasks; i++) {
sleeping_tasks[i].WakeUp();
sleeping_tasks[i].WaitUntilDone();
}
dbfull()->TEST_WaitForCompact();
// Verify number of compactions allowed will come back to 1.
for (size_t i = 0; i < kTotalTasks; i++) {
sleeping_tasks[i].Reset();
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
&sleeping_tasks[i], Env::Priority::LOW);
sleeping_tasks[i].WaitUntilSleeping();
}
for (int cf = 0; cf < 4; cf++) {
for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) {
for (int i = 0; i < kNumKeysPerFile; i++) {
ASSERT_OK(Put(cf, Key(i), ""));
}
// put extra key to trigger flush
ASSERT_OK(Put(cf, "", ""));
dbfull()->TEST_WaitForFlushMemTable(handles_[cf]);
ASSERT_EQ(NumTableFilesAtLevel(0, cf), num + 1);
}
}
// Now all column families qualify compaction but only one should be
// scheduled, because no column family hits speed up condition.
ASSERT_EQ(1, env_->GetThreadPoolQueueLen(Env::Priority::LOW));
for (size_t i = 0; i < kTotalTasks; i++) {
sleeping_tasks[i].WakeUp();
sleeping_tasks[i].WaitUntilDone();
}
}
TEST_P(DBCompactionTestWithParam, CompactionsGenerateMultipleFiles) {
Options options;
options.write_buffer_size = 100000000; // Large write buffer
@ -1898,7 +1996,7 @@ TEST_P(DBCompactionTestWithParam, DISABLED_CompactFilesOnLevelCompaction) {
std::set<std::string> overlapping_file_names;
std::vector<std::string> compaction_input_file_names;
for (int f = 0; f < file_picked; ++f) {
int level;
int level = 0;
auto file_meta = PickFileRandomly(cf_meta, &rnd, &level);
compaction_input_file_names.push_back(file_meta->name);
GetOverlappingFileNumbersForLevelCompaction(
@ -2198,6 +2296,25 @@ TEST_P(DBCompactionTestWithParam, CompressLevelCompaction) {
Destroy(options);
}
TEST_F(DBCompactionTest, SanitizeCompactionOptionsTest) {
Options options = CurrentOptions();
options.max_background_compactions = 5;
options.soft_pending_compaction_bytes_limit = 0;
options.hard_pending_compaction_bytes_limit = 100;
options.create_if_missing = true;
DestroyAndReopen(options);
ASSERT_EQ(5, db_->GetOptions().base_background_compactions);
ASSERT_EQ(100, db_->GetOptions().soft_pending_compaction_bytes_limit);
options.base_background_compactions = 4;
options.max_background_compactions = 3;
options.soft_pending_compaction_bytes_limit = 200;
options.hard_pending_compaction_bytes_limit = 150;
DestroyAndReopen(options);
ASSERT_EQ(3, db_->GetOptions().base_background_compactions);
ASSERT_EQ(150, db_->GetOptions().soft_pending_compaction_bytes_limit);
}
// This tests for a bug that could cause two level0 compactions running
// concurrently
// TODO(aekmekji): Make sure that the reason this fails when run with
@ -2390,8 +2507,12 @@ TEST_P(CompactionPriTest, Test) {
}
}
INSTANTIATE_TEST_CASE_P(CompactionPriTest, CompactionPriTest,
::testing::Values(0, 1, 2));
INSTANTIATE_TEST_CASE_P(
CompactionPriTest, CompactionPriTest,
::testing::Values(CompactionPri::kByCompensatedSize,
CompactionPri::kOldestLargestSeqFirst,
CompactionPri::kOldestSmallestSeqFirst,
CompactionPri::kMinOverlappingRatio));
#endif // !defined(ROCKSDB_LITE)
} // namespace rocksdb

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -15,6 +15,9 @@
#include <inttypes.h>
#include <stdint.h>
#ifdef OS_SOLARIS
#include <alloca.h>
#endif
#include <algorithm>
#include <climits>
@ -28,8 +31,10 @@
#include <utility>
#include <vector>
#include "db/auto_roll_logger.h"
#include "db/builder.h"
#include "db/compaction_job.h"
#include "db/db_info_dumper.h"
#include "db/db_iter.h"
#include "db/dbformat.h"
#include "db/event_helpers.h"
@ -51,6 +56,7 @@
#include "db/write_batch_internal.h"
#include "db/write_callback.h"
#include "db/writebuffer.h"
#include "db/xfunc_test_points.h"
#include "memtable/hash_linklist_rep.h"
#include "memtable/hash_skiplist_rep.h"
#include "port/likely.h"
@ -58,7 +64,6 @@
#include "rocksdb/cache.h"
#include "rocksdb/compaction_filter.h"
#include "rocksdb/db.h"
#include "rocksdb/delete_scheduler.h"
#include "rocksdb/env.h"
#include "rocksdb/merge_operator.h"
#include "rocksdb/sst_file_writer.h"
@ -72,19 +77,18 @@
#include "table/merger.h"
#include "table/table_builder.h"
#include "table/two_level_iterator.h"
#include "util/auto_roll_logger.h"
#include "util/autovector.h"
#include "util/build_version.h"
#include "util/coding.h"
#include "util/compression.h"
#include "util/crc32c.h"
#include "util/db_info_dumper.h"
#include "util/file_reader_writer.h"
#include "util/file_util.h"
#include "util/iostats_context_imp.h"
#include "util/log_buffer.h"
#include "util/logging.h"
#include "util/mutexlock.h"
#include "util/sst_file_manager_impl.h"
#include "util/options_helper.h"
#include "util/options_parser.h"
#include "util/perf_context_imp.h"
@ -142,6 +146,12 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
result.info_log = nullptr;
}
}
if (result.base_background_compactions == -1) {
result.base_background_compactions = result.max_background_compactions;
}
if (result.base_background_compactions > result.max_background_compactions) {
result.base_background_compactions = result.max_background_compactions;
}
result.env->IncBackgroundThreadsIfNeeded(src.max_background_compactions,
Env::Priority::LOW);
result.env->IncBackgroundThreadsIfNeeded(src.max_background_flushes,
@ -265,13 +275,14 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname)
db_options_.delete_obsolete_files_period_micros),
last_stats_dump_time_microsec_(0),
next_job_id_(1),
flush_on_destroy_(false),
has_unpersisted_data_(false),
env_options_(db_options_),
#ifndef ROCKSDB_LITE
wal_manager_(db_options_, env_options_),
#endif // ROCKSDB_LITE
event_logger_(db_options_.info_log.get()),
bg_work_paused_(0),
bg_compaction_paused_(0),
refitting_level_(false),
opened_successfully_(false) {
env_->GetAbsolutePath(dbname, &db_absolute_path_);
@ -312,7 +323,8 @@ void DBImpl::CancelAllBackgroundWork(bool wait) {
DBImpl::~DBImpl() {
mutex_.Lock();
if (!shutting_down_.load(std::memory_order_acquire) && flush_on_destroy_) {
if (!shutting_down_.load(std::memory_order_acquire) &&
has_unpersisted_data_) {
for (auto cfd : *versions_->GetColumnFamilySet()) {
if (!cfd->IsDropped() && !cfd->mem()->IsEmpty()) {
cfd->Ref();
@ -484,23 +496,22 @@ void DBImpl::MaybeDumpStats() {
last_stats_dump_time_microsec_ = now_micros;
#ifndef ROCKSDB_LITE
bool tmp1 = false;
bool tmp2 = false;
DBPropertyType cf_property_type =
GetPropertyType(DB::Properties::kCFStats, &tmp1, &tmp2);
DBPropertyType db_property_type =
GetPropertyType(DB::Properties::kDBStats, &tmp1, &tmp2);
const DBPropertyInfo* cf_property_info =
GetPropertyInfo(DB::Properties::kCFStats);
assert(cf_property_info != nullptr);
const DBPropertyInfo* db_property_info =
GetPropertyInfo(DB::Properties::kDBStats);
assert(db_property_info != nullptr);
std::string stats;
{
InstrumentedMutexLock l(&mutex_);
for (auto cfd : *versions_->GetColumnFamilySet()) {
cfd->internal_stats()->GetStringProperty(cf_property_type,
DB::Properties::kCFStats,
&stats);
cfd->internal_stats()->GetStringProperty(
*cf_property_info, DB::Properties::kCFStats, &stats);
}
default_cf_internal_stats_->GetStringProperty(db_property_type,
DB::Properties::kDBStats,
&stats);
default_cf_internal_stats_->GetStringProperty(
*db_property_info, DB::Properties::kDBStats, &stats);
}
Log(InfoLogLevel::WARN_LEVEL,
db_options_.info_log, "------- DUMPING STATS -------");
@ -561,6 +572,7 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
// Get obsolete files. This function will also update the list of
// pending files in VersionSet().
versions_->GetObsoleteFiles(&job_context->sst_delete_files,
&job_context->manifest_delete_files,
job_context->min_pending_output);
// store the current filenum, lognum, etc
@ -678,9 +690,9 @@ void DBImpl::PurgeObsoleteFiles(const JobContext& state) {
}
auto candidate_files = state.full_scan_candidate_files;
candidate_files.reserve(candidate_files.size() +
state.sst_delete_files.size() +
state.log_delete_files.size());
candidate_files.reserve(
candidate_files.size() + state.sst_delete_files.size() +
state.log_delete_files.size() + state.manifest_delete_files.size());
// We may ignore the dbname when generating the file names.
const char* kDumbDbName = "";
for (auto file : state.sst_delete_files) {
@ -696,6 +708,9 @@ void DBImpl::PurgeObsoleteFiles(const JobContext& state) {
0);
}
}
for (const auto& filename : state.manifest_delete_files) {
candidate_files.emplace_back(filename, 0);
}
// dedup state.candidate_files so we don't try to delete the same
// file twice
@ -782,8 +797,8 @@ void DBImpl::PurgeObsoleteFiles(const JobContext& state) {
}
#endif // !ROCKSDB_LITE
Status file_deletion_status;
if (type == kTableFile && path_id == 0) {
file_deletion_status = DeleteOrMoveToTrash(&db_options_, fname);
if (type == kTableFile) {
file_deletion_status = DeleteSSTFile(&db_options_, fname, path_id);
} else {
file_deletion_status = env_->DeleteFile(fname);
}
@ -814,7 +829,8 @@ void DBImpl::PurgeObsoleteFiles(const JobContext& state) {
// Delete old info log files.
size_t old_info_log_file_count = old_info_log_files.size();
if (old_info_log_file_count >= db_options_.keep_log_file_num) {
if (old_info_log_file_count != 0 &&
old_info_log_file_count >= db_options_.keep_log_file_num) {
std::sort(old_info_log_files.begin(), old_info_log_files.end());
size_t end = old_info_log_file_count - db_options_.keep_log_file_num;
for (unsigned int i = 0; i <= end; i++) {
@ -1393,9 +1409,9 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
mutex_.AssertHeld();
const uint64_t start_micros = env_->NowMicros();
FileMetaData meta;
meta.fd = FileDescriptor(versions_->NewFileNumber(), 0, 0);
auto pending_outputs_inserted_elem =
CaptureCurrentFileNumberInPendingOutputs();
meta.fd = FileDescriptor(versions_->NewFileNumber(), 0, 0);
ReadOptions ro;
ro.total_order_seek = true;
Arena arena;
@ -1517,13 +1533,26 @@ Status DBImpl::FlushMemTableToOutputFile(
bg_error_ = s;
}
RecordFlushIOStats();
#ifndef ROCKSDB_LITE
if (s.ok()) {
#ifndef ROCKSDB_LITE
// may temporarily unlock and lock the mutex.
NotifyOnFlushCompleted(cfd, &file_meta, mutable_cf_options,
job_context->job_id, flush_job.GetTableProperties());
}
#endif // ROCKSDB_LITE
auto sfm =
static_cast<SstFileManagerImpl*>(db_options_.sst_file_manager.get());
if (sfm) {
// Notify sst_file_manager that a new file was added
std::string file_path = MakeTableFileName(db_options_.db_paths[0].path,
file_meta.fd.GetNumber());
sfm->OnAddFile(file_path);
if (sfm->IsMaxAllowedSpaceReached() && bg_error_.ok()) {
bg_error_ = Status::IOError("Max allowed space was reached");
TEST_SYNC_POINT(
"DBImpl::FlushMemTableToOutputFile:MaxAllowedSpaceReached");
}
}
}
return s;
}
@ -1813,13 +1842,16 @@ Status DBImpl::CompactFilesImpl(
std::vector<SequenceNumber> snapshot_seqs =
snapshots_.GetAll(&earliest_write_conflict_snapshot);
auto pending_outputs_inserted_elem =
CaptureCurrentFileNumberInPendingOutputs();
assert(is_snapshot_supported_ || snapshots_.empty());
CompactionJob compaction_job(
job_context->job_id, c.get(), db_options_, env_options_, versions_.get(),
&shutting_down_, log_buffer, directories_.GetDbDir(),
directories_.GetDataDir(c->output_path_id()), stats_, snapshot_seqs,
earliest_write_conflict_snapshot, table_cache_, &event_logger_,
c->mutable_cf_options()->paranoid_file_checks,
directories_.GetDataDir(c->output_path_id()), stats_, &mutex_, &bg_error_,
snapshot_seqs, earliest_write_conflict_snapshot, table_cache_,
&event_logger_, c->mutable_cf_options()->paranoid_file_checks,
c->mutable_cf_options()->compaction_measure_io_stats, dbname_,
nullptr); // Here we pass a nullptr for CompactionJobStats because
// CompactFiles does not trigger OnCompactionCompleted(),
@ -1834,21 +1866,35 @@ Status DBImpl::CompactFilesImpl(
// support for CompactFiles, we should have CompactFiles API
// pass a pointer of CompactionJobStats as the out-value
// instead of using EventListener.
// Creating a compaction influences the compaction score because the score
// takes running compactions into account (by skipping files that are already
// being compacted). Since we just changed compaction score, we recalculate it
// here.
{
CompactionOptionsFIFO dummy_compaction_options_fifo;
version->storage_info()->ComputeCompactionScore(
*c->mutable_cf_options(), dummy_compaction_options_fifo);
}
compaction_job.Prepare();
mutex_.Unlock();
TEST_SYNC_POINT("CompactFilesImpl:0");
TEST_SYNC_POINT("CompactFilesImpl:1");
compaction_job.Run();
TEST_SYNC_POINT("CompactFilesImpl:2");
TEST_SYNC_POINT("CompactFilesImpl:3");
mutex_.Lock();
Status status = compaction_job.Install(*c->mutable_cf_options(), &mutex_);
Status status = compaction_job.Install(*c->mutable_cf_options());
if (status.ok()) {
InstallSuperVersionAndScheduleWorkWrapper(
c->column_family_data(), job_context, *c->mutable_cf_options());
}
c->ReleaseCompactionFiles(s);
c.reset();
ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
if (status.ok()) {
// Done
@ -1864,6 +1910,8 @@ Status DBImpl::CompactFilesImpl(
}
}
c.reset();
bg_compaction_scheduled_--;
if (bg_compaction_scheduled_ == 0) {
bg_cv_.SignalAll();
@ -1875,10 +1923,11 @@ Status DBImpl::CompactFilesImpl(
Status DBImpl::PauseBackgroundWork() {
InstrumentedMutexLock guard_lock(&mutex_);
bg_work_paused_++;
bg_compaction_paused_++;
while (bg_compaction_scheduled_ > 0 || bg_flush_scheduled_ > 0) {
bg_cv_.Wait();
}
bg_work_paused_++;
return Status::OK();
}
@ -1888,7 +1937,11 @@ Status DBImpl::ContinueBackgroundWork() {
return Status::InvalidArgument();
}
assert(bg_work_paused_ > 0);
assert(bg_compaction_paused_ > 0);
bg_compaction_paused_--;
bg_work_paused_--;
// It's sufficient to check just bg_work_paused_ here since
// bg_work_paused_ is always no greater than bg_compaction_paused_
if (bg_work_paused_ == 0) {
MaybeScheduleFlushOrCompaction();
}
@ -2188,6 +2241,9 @@ Status DBImpl::SyncWAL() {
status = directories_.GetWalDir()->Fsync();
}
TEST_SYNC_POINT("DBImpl::SyncWAL:BeforeMarkLogsSynced:1");
TEST_SYNC_POINT("DBImpl::SyncWAL:BeforeMarkLogsSynced:2");
{
InstrumentedMutexLock l(&mutex_);
MarkLogsSynced(current_log_number, need_log_dir_sync, status);
@ -2215,7 +2271,8 @@ void DBImpl::MarkLogsSynced(
++it;
}
}
assert(logs_.empty() || (logs_.size() == 1 && !logs_[0].getting_synced));
assert(logs_.empty() || logs_[0].number > up_to ||
(logs_.size() == 1 && !logs_[0].getting_synced));
log_sync_cv_.SignalAll();
}
@ -2453,25 +2510,32 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::HIGH, this);
}
auto bg_compactions_allowed = BGCompactionsAllowed();
// special case -- if max_background_flushes == 0, then schedule flush on a
// compaction thread
if (db_options_.max_background_flushes == 0) {
while (unscheduled_flushes_ > 0 &&
bg_flush_scheduled_ + bg_compaction_scheduled_ <
db_options_.max_background_compactions) {
bg_compactions_allowed) {
unscheduled_flushes_--;
bg_flush_scheduled_++;
env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::LOW, this);
}
}
if (bg_compaction_paused_ > 0) {
// we paused the background compaction
return;
}
if (HasExclusiveManualCompaction()) {
// only manual compactions are allowed to run. don't schedule automatic
// compactions
return;
}
while (bg_compaction_scheduled_ < db_options_.max_background_compactions &&
while (bg_compaction_scheduled_ < bg_compactions_allowed &&
unscheduled_compactions_ > 0) {
CompactionArg* ca = new CompactionArg;
ca->db = this;
@ -2483,6 +2547,14 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
}
}
int DBImpl::BGCompactionsAllowed() const {
if (write_controller_.NeedSpeedupCompaction()) {
return db_options_.max_background_compactions;
} else {
return db_options_.base_background_compactions;
}
}
void DBImpl::AddToCompactionQueue(ColumnFamilyData* cfd) {
assert(!cfd->pending_compaction());
cfd->Ref();
@ -2595,10 +2667,10 @@ Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context,
LogToBuffer(
log_buffer,
"Calling FlushMemTableToOutputFile with column "
"family [%s], flush slots available %d, compaction slots available %d",
cfd->GetName().c_str(),
db_options_.max_background_flushes - bg_flush_scheduled_,
db_options_.max_background_compactions - bg_compaction_scheduled_);
"family [%s], flush slots available %d, compaction slots allowed %d, "
"compaction slots scheduled %d",
cfd->GetName().c_str(), db_options_.max_background_flushes,
bg_flush_scheduled_, BGCompactionsAllowed() - bg_compaction_scheduled_);
status = FlushMemTableToOutputFile(cfd, mutable_cf_options, made_progress,
job_context, log_buffer);
if (cfd->Unref()) {
@ -2911,7 +2983,9 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
TEST_SYNC_POINT("DBImpl::BackgroundCompaction:TrivialMove");
// Instrument for event update
// TODO(yhchiang): add op details for showing trivial-move.
ThreadStatusUtil::SetColumnFamily(c->column_family_data());
ThreadStatusUtil::SetColumnFamily(
c->column_family_data(), c->column_family_data()->ioptions()->env,
c->column_family_data()->options()->enable_thread_tracking);
ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_COMPACTION);
compaction_job_stats.num_input_files = c->num_input_files(0);
@ -2980,8 +3054,9 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
CompactionJob compaction_job(
job_context->job_id, c.get(), db_options_, env_options_,
versions_.get(), &shutting_down_, log_buffer, directories_.GetDbDir(),
directories_.GetDataDir(c->output_path_id()), stats_, snapshot_seqs,
earliest_write_conflict_snapshot, table_cache_, &event_logger_,
directories_.GetDataDir(c->output_path_id()), stats_, &mutex_,
&bg_error_, snapshot_seqs, earliest_write_conflict_snapshot,
table_cache_, &event_logger_,
c->mutable_cf_options()->paranoid_file_checks,
c->mutable_cf_options()->compaction_measure_io_stats, dbname_,
&compaction_job_stats);
@ -2992,7 +3067,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
TEST_SYNC_POINT("DBImpl::BackgroundCompaction:NonTrivial:AfterRun");
mutex_.Lock();
status = compaction_job.Install(*c->mutable_cf_options(), &mutex_);
status = compaction_job.Install(*c->mutable_cf_options());
if (status.ok()) {
InstallSuperVersionAndScheduleWorkWrapper(
c->column_family_data(), job_context, *c->mutable_cf_options());
@ -3294,13 +3369,19 @@ Status DBImpl::GetImpl(const ReadOptions& read_options,
LookupKey lkey(key, snapshot);
PERF_TIMER_STOP(get_snapshot_time);
bool skip_memtable =
(read_options.read_tier == kPersistedTier && has_unpersisted_data_);
bool done = false;
if (!skip_memtable) {
if (sv->mem->Get(lkey, value, &s, &merge_context)) {
// Done
done = true;
RecordTick(stats_, MEMTABLE_HIT);
} else if (sv->imm->Get(lkey, value, &s, &merge_context)) {
// Done
done = true;
RecordTick(stats_, MEMTABLE_HIT);
} else {
}
}
if (!done) {
PERF_TIMER_GUARD(get_from_output_files_time);
sv->current->Get(read_options, lkey, value, &s, &merge_context,
value_found);
@ -3314,6 +3395,7 @@ Status DBImpl::GetImpl(const ReadOptions& read_options,
RecordTick(stats_, NUMBER_KEYS_READ);
RecordTick(stats_, BYTES_READ, value->size());
MeasureTime(stats_, BYTES_PER_READ, value->size());
}
return s;
}
@ -3384,14 +3466,23 @@ std::vector<Status> DBImpl::MultiGet(
assert(mgd_iter != multiget_cf_data.end());
auto mgd = mgd_iter->second;
auto super_version = mgd->super_version;
bool skip_memtable =
(read_options.read_tier == kPersistedTier && has_unpersisted_data_);
bool done = false;
if (!skip_memtable) {
if (super_version->mem->Get(lkey, value, &s, &merge_context)) {
// Done
done = true;
// TODO(?): RecordTick(stats_, MEMTABLE_HIT)?
} else if (super_version->imm->Get(lkey, value, &s, &merge_context)) {
// Done
} else {
done = true;
// TODO(?): RecordTick(stats_, MEMTABLE_HIT)?
}
}
if (!done) {
PERF_TIMER_GUARD(get_from_output_files_time);
super_version->current->Get(read_options, lkey, value, &s,
&merge_context);
// TODO(?): RecordTick(stats_, MEMTABLE_MISS)?
}
if (s.ok()) {
@ -3424,6 +3515,7 @@ std::vector<Status> DBImpl::MultiGet(
RecordTick(stats_, NUMBER_MULTIGET_CALLS);
RecordTick(stats_, NUMBER_MULTIGET_KEYS_READ, num_keys);
RecordTick(stats_, NUMBER_MULTIGET_BYTES_READ, bytes_read);
MeasureTime(stats_, BYTES_PER_MULTIGET, bytes_read);
PERF_TIMER_STOP(get_post_process_time);
return stat_list;
@ -3516,6 +3608,9 @@ Status DBImpl::AddFile(ColumnFamilyHandle* column_family,
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
ColumnFamilyData* cfd = cfh->cfd();
if (file_info->num_entries == 0) {
return Status::InvalidArgument("File contain no entries");
}
if (file_info->version != 1) {
return Status::InvalidArgument("Generated table version is not supported");
}
@ -3536,8 +3631,16 @@ Status DBImpl::AddFile(ColumnFamilyHandle* column_family,
return Status::InvalidArgument(
"Non zero sequence numbers are not supported");
}
// Generate a location for the new table
meta.fd = FileDescriptor(versions_->NewFileNumber(), 0, file_info->file_size);
std::list<uint64_t>::iterator pending_outputs_inserted_elem;
{
InstrumentedMutexLock l(&mutex_);
pending_outputs_inserted_elem = CaptureCurrentFileNumberInPendingOutputs();
meta.fd =
FileDescriptor(versions_->NewFileNumber(), 0, file_info->file_size);
}
std::string db_fname = TableFileName(
db_options_.db_paths, meta.fd.GetNumber(), meta.fd.GetPathId());
@ -3550,6 +3653,7 @@ Status DBImpl::AddFile(ColumnFamilyHandle* column_family,
} else {
status = CopyFile(env_, file_info->file_path, db_fname, 0);
}
TEST_SYNC_POINT("DBImpl::AddFile:FileCopied");
if (!status.ok()) {
return status;
}
@ -3613,6 +3717,7 @@ Status DBImpl::AddFile(ColumnFamilyHandle* column_family,
delete InstallSuperVersionAndScheduleWork(cfd, nullptr,
mutable_cf_options);
}
ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
}
if (!status.ok()) {
@ -3826,6 +3931,10 @@ bool DBImpl::KeyMayExist(const ReadOptions& read_options,
Iterator* DBImpl::NewIterator(const ReadOptions& read_options,
ColumnFamilyHandle* column_family) {
if (read_options.read_tier == kPersistedTier) {
return NewErrorIterator(Status::NotSupported(
"ReadTier::kPersistedData is not yet supported in iterators."));
}
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
auto cfd = cfh->cfd();
@ -3857,8 +3966,8 @@ Iterator* DBImpl::NewIterator(const ReadOptions& read_options,
env_, *cfd->ioptions(), cfd->user_comparator(), iter,
kMaxSequenceNumber,
sv->mutable_cf_options.max_sequential_skip_in_iterations,
read_options.iterate_upper_bound, read_options.prefix_same_as_start,
read_options.pin_data);
sv->version_number, read_options.iterate_upper_bound,
read_options.prefix_same_as_start, read_options.pin_data);
#endif
} else {
SequenceNumber latest_snapshot = versions_->LastSequence();
@ -3915,8 +4024,8 @@ Iterator* DBImpl::NewIterator(const ReadOptions& read_options,
ArenaWrappedDBIter* db_iter = NewArenaWrappedDbIterator(
env_, *cfd->ioptions(), cfd->user_comparator(), snapshot,
sv->mutable_cf_options.max_sequential_skip_in_iterations,
read_options.iterate_upper_bound, read_options.prefix_same_as_start,
read_options.pin_data);
sv->version_number, read_options.iterate_upper_bound,
read_options.prefix_same_as_start, read_options.pin_data);
InternalIterator* internal_iter =
NewInternalIterator(read_options, cfd, sv, db_iter->GetArena());
@ -3932,6 +4041,10 @@ Status DBImpl::NewIterators(
const ReadOptions& read_options,
const std::vector<ColumnFamilyHandle*>& column_families,
std::vector<Iterator*>* iterators) {
if (read_options.read_tier == kPersistedTier) {
return Status::NotSupported(
"ReadTier::kPersistedData is not yet supported in iterators.");
}
iterators->clear();
iterators->reserve(column_families.size());
XFUNC_TEST("", "managed_new", managed_new1, xf_manage_new,
@ -3965,8 +4078,8 @@ Status DBImpl::NewIterators(
iterators->push_back(NewDBIterator(
env_, *cfd->ioptions(), cfd->user_comparator(), iter,
kMaxSequenceNumber,
sv->mutable_cf_options.max_sequential_skip_in_iterations, nullptr,
false, read_options.pin_data));
sv->mutable_cf_options.max_sequential_skip_in_iterations,
sv->version_number, nullptr, false, read_options.pin_data));
}
#endif
} else {
@ -3985,8 +4098,8 @@ Status DBImpl::NewIterators(
ArenaWrappedDBIter* db_iter = NewArenaWrappedDbIterator(
env_, *cfd->ioptions(), cfd->user_comparator(), snapshot,
sv->mutable_cf_options.max_sequential_skip_in_iterations, nullptr,
false, read_options.pin_data);
sv->mutable_cf_options.max_sequential_skip_in_iterations,
sv->version_number, nullptr, false, read_options.pin_data);
InternalIterator* internal_iter =
NewInternalIterator(read_options, cfd, sv, db_iter->GetArena());
db_iter->SetIterUnderDBIter(internal_iter);
@ -4078,7 +4191,6 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
}
Status status;
bool callback_failed = false;
bool xfunc_attempted_write = false;
XFUNC_TEST("transaction", "transaction_xftest_write_impl",
@ -4096,7 +4208,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
w.sync = write_options.sync;
w.disableWAL = write_options.disableWAL;
w.in_batch_group = false;
w.has_callback = (callback != nullptr) ? true : false;
w.callback = callback;
if (!write_options.disableWAL) {
RecordTick(stats_, WRITE_WITH_WAL);
@ -4109,6 +4221,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
// we are a non-leader in a parallel group
PERF_TIMER_GUARD(write_memtable_time);
if (!w.CallbackFailed()) {
ColumnFamilyMemTablesImpl column_family_memtables(
versions_->GetColumnFamilySet());
WriteBatchInternal::SetSequence(w.batch, w.sequence);
@ -4116,21 +4229,24 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
w.batch, &column_family_memtables, &flush_scheduler_,
write_options.ignore_missing_column_families, 0 /*log_number*/, this,
true /*dont_filter_deletes*/, true /*concurrent_memtable_writes*/);
}
if (write_thread_.CompleteParallelWorker(&w)) {
// we're responsible for early exit
auto last_sequence = w.parallel_group->last_writer->sequence;
auto last_sequence = w.parallel_group->last_sequence;
SetTickerCount(stats_, SEQUENCE_NUMBER, last_sequence);
versions_->SetLastSequence(last_sequence);
write_thread_.EarlyExitParallelGroup(&w);
}
assert(w.state == WriteThread::STATE_COMPLETED);
// STATE_COMPLETED conditional below handles exit
status = w.FinalStatus();
}
if (w.state == WriteThread::STATE_COMPLETED) {
// write is complete and leader has updated sequence
RecordTick(stats_, WRITE_DONE_BY_OTHER);
return w.status;
return w.FinalStatus();
}
// else we are the leader of the write batch group
assert(w.state == WriteThread::STATE_GROUP_LEADER);
@ -4236,7 +4352,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
uint64_t last_sequence = versions_->LastSequence();
WriteThread::Writer* last_writer = &w;
autovector<WriteBatch*> write_batch_group;
autovector<WriteThread::Writer*> write_group;
bool need_log_sync = !write_options.disableWAL && write_options.sync;
bool need_log_dir_sync = need_log_sync && !log_dir_synced_;
@ -4255,24 +4371,15 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
// during this phase since &w is currently responsible for logging
// and protects against concurrent loggers and concurrent writes
// into memtables
mutex_.Unlock();
if (callback != nullptr) {
// If this write has a validation callback, check to see if this write
// is able to be written. Must be called on the write thread.
status = callback->Callback(this);
callback_failed = true;
}
} else {
mutex_.Unlock();
}
// At this point the mutex is unlocked
bool exit_completed_early = false;
last_batch_group_size_ = write_thread_.EnterAsBatchGroupLeader(
&w, &last_writer, &write_batch_group);
last_batch_group_size_ =
write_thread_.EnterAsBatchGroupLeader(&w, &last_writer, &write_group);
if (status.ok()) {
// Rules for when we can update the memtable concurrently
@ -4288,15 +4395,17 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
// assumed to be true. Rule 4 is checked for each batch. We could
// relax rules 2 and 3 if we could prevent write batches from referring
// more than once to a particular key.
bool parallel = db_options_.allow_concurrent_memtable_write &&
write_batch_group.size() > 1;
bool parallel =
db_options_.allow_concurrent_memtable_write && write_group.size() > 1;
int total_count = 0;
uint64_t total_byte_size = 0;
for (auto b : write_batch_group) {
total_count += WriteBatchInternal::Count(b);
for (auto writer : write_group) {
if (writer->CheckCallback(this)) {
total_count += WriteBatchInternal::Count(writer->batch);
total_byte_size = WriteBatchInternal::AppendedByteSize(
total_byte_size, WriteBatchInternal::ByteSize(b));
parallel = parallel && !b->HasMerge();
total_byte_size, WriteBatchInternal::ByteSize(writer->batch));
parallel = parallel && !writer->batch->HasMerge();
}
}
const SequenceNumber current_sequence = last_sequence + 1;
@ -4305,10 +4414,11 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
// Record statistics
RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count);
RecordTick(stats_, BYTES_WRITTEN, total_byte_size);
MeasureTime(stats_, BYTES_PER_WRITE, total_byte_size);
PERF_TIMER_STOP(write_pre_and_post_process_time);
if (write_options.disableWAL) {
flush_on_destroy_ = true;
has_unpersisted_data_ = true;
}
uint64_t log_size = 0;
@ -4316,21 +4426,22 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
PERF_TIMER_GUARD(write_wal_time);
WriteBatch* merged_batch = nullptr;
if (write_batch_group.size() == 1) {
merged_batch = write_batch_group[0];
if (write_group.size() == 1 && !write_group[0]->CallbackFailed()) {
merged_batch = write_group[0]->batch;
} else {
// WAL needs all of the batches flattened into a single batch.
// We could avoid copying here with an iov-like AddRecord
// interface
merged_batch = &tmp_batch_;
for (auto b : write_batch_group) {
WriteBatchInternal::Append(merged_batch, b);
for (auto writer : write_group) {
if (!writer->CallbackFailed()) {
WriteBatchInternal::Append(merged_batch, writer->batch);
}
}
}
WriteBatchInternal::SetSequence(merged_batch, current_sequence);
assert(WriteBatchInternal::Count(merged_batch) == total_count);
assert(WriteBatchInternal::ByteSize(merged_batch) == total_byte_size);
Slice log_entry = WriteBatchInternal::Contents(merged_batch);
status = logs_.back().writer->AddRecord(log_entry);
@ -4385,7 +4496,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
}
stats->AddDBStats(InternalStats::WAL_FILE_BYTES, log_size);
}
uint64_t for_other = write_batch_group.size() - 1;
uint64_t for_other = write_group.size() - 1;
if (for_other > 0) {
stats->AddDBStats(InternalStats::WRITE_DONE_BY_OTHER, for_other);
if (!write_options.disableWAL) {
@ -4396,18 +4507,28 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
if (!parallel) {
status = WriteBatchInternal::InsertInto(
write_batch_group, current_sequence, column_family_memtables_.get(),
write_group, current_sequence, column_family_memtables_.get(),
&flush_scheduler_, write_options.ignore_missing_column_families,
0 /*log_number*/, this, false /*dont_filter_deletes*/);
if (status.ok()) {
// There were no write failures. Set leader's status
// in case the write callback returned a non-ok status.
status = w.FinalStatus();
}
} else {
WriteThread::ParallelGroup pg;
pg.leader = &w;
pg.last_writer = last_writer;
pg.last_sequence = last_sequence;
pg.early_exit_allowed = !need_log_sync;
pg.running.store(static_cast<uint32_t>(write_batch_group.size()),
pg.running.store(static_cast<uint32_t>(write_group.size()),
std::memory_order_relaxed);
write_thread_.LaunchParallelFollowers(&pg, current_sequence);
if (!w.CallbackFailed()) {
// do leader write
ColumnFamilyMemTablesImpl column_family_memtables(
versions_->GetColumnFamilySet());
assert(w.sequence == current_sequence);
@ -4417,20 +4538,19 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
write_options.ignore_missing_column_families, 0 /*log_number*/,
this, true /*dont_filter_deletes*/,
true /*concurrent_memtable_writes*/);
}
assert(last_writer->sequence == last_sequence);
// CompleteParallelWorker returns true if this thread should
// handle exit, false means somebody else did
exit_completed_early = !write_thread_.CompleteParallelWorker(&w);
status = w.status;
assert(status.ok() || !exit_completed_early);
status = w.FinalStatus();
}
if (status.ok() && !exit_completed_early) {
if (!exit_completed_early && w.status.ok()) {
SetTickerCount(stats_, SEQUENCE_NUMBER, last_sequence);
versions_->SetLastSequence(last_sequence);
if (!need_log_sync) {
write_thread_.ExitAsBatchGroupLeader(&w, last_writer, status);
write_thread_.ExitAsBatchGroupLeader(&w, last_writer, w.status);
exit_completed_early = true;
}
}
@ -4443,14 +4563,14 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
//
// Is setting bg_error_ enough here? This will at least stop
// compaction and fail any further writes.
if (!status.ok() && bg_error_.ok()) {
if (!status.ok() && bg_error_.ok() && !w.CallbackFailed()) {
bg_error_ = status;
}
}
}
PERF_TIMER_START(write_pre_and_post_process_time);
if (db_options_.paranoid_checks && !status.ok() && !callback_failed &&
if (db_options_.paranoid_checks && !status.ok() && !w.CallbackFailed() &&
!status.IsBusy()) {
mutex_.Lock();
if (bg_error_.ok()) {
@ -4466,7 +4586,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
}
if (!exit_completed_early) {
write_thread_.ExitAsBatchGroupLeader(&w, last_writer, status);
write_thread_.ExitAsBatchGroupLeader(&w, last_writer, w.status);
}
return status;
@ -4678,53 +4798,51 @@ const DBOptions& DBImpl::GetDBOptions() const { return db_options_; }
bool DBImpl::GetProperty(ColumnFamilyHandle* column_family,
const Slice& property, std::string* value) {
bool is_int_property = false;
bool need_out_of_mutex = false;
DBPropertyType property_type =
GetPropertyType(property, &is_int_property, &need_out_of_mutex);
const DBPropertyInfo* property_info = GetPropertyInfo(property);
value->clear();
auto cfd = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family)->cfd();
if (is_int_property) {
if (property_info == nullptr) {
return false;
} else if (property_info->handle_int) {
uint64_t int_value;
bool ret_value = GetIntPropertyInternal(
cfd, property_type, need_out_of_mutex, false, &int_value);
bool ret_value =
GetIntPropertyInternal(cfd, *property_info, false, &int_value);
if (ret_value) {
*value = ToString(int_value);
}
return ret_value;
} else {
} else if (property_info->handle_string) {
InstrumentedMutexLock l(&mutex_);
return cfd->internal_stats()->GetStringProperty(property_type, property,
return cfd->internal_stats()->GetStringProperty(*property_info, property,
value);
}
// Shouldn't reach here since exactly one of handle_string and handle_int
// should be non-nullptr.
assert(false);
return false;
}
bool DBImpl::GetIntProperty(ColumnFamilyHandle* column_family,
const Slice& property, uint64_t* value) {
bool is_int_property = false;
bool need_out_of_mutex = false;
DBPropertyType property_type =
GetPropertyType(property, &is_int_property, &need_out_of_mutex);
if (!is_int_property) {
const DBPropertyInfo* property_info = GetPropertyInfo(property);
if (property_info == nullptr || property_info->handle_int == nullptr) {
return false;
}
auto cfd = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family)->cfd();
return GetIntPropertyInternal(cfd, property_type, need_out_of_mutex, false,
value);
return GetIntPropertyInternal(cfd, *property_info, false, value);
}
bool DBImpl::GetIntPropertyInternal(ColumnFamilyData* cfd,
DBPropertyType property_type,
bool need_out_of_mutex, bool is_locked,
uint64_t* value) {
if (!need_out_of_mutex) {
const DBPropertyInfo& property_info,
bool is_locked, uint64_t* value) {
assert(property_info.handle_int != nullptr);
if (!property_info.need_out_of_mutex) {
if (is_locked) {
mutex_.AssertHeld();
return cfd->internal_stats()->GetIntProperty(property_type, value, this);
return cfd->internal_stats()->GetIntProperty(property_info, value, this);
} else {
InstrumentedMutexLock l(&mutex_);
return cfd->internal_stats()->GetIntProperty(property_type, value, this);
return cfd->internal_stats()->GetIntProperty(property_info, value, this);
}
} else {
SuperVersion* sv = nullptr;
@ -4735,7 +4853,7 @@ bool DBImpl::GetIntPropertyInternal(ColumnFamilyData* cfd,
}
bool ret = cfd->internal_stats()->GetIntPropertyOutOfMutex(
property_type, sv->current, value);
property_info, sv->current, value);
if (!is_locked) {
ReturnAndCleanupSuperVersion(cfd, sv);
@ -4747,11 +4865,8 @@ bool DBImpl::GetIntPropertyInternal(ColumnFamilyData* cfd,
bool DBImpl::GetAggregatedIntProperty(const Slice& property,
uint64_t* aggregated_value) {
bool need_out_of_mutex;
bool is_int_property;
DBPropertyType property_type =
GetPropertyType(property, &is_int_property, &need_out_of_mutex);
if (!is_int_property) {
const DBPropertyInfo* property_info = GetPropertyInfo(property);
if (property_info == nullptr || property_info->handle_int == nullptr) {
return false;
}
@ -4761,8 +4876,7 @@ bool DBImpl::GetAggregatedIntProperty(const Slice& property,
InstrumentedMutexLock l(&mutex_);
uint64_t value;
for (auto* cfd : *versions_->GetColumnFamilySet()) {
if (GetIntPropertyInternal(cfd, property_type, need_out_of_mutex, true,
&value)) {
if (GetIntPropertyInternal(cfd, *property_info, true, &value)) {
sum += value;
} else {
return false;
@ -5414,6 +5528,25 @@ Status DB::Open(const DBOptions& db_options, const std::string& dbname,
}
impl->mutex_.Unlock();
auto sfm = static_cast<SstFileManagerImpl*>(
impl->db_options_.sst_file_manager.get());
if (s.ok() && sfm) {
// Notify SstFileManager about all sst files that already exist in
// db_paths[0] when the DB is opened.
auto& db_path = impl->db_options_.db_paths[0];
std::vector<std::string> existing_files;
impl->db_options_.env->GetChildren(db_path.path, &existing_files);
for (auto& file_name : existing_files) {
uint64_t file_number;
FileType file_type;
std::string file_path = db_path.path + "/" + file_name;
if (ParseFileName(file_name, &file_number, &file_type) &&
file_type == kTableFile) {
sfm->OnAddFile(file_path);
}
}
}
if (s.ok()) {
Log(InfoLogLevel::INFO_LEVEL, impl->db_options_.info_log, "DB pointer %p",
impl);
@ -5473,7 +5606,7 @@ Status DestroyDB(const std::string& dbname, const Options& options) {
if (type == kMetaDatabase) {
del = DestroyDB(path_to_delete, options);
} else if (type == kTableFile) {
del = DeleteOrMoveToTrash(&options, path_to_delete);
del = DeleteSSTFile(&options, path_to_delete, 0);
} else {
del = env->DeleteFile(path_to_delete);
}
@ -5489,13 +5622,9 @@ Status DestroyDB(const std::string& dbname, const Options& options) {
for (size_t i = 0; i < filenames.size(); i++) {
if (ParseFileName(filenames[i], &number, &type) &&
type == kTableFile) { // Lock file will be deleted at end
Status del;
std::string table_path = db_path.path + "/" + filenames[i];
if (path_id == 0) {
del = DeleteOrMoveToTrash(&options, table_path);
} else {
del = env->DeleteFile(table_path);
}
Status del = DeleteSSTFile(&options, table_path,
static_cast<uint32_t>(path_id));
if (result.ok() && !del.ok()) {
result = del;
}
@ -5650,7 +5779,8 @@ Status DBImpl::RenameTempFileToOptionsFile(const std::string& file_name) {
void DBImpl::NewThreadStatusCfInfo(
ColumnFamilyData* cfd) const {
if (db_options_.enable_thread_tracking) {
ThreadStatusUtil::NewColumnFamilyInfo(this, cfd);
ThreadStatusUtil::NewColumnFamilyInfo(this, cfd, cfd->GetName(),
cfd->ioptions()->env);
}
}

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -347,6 +347,10 @@ class DBImpl : public DB {
#endif // NDEBUG
// Return maximum background compaction alowed to be scheduled based on
// compaction status.
int BGCompactionsAllowed() const;
// Returns the list of live files in 'live' and the list
// of all files in the filesystem in 'candidate_files'.
// If force == false and the last call was less than
@ -490,10 +494,11 @@ class DBImpl : public DB {
// Background process needs to call
// auto x = CaptureCurrentFileNumberInPendingOutputs()
// auto file_num = versions_->NewFileNumber();
// <do something>
// ReleaseFileNumberFromPendingOutputs(x)
// This will protect any temporary files created while <do something> is
// executing from being deleted.
// This will protect any file with number `file_num` or greater from being
// deleted while <do something> is running.
// -----------
// This function will capture current file number and append it to
// pending_outputs_. This will prevent any background process to delete any
@ -818,7 +823,10 @@ class DBImpl : public DB {
// they're unique
std::atomic<int> next_job_id_;
bool flush_on_destroy_; // Used when disableWAL is true.
// A flag indicating whether the current rocksdb database has any
// data that is not yet persisted into either WAL or SST file.
// Used when disableWAL is true.
bool has_unpersisted_data_;
static const int KEEP_LOG_FILE_NUM = 1000;
// MSVC version 1800 still does not have constexpr for ::max()
@ -839,6 +847,9 @@ class DBImpl : public DB {
// A value of > 0 temporarily disables scheduling of background work
int bg_work_paused_;
// A value of > 0 temporarily disables scheduling of background compaction
int bg_compaction_paused_;
// Guard against multiple concurrent refitting
bool refitting_level_;
@ -889,9 +900,8 @@ class DBImpl : public DB {
bool* value_found = nullptr);
bool GetIntPropertyInternal(ColumnFamilyData* cfd,
DBPropertyType property_type,
bool need_out_of_mutex, bool is_locked,
uint64_t* value);
const DBPropertyInfo& property_info,
bool is_locked, uint64_t* value);
bool HasPendingManualCompaction();
bool HasExclusiveManualCompaction();

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -54,10 +54,11 @@ Iterator* DBImplReadOnly::NewIterator(const ReadOptions& read_options,
auto db_iter = NewArenaWrappedDbIterator(
env_, *cfd->ioptions(), cfd->user_comparator(),
(read_options.snapshot != nullptr
? reinterpret_cast<const SnapshotImpl*>(
read_options.snapshot)->number_
? reinterpret_cast<const SnapshotImpl*>(read_options.snapshot)
->number_
: latest_snapshot),
super_version->mutable_cf_options.max_sequential_skip_in_iterations);
super_version->mutable_cf_options.max_sequential_skip_in_iterations,
super_version->version_number);
auto internal_iter = NewInternalIterator(
read_options, cfd, super_version, db_iter->GetArena());
db_iter->SetIterUnderDBIter(internal_iter);
@ -81,10 +82,11 @@ Status DBImplReadOnly::NewIterators(
auto* db_iter = NewArenaWrappedDbIterator(
env_, *cfd->ioptions(), cfd->user_comparator(),
(read_options.snapshot != nullptr
? reinterpret_cast<const SnapshotImpl*>(
read_options.snapshot)->number_
? reinterpret_cast<const SnapshotImpl*>(read_options.snapshot)
->number_
: latest_snapshot),
sv->mutable_cf_options.max_sequential_skip_in_iterations);
sv->mutable_cf_options.max_sequential_skip_in_iterations,
sv->version_number);
auto* internal_iter = NewInternalIterator(
read_options, cfd, sv, db_iter->GetArena());
db_iter->SetIterUnderDBIter(internal_iter);

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -7,6 +7,8 @@
#define __STDC_FORMAT_MACROS
#endif
#include "db/db_info_dumper.h"
#include <inttypes.h>
#include <stdio.h>
#include <string>
@ -16,7 +18,6 @@
#include "db/filename.h"
#include "rocksdb/options.h"
#include "rocksdb/env.h"
#include "util/db_info_dumper.h"
namespace rocksdb {

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -13,18 +13,19 @@
#include <string>
#include <limits>
#include "db/filename.h"
#include "db/dbformat.h"
#include "db/filename.h"
#include "port/port.h"
#include "rocksdb/env.h"
#include "rocksdb/options.h"
#include "rocksdb/iterator.h"
#include "rocksdb/merge_operator.h"
#include "rocksdb/options.h"
#include "table/internal_iterator.h"
#include "util/arena.h"
#include "util/logging.h"
#include "util/mutexlock.h"
#include "util/perf_context_imp.h"
#include "util/string_util.h"
namespace rocksdb {
@ -59,9 +60,47 @@ class DBIter: public Iterator {
kReverse
};
// LocalStatistics contain Statistics counters that will be aggregated per
// each iterator instance and then will be sent to the global statistics when
// the iterator is destroyed.
//
// The purpose of this approach is to avoid perf regression happening
// when multiple threads bump the atomic counters from a DBIter::Next().
struct LocalStatistics {
explicit LocalStatistics() { ResetCounters(); }
void ResetCounters() {
next_count_ = 0;
next_found_count_ = 0;
prev_count_ = 0;
prev_found_count_ = 0;
bytes_read_ = 0;
}
void BumpGlobalStatistics(Statistics* global_statistics) {
RecordTick(global_statistics, NUMBER_DB_NEXT, next_count_);
RecordTick(global_statistics, NUMBER_DB_NEXT_FOUND, next_found_count_);
RecordTick(global_statistics, NUMBER_DB_PREV, prev_count_);
RecordTick(global_statistics, NUMBER_DB_PREV_FOUND, prev_found_count_);
RecordTick(global_statistics, ITER_BYTES_READ, bytes_read_);
ResetCounters();
}
// Map to Tickers::NUMBER_DB_NEXT
uint64_t next_count_;
// Map to Tickers::NUMBER_DB_NEXT_FOUND
uint64_t next_found_count_;
// Map to Tickers::NUMBER_DB_PREV
uint64_t prev_count_;
// Map to Tickers::NUMBER_DB_PREV_FOUND
uint64_t prev_found_count_;
// Map to Tickers::ITER_BYTES_READ
uint64_t bytes_read_;
};
DBIter(Env* env, const ImmutableCFOptions& ioptions, const Comparator* cmp,
InternalIterator* iter, SequenceNumber s, bool arena_mode,
uint64_t max_sequential_skip_in_iterations,
uint64_t max_sequential_skip_in_iterations, uint64_t version_number,
const Slice* iterate_upper_bound = nullptr,
bool prefix_same_as_start = false)
: arena_mode_(arena_mode),
@ -75,6 +114,7 @@ class DBIter: public Iterator {
valid_(false),
current_entry_is_merged_(false),
statistics_(ioptions.statistics),
version_number_(version_number),
iterate_upper_bound_(iterate_upper_bound),
prefix_same_as_start_(prefix_same_as_start),
iter_pinned_(false) {
@ -84,6 +124,7 @@ class DBIter: public Iterator {
}
virtual ~DBIter() {
RecordTick(statistics_, NO_ITERATORS, -1);
local_stats_.BumpGlobalStatistics(statistics_);
if (!arena_mode_) {
delete iter_;
} else {
@ -136,9 +177,27 @@ class DBIter: public Iterator {
}
return s;
}
virtual bool IsKeyPinned() const override {
assert(valid_);
return iter_pinned_ && saved_key_.IsKeyPinned();
virtual Status GetProperty(std::string prop_name,
std::string* prop) override {
if (prop == nullptr) {
return Status::InvalidArgument("prop is nullptr");
}
if (prop_name == "rocksdb.iterator.super-version-number") {
// First try to pass the value returned from inner iterator.
if (!iter_->GetProperty(prop_name, prop).ok()) {
*prop = ToString(version_number_);
}
return Status::OK();
} else if (prop_name == "rocksdb.iterator.is-key-pinned") {
if (valid_) {
*prop = (iter_pinned_ && saved_key_.IsKeyPinned()) ? "1" : "0";
} else {
*prop = "Iterator is not valid.";
}
return Status::OK();
}
return Status::InvalidArgument("Undentified property.");
}
virtual void Next() override;
@ -186,12 +245,14 @@ class DBIter: public Iterator {
bool current_entry_is_merged_;
Statistics* statistics_;
uint64_t max_skip_;
uint64_t version_number_;
const Slice* iterate_upper_bound_;
IterKey prefix_start_;
bool prefix_same_as_start_;
bool iter_pinned_;
// List of operands for merge operator.
std::deque<std::string> merge_operands_;
LocalStatistics local_stats_;
// No copying allowed
DBIter(const DBIter&);
@ -229,6 +290,9 @@ void DBIter::Next() {
PERF_COUNTER_ADD(internal_key_skipped_count, 1);
}
if (statistics_ != nullptr) {
local_stats_.next_count_++;
}
// Now we point to the next internal position, for both of merge and
// not merge cases.
if (!iter_->Valid()) {
@ -236,18 +300,15 @@ void DBIter::Next() {
return;
}
FindNextUserEntry(true /* skipping the current user key */);
if (statistics_ != nullptr) {
RecordTick(statistics_, NUMBER_DB_NEXT);
if (valid_) {
RecordTick(statistics_, NUMBER_DB_NEXT_FOUND);
RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size());
}
}
if (valid_ && prefix_extractor_ && prefix_same_as_start_ &&
prefix_extractor_->Transform(saved_key_.GetKey())
.compare(prefix_start_.GetKey()) != 0) {
valid_ = false;
}
if (statistics_ != nullptr && valid_) {
local_stats_.next_found_count_++;
local_stats_.bytes_read_ += (key().size() + value().size());
}
}
// PRE: saved_key_ has the current user key if skipping
@ -275,7 +336,7 @@ void DBIter::FindNextUserEntryInternal(bool skipping) {
if (ParseKey(&ikey)) {
if (iterate_upper_bound_ != nullptr &&
ikey.user_key.compare(*iterate_upper_bound_) >= 0) {
user_comparator_->Compare(ikey.user_key, *iterate_upper_bound_) >= 0) {
break;
}
@ -415,10 +476,10 @@ void DBIter::Prev() {
}
PrevInternal();
if (statistics_ != nullptr) {
RecordTick(statistics_, NUMBER_DB_PREV);
local_stats_.prev_count_++;
if (valid_) {
RecordTick(statistics_, NUMBER_DB_PREV_FOUND);
RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size());
local_stats_.prev_found_count_++;
local_stats_.bytes_read_ += (key().size() + value().size());
}
}
if (valid_ && prefix_extractor_ && prefix_same_as_start_ &&
@ -818,12 +879,13 @@ Iterator* NewDBIterator(Env* env, const ImmutableCFOptions& ioptions,
InternalIterator* internal_iter,
const SequenceNumber& sequence,
uint64_t max_sequential_skip_in_iterations,
uint64_t version_number,
const Slice* iterate_upper_bound,
bool prefix_same_as_start, bool pin_data) {
DBIter* db_iter =
new DBIter(env, ioptions, user_key_comparator, internal_iter, sequence,
false, max_sequential_skip_in_iterations, iterate_upper_bound,
prefix_same_as_start);
false, max_sequential_skip_in_iterations, version_number,
iterate_upper_bound, prefix_same_as_start);
if (pin_data) {
db_iter->PinData();
}
@ -850,12 +912,13 @@ inline Slice ArenaWrappedDBIter::key() const { return db_iter_->key(); }
inline Slice ArenaWrappedDBIter::value() const { return db_iter_->value(); }
inline Status ArenaWrappedDBIter::status() const { return db_iter_->status(); }
inline Status ArenaWrappedDBIter::PinData() { return db_iter_->PinData(); }
inline Status ArenaWrappedDBIter::GetProperty(std::string prop_name,
std::string* prop) {
return db_iter_->GetProperty(prop_name, prop);
}
inline Status ArenaWrappedDBIter::ReleasePinnedData() {
return db_iter_->ReleasePinnedData();
}
inline bool ArenaWrappedDBIter::IsKeyPinned() const {
return db_iter_->IsKeyPinned();
}
void ArenaWrappedDBIter::RegisterCleanup(CleanupFunction function, void* arg1,
void* arg2) {
db_iter_->RegisterCleanup(function, arg1, arg2);
@ -864,7 +927,7 @@ void ArenaWrappedDBIter::RegisterCleanup(CleanupFunction function, void* arg1,
ArenaWrappedDBIter* NewArenaWrappedDbIterator(
Env* env, const ImmutableCFOptions& ioptions,
const Comparator* user_key_comparator, const SequenceNumber& sequence,
uint64_t max_sequential_skip_in_iterations,
uint64_t max_sequential_skip_in_iterations, uint64_t version_number,
const Slice* iterate_upper_bound, bool prefix_same_as_start,
bool pin_data) {
ArenaWrappedDBIter* iter = new ArenaWrappedDBIter();
@ -872,7 +935,7 @@ ArenaWrappedDBIter* NewArenaWrappedDbIterator(
auto mem = arena->AllocateAligned(sizeof(DBIter));
DBIter* db_iter =
new (mem) DBIter(env, ioptions, user_key_comparator, nullptr, sequence,
true, max_sequential_skip_in_iterations,
true, max_sequential_skip_in_iterations, version_number,
iterate_upper_bound, prefix_same_as_start);
iter->SetDBIter(db_iter);

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -9,6 +9,7 @@
#pragma once
#include <stdint.h>
#include <string>
#include "rocksdb/db.h"
#include "rocksdb/iterator.h"
#include "db/dbformat.h"
@ -24,14 +25,12 @@ class InternalIterator;
// Return a new iterator that converts internal keys (yielded by
// "*internal_iter") that were live at the specified "sequence" number
// into appropriate user keys.
extern Iterator* NewDBIterator(Env* env, const ImmutableCFOptions& options,
const Comparator* user_key_comparator,
InternalIterator* internal_iter,
const SequenceNumber& sequence,
uint64_t max_sequential_skip_in_iterations,
const Slice* iterate_upper_bound = nullptr,
bool prefix_same_as_start = false,
bool pin_data = false);
extern Iterator* NewDBIterator(
Env* env, const ImmutableCFOptions& options,
const Comparator* user_key_comparator, InternalIterator* internal_iter,
const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations,
uint64_t version_number, const Slice* iterate_upper_bound = nullptr,
bool prefix_same_as_start = false, bool pin_data = false);
// A wrapper iterator which wraps DB Iterator and the arena, with which the DB
// iterator is supposed be allocated. This class is used as an entry point of
@ -66,7 +65,7 @@ class ArenaWrappedDBIter : public Iterator {
void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2);
virtual Status PinData();
virtual Status ReleasePinnedData();
virtual bool IsKeyPinned() const override;
virtual Status GetProperty(std::string prop_name, std::string* prop) override;
private:
DBIter* db_iter_;
@ -77,7 +76,7 @@ class ArenaWrappedDBIter : public Iterator {
extern ArenaWrappedDBIter* NewArenaWrappedDbIterator(
Env* env, const ImmutableCFOptions& options,
const Comparator* user_key_comparator, const SequenceNumber& sequence,
uint64_t max_sequential_skip_in_iterations,
uint64_t max_sequential_skip_in_iterations, uint64_t version_number,
const Slice* iterate_upper_bound = nullptr,
bool prefix_same_as_start = false, bool pin_data = false);

@ -1,4 +1,4 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -181,10 +181,9 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) {
internal_iter->AddPut("b", "val_b");
internal_iter->Finish();
std::unique_ptr<Iterator> db_iter(
NewDBIterator(env_, ImmutableCFOptions(options),
BytewiseComparator(), internal_iter, 10,
options.max_sequential_skip_in_iterations));
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -215,7 +214,7 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, options.max_sequential_skip_in_iterations));
10, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -241,7 +240,8 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, options.max_sequential_skip_in_iterations, ro.iterate_upper_bound));
10, options.max_sequential_skip_in_iterations, 0,
ro.iterate_upper_bound));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -273,7 +273,8 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, options.max_sequential_skip_in_iterations, ro.iterate_upper_bound));
10, options.max_sequential_skip_in_iterations, 0,
ro.iterate_upper_bound));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -308,7 +309,8 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, options.max_sequential_skip_in_iterations, ro.iterate_upper_bound));
10, options.max_sequential_skip_in_iterations, 0,
ro.iterate_upper_bound));
db_iter->SeekToLast();
ASSERT_TRUE(!db_iter->Valid());
@ -337,7 +339,8 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
7, options.max_sequential_skip_in_iterations, ro.iterate_upper_bound));
7, options.max_sequential_skip_in_iterations, 0,
ro.iterate_upper_bound));
SetPerfLevel(kEnableCount);
ASSERT_TRUE(GetPerfLevel() == kEnableCount);
@ -374,7 +377,8 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
4, options.max_sequential_skip_in_iterations, ro.iterate_upper_bound));
4, options.max_sequential_skip_in_iterations, 0,
ro.iterate_upper_bound));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -399,7 +403,8 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, options.max_sequential_skip_in_iterations, ro.iterate_upper_bound));
10, options.max_sequential_skip_in_iterations, 0,
ro.iterate_upper_bound));
db_iter->SeekToLast();
ASSERT_TRUE(!db_iter->Valid());
@ -421,7 +426,8 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, options.max_sequential_skip_in_iterations, ro.iterate_upper_bound));
10, options.max_sequential_skip_in_iterations, 0,
ro.iterate_upper_bound));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -456,7 +462,8 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
7, options.max_sequential_skip_in_iterations, ro.iterate_upper_bound));
7, options.max_sequential_skip_in_iterations, 0,
ro.iterate_upper_bound));
SetPerfLevel(kEnableCount);
ASSERT_TRUE(GetPerfLevel() == kEnableCount);
@ -482,10 +489,9 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) {
internal_iter->AddPut("b", "val_b");
internal_iter->Finish();
std::unique_ptr<Iterator> db_iter(
NewDBIterator(env_, ImmutableCFOptions(options),
BytewiseComparator(), internal_iter, 10,
options.max_sequential_skip_in_iterations));
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToFirst();
ASSERT_TRUE(db_iter->Valid());
@ -524,10 +530,9 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) {
internal_iter->AddPut("b", "val_b");
internal_iter->Finish();
std::unique_ptr<Iterator> db_iter(
NewDBIterator(env_, ImmutableCFOptions(options),
BytewiseComparator(), internal_iter, 2,
options.max_sequential_skip_in_iterations));
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
2, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "b");
@ -555,10 +560,9 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) {
internal_iter->AddPut("c", "val_c");
internal_iter->Finish();
std::unique_ptr<Iterator> db_iter(
NewDBIterator(env_, ImmutableCFOptions(options),
BytewiseComparator(), internal_iter, 10,
options.max_sequential_skip_in_iterations));
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "c");
@ -583,10 +587,9 @@ TEST_F(DBIteratorTest, DBIteratorEmpty) {
TestIterator* internal_iter = new TestIterator(BytewiseComparator());
internal_iter->Finish();
std::unique_ptr<Iterator> db_iter(
NewDBIterator(env_, ImmutableCFOptions(options),
BytewiseComparator(), internal_iter, 0,
options.max_sequential_skip_in_iterations));
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
0, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(!db_iter->Valid());
}
@ -595,10 +598,9 @@ TEST_F(DBIteratorTest, DBIteratorEmpty) {
TestIterator* internal_iter = new TestIterator(BytewiseComparator());
internal_iter->Finish();
std::unique_ptr<Iterator> db_iter(
NewDBIterator(env_, ImmutableCFOptions(options),
BytewiseComparator(), internal_iter, 0,
options.max_sequential_skip_in_iterations));
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
0, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToFirst();
ASSERT_TRUE(!db_iter->Valid());
}
@ -617,10 +619,9 @@ TEST_F(DBIteratorTest, DBIteratorUseSkipCountSkips) {
}
internal_iter->Finish();
std::unique_ptr<Iterator> db_iter(
NewDBIterator(env_, ImmutableCFOptions(options),
BytewiseComparator(), internal_iter, 2,
options.max_sequential_skip_in_iterations));
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, 2,
options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "c");
@ -659,9 +660,8 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip) {
options.statistics = rocksdb::CreateDBStatistics();
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options),
BytewiseComparator(), internal_iter, i + 2,
options.max_sequential_skip_in_iterations));
env_, ImmutableCFOptions(options), BytewiseComparator(),
internal_iter, i + 2, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -695,9 +695,8 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip) {
internal_iter->Finish();
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options),
BytewiseComparator(), internal_iter, i + 2,
options.max_sequential_skip_in_iterations));
env_, ImmutableCFOptions(options), BytewiseComparator(),
internal_iter, i + 2, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -724,9 +723,8 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip) {
internal_iter->Finish();
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options),
BytewiseComparator(), internal_iter, 202,
options.max_sequential_skip_in_iterations));
env_, ImmutableCFOptions(options), BytewiseComparator(),
internal_iter, 202, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -756,10 +754,9 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip) {
}
internal_iter->AddPut("c", "200");
internal_iter->Finish();
std::unique_ptr<Iterator> db_iter(
NewDBIterator(env_, ImmutableCFOptions(options),
BytewiseComparator(), internal_iter, i,
options.max_sequential_skip_in_iterations));
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(),
internal_iter, i, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(!db_iter->Valid());
@ -773,10 +770,9 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip) {
}
internal_iter->AddPut("c", "200");
internal_iter->Finish();
std::unique_ptr<Iterator> db_iter(
NewDBIterator(env_, ImmutableCFOptions(options),
BytewiseComparator(), internal_iter, 200,
options.max_sequential_skip_in_iterations));
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
200, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "c");
@ -809,9 +805,8 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip) {
internal_iter->Finish();
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options),
BytewiseComparator(), internal_iter, i + 2,
options.max_sequential_skip_in_iterations));
env_, ImmutableCFOptions(options), BytewiseComparator(),
internal_iter, i + 2, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -844,9 +839,8 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip) {
internal_iter->Finish();
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options),
BytewiseComparator(), internal_iter, i + 2,
options.max_sequential_skip_in_iterations));
env_, ImmutableCFOptions(options), BytewiseComparator(),
internal_iter, i + 2, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -887,7 +881,7 @@ TEST_F(DBIteratorTest, DBIterator1) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, 1,
options.max_sequential_skip_in_iterations));
options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToFirst();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -913,7 +907,7 @@ TEST_F(DBIteratorTest, DBIterator2) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, 0,
options.max_sequential_skip_in_iterations));
options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToFirst();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -936,7 +930,7 @@ TEST_F(DBIteratorTest, DBIterator3) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, 2,
options.max_sequential_skip_in_iterations));
options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToFirst();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -958,7 +952,7 @@ TEST_F(DBIteratorTest, DBIterator4) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, 4,
options.max_sequential_skip_in_iterations));
options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToFirst();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -987,7 +981,7 @@ TEST_F(DBIteratorTest, DBIterator5) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
0, options.max_sequential_skip_in_iterations));
0, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -1009,7 +1003,7 @@ TEST_F(DBIteratorTest, DBIterator5) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
1, options.max_sequential_skip_in_iterations));
1, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -1031,7 +1025,7 @@ TEST_F(DBIteratorTest, DBIterator5) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
2, options.max_sequential_skip_in_iterations));
2, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -1053,7 +1047,7 @@ TEST_F(DBIteratorTest, DBIterator5) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
3, options.max_sequential_skip_in_iterations));
3, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -1075,7 +1069,7 @@ TEST_F(DBIteratorTest, DBIterator5) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
4, options.max_sequential_skip_in_iterations));
4, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -1097,7 +1091,7 @@ TEST_F(DBIteratorTest, DBIterator5) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
5, options.max_sequential_skip_in_iterations));
5, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -1119,7 +1113,7 @@ TEST_F(DBIteratorTest, DBIterator5) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
6, options.max_sequential_skip_in_iterations));
6, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -1145,7 +1139,7 @@ TEST_F(DBIteratorTest, DBIterator6) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
0, options.max_sequential_skip_in_iterations));
0, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -1167,7 +1161,7 @@ TEST_F(DBIteratorTest, DBIterator6) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
1, options.max_sequential_skip_in_iterations));
1, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -1189,7 +1183,7 @@ TEST_F(DBIteratorTest, DBIterator6) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
2, options.max_sequential_skip_in_iterations));
2, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -1211,7 +1205,7 @@ TEST_F(DBIteratorTest, DBIterator6) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
3, options.max_sequential_skip_in_iterations));
3, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(!db_iter->Valid());
}
@ -1229,7 +1223,7 @@ TEST_F(DBIteratorTest, DBIterator6) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
4, options.max_sequential_skip_in_iterations));
4, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -1251,7 +1245,7 @@ TEST_F(DBIteratorTest, DBIterator6) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
5, options.max_sequential_skip_in_iterations));
5, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -1273,7 +1267,7 @@ TEST_F(DBIteratorTest, DBIterator6) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
6, options.max_sequential_skip_in_iterations));
6, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -1311,7 +1305,7 @@ TEST_F(DBIteratorTest, DBIterator7) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
0, options.max_sequential_skip_in_iterations));
0, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -1345,7 +1339,7 @@ TEST_F(DBIteratorTest, DBIterator7) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
2, options.max_sequential_skip_in_iterations));
2, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -1385,7 +1379,7 @@ TEST_F(DBIteratorTest, DBIterator7) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
4, options.max_sequential_skip_in_iterations));
4, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -1425,7 +1419,7 @@ TEST_F(DBIteratorTest, DBIterator7) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
5, options.max_sequential_skip_in_iterations));
5, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -1470,7 +1464,7 @@ TEST_F(DBIteratorTest, DBIterator7) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
6, options.max_sequential_skip_in_iterations));
6, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -1516,7 +1510,7 @@ TEST_F(DBIteratorTest, DBIterator7) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
7, options.max_sequential_skip_in_iterations));
7, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -1556,7 +1550,7 @@ TEST_F(DBIteratorTest, DBIterator7) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
9, options.max_sequential_skip_in_iterations));
9, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -1602,7 +1596,7 @@ TEST_F(DBIteratorTest, DBIterator7) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
13, options.max_sequential_skip_in_iterations));
13, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -1649,7 +1643,7 @@ TEST_F(DBIteratorTest, DBIterator7) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
14, options.max_sequential_skip_in_iterations));
14, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -1678,7 +1672,7 @@ TEST_F(DBIteratorTest, DBIterator8) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, options.max_sequential_skip_in_iterations));
10, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "b");
@ -1707,7 +1701,7 @@ TEST_F(DBIteratorTest, DBIterator9) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, options.max_sequential_skip_in_iterations));
10, options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
@ -1754,7 +1748,7 @@ TEST_F(DBIteratorTest, DBIterator10) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, options.max_sequential_skip_in_iterations));
10, options.max_sequential_skip_in_iterations, 0));
db_iter->Seek("c");
ASSERT_TRUE(db_iter->Valid());
@ -1778,9 +1772,9 @@ TEST_F(DBIteratorTest, SeekToLastOccurrenceSeq0) {
internal_iter->AddPut("b", "2");
internal_iter->Finish();
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, 0 /* force seek */));
std::unique_ptr<Iterator> db_iter(
NewDBIterator(env_, ImmutableCFOptions(options), BytewiseComparator(),
internal_iter, 10, 0 /* force seek */, 0));
db_iter->SeekToFirst();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -1807,7 +1801,7 @@ TEST_F(DBIteratorTest, DBIterator11) {
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, 1,
options.max_sequential_skip_in_iterations));
options.max_sequential_skip_in_iterations, 0));
db_iter->SeekToFirst();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
@ -1832,7 +1826,7 @@ TEST_F(DBIteratorTest, DBIterator12) {
std::unique_ptr<Iterator> db_iter(
NewDBIterator(env_, ImmutableCFOptions(options), BytewiseComparator(),
internal_iter, 10, 0));
internal_iter, 10, 0, 0));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "c");
@ -1874,7 +1868,7 @@ class DBIterWithMergeIterTest : public testing::Test {
db_iter_.reset(NewDBIterator(env_, ImmutableCFOptions(options_),
BytewiseComparator(), merge_iter,
8 /* read data earlier than seqId 8 */,
3 /* max iterators before reseek */));
3 /* max iterators before reseek */, 0));
}
Env* env_;

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

File diff suppressed because it is too large Load Diff

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -654,6 +654,51 @@ TEST_F(DBTestTailingIterator, ManagedTailingIteratorSeekToSame) {
ASSERT_EQ(found, iter->key().ToString());
}
TEST_F(DBTestTailingIterator, ForwardIteratorVersionProperty) {
Options options = CurrentOptions();
options.write_buffer_size = 1000;
ReadOptions read_options;
read_options.tailing = true;
Put("foo", "bar");
uint64_t v1, v2, v3, v4;
{
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
iter->Seek("foo");
std::string prop_value;
ASSERT_OK(iter->GetProperty("rocksdb.iterator.super-version-number",
&prop_value));
v1 = static_cast<uint64_t>(std::atoi(prop_value.c_str()));
Put("foo1", "bar1");
Flush();
ASSERT_OK(iter->GetProperty("rocksdb.iterator.super-version-number",
&prop_value));
v2 = static_cast<uint64_t>(std::atoi(prop_value.c_str()));
iter->Seek("f");
ASSERT_OK(iter->GetProperty("rocksdb.iterator.super-version-number",
&prop_value));
v3 = static_cast<uint64_t>(std::atoi(prop_value.c_str()));
ASSERT_EQ(v1, v2);
ASSERT_GT(v3, v2);
}
{
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
iter->Seek("foo");
std::string prop_value;
ASSERT_OK(iter->GetProperty("rocksdb.iterator.super-version-number",
&prop_value));
v4 = static_cast<uint64_t>(std::atoi(prop_value.c_str()));
}
ASSERT_EQ(v3, v4);
}
} // namespace rocksdb
#endif // !defined(ROCKSDB_LITE)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,86 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <cstdlib>
#include "db/db_test_util.h"
#include "port/stack_trace.h"
namespace rocksdb {
class DBTest2 : public DBTestBase {
public:
DBTest2() : DBTestBase("/db_test2") {}
};
TEST_F(DBTest2, IteratorPropertyVersionNumber) {
Put("", "");
Iterator* iter1 = db_->NewIterator(ReadOptions());
std::string prop_value;
ASSERT_OK(
iter1->GetProperty("rocksdb.iterator.super-version-number", &prop_value));
uint64_t version_number1 =
static_cast<uint64_t>(std::atoi(prop_value.c_str()));
Put("", "");
Flush();
Iterator* iter2 = db_->NewIterator(ReadOptions());
ASSERT_OK(
iter2->GetProperty("rocksdb.iterator.super-version-number", &prop_value));
uint64_t version_number2 =
static_cast<uint64_t>(std::atoi(prop_value.c_str()));
ASSERT_GT(version_number2, version_number1);
Put("", "");
Iterator* iter3 = db_->NewIterator(ReadOptions());
ASSERT_OK(
iter3->GetProperty("rocksdb.iterator.super-version-number", &prop_value));
uint64_t version_number3 =
static_cast<uint64_t>(std::atoi(prop_value.c_str()));
ASSERT_EQ(version_number2, version_number3);
iter1->SeekToFirst();
ASSERT_OK(
iter1->GetProperty("rocksdb.iterator.super-version-number", &prop_value));
uint64_t version_number1_new =
static_cast<uint64_t>(std::atoi(prop_value.c_str()));
ASSERT_EQ(version_number1, version_number1_new);
delete iter1;
delete iter2;
delete iter3;
}
TEST_F(DBTest2, CacheIndexAndFilterWithDBRestart) {
Options options = CurrentOptions();
options.create_if_missing = true;
options.statistics = rocksdb::CreateDBStatistics();
BlockBasedTableOptions table_options;
table_options.cache_index_and_filter_blocks = true;
table_options.filter_policy.reset(NewBloomFilterPolicy(20));
options.table_factory.reset(new BlockBasedTableFactory(table_options));
CreateAndReopenWithCF({"pikachu"}, options);
Put(1, "a", "begin");
Put(1, "z", "end");
ASSERT_OK(Flush(1));
TryReopenWithColumnFamilies({"default", "pikachu"}, options);
std::string value;
value = Get(1, "a");
}
} // namespace rocksdb
int main(int argc, char** argv) {
rocksdb::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -73,63 +73,69 @@ DBTestBase::~DBTestBase() {
delete env_;
}
// Switch to a fresh database with the next option configuration to
// test. Return false if there are no more configurations to test.
bool DBTestBase::ChangeOptions(int skip_mask) {
for (option_config_++; option_config_ < kEnd; option_config_++) {
bool DBTestBase::ShouldSkipOptions(int option_config, int skip_mask) {
#ifdef ROCKSDB_LITE
// These options are not supported in ROCKSDB_LITE
if (option_config_ == kHashSkipList ||
option_config_ == kPlainTableFirstBytePrefix ||
option_config_ == kPlainTableCappedPrefix ||
option_config_ == kPlainTableCappedPrefixNonMmap ||
option_config_ == kPlainTableAllBytesPrefix ||
option_config_ == kVectorRep || option_config_ == kHashLinkList ||
option_config_ == kHashCuckoo ||
option_config_ == kUniversalCompaction ||
option_config_ == kUniversalCompactionMultiLevel ||
option_config_ == kUniversalSubcompactions ||
option_config_ == kFIFOCompaction) {
continue;
if (option_config == kHashSkipList ||
option_config == kPlainTableFirstBytePrefix ||
option_config == kPlainTableCappedPrefix ||
option_config == kPlainTableCappedPrefixNonMmap ||
option_config == kPlainTableAllBytesPrefix ||
option_config == kVectorRep || option_config == kHashLinkList ||
option_config == kHashCuckoo || option_config == kUniversalCompaction ||
option_config == kUniversalCompactionMultiLevel ||
option_config == kUniversalSubcompactions ||
option_config == kFIFOCompaction ||
option_config == kConcurrentSkipList) {
return true;
}
#endif
if ((skip_mask & kSkipDeletesFilterFirst) &&
option_config_ == kDeletesFilterFirst) {
continue;
option_config == kDeletesFilterFirst) {
return true;
}
if ((skip_mask & kSkipUniversalCompaction) &&
(option_config_ == kUniversalCompaction ||
option_config_ == kUniversalCompactionMultiLevel)) {
continue;
(option_config == kUniversalCompaction ||
option_config == kUniversalCompactionMultiLevel)) {
return true;
}
if ((skip_mask & kSkipMergePut) && option_config_ == kMergePut) {
continue;
if ((skip_mask & kSkipMergePut) && option_config == kMergePut) {
return true;
}
if ((skip_mask & kSkipNoSeekToLast) &&
(option_config_ == kHashLinkList || option_config_ == kHashSkipList)) {
continue;
(option_config == kHashLinkList || option_config == kHashSkipList)) {
return true;
}
if ((skip_mask & kSkipPlainTable) &&
(option_config_ == kPlainTableAllBytesPrefix ||
option_config_ == kPlainTableFirstBytePrefix ||
option_config_ == kPlainTableCappedPrefix ||
option_config_ == kPlainTableCappedPrefixNonMmap)) {
continue;
(option_config == kPlainTableAllBytesPrefix ||
option_config == kPlainTableFirstBytePrefix ||
option_config == kPlainTableCappedPrefix ||
option_config == kPlainTableCappedPrefixNonMmap)) {
return true;
}
if ((skip_mask & kSkipHashIndex) &&
(option_config_ == kBlockBasedTableWithPrefixHashIndex ||
option_config_ == kBlockBasedTableWithWholeKeyHashIndex)) {
continue;
(option_config == kBlockBasedTableWithPrefixHashIndex ||
option_config == kBlockBasedTableWithWholeKeyHashIndex)) {
return true;
}
if ((skip_mask & kSkipHashCuckoo) && (option_config_ == kHashCuckoo)) {
continue;
if ((skip_mask & kSkipHashCuckoo) && (option_config == kHashCuckoo)) {
return true;
}
if ((skip_mask & kSkipFIFOCompaction) &&
option_config_ == kFIFOCompaction) {
continue;
if ((skip_mask & kSkipFIFOCompaction) && option_config == kFIFOCompaction) {
return true;
}
if ((skip_mask & kSkipMmapReads) && option_config == kWalDirAndMmapReads) {
return true;
}
return false;
}
if ((skip_mask & kSkipMmapReads) && option_config_ == kWalDirAndMmapReads) {
// Switch to a fresh database with the next option configuration to
// test. Return false if there are no more configurations to test.
bool DBTestBase::ChangeOptions(int skip_mask) {
for (option_config_++; option_config_ < kEnd; option_config_++) {
if (ShouldSkipOptions(option_config_, skip_mask)) {
continue;
}
break;
@ -333,6 +339,10 @@ Options DBTestBase::CurrentOptions(
options.prefix_extractor.reset(NewNoopTransform());
break;
}
case kBlockBasedTableWithIndexRestartInterval: {
table_options.index_block_restart_interval = 8;
break;
}
case kOptimizeFiltersForHits: {
options.optimize_filters_for_hits = true;
set_block_based_table_factory = true;
@ -356,6 +366,11 @@ Options DBTestBase::CurrentOptions(
options.max_subcompactions = 4;
break;
}
case kConcurrentSkipList: {
options.allow_concurrent_memtable_write = true;
options.enable_write_thread_adaptive_yield = true;
break;
}
default:
break;
@ -1000,4 +1015,29 @@ void DBTestBase::CopyFile(const std::string& source,
ASSERT_OK(destfile->Close());
}
std::unordered_map<std::string, uint64_t> DBTestBase::GetAllSSTFiles(
uint64_t* total_size) {
std::unordered_map<std::string, uint64_t> res;
if (total_size) {
*total_size = 0;
}
std::vector<std::string> files;
env_->GetChildren(dbname_, &files);
for (auto& file_name : files) {
uint64_t number;
FileType type;
std::string file_path = dbname_ + "/" + file_name;
if (ParseFileName(file_name, &number, &type) && type == kTableFile) {
uint64_t file_size = 0;
env_->GetFileSize(file_path, &file_size);
res[file_path] = file_size;
if (total_size) {
*total_size += file_size;
}
}
}
return res;
}
} // namespace rocksdb

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -19,6 +19,7 @@
#endif
#include <algorithm>
#include <map>
#include <set>
#include <string>
#include <thread>
@ -27,7 +28,6 @@
#include <vector>
#include "db/db_impl.h"
#include "db/db_test_util.h"
#include "db/dbformat.h"
#include "db/filename.h"
#include "memtable/hash_linklist_rep.h"
@ -525,9 +525,11 @@ class DBTestBase : public testing::Test {
kOptimizeFiltersForHits = 27,
kRowCache = 28,
kRecycleLogFiles = 29,
kLevelSubcompactions = 30,
kUniversalSubcompactions = 31,
kEnd = 30
kConcurrentSkipList = 30,
kEnd = 31,
kLevelSubcompactions = 31,
kUniversalSubcompactions = 32,
kBlockBasedTableWithIndexRestartInterval = 33,
};
int option_config_;
@ -573,6 +575,8 @@ class DBTestBase : public testing::Test {
return std::string(buf);
}
static bool ShouldSkipOptions(int option_config, int skip_mask = kNoSkip);
// Switch to a fresh database with the next option configuration to
// test. Return false if there are no more configurations to test.
bool ChangeOptions(int skip_mask = kNoSkip);
@ -749,6 +753,9 @@ class DBTestBase : public testing::Test {
void CopyFile(const std::string& source, const std::string& destination,
uint64_t size = 0);
std::unordered_map<std::string, uint64_t> GetAllSSTFiles(
uint64_t* total_size = nullptr);
};
} // namespace rocksdb

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -14,6 +14,11 @@
namespace rocksdb {
static uint64_t TestGetTickerCount(const Options& options,
Tickers ticker_type) {
return options.statistics->getTickerCount(ticker_type);
}
static std::string CompressibleString(Random* rnd, int len) {
std::string r;
test::CompressibleString(rnd, 0.8, len, &r);
@ -154,6 +159,72 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionSingleSortedRun) {
}
}
TEST_P(DBTestUniversalCompaction, OptimizeFiltersForHits) {
Options options;
options = CurrentOptions(options);
options.compaction_style = kCompactionStyleUniversal;
options.compaction_options_universal.size_ratio = 5;
options.num_levels = num_levels_;
options.write_buffer_size = 105 << 10; // 105KB
options.arena_block_size = 4 << 10;
options.target_file_size_base = 32 << 10; // 32KB
// trigger compaction if there are >= 4 files
options.level0_file_num_compaction_trigger = 4;
BlockBasedTableOptions bbto;
bbto.cache_index_and_filter_blocks = true;
bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
bbto.whole_key_filtering = true;
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
options.optimize_filters_for_hits = true;
options.statistics = rocksdb::CreateDBStatistics();
options.memtable_factory.reset(new SpecialSkipListFactory(3));
DestroyAndReopen(options);
// block compaction from happening
env_->SetBackgroundThreads(1, Env::LOW);
test::SleepingBackgroundTask sleeping_task_low;
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
Env::Priority::LOW);
for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) {
Put(Key(num * 10), "val");
if (num) {
dbfull()->TEST_WaitForFlushMemTable();
}
Put(Key(30 + num * 10), "val");
Put(Key(60 + num * 10), "val");
}
Put("", "");
dbfull()->TEST_WaitForFlushMemTable();
// Query set of non existing keys
for (int i = 5; i < 90; i += 10) {
ASSERT_EQ(Get(Key(i)), "NOT_FOUND");
}
// Make sure bloom filter is used at least once.
ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
auto prev_counter = TestGetTickerCount(options, BLOOM_FILTER_USEFUL);
// Make sure bloom filter is used for all but the last L0 file when looking
// up a non-existent key that's in the range of all L0 files.
ASSERT_EQ(Get(Key(35)), "NOT_FOUND");
ASSERT_EQ(prev_counter + NumTableFilesAtLevel(0) - 1,
TestGetTickerCount(options, BLOOM_FILTER_USEFUL));
prev_counter = TestGetTickerCount(options, BLOOM_FILTER_USEFUL);
// Unblock compaction and wait it for happening.
sleeping_task_low.WakeUp();
dbfull()->TEST_WaitForCompact();
// The same queries will not trigger bloom filter
for (int i = 5; i < 90; i += 10) {
ASSERT_EQ(Get(Key(i)), "NOT_FOUND");
}
ASSERT_EQ(prev_counter, TestGetTickerCount(options, BLOOM_FILTER_USEFUL));
}
// TODO(kailiu) The tests on UniversalCompaction has some issues:
// 1. A lot of magic numbers ("11" or "12").
// 2. Made assumption on the memtable flush conditions, which may change from
@ -1032,16 +1103,11 @@ TEST_P(DBTestUniversalCompaction, IncreaseUniversalCompactionNumLevels) {
for (int i = 0; i <= max_key1; i++) {
// each value is 10K
ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000)));
dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
}
ASSERT_OK(Flush(1));
dbfull()->TEST_WaitForCompact();
int non_level0_num_files = 0;
for (int i = 1; i < options.num_levels; i++) {
non_level0_num_files += NumTableFilesAtLevel(i, 1);
}
ASSERT_EQ(non_level0_num_files, 0);
// Stage 2: reopen with universal compaction, num_levels=4
options.compaction_style = kCompactionStyleUniversal;
options.num_levels = 4;
@ -1054,6 +1120,7 @@ TEST_P(DBTestUniversalCompaction, IncreaseUniversalCompactionNumLevels) {
for (int i = max_key1 + 1; i <= max_key2; i++) {
// each value is 10K
ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000)));
dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
}
ASSERT_OK(Flush(1));
dbfull()->TEST_WaitForCompact();
@ -1084,6 +1151,7 @@ TEST_P(DBTestUniversalCompaction, IncreaseUniversalCompactionNumLevels) {
for (int i = max_key2 + 1; i <= max_key3; i++) {
// each value is 10K
ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000)));
dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
}
ASSERT_OK(Flush(1));
dbfull()->TEST_WaitForCompact();

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -37,6 +37,7 @@ class DeleteFileTest : public testing::Test {
DeleteFileTest() {
db_ = nullptr;
env_ = Env::Default();
options_.delete_obsolete_files_period_micros = 0; // always do full purge
options_.enable_thread_tracking = true;
options_.write_buffer_size = 1024*1024*1000;
options_.target_file_size_base = 1024*1024*1000;

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2015, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -94,7 +94,8 @@ FlushJob::~FlushJob() {
}
void FlushJob::ReportStartedFlush() {
ThreadStatusUtil::SetColumnFamily(cfd_);
ThreadStatusUtil::SetColumnFamily(cfd_, cfd_->ioptions()->env,
cfd_->options()->enable_thread_tracking);
ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_FLUSH);
ThreadStatusUtil::SetThreadOperationProperty(
ThreadStatus::COMPACTION_JOB_ID,
@ -233,14 +234,14 @@ Status FlushJob::WriteLevel0Table(const autovector<MemTable*>& mems,
TEST_SYNC_POINT_CALLBACK("FlushJob::WriteLevel0Table:output_compression",
&output_compression_);
s = BuildTable(dbname_, db_options_.env, *cfd_->ioptions(), env_options_,
cfd_->table_cache(), iter.get(), meta,
cfd_->internal_comparator(),
s = BuildTable(
dbname_, db_options_.env, *cfd_->ioptions(), env_options_,
cfd_->table_cache(), iter.get(), meta, cfd_->internal_comparator(),
cfd_->int_tbl_prop_collector_factories(), cfd_->GetID(),
existing_snapshots_, earliest_write_conflict_snapshot_,
output_compression_, cfd_->ioptions()->compression_opts,
mutable_cf_options_.paranoid_file_checks,
cfd_->internal_stats(), Env::IO_HIGH, &table_properties_);
mutable_cf_options_.paranoid_file_checks, cfd_->internal_stats(),
Env::IO_HIGH, &table_properties_, 0 /* level */);
info.table_properties = table_properties_;
LogFlush(db_options_.info_log);
}
@ -270,6 +271,7 @@ Status FlushJob::WriteLevel0Table(const autovector<MemTable*>& mems,
if (!db_options_.disableDataSync && output_file_directory_ != nullptr) {
output_file_directory_->Fsync();
}
TEST_SYNC_POINT("FlushJob::WriteLevel0Table");
db_mutex_->Lock();
}
base->Unref();

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -10,15 +10,16 @@
#include <string>
#include <utility>
#include "db/job_context.h"
#include "db/column_family.h"
#include "db/db_impl.h"
#include "db/db_iter.h"
#include "db/column_family.h"
#include "db/dbformat.h"
#include "db/job_context.h"
#include "rocksdb/env.h"
#include "rocksdb/slice.h"
#include "rocksdb/slice_transform.h"
#include "table/merger.h"
#include "db/dbformat.h"
#include "util/string_util.h"
#include "util/sync_point.h"
namespace rocksdb {
@ -471,6 +472,15 @@ Status ForwardIterator::status() const {
return immutable_status_;
}
Status ForwardIterator::GetProperty(std::string prop_name, std::string* prop) {
assert(prop != nullptr);
if (prop_name == "rocksdb.iterator.super-version-number") {
*prop = ToString(sv_->version_number);
return Status::OK();
}
return Status::InvalidArgument();
}
void ForwardIterator::RebuildIterators(bool refresh_sv) {
// Clean up
Cleanup(refresh_sv);

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -71,6 +71,8 @@ class ForwardIterator : public InternalIterator {
virtual Slice key() const override;
virtual Slice value() const override;
virtual Status status() const override;
virtual Status GetProperty(std::string prop_name, std::string* prop) override;
bool TEST_CheckDeletedIters(int* deleted_iters, int* num_iters);
private:

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional
// grant of patent rights can be found in the PATENTS file in the same
@ -147,8 +147,9 @@ class InlineSkipList {
// values are ok.
std::atomic<int> max_height_; // Height of the entire list
// Used for optimizing sequential insert patterns. Tricky. prev_[i] for
// i up to max_height_ - 1 (inclusive) is the predecessor of prev_[0].
// Used for optimizing sequential insert patterns. Tricky. prev_height_
// of zero means prev_ is undefined. Otherwise: prev_[i] for i up
// to max_height_ - 1 (inclusive) is the predecessor of prev_[0], and
// prev_height_ is the height of prev_[0]. prev_[0] can only be equal
// to head when max_height_ and prev_height_ are both 1.
Node** prev_;
@ -510,11 +511,10 @@ InlineSkipList<Comparator>::AllocateNode(size_t key_size, int height) {
template <class Comparator>
void InlineSkipList<Comparator>::Insert(const char* key) {
// InsertConcurrently can't maintain the prev_ invariants when it needs
// to increase max_height_. In that case it sets prev_height_ to zero,
// letting us know that we should ignore it. A relaxed load suffices
// here because write thread synchronization separates Insert calls
// from InsertConcurrently calls.
// InsertConcurrently often can't maintain the prev_ invariants, so
// it just sets prev_height_ to zero, letting us know that we should
// ignore it. A relaxed load suffices here because write thread
// synchronization separates Insert calls from InsertConcurrently calls.
auto prev_height = prev_height_.load(std::memory_order_relaxed);
// fast path for sequential insertion
@ -595,15 +595,24 @@ void InlineSkipList<Comparator>::InsertConcurrently(const char* key) {
int height = x->UnstashHeight();
assert(height >= 1 && height <= kMaxHeight_);
// We don't have a lock-free algorithm for updating prev_, but we do have
// the option of invalidating the entire sequential-insertion cache.
// prev_'s invariant is that prev_[i] (i > 0) is the predecessor of
// prev_[0] at that level. We're only going to violate that if height
// > 1 and key lands after prev_[height - 1] but before prev_[0].
// Comparisons are pretty expensive, so an easier version is to just
// clear the cache if height > 1. We only write to prev_height_ if the
// nobody else has, to avoid invalidating the root of the skip list in
// all of the other CPU caches.
if (height > 1 && prev_height_.load(std::memory_order_relaxed) != 0) {
prev_height_.store(0, std::memory_order_relaxed);
}
int max_height = max_height_.load(std::memory_order_relaxed);
while (height > max_height) {
if (max_height_.compare_exchange_strong(max_height, height)) {
// successfully updated it
max_height = height;
// we dont have a lock-free algorithm for fixing up prev_, so just
// mark it invalid
prev_height_.store(0, std::memory_order_relaxed);
break;
}
// else retry, possibly exiting the loop because somebody else

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

@ -15,6 +15,7 @@
#include <inttypes.h>
#include <string>
#include <algorithm>
#include <utility>
#include <vector>
#include "db/column_family.h"
@ -81,7 +82,21 @@ void PrintLevelStats(char* buf, size_t len, const std::string& name,
stats.count == 0 ? 0 : stats.micros / kMicrosInSec / stats.count,
num_input_records.c_str(), num_dropped_records.c_str());
}
// Assumes that trailing numbers represent an optional argument. This requires
// property names to not end with numbers.
std::pair<Slice, Slice> GetPropertyNameAndArg(const Slice& property) {
Slice name = property, arg = property;
size_t sfx_len = 0;
while (sfx_len < property.size() &&
isdigit(property[property.size() - sfx_len - 1])) {
++sfx_len;
}
name.remove_suffix(sfx_len);
arg.remove_prefix(property.size() - sfx_len);
return {name, arg};
}
} // anonymous namespace
static const std::string rocksdb_prefix = "rocksdb.";
@ -99,9 +114,8 @@ static const std::string compaction_pending = "compaction-pending";
static const std::string background_errors = "background-errors";
static const std::string cur_size_active_mem_table =
"cur-size-active-mem-table";
static const std::string cur_size_unflushed_mem_tables =
"cur-size-all-mem-tables";
static const std::string cur_size_all_mem_tables = "size-all-mem-tables";
static const std::string cur_size_all_mem_tables = "cur-size-all-mem-tables";
static const std::string size_all_mem_tables = "size-all-mem-tables";
static const std::string num_entries_active_mem_table =
"num-entries-active-mem-table";
static const std::string num_entries_imm_mem_tables =
@ -118,6 +132,8 @@ static const std::string is_file_deletions_enabled =
static const std::string num_snapshots = "num-snapshots";
static const std::string oldest_snapshot_time = "oldest-snapshot-time";
static const std::string num_live_versions = "num-live-versions";
static const std::string current_version_number =
"current-super-version-number";
static const std::string estimate_live_data_size = "estimate-live-data-size";
static const std::string base_level = "base-level";
static const std::string total_sst_files_size = "total-sst-files-size";
@ -136,8 +152,11 @@ const std::string DB::Properties::kStats = rocksdb_prefix + allstats;
const std::string DB::Properties::kSSTables = rocksdb_prefix + sstables;
const std::string DB::Properties::kCFStats = rocksdb_prefix + cfstats;
const std::string DB::Properties::kDBStats = rocksdb_prefix + dbstats;
const std::string DB::Properties::kLevelStats = rocksdb_prefix + levelstats;
const std::string DB::Properties::kNumImmutableMemTable =
rocksdb_prefix + num_immutable_mem_table;
const std::string DB::Properties::kNumImmutableMemTableFlushed =
rocksdb_prefix + num_immutable_mem_table_flushed;
const std::string DB::Properties::kMemTableFlushPending =
rocksdb_prefix + mem_table_flush_pending;
const std::string DB::Properties::kCompactionPending =
@ -151,9 +170,9 @@ const std::string DB::Properties::kBackgroundErrors =
const std::string DB::Properties::kCurSizeActiveMemTable =
rocksdb_prefix + cur_size_active_mem_table;
const std::string DB::Properties::kCurSizeAllMemTables =
rocksdb_prefix + cur_size_unflushed_mem_tables;
const std::string DB::Properties::kSizeAllMemTables =
rocksdb_prefix + cur_size_all_mem_tables;
const std::string DB::Properties::kSizeAllMemTables =
rocksdb_prefix + size_all_mem_tables;
const std::string DB::Properties::kNumEntriesActiveMemTable =
rocksdb_prefix + num_entries_active_mem_table;
const std::string DB::Properties::kNumEntriesImmMemTables =
@ -174,10 +193,13 @@ const std::string DB::Properties::kOldestSnapshotTime =
rocksdb_prefix + oldest_snapshot_time;
const std::string DB::Properties::kNumLiveVersions =
rocksdb_prefix + num_live_versions;
const std::string DB::Properties::kCurrentSuperVersionNumber =
rocksdb_prefix + current_version_number;
const std::string DB::Properties::kEstimateLiveDataSize =
rocksdb_prefix + estimate_live_data_size;
const std::string DB::Properties::kTotalSstFilesSize =
rocksdb_prefix + total_sst_files_size;
const std::string DB::Properties::kBaseLevel = rocksdb_prefix + base_level;
const std::string DB::Properties::kEstimatePendingCompactionBytes =
rocksdb_prefix + estimate_pending_comp_bytes;
const std::string DB::Properties::kAggregatedTableProperties =
@ -185,125 +207,113 @@ const std::string DB::Properties::kAggregatedTableProperties =
const std::string DB::Properties::kAggregatedTablePropertiesAtLevel =
rocksdb_prefix + aggregated_table_properties_at_level;
DBPropertyType GetPropertyType(const Slice& property, bool* is_int_property,
bool* need_out_of_mutex) {
assert(is_int_property != nullptr);
assert(need_out_of_mutex != nullptr);
Slice in = property;
Slice prefix(rocksdb_prefix);
*need_out_of_mutex = false;
*is_int_property = false;
if (!in.starts_with(prefix)) {
return kUnknown;
}
in.remove_prefix(prefix.size());
if (in.starts_with(num_files_at_level_prefix)) {
return kNumFilesAtLevel;
} else if (in == levelstats) {
return kLevelStats;
} else if (in == allstats) {
return kStats;
} else if (in == cfstats) {
return kCFStats;
} else if (in == dbstats) {
return kDBStats;
} else if (in == sstables) {
return kSsTables;
} else if (in == aggregated_table_properties) {
return kAggregatedTableProperties;
} else if (in.starts_with(aggregated_table_properties_at_level)) {
return kAggregatedTablePropertiesAtLevel;
}
*is_int_property = true;
if (in == num_immutable_mem_table) {
return kNumImmutableMemTable;
} else if (in == num_immutable_mem_table_flushed) {
return kNumImmutableMemTableFlushed;
} else if (in == mem_table_flush_pending) {
return kMemtableFlushPending;
} else if (in == compaction_pending) {
return kCompactionPending;
} else if (in == background_errors) {
return kBackgroundErrors;
} else if (in == cur_size_active_mem_table) {
return kCurSizeActiveMemTable;
} else if (in == cur_size_unflushed_mem_tables) {
return kCurSizeAllMemTables;
} else if (in == cur_size_all_mem_tables) {
return kSizeAllMemTables;
} else if (in == num_entries_active_mem_table) {
return kNumEntriesInMutableMemtable;
} else if (in == num_entries_imm_mem_tables) {
return kNumEntriesInImmutableMemtable;
} else if (in == num_deletes_active_mem_table) {
return kNumDeletesInMutableMemtable;
} else if (in == num_deletes_imm_mem_tables) {
return kNumDeletesInImmutableMemtable;
} else if (in == estimate_num_keys) {
return kEstimatedNumKeys;
} else if (in == estimate_table_readers_mem) {
*need_out_of_mutex = true;
return kEstimatedUsageByTableReaders;
} else if (in == is_file_deletions_enabled) {
return kIsFileDeletionEnabled;
} else if (in == num_snapshots) {
return kNumSnapshots;
} else if (in == oldest_snapshot_time) {
return kOldestSnapshotTime;
} else if (in == num_live_versions) {
return kNumLiveVersions;
} else if (in == estimate_live_data_size) {
*need_out_of_mutex = true;
return kEstimateLiveDataSize;
} else if (in == base_level) {
return kBaseLevel;
} else if (in == total_sst_files_size) {
return kTotalSstFilesSize;
} else if (in == estimate_pending_comp_bytes) {
return kEstimatePendingCompactionBytes;
} else if (in == num_running_flushes) {
return kNumRunningFlushes;
} else if (in == num_running_compactions) {
return kNumRunningCompactions;
}
return kUnknown;
}
bool InternalStats::GetIntPropertyOutOfMutex(DBPropertyType property_type,
Version* version,
uint64_t* value) const {
const std::unordered_map<std::string,
DBPropertyInfo> InternalStats::ppt_name_to_info = {
{DB::Properties::kNumFilesAtLevelPrefix,
{false, &InternalStats::HandleNumFilesAtLevel, nullptr}},
{DB::Properties::kLevelStats,
{false, &InternalStats::HandleLevelStats, nullptr}},
{DB::Properties::kStats, {false, &InternalStats::HandleStats, nullptr}},
{DB::Properties::kCFStats, {false, &InternalStats::HandleCFStats, nullptr}},
{DB::Properties::kDBStats, {false, &InternalStats::HandleDBStats, nullptr}},
{DB::Properties::kSSTables,
{false, &InternalStats::HandleSsTables, nullptr}},
{DB::Properties::kAggregatedTableProperties,
{false, &InternalStats::HandleAggregatedTableProperties, nullptr}},
{DB::Properties::kAggregatedTablePropertiesAtLevel,
{false, &InternalStats::HandleAggregatedTablePropertiesAtLevel, nullptr}},
{DB::Properties::kNumImmutableMemTable,
{false, nullptr, &InternalStats::HandleNumImmutableMemTable}},
{DB::Properties::kNumImmutableMemTableFlushed,
{false, nullptr, &InternalStats::HandleNumImmutableMemTableFlushed}},
{DB::Properties::kMemTableFlushPending,
{false, nullptr, &InternalStats::HandleMemTableFlushPending}},
{DB::Properties::kCompactionPending,
{false, nullptr, &InternalStats::HandleCompactionPending}},
{DB::Properties::kBackgroundErrors,
{false, nullptr, &InternalStats::HandleBackgroundErrors}},
{DB::Properties::kCurSizeActiveMemTable,
{false, nullptr, &InternalStats::HandleCurSizeActiveMemTable}},
{DB::Properties::kCurSizeAllMemTables,
{false, nullptr, &InternalStats::HandleCurSizeAllMemTables}},
{DB::Properties::kSizeAllMemTables,
{false, nullptr, &InternalStats::HandleSizeAllMemTables}},
{DB::Properties::kNumEntriesActiveMemTable,
{false, nullptr, &InternalStats::HandleNumEntriesActiveMemTable}},
{DB::Properties::kNumEntriesImmMemTables,
{false, nullptr, &InternalStats::HandleNumEntriesImmMemTables}},
{DB::Properties::kNumDeletesActiveMemTable,
{false, nullptr, &InternalStats::HandleNumDeletesActiveMemTable}},
{DB::Properties::kNumDeletesImmMemTables,
{false, nullptr, &InternalStats::HandleNumDeletesImmMemTables}},
{DB::Properties::kEstimateNumKeys,
{false, nullptr, &InternalStats::HandleEstimateNumKeys}},
{DB::Properties::kEstimateTableReadersMem,
{true, nullptr, &InternalStats::HandleEstimateTableReadersMem}},
{DB::Properties::kIsFileDeletionsEnabled,
{false, nullptr, &InternalStats::HandleIsFileDeletionsEnabled}},
{DB::Properties::kNumSnapshots,
{false, nullptr, &InternalStats::HandleNumSnapshots}},
{DB::Properties::kOldestSnapshotTime,
{false, nullptr, &InternalStats::HandleOldestSnapshotTime}},
{DB::Properties::kNumLiveVersions,
{false, nullptr, &InternalStats::HandleNumLiveVersions}},
{DB::Properties::kCurrentSuperVersionNumber,
{false, nullptr, &InternalStats::HandleCurrentSuperVersionNumber}},
{DB::Properties::kEstimateLiveDataSize,
{true, nullptr, &InternalStats::HandleEstimateLiveDataSize}},
{DB::Properties::kBaseLevel,
{false, nullptr, &InternalStats::HandleBaseLevel}},
{DB::Properties::kTotalSstFilesSize,
{false, nullptr, &InternalStats::HandleTotalSstFilesSize}},
{DB::Properties::kEstimatePendingCompactionBytes,
{false, nullptr, &InternalStats::HandleEstimatePendingCompactionBytes}},
{DB::Properties::kNumRunningFlushes,
{false, nullptr, &InternalStats::HandleNumRunningFlushes}},
{DB::Properties::kNumRunningCompactions,
{false, nullptr, &InternalStats::HandleNumRunningCompactions}},
};
const DBPropertyInfo* GetPropertyInfo(const Slice& property) {
std::string ppt_name = GetPropertyNameAndArg(property).first.ToString();
auto ppt_info_iter = InternalStats::ppt_name_to_info.find(ppt_name);
if (ppt_info_iter == InternalStats::ppt_name_to_info.end()) {
return nullptr;
}
return &ppt_info_iter->second;
}
bool InternalStats::GetStringProperty(const DBPropertyInfo& property_info,
const Slice& property,
std::string* value) {
assert(value != nullptr);
const auto* vstorage = cfd_->current()->storage_info();
switch (property_type) {
case kEstimatedUsageByTableReaders:
*value = (version == nullptr) ?
0 : version->GetMemoryUsageByTableReaders();
return true;
case kEstimateLiveDataSize:
*value = vstorage->EstimateLiveDataSize();
return true;
default:
return false;
assert(property_info.handle_string != nullptr);
Slice arg = GetPropertyNameAndArg(property).second;
return (this->*(property_info.handle_string))(value, arg);
}
bool InternalStats::GetIntProperty(const DBPropertyInfo& property_info,
uint64_t* value, DBImpl* db) {
assert(value != nullptr);
assert(property_info.handle_int != nullptr &&
!property_info.need_out_of_mutex);
db->mutex_.AssertHeld();
return (this->*(property_info.handle_int))(value, db, nullptr /* version */);
}
bool InternalStats::GetStringProperty(DBPropertyType property_type,
const Slice& property,
std::string* value) {
bool InternalStats::GetIntPropertyOutOfMutex(
const DBPropertyInfo& property_info, Version* version, uint64_t* value) {
assert(value != nullptr);
auto* current = cfd_->current();
const auto* vstorage = current->storage_info();
Slice in = property;
assert(property_info.handle_int != nullptr &&
property_info.need_out_of_mutex);
return (this->*(property_info.handle_int))(value, nullptr /* db */, version);
}
switch (property_type) {
case kNumFilesAtLevel: {
in.remove_prefix(strlen("rocksdb.num-files-at-level"));
bool InternalStats::HandleNumFilesAtLevel(std::string* value, Slice suffix) {
uint64_t level;
bool ok = ConsumeDecimalNumber(&in, &level) && in.empty();
if (!ok || (int)level >= number_levels_) {
const auto* vstorage = cfd_->current()->storage_info();
bool ok = ConsumeDecimalNumber(&suffix, &level) && suffix.empty();
if (!ok || static_cast<int>(level) >= number_levels_) {
return false;
} else {
char buf[100];
@ -313,8 +323,10 @@ bool InternalStats::GetStringProperty(DBPropertyType property_type,
return true;
}
}
case kLevelStats: {
bool InternalStats::HandleLevelStats(std::string* value, Slice suffix) {
char buf[1000];
const auto* vstorage = cfd_->current()->storage_info();
snprintf(buf, sizeof(buf),
"Level Files Size(MB)\n"
"--------------------\n");
@ -328,27 +340,35 @@ bool InternalStats::GetStringProperty(DBPropertyType property_type,
}
return true;
}
case kStats: {
if (!GetStringProperty(kCFStats, DB::Properties::kCFStats, value)) {
bool InternalStats::HandleStats(std::string* value, Slice suffix) {
if (!HandleCFStats(value, suffix)) {
return false;
}
if (!GetStringProperty(kDBStats, DB::Properties::kDBStats, value)) {
if (!HandleDBStats(value, suffix)) {
return false;
}
return true;
}
case kCFStats: {
bool InternalStats::HandleCFStats(std::string* value, Slice suffix) {
DumpCFStats(value);
return true;
}
case kDBStats: {
bool InternalStats::HandleDBStats(std::string* value, Slice suffix) {
DumpDBStats(value);
return true;
}
case kSsTables:
bool InternalStats::HandleSsTables(std::string* value, Slice suffix) {
auto* current = cfd_->current();
*value = current->DebugString();
return true;
case kAggregatedTableProperties: {
}
bool InternalStats::HandleAggregatedTableProperties(std::string* value,
Slice suffix) {
std::shared_ptr<const TableProperties> tp;
auto s = cfd_->current()->GetAggregatedTableProperties(&tp);
if (!s.ok()) {
@ -357,11 +377,11 @@ bool InternalStats::GetStringProperty(DBPropertyType property_type,
*value = tp->ToString();
return true;
}
case kAggregatedTablePropertiesAtLevel: {
in.remove_prefix(
DB::Properties::kAggregatedTablePropertiesAtLevel.length());
bool InternalStats::HandleAggregatedTablePropertiesAtLevel(std::string* value,
Slice suffix) {
uint64_t level;
bool ok = ConsumeDecimalNumber(&in, &level) && in.empty();
bool ok = ConsumeDecimalNumber(&suffix, &level) && suffix.empty();
if (!ok || static_cast<int>(level) >= number_levels_) {
return false;
}
@ -374,74 +394,110 @@ bool InternalStats::GetStringProperty(DBPropertyType property_type,
*value = tp->ToString();
return true;
}
default:
return false;
}
}
bool InternalStats::GetIntProperty(DBPropertyType property_type,
uint64_t* value, DBImpl* db) const {
db->mutex_.AssertHeld();
const auto* vstorage = cfd_->current()->storage_info();
switch (property_type) {
case kNumImmutableMemTable:
bool InternalStats::HandleNumImmutableMemTable(uint64_t* value, DBImpl* db,
Version* version) {
*value = cfd_->imm()->NumNotFlushed();
return true;
case kNumImmutableMemTableFlushed:
}
bool InternalStats::HandleNumImmutableMemTableFlushed(uint64_t* value,
DBImpl* db,
Version* version) {
*value = cfd_->imm()->NumFlushed();
return true;
case kMemtableFlushPending:
}
bool InternalStats::HandleMemTableFlushPending(uint64_t* value, DBImpl* db,
Version* version) {
// Return number of mem tables that are ready to flush (made immutable)
*value = (cfd_->imm()->IsFlushPending() ? 1 : 0);
return true;
case kNumRunningFlushes:
}
bool InternalStats::HandleNumRunningFlushes(uint64_t* value, DBImpl* db,
Version* version) {
*value = db->num_running_flushes();
return true;
case kCompactionPending:
}
bool InternalStats::HandleCompactionPending(uint64_t* value, DBImpl* db,
Version* version) {
// 1 if the system already determines at least one compaction is needed.
// 0 otherwise,
const auto* vstorage = cfd_->current()->storage_info();
*value = (cfd_->compaction_picker()->NeedsCompaction(vstorage) ? 1 : 0);
return true;
case kNumRunningCompactions:
}
bool InternalStats::HandleNumRunningCompactions(uint64_t* value, DBImpl* db,
Version* version) {
*value = db->num_running_compactions_;
return true;
case kBackgroundErrors:
}
bool InternalStats::HandleBackgroundErrors(uint64_t* value, DBImpl* db,
Version* version) {
// Accumulated number of errors in background flushes or compactions.
*value = GetBackgroundErrorCount();
return true;
case kCurSizeActiveMemTable:
}
bool InternalStats::HandleCurSizeActiveMemTable(uint64_t* value, DBImpl* db,
Version* version) {
// Current size of the active memtable
*value = cfd_->mem()->ApproximateMemoryUsage();
return true;
case kCurSizeAllMemTables:
}
bool InternalStats::HandleCurSizeAllMemTables(uint64_t* value, DBImpl* db,
Version* version) {
// Current size of the active memtable + immutable memtables
*value = cfd_->mem()->ApproximateMemoryUsage() +
cfd_->imm()->ApproximateUnflushedMemTablesMemoryUsage();
return true;
case kSizeAllMemTables:
}
bool InternalStats::HandleSizeAllMemTables(uint64_t* value, DBImpl* db,
Version* version) {
*value = cfd_->mem()->ApproximateMemoryUsage() +
cfd_->imm()->ApproximateMemoryUsage();
return true;
case kNumEntriesInMutableMemtable:
}
bool InternalStats::HandleNumEntriesActiveMemTable(uint64_t* value, DBImpl* db,
Version* version) {
// Current number of entires in the active memtable
*value = cfd_->mem()->num_entries();
return true;
case kNumEntriesInImmutableMemtable:
}
bool InternalStats::HandleNumEntriesImmMemTables(uint64_t* value, DBImpl* db,
Version* version) {
// Current number of entries in the immutable memtables
*value = cfd_->imm()->current()->GetTotalNumEntries();
return true;
case kNumDeletesInMutableMemtable:
}
bool InternalStats::HandleNumDeletesActiveMemTable(uint64_t* value, DBImpl* db,
Version* version) {
// Current number of entires in the active memtable
*value = cfd_->mem()->num_deletes();
return true;
case kNumDeletesInImmutableMemtable:
}
bool InternalStats::HandleNumDeletesImmMemTables(uint64_t* value, DBImpl* db,
Version* version) {
// Current number of entries in the immutable memtables
*value = cfd_->imm()->current()->GetTotalNumDeletes();
return true;
case kEstimatedNumKeys:
}
bool InternalStats::HandleEstimateNumKeys(uint64_t* value, DBImpl* db,
Version* version) {
// Estimate number of entries in the column family:
// Use estimated entries in tables + total entries in memtables.
const auto* vstorage = cfd_->current()->storage_info();
*value = cfd_->mem()->num_entries() +
cfd_->imm()->current()->GetTotalNumEntries() -
(cfd_->mem()->num_deletes() +
@ -449,30 +505,70 @@ bool InternalStats::GetIntProperty(DBPropertyType property_type,
2 +
vstorage->GetEstimatedActiveKeys();
return true;
case kNumSnapshots:
}
bool InternalStats::HandleNumSnapshots(uint64_t* value, DBImpl* db,
Version* version) {
*value = db->snapshots().count();
return true;
case kOldestSnapshotTime:
}
bool InternalStats::HandleOldestSnapshotTime(uint64_t* value, DBImpl* db,
Version* version) {
*value = static_cast<uint64_t>(db->snapshots().GetOldestSnapshotTime());
return true;
case kNumLiveVersions:
}
bool InternalStats::HandleNumLiveVersions(uint64_t* value, DBImpl* db,
Version* version) {
*value = cfd_->GetNumLiveVersions();
return true;
case kIsFileDeletionEnabled:
}
bool InternalStats::HandleCurrentSuperVersionNumber(uint64_t* value, DBImpl* db,
Version* version) {
*value = cfd_->GetSuperVersionNumber();
return true;
}
bool InternalStats::HandleIsFileDeletionsEnabled(uint64_t* value, DBImpl* db,
Version* version) {
*value = db->IsFileDeletionsEnabled();
return true;
case kBaseLevel:
}
bool InternalStats::HandleBaseLevel(uint64_t* value, DBImpl* db,
Version* version) {
const auto* vstorage = cfd_->current()->storage_info();
*value = vstorage->base_level();
return true;
case kTotalSstFilesSize:
}
bool InternalStats::HandleTotalSstFilesSize(uint64_t* value, DBImpl* db,
Version* version) {
*value = cfd_->GetTotalSstFilesSize();
return true;
case kEstimatePendingCompactionBytes:
}
bool InternalStats::HandleEstimatePendingCompactionBytes(uint64_t* value,
DBImpl* db,
Version* version) {
const auto* vstorage = cfd_->current()->storage_info();
*value = vstorage->estimated_compaction_needed_bytes();
return true;
default:
return false;
}
bool InternalStats::HandleEstimateTableReadersMem(uint64_t* value, DBImpl* db,
Version* version) {
*value = (version == nullptr) ? 0 : version->GetMemoryUsageByTableReaders();
return true;
}
bool InternalStats::HandleEstimateLiveDataSize(uint64_t* value, DBImpl* db,
Version* version) {
const auto* vstorage = cfd_->current()->storage_info();
*value = vstorage->EstimateLiveDataSize();
return true;
}
void InternalStats::DumpDBStats(std::string* value) {
@ -757,10 +853,7 @@ void InternalStats::DumpCFStats(std::string* value) {
#else
DBPropertyType GetPropertyType(const Slice& property, bool* is_int_property,
bool* need_out_of_mutex) {
return kUnknown;
}
const DBPropertyInfo* GetPropertyInfo(const Slice& property) { return nullptr; }
#endif // !ROCKSDB_LITE

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -21,63 +21,29 @@ namespace rocksdb {
class MemTableList;
class DBImpl;
// IMPORTANT: If you add a new property here, also add it to the list in
// include/rocksdb/db.h
enum DBPropertyType : uint32_t {
kUnknown,
kNumFilesAtLevel, // Number of files at a specific level
kLevelStats, // Return number of files and total sizes of each level
kCFStats, // Return general statitistics of CF
kDBStats, // Return general statitistics of DB
kStats, // Return general statitistics of both DB and CF
kSsTables, // Return a human readable string of current SST files
kStartIntTypes, // ---- Dummy value to indicate the start of integer values
kNumImmutableMemTable, // Return number of immutable mem tables that
// have not been flushed.
kNumImmutableMemTableFlushed, // Return number of immutable mem tables
// in memory that have already been flushed
kMemtableFlushPending, // Return 1 if mem table flushing is pending,
// otherwise 0.
kNumRunningFlushes, // Return the number of currently running flushes.
kCompactionPending, // Return 1 if a compaction is pending. Otherwise 0.
kNumRunningCompactions, // Return the number of currently running
// compactions.
kBackgroundErrors, // Return accumulated background errors encountered.
kCurSizeActiveMemTable, // Return current size of the active memtable
kCurSizeAllMemTables, // Return current size of unflushed
// (active + immutable) memtables
kSizeAllMemTables, // Return current size of all (active + immutable
// + pinned) memtables
kNumEntriesInMutableMemtable, // Return number of deletes in the mutable
// memtable.
kNumEntriesInImmutableMemtable, // Return sum of number of entries in all
// the immutable mem tables.
kNumDeletesInMutableMemtable, // Return number of deletion entries in the
// mutable memtable.
kNumDeletesInImmutableMemtable, // Return the total number of deletion
// entries in all the immutable mem tables.
kEstimatedNumKeys, // Estimated total number of keys in the database.
kEstimatedUsageByTableReaders, // Estimated memory by table readers.
kIsFileDeletionEnabled, // Equals disable_delete_obsolete_files_,
// 0 means file deletions enabled
kNumSnapshots, // Number of snapshots in the system
kOldestSnapshotTime, // Unix timestamp of the first snapshot
kNumLiveVersions,
kEstimateLiveDataSize, // Estimated amount of live data in bytes
kTotalSstFilesSize, // Total size of all sst files.
kBaseLevel, // The level that L0 data is compacted to
kEstimatePendingCompactionBytes, // Estimated bytes to compaction
kAggregatedTableProperties, // Return a string that contains the aggregated
// table properties.
kAggregatedTablePropertiesAtLevel, // Return a string that contains the
// aggregated
// table properties at the specified level.
// Config for retrieving a property's value.
struct DBPropertyInfo {
bool need_out_of_mutex;
// gcc had an internal error for initializing union of pointer-to-member-
// functions. Workaround is to populate exactly one of the following function
// pointers with a non-nullptr value.
// @param value Value-result argument for storing the property's string value
// @param suffix Argument portion of the property. For example, suffix would
// be "5" for the property "rocksdb.num-files-at-level5". So far, only
// certain string properties take an argument.
bool (InternalStats::*handle_string)(std::string* value, Slice suffix);
// @param value Value-result argument for storing the property's uint64 value
// @param db Many of the int properties rely on DBImpl methods.
// @param version Version is needed in case the property is retrieved without
// holding db mutex, which is only supported for int properties.
bool (InternalStats::*handle_int)(uint64_t* value, DBImpl* db,
Version* version);
};
extern DBPropertyType GetPropertyType(const Slice& property,
bool* is_int_property,
bool* need_out_of_mutex);
extern const DBPropertyInfo* GetPropertyInfo(const Slice& property);
#ifndef ROCKSDB_LITE
class InternalStats {
@ -248,14 +214,18 @@ class InternalStats {
uint64_t BumpAndGetBackgroundErrorCount() { return ++bg_error_count_; }
bool GetStringProperty(DBPropertyType property_type, const Slice& property,
std::string* value);
bool GetStringProperty(const DBPropertyInfo& property_info,
const Slice& property, std::string* value);
bool GetIntProperty(const DBPropertyInfo& property_info, uint64_t* value,
DBImpl* db);
bool GetIntProperty(DBPropertyType property_type, uint64_t* value,
DBImpl* db) const;
bool GetIntPropertyOutOfMutex(const DBPropertyInfo& property_info,
Version* version, uint64_t* value);
bool GetIntPropertyOutOfMutex(DBPropertyType property_type, Version* version,
uint64_t* value) const;
// Store a mapping from the user-facing DB::Properties string to our
// DBPropertyInfo struct used internally for retrieving properties.
static const std::unordered_map<std::string, DBPropertyInfo> ppt_name_to_info;
private:
void DumpDBStats(std::string* value);
@ -321,6 +291,56 @@ class InternalStats {
seconds_up(0) {}
} db_stats_snapshot_;
// Handler functions for getting property values. They use "value" as a value-
// result argument, and return true upon successfully setting "value".
bool HandleNumFilesAtLevel(std::string* value, Slice suffix);
bool HandleLevelStats(std::string* value, Slice suffix);
bool HandleStats(std::string* value, Slice suffix);
bool HandleCFStats(std::string* value, Slice suffix);
bool HandleDBStats(std::string* value, Slice suffix);
bool HandleSsTables(std::string* value, Slice suffix);
bool HandleAggregatedTableProperties(std::string* value, Slice suffix);
bool HandleAggregatedTablePropertiesAtLevel(std::string* value, Slice suffix);
bool HandleNumImmutableMemTable(uint64_t* value, DBImpl* db,
Version* version);
bool HandleNumImmutableMemTableFlushed(uint64_t* value, DBImpl* db,
Version* version);
bool HandleMemTableFlushPending(uint64_t* value, DBImpl* db,
Version* version);
bool HandleNumRunningFlushes(uint64_t* value, DBImpl* db, Version* version);
bool HandleCompactionPending(uint64_t* value, DBImpl* db, Version* version);
bool HandleNumRunningCompactions(uint64_t* value, DBImpl* db,
Version* version);
bool HandleBackgroundErrors(uint64_t* value, DBImpl* db, Version* version);
bool HandleCurSizeActiveMemTable(uint64_t* value, DBImpl* db,
Version* version);
bool HandleCurSizeAllMemTables(uint64_t* value, DBImpl* db, Version* version);
bool HandleSizeAllMemTables(uint64_t* value, DBImpl* db, Version* version);
bool HandleNumEntriesActiveMemTable(uint64_t* value, DBImpl* db,
Version* version);
bool HandleNumEntriesImmMemTables(uint64_t* value, DBImpl* db,
Version* version);
bool HandleNumDeletesActiveMemTable(uint64_t* value, DBImpl* db,
Version* version);
bool HandleNumDeletesImmMemTables(uint64_t* value, DBImpl* db,
Version* version);
bool HandleEstimateNumKeys(uint64_t* value, DBImpl* db, Version* version);
bool HandleNumSnapshots(uint64_t* value, DBImpl* db, Version* version);
bool HandleOldestSnapshotTime(uint64_t* value, DBImpl* db, Version* version);
bool HandleNumLiveVersions(uint64_t* value, DBImpl* db, Version* version);
bool HandleCurrentSuperVersionNumber(uint64_t* value, DBImpl* db,
Version* version);
bool HandleIsFileDeletionsEnabled(uint64_t* value, DBImpl* db,
Version* version);
bool HandleBaseLevel(uint64_t* value, DBImpl* db, Version* version);
bool HandleTotalSstFilesSize(uint64_t* value, DBImpl* db, Version* version);
bool HandleEstimatePendingCompactionBytes(uint64_t* value, DBImpl* db,
Version* version);
bool HandleEstimateTableReadersMem(uint64_t* value, DBImpl* db,
Version* version);
bool HandleEstimateLiveDataSize(uint64_t* value, DBImpl* db,
Version* version);
// Total number of background errors encountered. Every time a flush task
// or compaction task fails, this counter is incremented. The failure can
// be caused by any possible reason, including file system errors, out of
@ -402,14 +422,20 @@ class InternalStats {
uint64_t BumpAndGetBackgroundErrorCount() { return 0; }
bool GetStringProperty(DBPropertyType property_type, const Slice& property,
std::string* value) { return false; }
bool GetStringProperty(const DBPropertyInfo& property_info,
const Slice& property, std::string* value) {
return false;
}
bool GetIntProperty(DBPropertyType property_type, uint64_t* value,
DBImpl* db) const { return false; }
bool GetIntProperty(const DBPropertyInfo& property_info, uint64_t* value,
DBImpl* db) const {
return false;
}
bool GetIntPropertyOutOfMutex(DBPropertyType property_type, Version* version,
uint64_t* value) const { return false; }
bool GetIntPropertyOutOfMutex(const DBPropertyInfo& property_info,
Version* version, uint64_t* value) const {
return false;
}
};
#endif // !ROCKSDB_LITE

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -22,9 +22,9 @@ class MemTable;
struct JobContext {
inline bool HaveSomethingToDelete() const {
return full_scan_candidate_files.size() || sst_delete_files.size() ||
log_delete_files.size() || new_superversion != nullptr ||
superversions_to_free.size() > 0 || memtables_to_free.size() > 0 ||
logs_to_free.size() > 0;
log_delete_files.size() || manifest_delete_files.size() ||
new_superversion != nullptr || superversions_to_free.size() > 0 ||
memtables_to_free.size() > 0 || logs_to_free.size() > 0;
}
// Structure to store information for candidate files to delete.
@ -56,6 +56,9 @@ struct JobContext {
// a list of log files that we need to delete
std::vector<uint64_t> log_delete_files;
// a list of manifest files that we need to delete
std::vector<std::string> manifest_delete_files;
// a list of memtables to be free
autovector<MemTable*> memtables_to_free;

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save