Makes directory smaller by removing files not used by Oxigraph

oxigraph-8.1.1
Tpt 2 years ago
parent 81d5273b06
commit 443333d8c0
  1. 892
      .circleci/config.yml
  2. 6
      .circleci/ubsan_suppression_list.txt
  3. 47
      .github/workflows/sanity_check.yml
  4. 168
      build_tools/amalgamate.py
  5. 238
      build_tools/benchmark_log_tool.py
  6. 900
      build_tools/build_detect_platform
  7. 48
      build_tools/check-sources.sh
  8. 22
      build_tools/dependencies_platform010.sh
  9. 3
      build_tools/dockerbuild.sh
  10. 181
      build_tools/error_filter.py
  11. 55
      build_tools/fb_compile_mongo.sh
  12. 175
      build_tools/fbcode_config.sh
  13. 175
      build_tools/fbcode_config_platform010.sh
  14. 203
      build_tools/format-diff.sh
  15. 7971
      build_tools/gnu_parallel
  16. 129
      build_tools/make_package.sh
  17. 38
      build_tools/ps_with_stack
  18. 396
      build_tools/regression_build_test.sh
  19. 493
      build_tools/run_ci_db_test.ps1
  20. 45
      build_tools/setup_centos7.sh
  21. 57
      build_tools/ubuntu20_image/Dockerfile
  22. 106
      build_tools/update_dependencies.sh
  23. 23
      build_tools/version.sh
  24. 469
      cache/cache_reservation_manager_test.cc
  25. 969
      cache/cache_test.cc
  26. 980
      cache/compressed_secondary_cache_test.cc
  27. 2558
      cache/lru_cache_test.cc
  28. 82
      coverage/coverage_test.sh
  29. 128
      coverage/parse_gcov_output.py
  30. 3382
      db/column_family_test.cc
  31. 491
      db/compact_files_test.cc
  32. 678
      db/comparator_db_test.cc
  33. 1669
      db/corruption_test.cc
  34. 351
      db/cuckoo_table_db_test.cc
  35. 4777
      db/db_basic_test.cc
  36. 1969
      db/db_block_cache_test.cc
  37. 3473
      db/db_bloom_filter_test.cc
  38. 1030
      db/db_compaction_filter_test.cc
  39. 9118
      db/db_compaction_test.cc
  40. 499
      db/db_dynamic_level_test.cc
  41. 126
      db/db_encryption_test.cc
  42. 3202
      db/db_flush_test.cc
  43. 262
      db/db_inplace_update_test.cc
  44. 589
      db/db_io_failure_test.cc
  45. 658
      db/db_iter_stress_test.cc
  46. 3195
      db/db_iter_test.cc
  47. 3253
      db/db_iterator_test.cc
  48. 885
      db/db_kv_checksum_test.cc
  49. 297
      db/db_log_iter_test.cc
  50. 505
      db/db_logical_block_size_cache_test.cc
  51. 344
      db/db_memtable_test.cc
  52. 488
      db/db_merge_operand_test.cc
  53. 824
      db/db_merge_operator_test.cc
  54. 1215
      db/db_options_test.cc
  55. 2376
      db/db_properties_test.cc
  56. 3414
      db/db_range_del_test.cc
  57. 436
      db/db_rate_limiter_test.cc
  58. 956
      db/db_readonly_with_timestamp_test.cc
  59. 1691
      db/db_secondary_test.cc
  60. 1864
      db/db_sst_test.cc
  61. 213
      db/db_statistics_test.cc
  62. 623
      db/db_table_properties_test.cc
  63. 595
      db/db_tailing_iter_test.cc
  64. 7338
      db/db_test.cc
  65. 2227
      db/db_universal_compaction_test.cc
  66. 2408
      db/db_wal_test.cc
  67. 3928
      db/db_with_timestamp_basic_test.cc
  68. 353
      db/db_with_timestamp_compaction_test.cc
  69. 860
      db/db_write_buffer_manager_test.cc
  70. 790
      db/db_write_test.cc
  71. 214
      db/dbformat_test.cc
  72. 603
      db/deletefile_test.cc
  73. 2862
      db/error_handler_fs_test.cc
  74. 1999
      db/external_sst_file_basic_test.cc
  75. 2860
      db/external_sst_file_test.cc
  76. 637
      db/fault_injection_test.cc
  77. 352
      db/file_indexer_test.cc
  78. 241
      db/filename_test.cc
  79. 743
      db/flush_job_test.cc
  80. 746
      db/import_column_family_test.cc
  81. 1598
      db/listener_test.cc
  82. 1095
      db/log_test.cc
  83. 308
      db/manual_compaction_test.cc
  84. 1037
      db/memtable_list_test.cc
  85. 298
      db/merge_helper_test.cc
  86. 621
      db/merge_test.cc
  87. 317
      db/obsolete_files_test.cc
  88. 110
      db/options_file_test.cc
  89. 1157
      db/perf_context_test.cc
  90. 229
      db/periodic_task_scheduler_test.cc
  91. 1347
      db/plain_table_db_test.cc
  92. 894
      db/prefix_test.cc
  93. 713
      db/range_del_aggregator_test.cc
  94. 555
      db/range_tombstone_fragmenter_test.cc
  95. 484
      db/repair_test.cc
  96. 994
      db/seqno_time_test.cc
  97. 509
      db/table_properties_collector_test.cc
  98. 1820
      db/version_builder_test.cc
  99. 732
      db/version_edit_test.cc
  100. 3619
      db/version_set_test.cc
  101. Some files were not shown because too many files have changed in this diff Show More

@ -1,892 +0,0 @@
version: 2.1
orbs:
win: circleci/windows@5.0.0
commands:
install-cmake-on-macos:
steps:
- run:
name: Install cmake on macos
command: |
HOMEBREW_NO_AUTO_UPDATE=1 brew install cmake
install-jdk8-on-macos:
steps:
- run:
name: Install JDK 8 on macos
command: |
brew install --cask adoptopenjdk/openjdk/adoptopenjdk8
increase-max-open-files-on-macos:
steps:
- run:
name: Increase max open files
command: |
sudo sysctl -w kern.maxfiles=1048576
sudo sysctl -w kern.maxfilesperproc=1048576
sudo launchctl limit maxfiles 1048576
pre-steps:
steps:
- checkout
- run:
name: Setup Environment Variables
command: |
echo "export GTEST_THROW_ON_FAILURE=0" >> $BASH_ENV
echo "export GTEST_OUTPUT=\"xml:/tmp/test-results/\"" >> $BASH_ENV
echo "export SKIP_FORMAT_BUCK_CHECKS=1" >> $BASH_ENV
echo "export GTEST_COLOR=1" >> $BASH_ENV
echo "export CTEST_OUTPUT_ON_FAILURE=1" >> $BASH_ENV
echo "export CTEST_TEST_TIMEOUT=300" >> $BASH_ENV
echo "export ZLIB_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/zlib" >> $BASH_ENV
echo "export BZIP2_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/bzip2" >> $BASH_ENV
echo "export SNAPPY_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/snappy" >> $BASH_ENV
echo "export LZ4_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/lz4" >> $BASH_ENV
echo "export ZSTD_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/zstd" >> $BASH_ENV
windows-build-steps:
steps:
- checkout
- run:
name: "Install thirdparty dependencies"
command: |
echo "Installing CMake..."
choco install cmake --installargs 'ADD_CMAKE_TO_PATH=System' -y
mkdir $Env:THIRDPARTY_HOME
cd $Env:THIRDPARTY_HOME
echo "Building Snappy dependency..."
curl https://github.com/google/snappy/archive/refs/tags/1.1.8.zip -O snappy-1.1.8.zip
unzip -q snappy-1.1.8.zip
cd snappy-1.1.8
mkdir build
cd build
& $Env:CMAKE_BIN -G "$Env:CMAKE_GENERATOR" ..
msbuild.exe Snappy.sln -maxCpuCount -property:Configuration=Debug -property:Platform=x64
- run:
name: "Build RocksDB"
command: |
mkdir build
cd build
& $Env:CMAKE_BIN -G "$Env:CMAKE_GENERATOR" -DCMAKE_BUILD_TYPE=Debug -DOPTDBG=1 -DPORTABLE=1 -DSNAPPY=1 -DJNI=1 ..
cd ..
echo "Building with VS version: $Env:CMAKE_GENERATOR"
msbuild.exe build/rocksdb.sln -maxCpuCount -property:Configuration=Debug -property:Platform=x64
- run:
name: "Test RocksDB"
shell: powershell.exe
command: |
build_tools\run_ci_db_test.ps1 -SuiteRun arena_test,db_basic_test,db_test,db_test2,db_merge_operand_test,bloom_test,c_test,coding_test,crc32c_test,dynamic_bloom_test,env_basic_test,env_test,hash_test,random_test -Concurrency 16
pre-steps-macos:
steps:
- pre-steps
post-steps:
steps:
- store_test_results: # store test result if there's any
path: /tmp/test-results
- store_artifacts: # store LOG for debugging if there's any
path: LOG
- run: # on fail, compress Test Logs for diagnosing the issue
name: Compress Test Logs
command: tar -cvzf t.tar.gz t
when: on_fail
- store_artifacts: # on fail, store Test Logs for diagnosing the issue
path: t.tar.gz
destination: test_logs
when: on_fail
- run: # store core dumps if there's any
command: |
mkdir -p /tmp/core_dumps
cp core.* /tmp/core_dumps
when: on_fail
- store_artifacts:
path: /tmp/core_dumps
when: on_fail
upgrade-cmake:
steps:
- run:
name: Upgrade cmake
command: |
sudo apt remove --purge cmake
sudo snap install cmake --classic
install-gflags:
steps:
- run:
name: Install gflags
command: |
sudo apt-get update -y && sudo apt-get install -y libgflags-dev
install-gflags-on-macos:
steps:
- run:
name: Install gflags on macos
command: |
HOMEBREW_NO_AUTO_UPDATE=1 brew install gflags
setup-folly:
steps:
- run:
name: Checkout folly sources
command: |
make checkout_folly
build-folly:
steps:
- run:
name: Build folly and dependencies
command: |
make build_folly
build-for-benchmarks:
steps:
- pre-steps
- run:
name: "Linux build for benchmarks"
command: #sized for the resource-class rocksdb-benchmark-sys1
make V=1 J=8 -j8 release
perform-benchmarks:
steps:
- run:
name: "Test low-variance benchmarks"
command: ./tools/benchmark_ci.py --db_dir /tmp/rocksdb-benchmark-datadir --output_dir /tmp/benchmark-results --num_keys 20000000
environment:
LD_LIBRARY_PATH: /usr/local/lib
# How long to run parts of the test(s)
DURATION_RO: 300
DURATION_RW: 500
# Keep threads within physical capacity of server (much lower than default)
NUM_THREADS: 1
MAX_BACKGROUND_JOBS: 4
# Don't run a couple of "optional" initial tests
CI_TESTS_ONLY: "true"
# Reduce configured size of levels to ensure more levels in the leveled compaction LSM tree
WRITE_BUFFER_SIZE_MB: 16
TARGET_FILE_SIZE_BASE_MB: 16
MAX_BYTES_FOR_LEVEL_BASE_MB: 64
# The benchmark host has 32GB memory
# The following values are tailored to work with that
# Note, tests may not exercise the targeted issues if the memory is increased on new test hosts.
COMPRESSION_TYPE: "none"
CACHE_INDEX_AND_FILTER_BLOCKS: 1
MIN_LEVEL_TO_COMPRESS: 3
CACHE_SIZE_MB: 10240
MB_WRITE_PER_SEC: 2
post-benchmarks:
steps:
- store_artifacts: # store the benchmark output
path: /tmp/benchmark-results
destination: test_logs
- run:
name: Send benchmark report to visualisation
command: |
set +e
set +o pipefail
./build_tools/benchmark_log_tool.py --tsvfile /tmp/benchmark-results/report.tsv --esdocument https://search-rocksdb-bench-k2izhptfeap2hjfxteolsgsynm.us-west-2.es.amazonaws.com/bench_test3_rix/_doc
true
executors:
linux-docker:
docker:
# The image configuration is build_tools/ubuntu20_image/Dockerfile
# To update and build the image:
# $ cd build_tools/ubuntu20_image
# $ docker build -t zjay437/rocksdb:0.5 .
# $ docker push zjay437/rocksdb:0.5
# `zjay437` is the account name for zjay@meta.com which readwrite token is shared internally. To login:
# $ docker login --username zjay437
# Or please feel free to change it to your docker hub account for hosting the image, meta employee should already have the account and able to login with SSO.
# To avoid impacting the existing CI runs, please bump the version every time creating a new image
# to run the CI image environment locally:
# $ docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -it zjay437/rocksdb:0.5 bash
# option `--cap-add=SYS_PTRACE --security-opt seccomp=unconfined` is used to enable gdb to attach an existing process
- image: zjay437/rocksdb:0.6
jobs:
build-macos:
macos:
xcode: 12.5.1
resource_class: large
environment:
ROCKSDB_DISABLE_JEMALLOC: 1 # jemalloc cause env_test hang, disable it for now
steps:
- increase-max-open-files-on-macos
- install-gflags-on-macos
- pre-steps-macos
- run: ulimit -S -n `ulimit -H -n` && OPT=-DCIRCLECI make V=1 J=32 -j32 all
- post-steps
build-macos-cmake:
macos:
xcode: 12.5.1
resource_class: large
parameters:
run_even_tests:
description: run even or odd tests, used to split tests to 2 groups
type: boolean
default: true
steps:
- increase-max-open-files-on-macos
- install-cmake-on-macos
- install-gflags-on-macos
- pre-steps-macos
- run:
name: "cmake generate project file"
command: ulimit -S -n `ulimit -H -n` && mkdir build && cd build && cmake -DWITH_GFLAGS=1 ..
- run:
name: "Build tests"
command: cd build && make V=1 -j32
- when:
condition: << parameters.run_even_tests >>
steps:
- run:
name: "Run even tests"
command: ulimit -S -n `ulimit -H -n` && cd build && ctest -j32 -I 0,,2
- when:
condition:
not: << parameters.run_even_tests >>
steps:
- run:
name: "Run odd tests"
command: ulimit -S -n `ulimit -H -n` && cd build && ctest -j32 -I 1,,2
- post-steps
build-linux:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- run: make V=1 J=32 -j32 check
- post-steps
build-linux-encrypted_env-no_compression:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- run: ENCRYPTED_ENV=1 ROCKSDB_DISABLE_SNAPPY=1 ROCKSDB_DISABLE_ZLIB=1 ROCKSDB_DISABLE_BZIP=1 ROCKSDB_DISABLE_LZ4=1 ROCKSDB_DISABLE_ZSTD=1 make V=1 J=32 -j32 check
- run: |
./sst_dump --help | grep -E -q 'Supported compression types: kNoCompression$' # Verify no compiled in compression
- post-steps
build-linux-static_lib-alt_namespace-status_checked:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- run: ASSERT_STATUS_CHECKED=1 TEST_UINT128_COMPAT=1 ROCKSDB_MODIFY_NPHASH=1 LIB_MODE=static OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" make V=1 -j24 check
- post-steps
build-linux-release:
executor: linux-docker
resource_class: 2xlarge
steps:
- checkout # check out the code in the project directory
- run: make V=1 -j32 LIB_MODE=shared release
- run: ls librocksdb.so # ensure shared lib built
- run: ./db_stress --version # ensure with gflags
- run: make clean
- run: make V=1 -j32 release
- run: ls librocksdb.a # ensure static lib built
- run: ./db_stress --version # ensure with gflags
- run: make clean
- run: apt-get remove -y libgflags-dev
- run: make V=1 -j32 LIB_MODE=shared release
- run: ls librocksdb.so # ensure shared lib built
- run: if ./db_stress --version; then false; else true; fi # ensure without gflags
- run: make clean
- run: make V=1 -j32 release
- run: ls librocksdb.a # ensure static lib built
- run: if ./db_stress --version; then false; else true; fi # ensure without gflags
- post-steps
build-linux-release-rtti:
executor: linux-docker
resource_class: xlarge
steps:
- checkout # check out the code in the project directory
- run: USE_RTTI=1 DEBUG_LEVEL=0 make V=1 -j16 static_lib tools db_bench
- run: ./db_stress --version # ensure with gflags
- run: make clean
- run: apt-get remove -y libgflags-dev
- run: USE_RTTI=1 DEBUG_LEVEL=0 make V=1 -j16 static_lib tools db_bench
- run: if ./db_stress --version; then false; else true; fi # ensure without gflags
build-linux-clang-no_test_run:
executor: linux-docker
resource_class: xlarge
steps:
- checkout # check out the code in the project directory
- run: CC=clang CXX=clang++ USE_CLANG=1 PORTABLE=1 make V=1 -j16 all
- post-steps
build-linux-clang10-asan:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- run: COMPILE_WITH_ASAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check # aligned new doesn't work for reason we haven't figured out
- post-steps
build-linux-clang10-mini-tsan:
executor: linux-docker
resource_class: 2xlarge+
steps:
- pre-steps
- run: COMPILE_WITH_TSAN=1 CC=clang-13 CXX=clang++-13 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check
- post-steps
build-linux-clang10-ubsan:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- run: COMPILE_WITH_UBSAN=1 OPT="-fsanitize-blacklist=.circleci/ubsan_suppression_list.txt" CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 ubsan_check # aligned new doesn't work for reason we haven't figured out
- post-steps
build-linux-valgrind:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- run: PORTABLE=1 make V=1 -j32 valgrind_test
- post-steps
build-linux-clang10-clang-analyze:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- run: CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 CLANG_ANALYZER="/usr/bin/clang++-10" CLANG_SCAN_BUILD=scan-build-10 USE_CLANG=1 make V=1 -j32 analyze # aligned new doesn't work for reason we haven't figured out. For unknown, reason passing "clang++-10" as CLANG_ANALYZER doesn't work, and we need a full path.
- post-steps
- run:
name: "compress test report"
command: tar -cvzf scan_build_report.tar.gz scan_build_report
when: on_fail
- store_artifacts:
path: scan_build_report.tar.gz
destination: scan_build_report
when: on_fail
build-linux-runner:
machine: true
resource_class: facebook/rocksdb-benchmark-sys1
steps:
- pre-steps
- run:
name: "Checked Linux build (Runner)"
command: make V=1 J=8 -j8 check
environment:
LD_LIBRARY_PATH: /usr/local/lib
- post-steps
build-linux-cmake-with-folly:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- setup-folly
- build-folly
- run: (mkdir build && cd build && cmake -DUSE_FOLLY=1 -DWITH_GFLAGS=1 -DROCKSDB_BUILD_SHARED=0 .. && make V=1 -j20 && ctest -j20)
- post-steps
build-linux-cmake-with-folly-lite-no-test:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- setup-folly
- run: (mkdir build && cd build && cmake -DUSE_FOLLY_LITE=1 -DWITH_GFLAGS=1 .. && make V=1 -j20)
- post-steps
build-linux-cmake-with-benchmark:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- run: mkdir build && cd build && cmake -DWITH_GFLAGS=1 -DWITH_BENCHMARK=1 .. && make V=1 -j20 && ctest -j20
- post-steps
build-linux-unity-and-headers:
docker: # executor type
- image: gcc:latest
environment:
EXTRA_CXXFLAGS: -mno-avx512f # Warnings-as-error in avx512fintrin.h, would be used on newer hardware
resource_class: large
steps:
- checkout # check out the code in the project directory
- run: apt-get update -y && apt-get install -y libgflags-dev
- run:
name: "Unity build"
command: make V=1 -j8 unity_test
no_output_timeout: 20m
- run: make V=1 -j8 -k check-headers # could be moved to a different build
- post-steps
build-linux-gcc-7-with-folly:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- setup-folly
- build-folly
- run: USE_FOLLY=1 LIB_MODE=static CC=gcc-7 CXX=g++-7 V=1 make -j32 check # TODO: LIB_MODE only to work around unresolved linker failures
- post-steps
build-linux-gcc-7-with-folly-lite-no-test:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- setup-folly
- run: USE_FOLLY_LITE=1 CC=gcc-7 CXX=g++-7 V=1 make -j32 all
- post-steps
build-linux-gcc-8-no_test_run:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- run: CC=gcc-8 CXX=g++-8 V=1 make -j32 all
- post-steps
build-linux-cmake-with-folly-coroutines:
executor: linux-docker
resource_class: 2xlarge
environment:
CC: gcc-10
CXX: g++-10
steps:
- pre-steps
- setup-folly
- build-folly
- run: (mkdir build && cd build && cmake -DUSE_COROUTINES=1 -DWITH_GFLAGS=1 -DROCKSDB_BUILD_SHARED=0 .. && make V=1 -j20 && ctest -j20)
- post-steps
build-linux-gcc-10-cxx20-no_test_run:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- run: CC=gcc-10 CXX=g++-10 V=1 ROCKSDB_CXX_STANDARD=c++20 make -j32 all
- post-steps
build-linux-gcc-11-no_test_run:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- run: LIB_MODE=static CC=gcc-11 CXX=g++-11 V=1 make -j32 all microbench # TODO: LIB_MODE only to work around unresolved linker failures
- post-steps
build-linux-clang-13-no_test_run:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- run: CC=clang-13 CXX=clang++-13 USE_CLANG=1 make -j32 all microbench
- post-steps
# Ensure ASAN+UBSAN with folly, and full testsuite with clang 13
build-linux-clang-13-asan-ubsan-with-folly:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- setup-folly
- build-folly
- run: CC=clang-13 CXX=clang++-13 LIB_MODE=static USE_CLANG=1 USE_FOLLY=1 COMPILE_WITH_UBSAN=1 COMPILE_WITH_ASAN=1 make -j32 check # TODO: LIB_MODE only to work around unresolved linker failures
- post-steps
# This job is only to make sure the microbench tests are able to run, the benchmark result is not meaningful as the CI host is changing.
build-linux-run-microbench:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- run: DEBUG_LEVEL=0 make -j32 run_microbench
- post-steps
build-linux-mini-crashtest:
executor: linux-docker
resource_class: large
steps:
- pre-steps
- run: ulimit -S -n `ulimit -H -n` && make V=1 -j8 CRASH_TEST_EXT_ARGS='--duration=960 --max_key=2500000 --use_io_uring=0' blackbox_crash_test_with_atomic_flush
- post-steps
build-linux-crashtest-tiered-storage-bb:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- run:
name: "run crashtest"
command: ulimit -S -n `ulimit -H -n` && make V=1 -j32 CRASH_TEST_EXT_ARGS='--duration=10800 --use_io_uring=0' blackbox_crash_test_with_tiered_storage
no_output_timeout: 100m
- post-steps
build-linux-crashtest-tiered-storage-wb:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- run:
name: "run crashtest"
command: ulimit -S -n `ulimit -H -n` && make V=1 -j32 CRASH_TEST_EXT_ARGS='--duration=10800 --use_io_uring=0' whitebox_crash_test_with_tiered_storage
no_output_timeout: 100m
- post-steps
build-windows-vs2022:
executor:
name: win/server-2022
size: 2xlarge
environment:
THIRDPARTY_HOME: C:/Users/circleci/thirdparty
CMAKE_HOME: C:/Program Files/CMake
CMAKE_BIN: C:/Program Files/CMake/bin/cmake.exe
SNAPPY_HOME: C:/Users/circleci/thirdparty/snappy-1.1.8
SNAPPY_INCLUDE: C:/Users/circleci/thirdparty/snappy-1.1.8;C:/Users/circleci/thirdparty/snappy-1.1.8/build
SNAPPY_LIB_DEBUG: C:/Users/circleci/thirdparty/snappy-1.1.8/build/Debug/snappy.lib
CMAKE_GENERATOR: Visual Studio 17 2022
steps:
- windows-build-steps
build-windows-vs2019:
executor:
name: win/server-2019
size: 2xlarge
environment:
THIRDPARTY_HOME: C:/Users/circleci/thirdparty
CMAKE_HOME: C:/Program Files/CMake
CMAKE_BIN: C:/Program Files/CMake/bin/cmake.exe
SNAPPY_HOME: C:/Users/circleci/thirdparty/snappy-1.1.8
SNAPPY_INCLUDE: C:/Users/circleci/thirdparty/snappy-1.1.8;C:/Users/circleci/thirdparty/snappy-1.1.8/build
SNAPPY_LIB_DEBUG: C:/Users/circleci/thirdparty/snappy-1.1.8/build/Debug/snappy.lib
CMAKE_GENERATOR: Visual Studio 16 2019
steps:
- windows-build-steps
build-linux-java:
executor: linux-docker
resource_class: large
steps:
- pre-steps
- run:
name: "Set Java Environment"
command: |
echo "JAVA_HOME=${JAVA_HOME}"
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Test RocksDBJava"
command: make V=1 J=8 -j8 jtest
- post-steps
build-linux-java-static:
executor: linux-docker
resource_class: large
steps:
- pre-steps
- run:
name: "Set Java Environment"
command: |
echo "JAVA_HOME=${JAVA_HOME}"
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Build RocksDBJava Static Library"
command: make V=1 J=8 -j8 rocksdbjavastatic
- post-steps
build-macos-java:
macos:
xcode: 12.5.1
resource_class: large
environment:
JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home
ROCKSDB_DISABLE_JEMALLOC: 1 # jemalloc causes java 8 crash
steps:
- increase-max-open-files-on-macos
- install-gflags-on-macos
- install-jdk8-on-macos
- pre-steps-macos
- run:
name: "Set Java Environment"
command: |
echo "JAVA_HOME=${JAVA_HOME}"
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Test RocksDBJava"
command: make V=1 J=16 -j16 jtest
no_output_timeout: 20m
- post-steps
build-macos-java-static:
macos:
xcode: 12.5.1
resource_class: large
environment:
JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home
steps:
- increase-max-open-files-on-macos
- install-gflags-on-macos
- install-cmake-on-macos
- install-jdk8-on-macos
- pre-steps-macos
- run:
name: "Set Java Environment"
command: |
echo "JAVA_HOME=${JAVA_HOME}"
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Build RocksDBJava x86 and ARM Static Libraries"
command: make V=1 J=16 -j16 rocksdbjavastaticosx
no_output_timeout: 20m
- post-steps
build-macos-java-static-universal:
macos:
xcode: 12.5.1
resource_class: large
environment:
JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home
steps:
- increase-max-open-files-on-macos
- install-gflags-on-macos
- install-cmake-on-macos
- install-jdk8-on-macos
- pre-steps-macos
- run:
name: "Set Java Environment"
command: |
echo "JAVA_HOME=${JAVA_HOME}"
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Build RocksDBJava Universal Binary Static Library"
command: make V=1 J=16 -j16 rocksdbjavastaticosx_ub
no_output_timeout: 20m
- post-steps
build-examples:
executor: linux-docker
resource_class: large
steps:
- pre-steps
- run:
name: "Build examples"
command: |
make V=1 -j4 static_lib && cd examples && make V=1 -j4
- post-steps
build-cmake-mingw:
executor: linux-docker
resource_class: large
steps:
- pre-steps
- run: update-alternatives --set x86_64-w64-mingw32-g++ /usr/bin/x86_64-w64-mingw32-g++-posix
- run:
name: "Build cmake-mingw"
command: |
export PATH=$JAVA_HOME/bin:$PATH
echo "JAVA_HOME=${JAVA_HOME}"
which java && java -version
which javac && javac -version
mkdir build && cd build && cmake -DJNI=1 -DWITH_GFLAGS=OFF .. -DCMAKE_C_COMPILER=x86_64-w64-mingw32-gcc -DCMAKE_CXX_COMPILER=x86_64-w64-mingw32-g++ -DCMAKE_SYSTEM_NAME=Windows && make -j4 rocksdb rocksdbjni
- post-steps
build-linux-non-shm:
executor: linux-docker
resource_class: 2xlarge
environment:
TEST_TMPDIR: /tmp/rocksdb_test_tmp
steps:
- pre-steps
- run: make V=1 -j32 check
- post-steps
build-linux-arm-test-full:
machine:
image: ubuntu-2004:202111-02
resource_class: arm.large
steps:
- pre-steps
- install-gflags
- run: make V=1 J=4 -j4 check
- post-steps
build-linux-arm:
machine:
image: ubuntu-2004:202111-02
resource_class: arm.large
steps:
- pre-steps
- install-gflags
- run: ROCKSDBTESTS_PLATFORM_DEPENDENT=only make V=1 J=4 -j4 all_but_some_tests check_some
- post-steps
build-linux-arm-cmake-no_test_run:
machine:
image: ubuntu-2004:202111-02
resource_class: arm.large
environment:
JAVA_HOME: /usr/lib/jvm/java-8-openjdk-arm64
steps:
- pre-steps
- install-gflags
- run:
name: "Set Java Environment"
command: |
echo "JAVA_HOME=${JAVA_HOME}"
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Build with cmake"
command: |
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=Release -DWITH_TESTS=0 -DWITH_GFLAGS=1 -DWITH_BENCHMARK_TOOLS=0 -DWITH_TOOLS=0 -DWITH_CORE_TOOLS=1 ..
make -j4
- run:
name: "Build Java with cmake"
command: |
rm -rf build
mkdir build
cd build
cmake -DJNI=1 -DCMAKE_BUILD_TYPE=Release -DWITH_GFLAGS=1 ..
make -j4 rocksdb rocksdbjni
- post-steps
build-format-compatible:
executor: linux-docker
resource_class: 2xlarge
steps:
- pre-steps
- run:
name: "test"
command: |
export TEST_TMPDIR=/dev/shm/rocksdb
rm -rf /dev/shm/rocksdb
mkdir /dev/shm/rocksdb
tools/check_format_compatible.sh
- post-steps
build-fuzzers:
executor: linux-docker
resource_class: large
steps:
- pre-steps
- run:
name: "Build rocksdb lib"
command: CC=clang-13 CXX=clang++-13 USE_CLANG=1 make -j4 static_lib
- run:
name: "Build fuzzers"
command: cd fuzz && make sst_file_writer_fuzzer db_fuzzer db_map_fuzzer
- post-steps
benchmark-linux: #use a private Circle CI runner (resource_class) to run the job
machine: true
resource_class: facebook/rocksdb-benchmark-sys1
steps:
- build-for-benchmarks
- perform-benchmarks
- post-benchmarks
workflows:
version: 2
jobs-linux-run-tests:
jobs:
- build-linux
- build-linux-cmake-with-folly
- build-linux-cmake-with-folly-lite-no-test
- build-linux-gcc-7-with-folly
- build-linux-gcc-7-with-folly-lite-no-test
- build-linux-cmake-with-folly-coroutines
- build-linux-cmake-with-benchmark
- build-linux-encrypted_env-no_compression
jobs-linux-run-tests-san:
jobs:
- build-linux-clang10-asan
- build-linux-clang10-ubsan
- build-linux-clang10-mini-tsan
- build-linux-static_lib-alt_namespace-status_checked
jobs-linux-no-test-run:
jobs:
- build-linux-release
- build-linux-release-rtti
- build-examples
- build-fuzzers
- build-linux-clang-no_test_run
- build-linux-clang-13-no_test_run
- build-linux-gcc-8-no_test_run
- build-linux-gcc-10-cxx20-no_test_run
- build-linux-gcc-11-no_test_run
- build-linux-arm-cmake-no_test_run
jobs-linux-other-checks:
jobs:
- build-linux-clang10-clang-analyze
- build-linux-unity-and-headers
- build-linux-mini-crashtest
jobs-windows:
jobs:
- build-windows-vs2022
- build-windows-vs2019
- build-cmake-mingw
jobs-java:
jobs:
- build-linux-java
- build-linux-java-static
- build-macos-java
- build-macos-java-static
- build-macos-java-static-universal
jobs-macos:
jobs:
- build-macos
- build-macos-cmake:
run_even_tests: true
- build-macos-cmake:
run_even_tests: false
jobs-linux-arm:
jobs:
- build-linux-arm
build-fuzzers:
jobs:
- build-fuzzers
benchmark-linux:
triggers:
- schedule:
cron: "0 * * * *"
filters:
branches:
only:
- main
jobs:
- benchmark-linux
nightly:
triggers:
- schedule:
cron: "0 9 * * *"
filters:
branches:
only:
- main
jobs:
- build-format-compatible
- build-linux-arm-test-full
- build-linux-run-microbench
- build-linux-non-shm
- build-linux-clang-13-asan-ubsan-with-folly
- build-linux-valgrind

@ -1,6 +0,0 @@
# Supress UBSAN warnings related to stl_tree.h, e.g.
# UndefinedBehaviorSanitizer: undefined-behavior /usr/bin/../lib/gcc/x86_64-linux-gnu/5.4.0/../../../../include/c++/5.4.0/bits/stl_tree.h:1505:43 in
# /usr/bin/../lib/gcc/x86_64-linux-gnu/5.4.0/../../../../include/c++/5.4.0/bits/stl_tree.h:1505:43:
# runtime error: upcast of address 0x000001fa8820 with insufficient space for an object of type
# 'std::_Rb_tree_node<std::pair<const std::__cxx11::basic_string<char>, rocksdb::(anonymous namespace)::LockHoldingInfo> >'
src:*bits/stl_tree.h

@ -1,47 +0,0 @@
name: Check buck targets and code format
on: [push, pull_request]
permissions:
contents: read
jobs:
check:
name: Check TARGETS file and code format
runs-on: ubuntu-latest
steps:
- name: Checkout feature branch
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Fetch from upstream
run: |
git remote add upstream https://github.com/facebook/rocksdb.git && git fetch upstream
- name: Where am I
run: |
echo git status && git status
echo "git remote -v" && git remote -v
echo git branch && git branch
- name: Setup Python
uses: actions/setup-python@v1
- name: Install Dependencies
run: python -m pip install --upgrade pip
- name: Install argparse
run: pip install argparse
- name: Download clang-format-diff.py
uses: wei/wget@v1
with:
args: https://raw.githubusercontent.com/llvm/llvm-project/release/12.x/clang/tools/clang-format/clang-format-diff.py
- name: Check format
run: VERBOSE_CHECK=1 make check-format
- name: Compare buckify output
run: make check-buck-targets
- name: Simple source code checks
run: make check-sources

@ -1,168 +0,0 @@
#!/usr/bin/python
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# amalgamate.py creates an amalgamation from a unity build.
# It can be run with either Python 2 or 3.
# An amalgamation consists of a header that includes the contents of all public
# headers and a source file that includes the contents of all source files and
# private headers.
#
# This script works by starting with the unity build file and recursively expanding
# #include directives. If the #include is found in a public include directory,
# that header is expanded into the amalgamation header.
#
# A particular header is only expanded once, so this script will
# break if there are multiple inclusions of the same header that are expected to
# expand differently. Similarly, this type of code causes issues:
#
# #ifdef FOO
# #include "bar.h"
# // code here
# #else
# #include "bar.h" // oops, doesn't get expanded
# // different code here
# #endif
#
# The solution is to move the include out of the #ifdef.
from __future__ import print_function
import argparse
import re
import sys
from os import path
include_re = re.compile('^[ \t]*#include[ \t]+"(.*)"[ \t]*$')
included = set()
excluded = set()
def find_header(name, abs_path, include_paths):
samedir = path.join(path.dirname(abs_path), name)
if path.exists(samedir):
return samedir
for include_path in include_paths:
include_path = path.join(include_path, name)
if path.exists(include_path):
return include_path
return None
def expand_include(
include_path,
f,
abs_path,
source_out,
header_out,
include_paths,
public_include_paths,
):
if include_path in included:
return False
included.add(include_path)
with open(include_path) as f:
print('#line 1 "{}"'.format(include_path), file=source_out)
process_file(
f, include_path, source_out, header_out, include_paths, public_include_paths
)
return True
def process_file(
f, abs_path, source_out, header_out, include_paths, public_include_paths
):
for (line, text) in enumerate(f):
m = include_re.match(text)
if m:
filename = m.groups()[0]
# first check private headers
include_path = find_header(filename, abs_path, include_paths)
if include_path:
if include_path in excluded:
source_out.write(text)
expanded = False
else:
expanded = expand_include(
include_path,
f,
abs_path,
source_out,
header_out,
include_paths,
public_include_paths,
)
else:
# now try public headers
include_path = find_header(filename, abs_path, public_include_paths)
if include_path:
# found public header
expanded = False
if include_path in excluded:
source_out.write(text)
else:
expand_include(
include_path,
f,
abs_path,
header_out,
None,
public_include_paths,
[],
)
else:
sys.exit(
"unable to find {}, included in {} on line {}".format(
filename, abs_path, line
)
)
if expanded:
print('#line {} "{}"'.format(line + 1, abs_path), file=source_out)
elif text != "#pragma once\n":
source_out.write(text)
def main():
parser = argparse.ArgumentParser(
description="Transform a unity build into an amalgamation"
)
parser.add_argument("source", help="source file")
parser.add_argument(
"-I",
action="append",
dest="include_paths",
help="include paths for private headers",
)
parser.add_argument(
"-i",
action="append",
dest="public_include_paths",
help="include paths for public headers",
)
parser.add_argument(
"-x", action="append", dest="excluded", help="excluded header files"
)
parser.add_argument("-o", dest="source_out", help="output C++ file", required=True)
parser.add_argument(
"-H", dest="header_out", help="output C++ header file", required=True
)
args = parser.parse_args()
include_paths = list(map(path.abspath, args.include_paths or []))
public_include_paths = list(map(path.abspath, args.public_include_paths or []))
excluded.update(map(path.abspath, args.excluded or []))
filename = args.source
abs_path = path.abspath(filename)
with open(filename) as f, open(args.source_out, "w") as source_out, open(
args.header_out, "w"
) as header_out:
print('#line 1 "{}"'.format(filename), file=source_out)
print('#include "{}"'.format(header_out.name), file=source_out)
process_file(
f, abs_path, source_out, header_out, include_paths, public_include_paths
)
if __name__ == "__main__":
main()

@ -1,238 +0,0 @@
#!/usr/bin/env python3
# Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
# This source code is licensed under both the GPLv2 (found in the
# COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory).
"""Access the results of benchmark runs
Send these results on to OpenSearch graphing service
"""
import argparse
import itertools
import logging
import os
import re
import sys
import requests
from dateutil import parser
logging.basicConfig(level=logging.DEBUG)
class Configuration:
opensearch_user = os.environ["ES_USER"]
opensearch_pass = os.environ["ES_PASS"]
class BenchmarkResultException(Exception):
def __init__(self, message, content):
super().__init__(self, message)
self.content = content
class BenchmarkUtils:
expected_keys = [
"ops_sec",
"mb_sec",
"lsm_sz",
"blob_sz",
"c_wgb",
"w_amp",
"c_mbps",
"c_wsecs",
"c_csecs",
"b_rgb",
"b_wgb",
"usec_op",
"p50",
"p99",
"p99.9",
"p99.99",
"pmax",
"uptime",
"stall%",
"Nstall",
"u_cpu",
"s_cpu",
"rss",
"test",
"date",
"version",
"job_id",
]
def sanity_check(row):
if "test" not in row:
logging.debug(f"not 'test' in row: {row}")
return False
if row["test"] == "":
logging.debug(f"row['test'] == '': {row}")
return False
if "date" not in row:
logging.debug(f"not 'date' in row: {row}")
return False
if "ops_sec" not in row:
logging.debug(f"not 'ops_sec' in row: {row}")
return False
try:
_ = int(row["ops_sec"])
except (ValueError, TypeError):
logging.debug(f"int(row['ops_sec']): {row}")
return False
try:
(_, _) = parser.parse(row["date"], fuzzy_with_tokens=True)
except (parser.ParserError):
logging.error(
f"parser.parse((row['date']): not a valid format for date in row: {row}"
)
return False
return True
def conform_opensearch(row):
(dt, _) = parser.parse(row["date"], fuzzy_with_tokens=True)
# create a test_date field, which was previously what was expected
# repair the date field, which has what can be a WRONG ISO FORMAT, (no leading 0 on single-digit day-of-month)
# e.g. 2022-07-1T00:14:55 should be 2022-07-01T00:14:55
row["test_date"] = dt.isoformat()
row["date"] = dt.isoformat()
return {key.replace(".", "_"): value for key, value in row.items()}
class ResultParser:
def __init__(self, field="(\w|[+-:.%])+", intrafield="(\s)+", separator="\t"):
self.field = re.compile(field)
self.intra = re.compile(intrafield)
self.sep = re.compile(separator)
def ignore(self, l_in: str):
if len(l_in) == 0:
return True
if l_in[0:1] == "#":
return True
return False
def line(self, line_in: str):
"""Parse a line into items
Being clever about separators
"""
line = line_in
row = []
while line != "":
match_item = self.field.match(line)
if match_item:
item = match_item.group(0)
row.append(item)
line = line[len(item) :]
else:
match_intra = self.intra.match(line)
if match_intra:
intra = match_intra.group(0)
# Count the separators
# If there are >1 then generate extra blank fields
# White space with no true separators fakes up a single separator
tabbed = self.sep.split(intra)
sep_count = len(tabbed) - 1
if sep_count == 0:
sep_count = 1
for _ in range(sep_count - 1):
row.append("")
line = line[len(intra) :]
else:
raise BenchmarkResultException(
"Invalid TSV line", f"{line_in} at {line}"
)
return row
def parse(self, lines):
"""Parse something that iterates lines"""
rows = [self.line(line) for line in lines if not self.ignore(line)]
header = rows[0]
width = len(header)
records = [
{k: v for (k, v) in itertools.zip_longest(header, row[:width])}
for row in rows[1:]
]
return records
def load_report_from_tsv(filename: str):
file = open(filename, "r")
contents = file.readlines()
file.close()
parser = ResultParser()
report = parser.parse(contents)
logging.debug(f"Loaded TSV Report: {report}")
return report
def push_report_to_opensearch(report, esdocument):
sanitized = [
BenchmarkUtils.conform_opensearch(row)
for row in report
if BenchmarkUtils.sanity_check(row)
]
logging.debug(
f"upload {len(sanitized)} sane of {len(report)} benchmarks to opensearch"
)
for single_benchmark in sanitized:
logging.debug(f"upload benchmark: {single_benchmark}")
response = requests.post(
esdocument,
json=single_benchmark,
auth=(os.environ["ES_USER"], os.environ["ES_PASS"]),
)
logging.debug(
f"Sent to OpenSearch, status: {response.status_code}, result: {response.text}"
)
response.raise_for_status()
def push_report_to_null(report):
for row in report:
if BenchmarkUtils.sanity_check(row):
logging.debug(f"row {row}")
conformed = BenchmarkUtils.conform_opensearch(row)
logging.debug(f"conformed row {conformed}")
def main():
"""Tool for fetching, parsing and uploading benchmark results to OpenSearch / ElasticSearch
This tool will
(1) Open a local tsv benchmark report file
(2) Upload to OpenSearch document, via https/JSON
"""
parser = argparse.ArgumentParser(description="CircleCI benchmark scraper.")
# --tsvfile is the name of the file to read results from
# --esdocument is the ElasticSearch document to push these results into
#
parser.add_argument(
"--tsvfile",
default="build_tools/circle_api_scraper_input.txt",
help="File from which to read tsv report",
)
parser.add_argument(
"--esdocument",
help="ElasticSearch/OpenSearch document URL to upload report into",
)
parser.add_argument(
"--upload", choices=["opensearch", "none"], default="opensearch"
)
args = parser.parse_args()
logging.debug(f"Arguments: {args}")
reports = load_report_from_tsv(args.tsvfile)
if args.upload == "opensearch":
push_report_to_opensearch(reports, args.esdocument)
else:
push_report_to_null(reports)
if __name__ == "__main__":
sys.exit(main())

@ -1,900 +0,0 @@
#!/usr/bin/env bash
#
# Detects OS we're compiling on and outputs a file specified by the first
# argument, which in turn gets read while processing Makefile.
#
# The output will set the following variables:
# CC C Compiler path
# CXX C++ Compiler path
# PLATFORM_LDFLAGS Linker flags
# JAVA_LDFLAGS Linker flags for RocksDBJava
# JAVA_STATIC_LDFLAGS Linker flags for RocksDBJava static build
# JAVAC_ARGS Arguments for javac
# PLATFORM_SHARED_EXT Extension for shared libraries
# PLATFORM_SHARED_LDFLAGS Flags for building shared library
# PLATFORM_SHARED_CFLAGS Flags for compiling objects for shared library
# PLATFORM_CCFLAGS C compiler flags
# PLATFORM_CXXFLAGS C++ compiler flags. Will contain:
# PLATFORM_SHARED_VERSIONED Set to 'true' if platform supports versioned
# shared libraries, empty otherwise.
# FIND Command for the find utility
# WATCH Command for the watch utility
#
# The PLATFORM_CCFLAGS and PLATFORM_CXXFLAGS might include the following:
#
# -DROCKSDB_PLATFORM_POSIX if posix-platform based
# -DSNAPPY if the Snappy library is present
# -DLZ4 if the LZ4 library is present
# -DZSTD if the ZSTD library is present
# -DNUMA if the NUMA library is present
# -DTBB if the TBB library is present
# -DMEMKIND if the memkind library is present
#
# Using gflags in rocksdb:
# Our project depends on gflags, which requires users to take some extra steps
# before they can compile the whole repository:
# 1. Install gflags. You may download it from here:
# https://gflags.github.io/gflags/ (Mac users can `brew install gflags`)
# 2. Once installed, add the include path for gflags to your CPATH env var and
# the lib path to LIBRARY_PATH. If installed with default settings, the lib
# will be /usr/local/lib and the include path will be /usr/local/include
OUTPUT=$1
if test -z "$OUTPUT"; then
echo "usage: $0 <output-filename>" >&2
exit 1
fi
# we depend on C++17, but should be compatible with newer standards
if [ "$ROCKSDB_CXX_STANDARD" ]; then
PLATFORM_CXXFLAGS="-std=$ROCKSDB_CXX_STANDARD"
else
PLATFORM_CXXFLAGS="-std=c++17"
fi
# we currently depend on POSIX platform
COMMON_FLAGS="-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX"
# Default to fbcode gcc on internal fb machines
if [ -z "$ROCKSDB_NO_FBCODE" -a -d /mnt/gvfs/third-party ]; then
FBCODE_BUILD="true"
# If we're compiling with TSAN or shared lib, we need pic build
PIC_BUILD=$COMPILE_WITH_TSAN
if [ "$LIB_MODE" == "shared" ]; then
PIC_BUILD=1
fi
source "$PWD/build_tools/fbcode_config_platform010.sh"
fi
# Delete existing output, if it exists
rm -f "$OUTPUT"
touch "$OUTPUT"
if test -z "$CC"; then
if [ -x "$(command -v cc)" ]; then
CC=cc
elif [ -x "$(command -v clang)" ]; then
CC=clang
else
CC=cc
fi
fi
if test -z "$CXX"; then
if [ -x "$(command -v g++)" ]; then
CXX=g++
elif [ -x "$(command -v clang++)" ]; then
CXX=clang++
else
CXX=g++
fi
fi
if test -z "$AR"; then
if [ -x "$(command -v gcc-ar)" ]; then
AR=gcc-ar
elif [ -x "$(command -v llvm-ar)" ]; then
AR=llvm-ar
else
AR=ar
fi
fi
# Detect OS
if test -z "$TARGET_OS"; then
TARGET_OS=`uname -s`
fi
if test -z "$TARGET_ARCHITECTURE"; then
TARGET_ARCHITECTURE=`uname -m`
fi
if test -z "$CLANG_SCAN_BUILD"; then
CLANG_SCAN_BUILD=scan-build
fi
if test -z "$CLANG_ANALYZER"; then
CLANG_ANALYZER=$(command -v clang++ 2> /dev/null)
fi
if test -z "$FIND"; then
FIND=find
fi
if test -z "$WATCH"; then
WATCH=watch
fi
COMMON_FLAGS="$COMMON_FLAGS ${CFLAGS}"
CROSS_COMPILE=
PLATFORM_CCFLAGS=
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS"
PLATFORM_SHARED_EXT="so"
PLATFORM_SHARED_LDFLAGS="-Wl,--no-as-needed -shared -Wl,-soname -Wl,"
PLATFORM_SHARED_CFLAGS="-fPIC"
PLATFORM_SHARED_VERSIONED=true
# generic port files (working on all platform by #ifdef) go directly in /port
GENERIC_PORT_FILES=`cd "$ROCKSDB_ROOT"; find port -name '*.cc' | tr "\n" " "`
# On GCC, we pick libc's memcmp over GCC's memcmp via -fno-builtin-memcmp
case "$TARGET_OS" in
Darwin)
PLATFORM=OS_MACOSX
COMMON_FLAGS="$COMMON_FLAGS -DOS_MACOSX"
PLATFORM_SHARED_EXT=dylib
PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name "
# PORT_FILES=port/darwin/darwin_specific.cc
;;
IOS)
PLATFORM=IOS
COMMON_FLAGS="$COMMON_FLAGS -DOS_MACOSX -DIOS_CROSS_COMPILE "
PLATFORM_SHARED_EXT=dylib
PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name "
CROSS_COMPILE=true
PLATFORM_SHARED_VERSIONED=
;;
Linux)
PLATFORM=OS_LINUX
COMMON_FLAGS="$COMMON_FLAGS -DOS_LINUX"
if [ -z "$USE_CLANG" ]; then
COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp"
else
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -latomic"
fi
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt -ldl"
if test -z "$ROCKSDB_USE_IO_URING"; then
ROCKSDB_USE_IO_URING=1
fi
if test "$ROCKSDB_USE_IO_URING" -ne 0; then
# check for liburing
$CXX $PLATFORM_CXXFLAGS -x c++ - -luring -o test.o 2>/dev/null <<EOF
#include <liburing.h>
int main() {
struct io_uring ring;
io_uring_queue_init(1, &ring, 0);
return 0;
}
EOF
if [ "$?" = 0 ]; then
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -luring"
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_IOURING_PRESENT"
fi
fi
# PORT_FILES=port/linux/linux_specific.cc
;;
SunOS)
PLATFORM=OS_SOLARIS
COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_SOLARIS -m64"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt -static-libstdc++ -static-libgcc -m64"
# PORT_FILES=port/sunos/sunos_specific.cc
;;
AIX)
PLATFORM=OS_AIX
CC=gcc
COMMON_FLAGS="$COMMON_FLAGS -maix64 -pthread -fno-builtin-memcmp -D_REENTRANT -DOS_AIX -D__STDC_FORMAT_MACROS"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -pthread -lpthread -lrt -maix64 -static-libstdc++ -static-libgcc"
# PORT_FILES=port/aix/aix_specific.cc
;;
FreeBSD)
PLATFORM=OS_FREEBSD
CXX=clang++
COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_FREEBSD"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread"
# PORT_FILES=port/freebsd/freebsd_specific.cc
;;
GNU/kFreeBSD)
PLATFORM=OS_GNU_KFREEBSD
COMMON_FLAGS="$COMMON_FLAGS -DOS_GNU_KFREEBSD"
if [ -z "$USE_CLANG" ]; then
COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp"
else
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -latomic"
fi
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt"
# PORT_FILES=port/gnu_kfreebsd/gnu_kfreebsd_specific.cc
;;
NetBSD)
PLATFORM=OS_NETBSD
COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_NETBSD"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lgcc_s"
# PORT_FILES=port/netbsd/netbsd_specific.cc
;;
OpenBSD)
PLATFORM=OS_OPENBSD
CXX=clang++
COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_OPENBSD"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -pthread"
# PORT_FILES=port/openbsd/openbsd_specific.cc
FIND=gfind
WATCH=gnuwatch
;;
DragonFly)
PLATFORM=OS_DRAGONFLYBSD
COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_DRAGONFLYBSD"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread"
# PORT_FILES=port/dragonfly/dragonfly_specific.cc
;;
Cygwin)
PLATFORM=CYGWIN
PLATFORM_SHARED_CFLAGS=""
PLATFORM_CXXFLAGS="-std=gnu++11"
COMMON_FLAGS="$COMMON_FLAGS -DCYGWIN"
if [ -z "$USE_CLANG" ]; then
COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp"
else
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -latomic"
fi
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt"
# PORT_FILES=port/linux/linux_specific.cc
;;
OS_ANDROID_CROSSCOMPILE)
PLATFORM=OS_ANDROID
COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_ANDROID -DROCKSDB_PLATFORM_POSIX"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS " # All pthread features are in the Android C library
# PORT_FILES=port/android/android.cc
CROSS_COMPILE=true
;;
*)
echo "Unknown platform!" >&2
exit 1
esac
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS ${CXXFLAGS}"
JAVA_LDFLAGS="$PLATFORM_LDFLAGS"
JAVA_STATIC_LDFLAGS="$PLATFORM_LDFLAGS"
JAVAC_ARGS="-source 8"
if [ "$CROSS_COMPILE" = "true" -o "$FBCODE_BUILD" = "true" ]; then
# Cross-compiling; do not try any compilation tests.
# Also don't need any compilation tests if compiling on fbcode
if [ "$FBCODE_BUILD" = "true" ]; then
# Enable backtrace on fbcode since the necessary libraries are present
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_BACKTRACE"
FOLLY_DIR="third-party/folly"
fi
true
else
if ! test $ROCKSDB_DISABLE_FALLOCATE; then
# Test whether fallocate is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
#include <fcntl.h>
#include <linux/falloc.h>
int main() {
int fd = open("/dev/null", 0);
fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 1024);
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_FALLOCATE_PRESENT"
fi
fi
if ! test $ROCKSDB_DISABLE_SNAPPY; then
# Test whether Snappy library is installed
# http://code.google.com/p/snappy/
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
#include <snappy.h>
int main() {}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DSNAPPY"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lsnappy"
JAVA_LDFLAGS="$JAVA_LDFLAGS -lsnappy"
fi
fi
if ! test $ROCKSDB_DISABLE_GFLAGS; then
# Test whether gflags library is installed
# http://gflags.github.io/gflags/
# check if the namespace is gflags
if $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null << EOF
#include <gflags/gflags.h>
using namespace GFLAGS_NAMESPACE;
int main() {}
EOF
then
COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags"
# check if namespace is gflags
elif $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null << EOF
#include <gflags/gflags.h>
using namespace gflags;
int main() {}
EOF
then
COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1 -DGFLAGS_NAMESPACE=gflags"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags"
# check if namespace is google
elif $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null << EOF
#include <gflags/gflags.h>
using namespace google;
int main() {}
EOF
then
COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1 -DGFLAGS_NAMESPACE=google"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags"
fi
fi
if ! test $ROCKSDB_DISABLE_ZLIB; then
# Test whether zlib library is installed
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null <<EOF
#include <zlib.h>
int main() {}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DZLIB"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lz"
JAVA_LDFLAGS="$JAVA_LDFLAGS -lz"
fi
fi
if ! test $ROCKSDB_DISABLE_BZIP; then
# Test whether bzip library is installed
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null <<EOF
#include <bzlib.h>
int main() {}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DBZIP2"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lbz2"
JAVA_LDFLAGS="$JAVA_LDFLAGS -lbz2"
fi
fi
if ! test $ROCKSDB_DISABLE_LZ4; then
# Test whether lz4 library is installed
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null <<EOF
#include <lz4.h>
#include <lz4hc.h>
int main() {}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DLZ4"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -llz4"
JAVA_LDFLAGS="$JAVA_LDFLAGS -llz4"
fi
fi
if ! test $ROCKSDB_DISABLE_ZSTD; then
# Test whether zstd library is installed
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <zstd.h>
int main() {}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DZSTD"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lzstd"
JAVA_LDFLAGS="$JAVA_LDFLAGS -lzstd"
fi
fi
if ! test $ROCKSDB_DISABLE_NUMA; then
# Test whether numa is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o -lnuma 2>/dev/null <<EOF
#include <numa.h>
#include <numaif.h>
int main() {}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DNUMA"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lnuma"
JAVA_LDFLAGS="$JAVA_LDFLAGS -lnuma"
fi
fi
if ! test $ROCKSDB_DISABLE_TBB; then
# Test whether tbb is available
$CXX $PLATFORM_CXXFLAGS $LDFLAGS -x c++ - -o test.o -ltbb 2>/dev/null <<EOF
#include <tbb/tbb.h>
int main() {}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DTBB"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -ltbb"
JAVA_LDFLAGS="$JAVA_LDFLAGS -ltbb"
fi
fi
if ! test $ROCKSDB_DISABLE_JEMALLOC; then
# Test whether jemalloc is available
if echo 'int main() {}' | $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o -ljemalloc \
2>/dev/null; then
# This will enable some preprocessor identifiers in the Makefile
JEMALLOC=1
# JEMALLOC can be enabled either using the flag (like here) or by
# providing direct link to the jemalloc library
WITH_JEMALLOC_FLAG=1
# check for JEMALLOC installed with HomeBrew
if [ "$PLATFORM" == "OS_MACOSX" ]; then
if hash brew 2>/dev/null && brew ls --versions jemalloc > /dev/null; then
JEMALLOC_VER=$(brew ls --versions jemalloc | tail -n 1 | cut -f 2 -d ' ')
JEMALLOC_INCLUDE="-I/usr/local/Cellar/jemalloc/${JEMALLOC_VER}/include"
JEMALLOC_LIB="/usr/local/Cellar/jemalloc/${JEMALLOC_VER}/lib/libjemalloc_pic.a"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS $JEMALLOC_LIB"
JAVA_STATIC_LDFLAGS="$JAVA_STATIC_LDFLAGS $JEMALLOC_LIB"
fi
fi
fi
fi
if ! test $JEMALLOC && ! test $ROCKSDB_DISABLE_TCMALLOC; then
# jemalloc is not available. Let's try tcmalloc
if echo 'int main() {}' | $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o \
-ltcmalloc 2>/dev/null; then
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -ltcmalloc"
JAVA_LDFLAGS="$JAVA_LDFLAGS -ltcmalloc"
fi
fi
if ! test $ROCKSDB_DISABLE_MALLOC_USABLE_SIZE; then
# Test whether malloc_usable_size is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
#include <malloc.h>
int main() {
size_t res = malloc_usable_size(0);
(void)res;
return 0;
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_MALLOC_USABLE_SIZE"
fi
fi
if ! test $ROCKSDB_DISABLE_MEMKIND; then
# Test whether memkind library is installed
$CXX $PLATFORM_CXXFLAGS $LDFLAGS -x c++ - -o test.o -lmemkind 2>/dev/null <<EOF
#include <memkind.h>
int main() {
memkind_malloc(MEMKIND_DAX_KMEM, 1024);
return 0;
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DMEMKIND"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lmemkind"
JAVA_LDFLAGS="$JAVA_LDFLAGS -lmemkind"
fi
fi
if ! test $ROCKSDB_DISABLE_PTHREAD_MUTEX_ADAPTIVE_NP; then
# Test whether PTHREAD_MUTEX_ADAPTIVE_NP mutex type is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
#include <pthread.h>
int main() {
int x = PTHREAD_MUTEX_ADAPTIVE_NP;
(void)x;
return 0;
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_PTHREAD_ADAPTIVE_MUTEX"
fi
fi
if ! test $ROCKSDB_DISABLE_BACKTRACE; then
# Test whether backtrace is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
#include <execinfo.h>
int main() {
void* frames[1];
backtrace_symbols(frames, backtrace(frames, 1));
return 0;
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_BACKTRACE"
else
# Test whether execinfo library is installed
$CXX $PLATFORM_CXXFLAGS -lexecinfo -x c++ - -o test.o 2>/dev/null <<EOF
#include <execinfo.h>
int main() {
void* frames[1];
backtrace_symbols(frames, backtrace(frames, 1));
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_BACKTRACE"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lexecinfo"
JAVA_LDFLAGS="$JAVA_LDFLAGS -lexecinfo"
fi
fi
fi
if ! test $ROCKSDB_DISABLE_PG; then
# Test if -pg is supported
$CXX $PLATFORM_CXXFLAGS -pg -x c++ - -o test.o 2>/dev/null <<EOF
int main() {
return 0;
}
EOF
if [ "$?" = 0 ]; then
PROFILING_FLAGS=-pg
fi
fi
if ! test $ROCKSDB_DISABLE_SYNC_FILE_RANGE; then
# Test whether sync_file_range is supported for compatibility with an old glibc
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
#include <fcntl.h>
int main() {
int fd = open("/dev/null", 0);
sync_file_range(fd, 0, 1024, SYNC_FILE_RANGE_WRITE);
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_RANGESYNC_PRESENT"
fi
fi
if ! test $ROCKSDB_DISABLE_SCHED_GETCPU; then
# Test whether sched_getcpu is supported
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
#include <sched.h>
int main() {
int cpuid = sched_getcpu();
(void)cpuid;
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_SCHED_GETCPU_PRESENT"
fi
fi
if ! test $ROCKSDB_DISABLE_AUXV_GETAUXVAL; then
# Test whether getauxval is supported
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
#include <sys/auxv.h>
int main() {
uint64_t auxv = getauxval(AT_HWCAP);
(void)auxv;
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_AUXV_GETAUXVAL_PRESENT"
fi
fi
if ! test $ROCKSDB_DISABLE_ALIGNED_NEW; then
# Test whether c++17 aligned-new is supported
$CXX $PLATFORM_CXXFLAGS -faligned-new -x c++ - -o test.o 2>/dev/null <<EOF
struct alignas(1024) t {int a;};
int main() {}
EOF
if [ "$?" = 0 ]; then
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS -faligned-new -DHAVE_ALIGNED_NEW"
fi
fi
if ! test $ROCKSDB_DISABLE_BENCHMARK; then
# Test whether google benchmark is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null -lbenchmark -lpthread 2>/dev/null <<EOF
#include <benchmark/benchmark.h>
int main() {}
EOF
if [ "$?" = 0 ]; then
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lbenchmark"
fi
fi
if test $USE_FOLLY; then
# Test whether libfolly library is installed
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <folly/synchronization/DistributedMutex.h>
int main() {}
EOF
if [ "$?" != 0 ]; then
FOLLY_DIR="./third-party/folly"
fi
fi
fi
# TODO(tec): Fix -Wshorten-64-to-32 errors on FreeBSD and enable the warning.
# -Wshorten-64-to-32 breaks compilation on FreeBSD aarch64 and i386
if ! { [ "$TARGET_OS" = FreeBSD ] && [ "$TARGET_ARCHITECTURE" = arm64 -o "$TARGET_ARCHITECTURE" = i386 ]; }; then
# Test whether -Wshorten-64-to-32 is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o -Wshorten-64-to-32 2>/dev/null <<EOF
int main() {}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -Wshorten-64-to-32"
fi
fi
if test "0$PORTABLE" -eq 0; then
if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then
# Tune for this POWER processor, treating '+' models as base models
POWER=`LD_SHOW_AUXV=1 /bin/true | grep AT_PLATFORM | grep -E -o power[0-9]+`
COMMON_FLAGS="$COMMON_FLAGS -mcpu=$POWER -mtune=$POWER "
elif test -n "`echo $TARGET_ARCHITECTURE | grep -e^arm -e^aarch64`"; then
# TODO: Handle this with approprite options.
COMMON_FLAGS="$COMMON_FLAGS"
elif test -n "`echo $TARGET_ARCHITECTURE | grep ^aarch64`"; then
COMMON_FLAGS="$COMMON_FLAGS"
elif test -n "`echo $TARGET_ARCHITECTURE | grep ^s390x`"; then
if echo 'int main() {}' | $CXX $PLATFORM_CXXFLAGS -x c++ \
-march=native - -o /dev/null 2>/dev/null; then
COMMON_FLAGS="$COMMON_FLAGS -march=native "
else
COMMON_FLAGS="$COMMON_FLAGS -march=z196 "
fi
COMMON_FLAGS="$COMMON_FLAGS"
elif test -n "`echo $TARGET_ARCHITECTURE | grep ^riscv64`"; then
RISC_ISA=$(cat /proc/cpuinfo | grep isa | head -1 | cut --delimiter=: -f 2 | cut -b 2-)
COMMON_FLAGS="$COMMON_FLAGS -march=${RISC_ISA}"
elif [ "$TARGET_OS" == "IOS" ]; then
COMMON_FLAGS="$COMMON_FLAGS"
elif [ "$TARGET_OS" == "AIX" ] || [ "$TARGET_OS" == "SunOS" ]; then
# TODO: Not sure why we don't use -march=native on these OSes
if test "$USE_SSE"; then
TRY_SSE_ETC="1"
fi
else
COMMON_FLAGS="$COMMON_FLAGS -march=native "
fi
else
# PORTABLE=1
if test "$USE_SSE"; then
TRY_SSE_ETC="1"
fi
if test -n "`echo $TARGET_ARCHITECTURE | grep ^s390x`"; then
COMMON_FLAGS="$COMMON_FLAGS -march=z196 "
fi
if test -n "`echo $TARGET_ARCHITECTURE | grep ^riscv64`"; then
RISC_ISA=$(cat /proc/cpuinfo | grep isa | head -1 | cut --delimiter=: -f 2 | cut -b 2-)
COMMON_FLAGS="$COMMON_FLAGS -march=${RISC_ISA}"
fi
if [[ "${PLATFORM}" == "OS_MACOSX" ]]; then
# For portability compile for macOS 10.13 (2017) or newer
COMMON_FLAGS="$COMMON_FLAGS -mmacosx-version-min=10.13"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -mmacosx-version-min=10.13"
# -mmacosx-version-min must come first here.
PLATFORM_SHARED_LDFLAGS="-mmacosx-version-min=10.13 $PLATFORM_SHARED_LDFLAGS"
PLATFORM_CMAKE_FLAGS="-DCMAKE_OSX_DEPLOYMENT_TARGET=10.13"
JAVA_STATIC_DEPS_COMMON_FLAGS="-mmacosx-version-min=10.13"
JAVA_STATIC_DEPS_LDFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS"
JAVA_STATIC_DEPS_CCFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS"
JAVA_STATIC_DEPS_CXXFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS"
fi
fi
if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then
# check for GNU libc on ppc64
$CXX -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <stdio.h>
#include <stdlib.h>
#include <gnu/libc-version.h>
int main(int argc, char *argv[]) {
printf("GNU libc version: %s\n", gnu_get_libc_version());
return 0;
}
EOF
if [ "$?" != 0 ]; then
PPC_LIBC_IS_GNU=0
fi
fi
if test "$TRY_SSE_ETC"; then
# The USE_SSE flag now means "attempt to compile with widely-available
# Intel architecture extensions utilized by specific optimizations in the
# source code." It's a qualifier on PORTABLE=1 that means "mostly portable."
# It doesn't even really check that your current CPU is compatible.
#
# SSE4.2 available since nehalem, ca. 2008-2010
# Includes POPCNT for BitsSetToOne, BitParity
TRY_SSE42="-msse4.2"
# PCLMUL available since westmere, ca. 2010-2011
TRY_PCLMUL="-mpclmul"
# AVX2 available since haswell, ca. 2013-2015
TRY_AVX2="-mavx2"
# BMI available since haswell, ca. 2013-2015
# Primarily for TZCNT for CountTrailingZeroBits
TRY_BMI="-mbmi"
# LZCNT available since haswell, ca. 2013-2015
# For FloorLog2
TRY_LZCNT="-mlzcnt"
fi
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_SSE42 -x c++ - -o test.o 2>/dev/null <<EOF
#include <cstdint>
#include <nmmintrin.h>
int main() {
volatile uint32_t x = _mm_crc32_u32(0, 0);
(void)x;
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS $TRY_SSE42 -DHAVE_SSE42"
elif test "$USE_SSE"; then
echo "warning: USE_SSE specified but compiler could not use SSE intrinsics, disabling" >&2
fi
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_PCLMUL -x c++ - -o test.o 2>/dev/null <<EOF
#include <cstdint>
#include <wmmintrin.h>
int main() {
const auto a = _mm_set_epi64x(0, 0);
const auto b = _mm_set_epi64x(0, 0);
const auto c = _mm_clmulepi64_si128(a, b, 0x00);
auto d = _mm_cvtsi128_si64(c);
(void)d;
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS $TRY_PCLMUL -DHAVE_PCLMUL"
elif test "$USE_SSE"; then
echo "warning: USE_SSE specified but compiler could not use PCLMUL intrinsics, disabling" >&2
fi
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_AVX2 -x c++ - -o test.o 2>/dev/null <<EOF
#include <cstdint>
#include <immintrin.h>
int main() {
const auto a = _mm256_setr_epi32(0, 1, 2, 3, 4, 7, 6, 5);
const auto b = _mm256_permutevar8x32_epi32(a, a);
(void)b;
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS $TRY_AVX2 -DHAVE_AVX2"
elif test "$USE_SSE"; then
echo "warning: USE_SSE specified but compiler could not use AVX2 intrinsics, disabling" >&2
fi
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_BMI -x c++ - -o test.o 2>/dev/null <<EOF
#include <cstdint>
#include <immintrin.h>
int main(int argc, char *argv[]) {
(void)argv;
return (int)_tzcnt_u64((uint64_t)argc);
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS $TRY_BMI -DHAVE_BMI"
elif test "$USE_SSE"; then
echo "warning: USE_SSE specified but compiler could not use BMI intrinsics, disabling" >&2
fi
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_LZCNT -x c++ - -o test.o 2>/dev/null <<EOF
#include <cstdint>
#include <immintrin.h>
int main(int argc, char *argv[]) {
(void)argv;
return (int)_lzcnt_u64((uint64_t)argc);
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS $TRY_LZCNT -DHAVE_LZCNT"
elif test "$USE_SSE"; then
echo "warning: USE_SSE specified but compiler could not use LZCNT intrinsics, disabling" >&2
fi
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null <<EOF
#include <cstdint>
int main() {
uint64_t a = 0xffffFFFFffffFFFF;
__uint128_t b = __uint128_t(a) * a;
a = static_cast<uint64_t>(b >> 64);
(void)a;
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DHAVE_UINT128_EXTENSION"
fi
if [ "$FBCODE_BUILD" != "true" -a "$PLATFORM" = OS_LINUX ]; then
$CXX $COMMON_FLAGS $PLATFORM_SHARED_CFLAGS -x c++ -c - -o test_dl.o 2>/dev/null <<EOF
void dummy_func() {}
EOF
if [ "$?" = 0 ]; then
$CXX $COMMON_FLAGS $PLATFORM_SHARED_LDFLAGS test_dl.o -o test.o 2>/dev/null
if [ "$?" = 0 ]; then
EXEC_LDFLAGS+="-ldl"
rm -f test_dl.o
fi
fi
fi
# check for F_FULLFSYNC
$CXX $PLATFORM_CXXFALGS -x c++ - -o test.o 2>/dev/null <<EOF
#include <fcntl.h>
int main() {
fcntl(0, F_FULLFSYNC);
return 0;
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DHAVE_FULLFSYNC"
fi
rm -f test.o test_dl.o
# Get the path for the folly installation dir
if [ "$USE_FOLLY" ]; then
if [ "$FOLLY_DIR" ]; then
FOLLY_PATH=`cd $FOLLY_DIR && $PYTHON build/fbcode_builder/getdeps.py show-inst-dir folly`
fi
fi
PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS"
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS"
VALGRIND_VER="$VALGRIND_VER"
ROCKSDB_MAJOR=`build_tools/version.sh major`
ROCKSDB_MINOR=`build_tools/version.sh minor`
ROCKSDB_PATCH=`build_tools/version.sh patch`
echo "CC=$CC" >> "$OUTPUT"
echo "CXX=$CXX" >> "$OUTPUT"
echo "AR=$AR" >> "$OUTPUT"
echo "PLATFORM=$PLATFORM" >> "$OUTPUT"
echo "PLATFORM_LDFLAGS=$PLATFORM_LDFLAGS" >> "$OUTPUT"
echo "PLATFORM_CMAKE_FLAGS=$PLATFORM_CMAKE_FLAGS" >> "$OUTPUT"
echo "JAVA_LDFLAGS=$JAVA_LDFLAGS" >> "$OUTPUT"
echo "JAVA_STATIC_LDFLAGS=$JAVA_STATIC_LDFLAGS" >> "$OUTPUT"
echo "JAVA_STATIC_DEPS_CCFLAGS=$JAVA_STATIC_DEPS_CCFLAGS" >> "$OUTPUT"
echo "JAVA_STATIC_DEPS_CXXFLAGS=$JAVA_STATIC_DEPS_CXXFLAGS" >> "$OUTPUT"
echo "JAVA_STATIC_DEPS_LDFLAGS=$JAVA_STATIC_DEPS_LDFLAGS" >> "$OUTPUT"
echo "JAVAC_ARGS=$JAVAC_ARGS" >> "$OUTPUT"
echo "VALGRIND_VER=$VALGRIND_VER" >> "$OUTPUT"
echo "PLATFORM_CCFLAGS=$PLATFORM_CCFLAGS" >> "$OUTPUT"
echo "PLATFORM_CXXFLAGS=$PLATFORM_CXXFLAGS" >> "$OUTPUT"
echo "PLATFORM_SHARED_CFLAGS=$PLATFORM_SHARED_CFLAGS" >> "$OUTPUT"
echo "PLATFORM_SHARED_EXT=$PLATFORM_SHARED_EXT" >> "$OUTPUT"
echo "PLATFORM_SHARED_LDFLAGS=$PLATFORM_SHARED_LDFLAGS" >> "$OUTPUT"
echo "PLATFORM_SHARED_VERSIONED=$PLATFORM_SHARED_VERSIONED" >> "$OUTPUT"
echo "EXEC_LDFLAGS=$EXEC_LDFLAGS" >> "$OUTPUT"
echo "JEMALLOC_INCLUDE=$JEMALLOC_INCLUDE" >> "$OUTPUT"
echo "JEMALLOC_LIB=$JEMALLOC_LIB" >> "$OUTPUT"
echo "ROCKSDB_MAJOR=$ROCKSDB_MAJOR" >> "$OUTPUT"
echo "ROCKSDB_MINOR=$ROCKSDB_MINOR" >> "$OUTPUT"
echo "ROCKSDB_PATCH=$ROCKSDB_PATCH" >> "$OUTPUT"
echo "CLANG_SCAN_BUILD=$CLANG_SCAN_BUILD" >> "$OUTPUT"
echo "CLANG_ANALYZER=$CLANG_ANALYZER" >> "$OUTPUT"
echo "PROFILING_FLAGS=$PROFILING_FLAGS" >> "$OUTPUT"
echo "FIND=$FIND" >> "$OUTPUT"
echo "WATCH=$WATCH" >> "$OUTPUT"
echo "FOLLY_PATH=$FOLLY_PATH" >> "$OUTPUT"
# This will enable some related identifiers for the preprocessor
if test -n "$JEMALLOC"; then
echo "JEMALLOC=1" >> "$OUTPUT"
fi
# Indicates that jemalloc should be enabled using -ljemalloc flag
# The alternative is to porvide a direct link to the library via JEMALLOC_LIB
# and JEMALLOC_INCLUDE
if test -n "$WITH_JEMALLOC_FLAG"; then
echo "WITH_JEMALLOC_FLAG=$WITH_JEMALLOC_FLAG" >> "$OUTPUT"
fi
echo "LUA_PATH=$LUA_PATH" >> "$OUTPUT"
if test -n "$USE_FOLLY"; then
echo "USE_FOLLY=$USE_FOLLY" >> "$OUTPUT"
fi
if test -n "$PPC_LIBC_IS_GNU"; then
echo "PPC_LIBC_IS_GNU=$PPC_LIBC_IS_GNU" >> "$OUTPUT"
fi

@ -1,48 +0,0 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# Check for some simple mistakes that should prevent commit or push
BAD=""
git grep -n 'namespace rocksdb' -- '*.[ch]*'
if [ "$?" != "1" ]; then
echo "^^^^^ Do not hardcode namespace rocksdb. Use ROCKSDB_NAMESPACE"
BAD=1
fi
git grep -n -i 'nocommit' -- ':!build_tools/check-sources.sh'
if [ "$?" != "1" ]; then
echo "^^^^^ Code was not intended to be committed"
BAD=1
fi
git grep -n 'include <rocksdb/' -- ':!build_tools/check-sources.sh'
if [ "$?" != "1" ]; then
echo '^^^^^ Use double-quotes as in #include "rocksdb/something.h"'
BAD=1
fi
git grep -n 'include "include/rocksdb/' -- ':!build_tools/check-sources.sh'
if [ "$?" != "1" ]; then
echo '^^^^^ Use #include "rocksdb/something.h" instead of #include "include/rocksdb/something.h"'
BAD=1
fi
git grep -n 'using namespace' -- ':!build_tools' ':!docs' \
':!third-party/folly/folly/lang/Align.h' \
':!third-party/gtest-1.8.1/fused-src/gtest/gtest.h'
if [ "$?" != "1" ]; then
echo '^^^^ Do not use "using namespace"'
BAD=1
fi
git grep -n -P "[\x80-\xFF]" -- ':!docs' ':!*.md'
if [ "$?" != "1" ]; then
echo '^^^^ Use only ASCII characters in source files'
BAD=1
fi
if [ "$BAD" ]; then
exit 1
fi

@ -1,22 +0,0 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# The file is generated using update_dependencies.sh.
GCC_BASE=/mnt/gvfs/third-party2/gcc/e40bde78650fa91b8405a857e3f10bf336633fb0/11.x/centos7-native/886b5eb
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/2043340983c032915adbb6f78903dc855b65aee8/12/platform010/9520e0f
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/c00dcc6a3e4125c7e8b248e9a79c14b78ac9e0ca/11.x/platform010/5684a5a
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/0b9c8e4b060eda62f3bc1c6127bbe1256697569b/2.34/platform010/f259413
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/bc9647f7912b131315827d65cb6189c21f381d05/1.1.3/platform010/76ebdda
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/a6f5f3f1d063d2d00cd02fc12f0f05fc3ab3a994/1.2.11/platform010/76ebdda
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/09703139cfc376bd8a82642385a0e97726b28287/1.0.6/platform010/76ebdda
LZ4_BASE=/mnt/gvfs/third-party2/lz4/60220d6a5bf7722b9cc239a1368c596619b12060/1.9.1/platform010/76ebdda
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/50eace8143eaaea9473deae1f3283e0049e05633/1.4.x/platform010/64091f4
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/5d27e5919771603da06000a027b12f799e58a4f7/2.2.0/platform010/76ebdda
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/b62912d333ef33f9760efa6219dbe3fe6abb3b0e/master/platform010/f57cc4a
NUMA_BASE=/mnt/gvfs/third-party2/numa/6b412770957aa3c8a87e5e0dcd8cc2f45f393bc0/2.0.11/platform010/76ebdda
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/52f69816e936e147664ad717eb71a1a0e9dc973a/1.4/platform010/5074a48
TBB_BASE=/mnt/gvfs/third-party2/tbb/c9cc192099fa84c0dcd0ffeedd44a373ad6e4925/2018_U5/platform010/76ebdda
LIBURING_BASE=/mnt/gvfs/third-party2/liburing/a98e2d137007e3ebf7f33bd6f99c2c56bdaf8488/20210212/platform010/76ebdda
BENCHMARK_BASE=/mnt/gvfs/third-party2/benchmark/780c7a0f9cf0967961e69ad08e61cddd85d61821/trunk/platform010/76ebdda
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/02d9f76aaaba580611cf75e741753c800c7fdc12/fb/platform010/da39a3e
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/938dc3f064ef3a48c0446f5b11d788d50b3eb5ee/2.37/centos7-native/da39a3e
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/429a6b3203eb415f1599bd15183659153129188e/3.15.0/platform010/76ebdda
LUA_BASE=/mnt/gvfs/third-party2/lua/363787fa5cac2a8aa20638909210443278fa138e/5.3.4/platform010/9079c97

@ -1,3 +0,0 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
docker run -v $PWD:/rocks -w /rocks buildpack-deps make

@ -1,181 +0,0 @@
# Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
# This source code is licensed under both the GPLv2 (found in the
# COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory).
"""Filter for error messages in test output:
- Receives merged stdout/stderr from test on stdin
- Finds patterns of known error messages for test name (first argument)
- Prints those error messages to stdout
"""
from __future__ import absolute_import, division, print_function, unicode_literals
import re
import sys
class ErrorParserBase(object):
def parse_error(self, line):
"""Parses a line of test output. If it contains an error, returns a
formatted message describing the error; otherwise, returns None.
Subclasses must override this method.
"""
raise NotImplementedError
class GTestErrorParser(ErrorParserBase):
"""A parser that remembers the last test that began running so it can print
that test's name upon detecting failure.
"""
_GTEST_NAME_PATTERN = re.compile(r"\[ RUN \] (\S+)$")
# format: '<filename or "unknown file">:<line #>: Failure'
_GTEST_FAIL_PATTERN = re.compile(r"(unknown file|\S+:\d+): Failure$")
def __init__(self):
self._last_gtest_name = "Unknown test"
def parse_error(self, line):
gtest_name_match = self._GTEST_NAME_PATTERN.match(line)
if gtest_name_match:
self._last_gtest_name = gtest_name_match.group(1)
return None
gtest_fail_match = self._GTEST_FAIL_PATTERN.match(line)
if gtest_fail_match:
return "%s failed: %s" % (self._last_gtest_name, gtest_fail_match.group(1))
return None
class MatchErrorParser(ErrorParserBase):
"""A simple parser that returns the whole line if it matches the pattern."""
def __init__(self, pattern):
self._pattern = re.compile(pattern)
def parse_error(self, line):
if self._pattern.match(line):
return line
return None
class CompilerErrorParser(MatchErrorParser):
def __init__(self):
# format (compile error):
# '<filename>:<line #>:<column #>: error: <error msg>'
# format (link error):
# '<filename>:<line #>: error: <error msg>'
# The below regex catches both
super(CompilerErrorParser, self).__init__(r"\S+:\d+: error:")
class ScanBuildErrorParser(MatchErrorParser):
def __init__(self):
super(ScanBuildErrorParser, self).__init__(r"scan-build: \d+ bugs found.$")
class DbCrashErrorParser(MatchErrorParser):
def __init__(self):
super(DbCrashErrorParser, self).__init__(r"\*\*\*.*\^$|TEST FAILED.")
class WriteStressErrorParser(MatchErrorParser):
def __init__(self):
super(WriteStressErrorParser, self).__init__(
r"ERROR: write_stress died with exitcode=\d+"
)
class AsanErrorParser(MatchErrorParser):
def __init__(self):
super(AsanErrorParser, self).__init__(r"==\d+==ERROR: AddressSanitizer:")
class UbsanErrorParser(MatchErrorParser):
def __init__(self):
# format: '<filename>:<line #>:<column #>: runtime error: <error msg>'
super(UbsanErrorParser, self).__init__(r"\S+:\d+:\d+: runtime error:")
class ValgrindErrorParser(MatchErrorParser):
def __init__(self):
# just grab the summary, valgrind doesn't clearly distinguish errors
# from other log messages.
super(ValgrindErrorParser, self).__init__(r"==\d+== ERROR SUMMARY:")
class CompatErrorParser(MatchErrorParser):
def __init__(self):
super(CompatErrorParser, self).__init__(r"==== .*[Ee]rror.* ====$")
class TsanErrorParser(MatchErrorParser):
def __init__(self):
super(TsanErrorParser, self).__init__(r"WARNING: ThreadSanitizer:")
_TEST_NAME_TO_PARSERS = {
"punit": [CompilerErrorParser, GTestErrorParser],
"unit": [CompilerErrorParser, GTestErrorParser],
"release": [CompilerErrorParser, GTestErrorParser],
"unit_481": [CompilerErrorParser, GTestErrorParser],
"release_481": [CompilerErrorParser, GTestErrorParser],
"clang_unit": [CompilerErrorParser, GTestErrorParser],
"clang_release": [CompilerErrorParser, GTestErrorParser],
"clang_analyze": [CompilerErrorParser, ScanBuildErrorParser],
"code_cov": [CompilerErrorParser, GTestErrorParser],
"unity": [CompilerErrorParser, GTestErrorParser],
"lite": [CompilerErrorParser],
"lite_test": [CompilerErrorParser, GTestErrorParser],
"stress_crash": [CompilerErrorParser, DbCrashErrorParser],
"stress_crash_with_atomic_flush": [CompilerErrorParser, DbCrashErrorParser],
"stress_crash_with_txn": [CompilerErrorParser, DbCrashErrorParser],
"write_stress": [CompilerErrorParser, WriteStressErrorParser],
"asan": [CompilerErrorParser, GTestErrorParser, AsanErrorParser],
"asan_crash": [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser],
"asan_crash_with_atomic_flush": [
CompilerErrorParser,
AsanErrorParser,
DbCrashErrorParser,
],
"asan_crash_with_txn": [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser],
"ubsan": [CompilerErrorParser, GTestErrorParser, UbsanErrorParser],
"ubsan_crash": [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser],
"ubsan_crash_with_atomic_flush": [
CompilerErrorParser,
UbsanErrorParser,
DbCrashErrorParser,
],
"ubsan_crash_with_txn": [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser],
"valgrind": [CompilerErrorParser, GTestErrorParser, ValgrindErrorParser],
"tsan": [CompilerErrorParser, GTestErrorParser, TsanErrorParser],
"format_compatible": [CompilerErrorParser, CompatErrorParser],
"run_format_compatible": [CompilerErrorParser, CompatErrorParser],
"no_compression": [CompilerErrorParser, GTestErrorParser],
"run_no_compression": [CompilerErrorParser, GTestErrorParser],
"regression": [CompilerErrorParser],
"run_regression": [CompilerErrorParser],
}
def main():
if len(sys.argv) != 2:
return "Usage: %s <test name>" % sys.argv[0]
test_name = sys.argv[1]
if test_name not in _TEST_NAME_TO_PARSERS:
return "Unknown test name: %s" % test_name
error_parsers = []
for parser_cls in _TEST_NAME_TO_PARSERS[test_name]:
error_parsers.append(parser_cls())
for line in sys.stdin:
line = line.strip()
for error_parser in error_parsers:
error_msg = error_parser.parse_error(line)
if error_msg is not None:
print(error_msg)
if __name__ == "__main__":
sys.exit(main())

@ -1,55 +0,0 @@
#!/bin/sh
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# fail early
set -e
if test -z $ROCKSDB_PATH; then
ROCKSDB_PATH=~/rocksdb
fi
source $ROCKSDB_PATH/build_tools/fbcode_config4.8.1.sh
EXTRA_LDFLAGS=""
if test -z $ALLOC; then
# default
ALLOC=tcmalloc
elif [[ $ALLOC == "jemalloc" ]]; then
ALLOC=system
EXTRA_LDFLAGS+=" -Wl,--whole-archive $JEMALLOC_LIB -Wl,--no-whole-archive"
fi
# we need to force mongo to use static library, not shared
STATIC_LIB_DEP_DIR='build/static_library_dependencies'
test -d $STATIC_LIB_DEP_DIR || mkdir $STATIC_LIB_DEP_DIR
test -h $STATIC_LIB_DEP_DIR/`basename $SNAPPY_LIBS` || ln -s $SNAPPY_LIBS $STATIC_LIB_DEP_DIR
test -h $STATIC_LIB_DEP_DIR/`basename $LZ4_LIBS` || ln -s $LZ4_LIBS $STATIC_LIB_DEP_DIR
EXTRA_LDFLAGS+=" -L $STATIC_LIB_DEP_DIR"
set -x
EXTRA_CMD=""
if ! test -e version.json; then
# this is Mongo 3.0
EXTRA_CMD="--rocksdb \
--variant-dir=linux2/norm
--cxx=${CXX} \
--cc=${CC} \
--use-system-zlib" # add this line back to normal code path
# when https://jira.mongodb.org/browse/SERVER-19123 is resolved
fi
scons \
LINKFLAGS="$EXTRA_LDFLAGS $EXEC_LDFLAGS $PLATFORM_LDFLAGS" \
CCFLAGS="$CXXFLAGS -L $STATIC_LIB_DEP_DIR" \
LIBS="lz4 gcc stdc++" \
LIBPATH="$ROCKSDB_PATH" \
CPPPATH="$ROCKSDB_PATH/include" \
-j32 \
--allocator=$ALLOC \
--nostrip \
--opt=on \
--disable-minimum-compiler-version-enforcement \
--use-system-snappy \
--disable-warnings-as-errors \
$EXTRA_CMD $*

@ -1,175 +0,0 @@
#!/bin/sh
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# Set environment variables so that we can compile rocksdb using
# fbcode settings. It uses the latest g++ and clang compilers and also
# uses jemalloc
# Environment variables that change the behavior of this script:
# PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included
BASEDIR=`dirname $BASH_SOURCE`
source "$BASEDIR/dependencies.sh"
CFLAGS=""
# libgcc
LIBGCC_INCLUDE="$LIBGCC_BASE/include"
LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
# glibc
GLIBC_INCLUDE="$GLIBC_BASE/include"
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
if ! test $ROCKSDB_DISABLE_SNAPPY; then
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
if test -z $PIC_BUILD; then
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a"
else
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a"
fi
CFLAGS+=" -DSNAPPY"
fi
if test -z $PIC_BUILD; then
if ! test $ROCKSDB_DISABLE_ZLIB; then
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a"
CFLAGS+=" -DZLIB"
fi
if ! test $ROCKSDB_DISABLE_BZIP; then
# location of bzip headers and libraries
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a"
CFLAGS+=" -DBZIP2"
fi
if ! test $ROCKSDB_DISABLE_LZ4; then
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
CFLAGS+=" -DLZ4"
fi
fi
if ! test $ROCKSDB_DISABLE_ZSTD; then
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
if test -z $PIC_BUILD; then
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
else
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a"
fi
CFLAGS+=" -DZSTD -DZSTD_STATIC_LINKING_ONLY"
fi
# location of gflags headers and libraries
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
if test -z $PIC_BUILD; then
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a"
else
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags_pic.a"
fi
CFLAGS+=" -DGFLAGS=gflags"
# location of jemalloc
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc.a"
if test -z $PIC_BUILD; then
# location of numa
NUMA_INCLUDE=" -I $NUMA_BASE/include/"
NUMA_LIB=" $NUMA_BASE/lib/libnuma.a"
CFLAGS+=" -DNUMA"
# location of libunwind
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a"
fi
# location of TBB
TBB_INCLUDE=" -isystem $TBB_BASE/include/"
if test -z $PIC_BUILD; then
TBB_LIBS="$TBB_BASE/lib/libtbb.a"
else
TBB_LIBS="$TBB_BASE/lib/libtbb_pic.a"
fi
CFLAGS+=" -DTBB"
test "$USE_SSE" || USE_SSE=1
export USE_SSE
test "$PORTABLE" || PORTABLE=1
export PORTABLE
BINUTILS="$BINUTILS_BASE/bin"
AR="$BINUTILS/ar"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE"
STDLIBS="-L $GCC_BASE/lib64"
CLANG_BIN="$CLANG_BASE/bin"
CLANG_LIB="$CLANG_BASE/lib"
CLANG_SRC="$CLANG_BASE/../../src"
CLANG_ANALYZER="$CLANG_BIN/clang++"
CLANG_SCAN_BUILD="$CLANG_SRC/llvm/tools/clang/tools/scan-build/bin/scan-build"
if [ -z "$USE_CLANG" ]; then
# gcc
CC="$GCC_BASE/bin/gcc"
CXX="$GCC_BASE/bin/g++"
AR="$GCC_BASE/bin/gcc-ar"
CFLAGS+=" -B$BINUTILS/gold"
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
JEMALLOC=1
else
# clang
CLANG_INCLUDE="$CLANG_LIB/clang/stable/include"
CC="$CLANG_BIN/clang"
CXX="$CLANG_BIN/clang++"
AR="$CLANG_BIN/llvm-ar"
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
CFLAGS+=" -B$BINUTILS/gold -nostdinc -nostdlib"
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/5.x "
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/5.x/x86_64-facebook-linux "
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $CLANG_INCLUDE"
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux "
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE "
CFLAGS+=" -Wno-expansion-to-defined "
CXXFLAGS="-nostdinc++"
fi
CFLAGS+=" $DEPS_INCLUDE"
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DHAVE_SSE42"
CXXFLAGS+=" $CFLAGS"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS"
EXEC_LDFLAGS+=" -B$BINUTILS/gold"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/gcc-5-glibc-2.23/lib/ld.so"
EXEC_LDFLAGS+=" $LIBUNWIND"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/gcc-5-glibc-2.23/lib"
# required by libtbb
EXEC_LDFLAGS+=" -ldl"
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS"
VALGRIND_VER="$VALGRIND_BASE/bin/"
LUA_PATH="$LUA_BASE"
if test -z $PIC_BUILD; then
LUA_LIB=" $LUA_PATH/lib/liblua.a"
else
LUA_LIB=" $LUA_PATH/lib/liblua_pic.a"
fi
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB

@ -1,175 +0,0 @@
#!/bin/sh
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# Set environment variables so that we can compile rocksdb using
# fbcode settings. It uses the latest g++ and clang compilers and also
# uses jemalloc
# Environment variables that change the behavior of this script:
# PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included
BASEDIR=`dirname $BASH_SOURCE`
source "$BASEDIR/dependencies_platform010.sh"
# Disallow using libraries from default locations as they might not be compatible with platform010 libraries.
CFLAGS=" --sysroot=/DOES/NOT/EXIST"
# libgcc
LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/trunk"
LIBGCC_LIBS=" -L $LIBGCC_BASE/lib -B$LIBGCC_BASE/lib/gcc/x86_64-facebook-linux/trunk/"
# glibc
GLIBC_INCLUDE="$GLIBC_BASE/include"
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
GLIBC_LIBS+=" -B$GLIBC_BASE/lib"
if test -z $PIC_BUILD; then
MAYBE_PIC=
else
MAYBE_PIC=_pic
fi
if ! test $ROCKSDB_DISABLE_SNAPPY; then
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy${MAYBE_PIC}.a"
CFLAGS+=" -DSNAPPY"
fi
if ! test $ROCKSDB_DISABLE_ZLIB; then
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz${MAYBE_PIC}.a"
CFLAGS+=" -DZLIB"
fi
if ! test $ROCKSDB_DISABLE_BZIP; then
# location of bzip headers and libraries
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2${MAYBE_PIC}.a"
CFLAGS+=" -DBZIP2"
fi
if ! test $ROCKSDB_DISABLE_LZ4; then
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4${MAYBE_PIC}.a"
CFLAGS+=" -DLZ4"
fi
if ! test $ROCKSDB_DISABLE_ZSTD; then
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd${MAYBE_PIC}.a"
CFLAGS+=" -DZSTD"
fi
# location of gflags headers and libraries
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags${MAYBE_PIC}.a"
CFLAGS+=" -DGFLAGS=gflags"
BENCHMARK_INCLUDE=" -I $BENCHMARK_BASE/include/"
BENCHMARK_LIBS=" $BENCHMARK_BASE/lib/libbenchmark${MAYBE_PIC}.a"
# location of jemalloc
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc${MAYBE_PIC}.a"
# location of numa
NUMA_INCLUDE=" -I $NUMA_BASE/include/"
NUMA_LIB=" $NUMA_BASE/lib/libnuma${MAYBE_PIC}.a"
CFLAGS+=" -DNUMA"
# location of libunwind
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind${MAYBE_PIC}.a"
# location of TBB
TBB_INCLUDE=" -isystem $TBB_BASE/include/"
TBB_LIBS="$TBB_BASE/lib/libtbb${MAYBE_PIC}.a"
CFLAGS+=" -DTBB"
# location of LIBURING
LIBURING_INCLUDE=" -isystem $LIBURING_BASE/include/"
LIBURING_LIBS="$LIBURING_BASE/lib/liburing${MAYBE_PIC}.a"
CFLAGS+=" -DLIBURING"
test "$USE_SSE" || USE_SSE=1
export USE_SSE
test "$PORTABLE" || PORTABLE=1
export PORTABLE
BINUTILS="$BINUTILS_BASE/bin"
AR="$BINUTILS/ar"
AS="$BINUTILS/as"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE $LIBURING_INCLUDE $BENCHMARK_INCLUDE"
STDLIBS="-L $GCC_BASE/lib64"
CLANG_BIN="$CLANG_BASE/bin"
CLANG_LIB="$CLANG_BASE/lib"
CLANG_SRC="$CLANG_BASE/../../src"
CLANG_ANALYZER="$CLANG_BIN/clang++"
CLANG_SCAN_BUILD="$CLANG_SRC/llvm/clang/tools/scan-build/bin/scan-build"
if [ -z "$USE_CLANG" ]; then
# gcc
CC="$GCC_BASE/bin/gcc"
CXX="$GCC_BASE/bin/g++"
AR="$GCC_BASE/bin/gcc-ar"
CFLAGS+=" -B$BINUTILS -nostdinc -nostdlib"
CFLAGS+=" -I$GCC_BASE/include"
CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/include"
CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/install-tools/include"
CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/include-fixed/"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -I$GLIBC_INCLUDE"
CFLAGS+=" -I$LIBGCC_BASE/include"
CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/"
CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/x86_64-facebook-linux/"
CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/backward"
CFLAGS+=" -isystem $GLIBC_INCLUDE -I$GLIBC_INCLUDE"
JEMALLOC=1
else
# clang
CLANG_INCLUDE="$CLANG_LIB/clang/stable/include"
CC="$CLANG_BIN/clang"
CXX="$CLANG_BIN/clang++"
AR="$CLANG_BIN/llvm-ar"
CFLAGS+=" -B$BINUTILS -nostdinc -nostdlib"
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/trunk "
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/trunk/x86_64-facebook-linux "
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $CLANG_INCLUDE"
CFLAGS+=" -Wno-expansion-to-defined "
CXXFLAGS="-nostdinc++"
fi
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux "
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE "
CFLAGS+=" $DEPS_INCLUDE"
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DHAVE_SSE42 -DROCKSDB_IOURING_PRESENT"
CXXFLAGS+=" $CFLAGS"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform010/lib/ld.so"
EXEC_LDFLAGS+=" $LIBUNWIND"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform010/lib"
EXEC_LDFLAGS+=" -Wl,-rpath=$GCC_BASE/lib64"
# required by libtbb
EXEC_LDFLAGS+=" -ldl"
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
PLATFORM_LDFLAGS+=" -B$BINUTILS"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
VALGRIND_VER="$VALGRIND_BASE/bin/"
export CC CXX AR AS CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB

@ -1,203 +0,0 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# If clang_format_diff.py command is not specfied, we assume we are able to
# access directly without any path.
print_usage () {
echo "Usage:"
echo "format-diff.sh [OPTIONS]"
echo "-c: check only."
echo "-h: print this message."
}
while getopts ':ch' OPTION; do
case "$OPTION" in
c)
CHECK_ONLY=1
;;
h)
print_usage
exit 1
;;
?)
print_usage
exit 1
;;
esac
done
REPO_ROOT="$(git rev-parse --show-toplevel)"
if [ "$CLANG_FORMAT_DIFF" ]; then
echo "Note: CLANG_FORMAT_DIFF='$CLANG_FORMAT_DIFF'"
# Dry run to confirm dependencies like argparse
if $CLANG_FORMAT_DIFF --help >/dev/null < /dev/null; then
true #Good
else
exit 128
fi
else
# First try directly executing the possibilities
if clang-format-diff --help &> /dev/null < /dev/null; then
CLANG_FORMAT_DIFF=clang-format-diff
elif clang-format-diff.py --help &> /dev/null < /dev/null; then
CLANG_FORMAT_DIFF=clang-format-diff.py
elif $REPO_ROOT/clang-format-diff.py --help &> /dev/null < /dev/null; then
CLANG_FORMAT_DIFF=$REPO_ROOT/clang-format-diff.py
else
# This probably means we need to directly invoke the interpreter.
# But first find clang-format-diff.py
if [ -f "$REPO_ROOT/clang-format-diff.py" ]; then
CFD_PATH="$REPO_ROOT/clang-format-diff.py"
elif which clang-format-diff.py &> /dev/null; then
CFD_PATH="$(which clang-format-diff.py)"
else
echo "You didn't have clang-format-diff.py and/or clang-format available in your computer!"
echo "You can download clang-format-diff.py by running: "
echo " curl --location https://raw.githubusercontent.com/llvm/llvm-project/main/clang/tools/clang-format/clang-format-diff.py -o ${REPO_ROOT}/clang-format-diff.py"
echo "You should make sure the downloaded script is not compromised."
echo "You can download clang-format by running:"
echo " brew install clang-format"
echo " Or"
echo " apt install clang-format"
echo " This might work too:"
echo " yum install git-clang-format"
echo "Then make sure clang-format is available and executable from \$PATH:"
echo " clang-format --version"
exit 128
fi
# Check argparse pre-req on interpreter, or it will fail
if echo import argparse | ${PYTHON:-python3}; then
true # Good
else
echo "To run clang-format-diff.py, we'll need the library "argparse" to be"
echo "installed. You can try either of the follow ways to install it:"
echo " 1. Manually download argparse: https://pypi.python.org/pypi/argparse"
echo " 2. easy_install argparse (if you have easy_install)"
echo " 3. pip install argparse (if you have pip)"
exit 129
fi
# Unfortunately, some machines have a Python2 clang-format-diff.py
# installed but only a Python3 interpreter installed. Unfortunately,
# automatic 2to3 migration is insufficient, so suggest downloading latest.
if grep -q "print '" "$CFD_PATH" && \
${PYTHON:-python3} --version | grep -q 'ython 3'; then
echo "You have clang-format-diff.py for Python 2 but are using a Python 3"
echo "interpreter (${PYTHON:-python3})."
echo "You can download clang-format-diff.py for Python 3 by running: "
echo " curl --location https://raw.githubusercontent.com/llvm/llvm-project/main/clang/tools/clang-format/clang-format-diff.py -o ${REPO_ROOT}/clang-format-diff.py"
echo "You should make sure the downloaded script is not compromised."
exit 130
fi
CLANG_FORMAT_DIFF="${PYTHON:-python3} $CFD_PATH"
# This had better work after all those checks
if $CLANG_FORMAT_DIFF --help >/dev/null < /dev/null; then
true #Good
else
exit 128
fi
fi
fi
# TODO(kailiu) following work is not complete since we still need to figure
# out how to add the modified files done pre-commit hook to git's commit index.
#
# Check if this script has already been added to pre-commit hook.
# Will suggest user to add this script to pre-commit hook if their pre-commit
# is empty.
# PRE_COMMIT_SCRIPT_PATH="`git rev-parse --show-toplevel`/.git/hooks/pre-commit"
# if ! ls $PRE_COMMIT_SCRIPT_PATH &> /dev/null
# then
# echo "Would you like to add this script to pre-commit hook, which will do "
# echo -n "the format check for all the affected lines before you check in (y/n):"
# read add_to_hook
# if [ "$add_to_hook" == "y" ]
# then
# ln -s `git rev-parse --show-toplevel`/build_tools/format-diff.sh $PRE_COMMIT_SCRIPT_PATH
# fi
# fi
set -e
uncommitted_code=`git diff HEAD`
# If there's no uncommitted changes, we assume user are doing post-commit
# format check, in which case we'll try to check the modified lines vs. the
# facebook/rocksdb.git main branch. Otherwise, we'll check format of the
# uncommitted code only.
if [ -z "$uncommitted_code" ]
then
# Attempt to get name of facebook/rocksdb.git remote.
[ "$FORMAT_REMOTE" ] || FORMAT_REMOTE="$(LC_ALL=POSIX LANG=POSIX git remote -v | grep 'facebook/rocksdb.git' | head -n 1 | cut -f 1)"
# Fall back on 'origin' if that fails
[ "$FORMAT_REMOTE" ] || FORMAT_REMOTE=origin
# Use main branch from that remote
[ "$FORMAT_UPSTREAM" ] || FORMAT_UPSTREAM="$FORMAT_REMOTE/$(LC_ALL=POSIX LANG=POSIX git remote show $FORMAT_REMOTE | sed -n '/HEAD branch/s/.*: //p')"
# Get the common ancestor with that remote branch. Everything after that
# common ancestor would be considered the contents of a pull request, so
# should be relevant for formatting fixes.
FORMAT_UPSTREAM_MERGE_BASE="$(git merge-base "$FORMAT_UPSTREAM" HEAD)"
# Get the differences
diffs=$(git diff -U0 "$FORMAT_UPSTREAM_MERGE_BASE" | $CLANG_FORMAT_DIFF -p 1)
echo "Checking format of changes not yet in $FORMAT_UPSTREAM..."
else
# Check the format of uncommitted lines,
diffs=$(git diff -U0 HEAD | $CLANG_FORMAT_DIFF -p 1)
echo "Checking format of uncommitted changes..."
fi
if [ -z "$diffs" ]
then
echo "Nothing needs to be reformatted!"
exit 0
elif [ $CHECK_ONLY ]
then
echo "Your change has unformatted code. Please run make format!"
if [ $VERBOSE_CHECK ]; then
clang-format --version
echo "$diffs"
fi
exit 1
fi
# Highlight the insertion/deletion from the clang-format-diff.py's output
COLOR_END="\033[0m"
COLOR_RED="\033[0;31m"
COLOR_GREEN="\033[0;32m"
echo -e "Detect lines that doesn't follow the format rules:\r"
# Add the color to the diff. lines added will be green; lines removed will be red.
echo "$diffs" |
sed -e "s/\(^-.*$\)/`echo -e \"$COLOR_RED\1$COLOR_END\"`/" |
sed -e "s/\(^+.*$\)/`echo -e \"$COLOR_GREEN\1$COLOR_END\"`/"
echo -e "Would you like to fix the format automatically (y/n): \c"
# Make sure under any mode, we can read user input.
exec < /dev/tty
read to_fix
if [ "$to_fix" != "y" ]
then
exit 1
fi
# Do in-place format adjustment.
if [ -z "$uncommitted_code" ]
then
git diff -U0 "$FORMAT_UPSTREAM_MERGE_BASE" | $CLANG_FORMAT_DIFF -i -p 1
else
git diff -U0 HEAD | $CLANG_FORMAT_DIFF -i -p 1
fi
echo "Files reformatted!"
# Amend to last commit if user do the post-commit format check
if [ -z "$uncommitted_code" ]; then
echo -e "Would you like to amend the changes to last commit (`git log HEAD --oneline | head -1`)? (y/n): \c"
read to_amend
if [ "$to_amend" == "y" ]
then
git commit -a --amend --reuse-message HEAD
echo "Amended to last commit"
fi
fi

File diff suppressed because it is too large Load Diff

@ -1,129 +0,0 @@
# shellcheck disable=SC1113
#/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
set -e
function log() {
echo "[+] $1"
}
function fatal() {
echo "[!] $1"
exit 1
}
function platform() {
local __resultvar=$1
if [[ -f "/etc/yum.conf" ]]; then
eval $__resultvar="centos"
elif [[ -f "/etc/dpkg/dpkg.cfg" ]]; then
eval $__resultvar="ubuntu"
else
fatal "Unknwon operating system"
fi
}
platform OS
function package() {
if [[ $OS = "ubuntu" ]]; then
if dpkg --get-selections | grep --quiet $1; then
log "$1 is already installed. skipping."
else
# shellcheck disable=SC2068
apt-get install $@ -y
fi
elif [[ $OS = "centos" ]]; then
if rpm -qa | grep --quiet $1; then
log "$1 is already installed. skipping."
else
# shellcheck disable=SC2068
yum install $@ -y
fi
fi
}
function detect_fpm_output() {
if [[ $OS = "ubuntu" ]]; then
export FPM_OUTPUT=deb
elif [[ $OS = "centos" ]]; then
export FPM_OUTPUT=rpm
fi
}
detect_fpm_output
function gem_install() {
if gem list | grep --quiet $1; then
log "$1 is already installed. skipping."
else
# shellcheck disable=SC2068
gem install $@
fi
}
function main() {
if [[ $# -ne 1 ]]; then
fatal "Usage: $0 <rocksdb_version>"
else
log "using rocksdb version: $1"
fi
if [[ -d /vagrant ]]; then
if [[ $OS = "ubuntu" ]]; then
package g++-4.8
export CXX=g++-4.8
# the deb would depend on libgflags2, but the static lib is the only thing
# installed by make install
package libgflags-dev
package ruby-all-dev
elif [[ $OS = "centos" ]]; then
pushd /etc/yum.repos.d
if [[ ! -f /etc/yum.repos.d/devtools-1.1.repo ]]; then
wget http://people.centos.org/tru/devtools-1.1/devtools-1.1.repo
fi
package devtoolset-1.1-gcc --enablerepo=testing-1.1-devtools-6
package devtoolset-1.1-gcc-c++ --enablerepo=testing-1.1-devtools-6
export CC=/opt/centos/devtoolset-1.1/root/usr/bin/gcc
export CPP=/opt/centos/devtoolset-1.1/root/usr/bin/cpp
export CXX=/opt/centos/devtoolset-1.1/root/usr/bin/c++
export PATH=$PATH:/opt/centos/devtoolset-1.1/root/usr/bin
popd
if ! rpm -qa | grep --quiet gflags; then
rpm -i https://github.com/schuhschuh/gflags/releases/download/v2.1.0/gflags-devel-2.1.0-1.amd64.rpm
fi
package ruby
package ruby-devel
package rubygems
package rpm-build
fi
fi
gem_install fpm
make static_lib
LIBDIR=/usr/lib
if [[ $FPM_OUTPUT = "rpm" ]]; then
LIBDIR=$(rpm --eval '%_libdir')
fi
rm -rf package
make install DESTDIR=package PREFIX=/usr LIBDIR=$LIBDIR
fpm \
-s dir \
-t $FPM_OUTPUT \
-C package \
-n rocksdb \
-v $1 \
--url http://rocksdb.org/ \
-m rocksdb@fb.com \
--license BSD \
--vendor Facebook \
--description "RocksDB is an embeddable persistent key-value store for fast storage." \
usr
}
# shellcheck disable=SC2068
main $@

@ -1,38 +0,0 @@
#!/usr/bin/env perl
use strict;
open(my $ps, "-|", "ps -wwf");
my $cols_known = 0;
my $cmd_col = 0;
my $pid_col = 0;
while (<$ps>) {
print;
my @cols = split(/\s+/);
if (!$cols_known && /CMD/) {
# Parse relevant ps column headers
for (my $i = 0; $i <= $#cols; $i++) {
if ($cols[$i] eq "CMD") {
$cmd_col = $i;
}
if ($cols[$i] eq "PID") {
$pid_col = $i;
}
}
$cols_known = 1;
} else {
my $pid = $cols[$pid_col];
my $cmd = $cols[$cmd_col];
# Match numeric PID and relative path command
# -> The intention is only to dump stack traces for hangs in code under
# test, which means we probably just built it and are executing by
# relative path (e.g. ./my_test or foo/bar_test) rather then by absolute
# path (e.g. /usr/bin/time) or PATH search (e.g. grep).
if ($pid =~ /^[0-9]+$/ && $cmd =~ /^[^\/ ]+[\/]/) {
print "Dumping stacks for $pid...\n";
system("pstack $pid || gdb -batch -p $pid -ex 'thread apply all bt'");
}
}
}
close $ps;

@ -1,396 +0,0 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
set -e
NUM=10000000
if [ $# -eq 1 ];then
DATA_DIR=$1
elif [ $# -eq 2 ];then
DATA_DIR=$1
STAT_FILE=$2
fi
# On the production build servers, set data and stat
# files/directories not in /tmp or else the tempdir cleaning
# scripts will make you very unhappy.
DATA_DIR=${DATA_DIR:-$(mktemp -t -d rocksdb_XXXX)}
STAT_FILE=${STAT_FILE:-$(mktemp -t -u rocksdb_test_stats_XXXX)}
function cleanup {
rm -rf $DATA_DIR
rm -f $STAT_FILE.*
}
trap cleanup EXIT
make release
# measure fillseq + fill up the DB for overwrite benchmark
./db_bench \
--benchmarks=fillseq \
--db=$DATA_DIR \
--use_existing_db=0 \
--bloom_bits=10 \
--num=$NUM \
--writes=$NUM \
--cache_size=6442450944 \
--cache_numshardbits=6 \
--table_cache_numshardbits=4 \
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_wal=1 \
--sync=0 > ${STAT_FILE}.fillseq
# measure overwrite performance
./db_bench \
--benchmarks=overwrite \
--db=$DATA_DIR \
--use_existing_db=1 \
--bloom_bits=10 \
--num=$NUM \
--writes=$((NUM / 10)) \
--cache_size=6442450944 \
--cache_numshardbits=6 \
--table_cache_numshardbits=4 \
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_wal=1 \
--sync=0 \
--threads=8 > ${STAT_FILE}.overwrite
# fill up the db for readrandom benchmark (1GB total size)
./db_bench \
--benchmarks=fillseq \
--db=$DATA_DIR \
--use_existing_db=0 \
--bloom_bits=10 \
--num=$NUM \
--writes=$NUM \
--cache_size=6442450944 \
--cache_numshardbits=6 \
--table_cache_numshardbits=4 \
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_wal=1 \
--sync=0 \
--threads=1 > /dev/null
# measure readrandom with 6GB block cache
./db_bench \
--benchmarks=readrandom \
--db=$DATA_DIR \
--use_existing_db=1 \
--bloom_bits=10 \
--num=$NUM \
--reads=$((NUM / 5)) \
--cache_size=6442450944 \
--cache_numshardbits=6 \
--table_cache_numshardbits=4 \
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_wal=1 \
--sync=0 \
--threads=16 > ${STAT_FILE}.readrandom
# measure readrandom with 6GB block cache and tailing iterator
./db_bench \
--benchmarks=readrandom \
--db=$DATA_DIR \
--use_existing_db=1 \
--bloom_bits=10 \
--num=$NUM \
--reads=$((NUM / 5)) \
--cache_size=6442450944 \
--cache_numshardbits=6 \
--table_cache_numshardbits=4 \
--open_files=55000 \
--use_tailing_iterator=1 \
--statistics=1 \
--histogram=1 \
--disable_wal=1 \
--sync=0 \
--threads=16 > ${STAT_FILE}.readrandomtailing
# measure readrandom with 100MB block cache
./db_bench \
--benchmarks=readrandom \
--db=$DATA_DIR \
--use_existing_db=1 \
--bloom_bits=10 \
--num=$NUM \
--reads=$((NUM / 5)) \
--cache_size=104857600 \
--cache_numshardbits=6 \
--table_cache_numshardbits=4 \
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_wal=1 \
--sync=0 \
--threads=16 > ${STAT_FILE}.readrandomsmallblockcache
# measure readrandom with 8k data in memtable
./db_bench \
--benchmarks=overwrite,readrandom \
--db=$DATA_DIR \
--use_existing_db=1 \
--bloom_bits=10 \
--num=$NUM \
--reads=$((NUM / 5)) \
--writes=512 \
--cache_size=6442450944 \
--cache_numshardbits=6 \
--table_cache_numshardbits=4 \
--write_buffer_size=1000000000 \
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_wal=1 \
--sync=0 \
--threads=16 > ${STAT_FILE}.readrandom_mem_sst
# fill up the db for readrandom benchmark with filluniquerandom (1GB total size)
./db_bench \
--benchmarks=filluniquerandom \
--db=$DATA_DIR \
--use_existing_db=0 \
--bloom_bits=10 \
--num=$((NUM / 4)) \
--writes=$((NUM / 4)) \
--cache_size=6442450944 \
--cache_numshardbits=6 \
--table_cache_numshardbits=4 \
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_wal=1 \
--sync=0 \
--threads=1 > /dev/null
# dummy test just to compact the data
./db_bench \
--benchmarks=readrandom \
--db=$DATA_DIR \
--use_existing_db=1 \
--bloom_bits=10 \
--num=$((NUM / 1000)) \
--reads=$((NUM / 1000)) \
--cache_size=6442450944 \
--cache_numshardbits=6 \
--table_cache_numshardbits=4 \
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_wal=1 \
--sync=0 \
--threads=16 > /dev/null
# measure readrandom after load with filluniquerandom with 6GB block cache
./db_bench \
--benchmarks=readrandom \
--db=$DATA_DIR \
--use_existing_db=1 \
--bloom_bits=10 \
--num=$((NUM / 4)) \
--reads=$((NUM / 4)) \
--cache_size=6442450944 \
--cache_numshardbits=6 \
--table_cache_numshardbits=4 \
--open_files=55000 \
--disable_auto_compactions=1 \
--statistics=1 \
--histogram=1 \
--disable_wal=1 \
--sync=0 \
--threads=16 > ${STAT_FILE}.readrandom_filluniquerandom
# measure readwhilewriting after load with filluniquerandom with 6GB block cache
./db_bench \
--benchmarks=readwhilewriting \
--db=$DATA_DIR \
--use_existing_db=1 \
--bloom_bits=10 \
--num=$((NUM / 4)) \
--reads=$((NUM / 4)) \
--benchmark_write_rate_limit=$(( 110 * 1024 )) \
--write_buffer_size=100000000 \
--cache_size=6442450944 \
--cache_numshardbits=6 \
--table_cache_numshardbits=4 \
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_wal=1 \
--sync=0 \
--threads=16 > ${STAT_FILE}.readwhilewriting
# measure memtable performance -- none of the data gets flushed to disk
./db_bench \
--benchmarks=fillrandom,readrandom, \
--db=$DATA_DIR \
--use_existing_db=0 \
--num=$((NUM / 10)) \
--reads=$NUM \
--cache_size=6442450944 \
--cache_numshardbits=6 \
--table_cache_numshardbits=4 \
--write_buffer_size=1000000000 \
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_wal=1 \
--sync=0 \
--value_size=10 \
--threads=16 > ${STAT_FILE}.memtablefillreadrandom
common_in_mem_args="--db=/dev/shm/rocksdb \
--num_levels=6 \
--key_size=20 \
--prefix_size=12 \
--keys_per_prefix=10 \
--value_size=100 \
--compression_type=none \
--compression_ratio=1 \
--write_buffer_size=134217728 \
--max_write_buffer_number=4 \
--level0_file_num_compaction_trigger=8 \
--level0_slowdown_writes_trigger=16 \
--level0_stop_writes_trigger=24 \
--target_file_size_base=134217728 \
--max_bytes_for_level_base=1073741824 \
--disable_wal=0 \
--wal_dir=/dev/shm/rocksdb \
--sync=0 \
--verify_checksum=1 \
--delete_obsolete_files_period_micros=314572800 \
--use_plain_table=1 \
--open_files=-1 \
--mmap_read=1 \
--mmap_write=0 \
--bloom_bits=10 \
--bloom_locality=1 \
--perf_level=0"
# prepare a in-memory DB with 50M keys, total DB size is ~6G
./db_bench \
$common_in_mem_args \
--statistics=0 \
--max_background_compactions=16 \
--max_background_flushes=16 \
--benchmarks=filluniquerandom \
--use_existing_db=0 \
--num=52428800 \
--threads=1 > /dev/null
# Readwhilewriting
./db_bench \
$common_in_mem_args \
--statistics=1 \
--max_background_compactions=4 \
--max_background_flushes=0 \
--benchmarks=readwhilewriting\
--use_existing_db=1 \
--duration=600 \
--threads=32 \
--benchmark_write_rate_limit=9502720 > ${STAT_FILE}.readwhilewriting_in_ram
# Seekrandomwhilewriting
./db_bench \
$common_in_mem_args \
--statistics=1 \
--max_background_compactions=4 \
--max_background_flushes=0 \
--benchmarks=seekrandomwhilewriting \
--use_existing_db=1 \
--use_tailing_iterator=1 \
--duration=600 \
--threads=32 \
--benchmark_write_rate_limit=9502720 > ${STAT_FILE}.seekwhilewriting_in_ram
# measure fillseq with bunch of column families
./db_bench \
--benchmarks=fillseq \
--num_column_families=500 \
--write_buffer_size=1048576 \
--db=$DATA_DIR \
--use_existing_db=0 \
--num=$NUM \
--writes=$NUM \
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_wal=1 \
--sync=0 > ${STAT_FILE}.fillseq_lots_column_families
# measure overwrite performance with bunch of column families
./db_bench \
--benchmarks=overwrite \
--num_column_families=500 \
--write_buffer_size=1048576 \
--db=$DATA_DIR \
--use_existing_db=1 \
--num=$NUM \
--writes=$((NUM / 10)) \
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_wal=1 \
--sync=0 \
--threads=8 > ${STAT_FILE}.overwrite_lots_column_families
# send data to ods
function send_to_ods {
key="$1"
value="$2"
if [ -z $JENKINS_HOME ]; then
# running on devbox, just print out the values
echo $1 $2
return
fi
if [ -z "$value" ];then
echo >&2 "ERROR: Key $key doesn't have a value."
return
fi
curl --silent "https://www.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build&key=$key&value=$value" \
--connect-timeout 60
}
function send_benchmark_to_ods {
bench="$1"
bench_key="$2"
file="$3"
QPS=$(grep $bench $file | awk '{print $5}')
P50_MICROS=$(grep $bench $file -A 6 | grep "Percentiles" | awk '{print $3}' )
P75_MICROS=$(grep $bench $file -A 6 | grep "Percentiles" | awk '{print $5}' )
P99_MICROS=$(grep $bench $file -A 6 | grep "Percentiles" | awk '{print $7}' )
send_to_ods rocksdb.build.$bench_key.qps $QPS
send_to_ods rocksdb.build.$bench_key.p50_micros $P50_MICROS
send_to_ods rocksdb.build.$bench_key.p75_micros $P75_MICROS
send_to_ods rocksdb.build.$bench_key.p99_micros $P99_MICROS
}
send_benchmark_to_ods overwrite overwrite $STAT_FILE.overwrite
send_benchmark_to_ods fillseq fillseq $STAT_FILE.fillseq
send_benchmark_to_ods readrandom readrandom $STAT_FILE.readrandom
send_benchmark_to_ods readrandom readrandom_tailing $STAT_FILE.readrandomtailing
send_benchmark_to_ods readrandom readrandom_smallblockcache $STAT_FILE.readrandomsmallblockcache
send_benchmark_to_ods readrandom readrandom_memtable_sst $STAT_FILE.readrandom_mem_sst
send_benchmark_to_ods readrandom readrandom_fillunique_random $STAT_FILE.readrandom_filluniquerandom
send_benchmark_to_ods fillrandom memtablefillrandom $STAT_FILE.memtablefillreadrandom
send_benchmark_to_ods readrandom memtablereadrandom $STAT_FILE.memtablefillreadrandom
send_benchmark_to_ods readwhilewriting readwhilewriting $STAT_FILE.readwhilewriting
send_benchmark_to_ods readwhilewriting readwhilewriting_in_ram ${STAT_FILE}.readwhilewriting_in_ram
send_benchmark_to_ods seekrandomwhilewriting seekwhilewriting_in_ram ${STAT_FILE}.seekwhilewriting_in_ram
send_benchmark_to_ods fillseq fillseq_lots_column_families ${STAT_FILE}.fillseq_lots_column_families
send_benchmark_to_ods overwrite overwrite_lots_column_families ${STAT_FILE}.overwrite_lots_column_families

@ -1,493 +0,0 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# This script enables you running RocksDB tests by running
# All the tests concurrently and utilizing all the cores
Param(
[switch]$EnableJE = $false, # Look for and use test executable, append _je to listed exclusions
[switch]$RunAll = $false, # Will attempt discover all *_test[_je].exe binaries and run all
# of them as Google suites. I.e. It will run test cases concurrently
# except those mentioned as $Run, those will run as individual test cases
# And any execlued with $ExcludeExes or $ExcludeCases
# It will also not run any individual test cases
# excluded but $ExcludeCasese
[switch]$RunAllExe = $false, # Look for and use test exdcutables, append _je to exclusions automatically
# It will attempt to run them in parallel w/o breaking them up on individual
# test cases. Those listed with $ExcludeExes will be excluded
[string]$SuiteRun = "", # Split test suites in test cases and run in parallel, not compatible with $RunAll
[string]$Run = "", # Run specified executables in parallel but do not split to test cases
[string]$ExcludeCases = "", # Exclude test cases, expects a comma separated list, no spaces
# Takes effect when $RunAll or $SuiteRun is specified. Must have full
# Test cases name including a group and a parameter if any
[string]$ExcludeExes = "", # Exclude exes from consideration, expects a comma separated list,
# no spaces. Takes effect only when $RunAll is specified
[string]$WorkFolder = "", # Direct tests to use that folder. SSD or Ram drive are better options.
# Number of async tasks that would run concurrently. Recommend a number below 64.
# However, CPU utlization really depends on the storage media. Recommend ram based disk.
# a value of 1 will run everything serially
[int]$Concurrency = 8,
[int]$Limit = -1 # -1 means do not limit for test purposes
)
# Folders and commands must be fullpath to run assuming
# the current folder is at the root of the git enlistment
$StartDate = (Get-Date)
$StartDate
$DebugPreference = "Continue"
# These tests are not google test suites and we should guard
# Against running them as suites
$RunOnly = New-Object System.Collections.Generic.HashSet[string]
$RunOnly.Add("c_test") | Out-Null
$RunOnly.Add("compact_on_deletion_collector_test") | Out-Null
$RunOnly.Add("merge_test") | Out-Null
$RunOnly.Add("stringappend_test") | Out-Null # Apparently incorrectly written
$RunOnly.Add("backup_engine_test") | Out-Null # Disabled
$RunOnly.Add("timer_queue_test") | Out-Null # Not a gtest
if($RunAll -and $SuiteRun -ne "") {
Write-Error "$RunAll and $SuiteRun are not compatible"
exit 1
}
if($RunAllExe -and $Run -ne "") {
Write-Error "$RunAllExe and $Run are not compatible"
exit 1
}
# If running under Appveyor assume that root
[string]$Appveyor = $Env:APPVEYOR_BUILD_FOLDER
if($Appveyor -ne "") {
$RootFolder = $Appveyor
} else {
$RootFolder = $PSScriptRoot -replace '\\build_tools', ''
}
$LogFolder = -Join($RootFolder, "\db_logs\")
$BinariesFolder = -Join($RootFolder, "\build\Debug\")
if($WorkFolder -eq "") {
# If TEST_TMPDIR is set use it
[string]$var = $Env:TEST_TMPDIR
if($var -eq "") {
$WorkFolder = -Join($RootFolder, "\db_tests\")
$Env:TEST_TMPDIR = $WorkFolder
} else {
$WorkFolder = $var
}
} else {
# Override from a command line
$Env:TEST_TMPDIR = $WorkFolder
}
Write-Output "Root: $RootFolder, WorkFolder: $WorkFolder"
Write-Output "BinariesFolder: $BinariesFolder, LogFolder: $LogFolder"
# Create test directories in the current folder
md -Path $WorkFolder -ErrorAction Ignore | Out-Null
md -Path $LogFolder -ErrorAction Ignore | Out-Null
$ExcludeCasesSet = New-Object System.Collections.Generic.HashSet[string]
if($ExcludeCases -ne "") {
Write-Host "ExcludeCases: $ExcludeCases"
$l = $ExcludeCases -split ' '
ForEach($t in $l) {
$ExcludeCasesSet.Add($t) | Out-Null
}
}
$ExcludeExesSet = New-Object System.Collections.Generic.HashSet[string]
if($ExcludeExes -ne "") {
Write-Host "ExcludeExe: $ExcludeExes"
$l = $ExcludeExes -split ' '
ForEach($t in $l) {
$ExcludeExesSet.Add($t) | Out-Null
}
}
# Extract the names of its tests by running db_test with --gtest_list_tests.
# This filter removes the "#"-introduced comments, and expands to
# fully-qualified names by changing input like this:
#
# DBTest.
# Empty
# WriteEmptyBatch
# MultiThreaded/MultiThreadedDBTest.
# MultiThreaded/0 # GetParam() = 0
# MultiThreaded/1 # GetParam() = 1
# RibbonTypeParamTest/0. # TypeParam = struct DefaultTypesAndSettings
# CompactnessAndBacktrackAndFpRate
# Extremes
# FindOccupancyForSuccessRate
#
# into this:
#
# DBTest.Empty
# DBTest.WriteEmptyBatch
# MultiThreaded/MultiThreadedDBTest.MultiThreaded/0
# MultiThreaded/MultiThreadedDBTest.MultiThreaded/1
# RibbonTypeParamTest/0.CompactnessAndBacktrackAndFpRate
# RibbonTypeParamTest/0.Extremes
# RibbonTypeParamTest/0.FindOccupancyForSuccessRate
#
# Output into the parameter in a form TestName -> Log File Name
function ExtractTestCases([string]$GTestExe, $HashTable) {
$Tests = @()
# Run db_test to get a list of tests and store it into $a array
&$GTestExe --gtest_list_tests | tee -Variable Tests | Out-Null
# Current group
$Group=""
ForEach( $l in $Tests) {
# remove trailing comment if any
$l = $l -replace '\s+\#.*',''
# Leading whitespace is fine
$l = $l -replace '^\s+',''
# Trailing dot is a test group but no whitespace
if ($l -match "\.$" -and $l -notmatch "\s+") {
$Group = $l
} else {
# Otherwise it is a test name, remove leading space
$test = $l
# create a log name
$test = "$Group$test"
if($ExcludeCasesSet.Contains($test)) {
Write-Warning "$test case is excluded"
continue
}
$test_log = $test -replace '[\./]','_'
$test_log += ".log"
$log_path = -join ($LogFolder, $test_log)
# Add to a hashtable
$HashTable.Add($test, $log_path);
}
}
}
# The function removes trailing .exe siffix if any,
# creates a name for the log file
# Then adds the test name if it was not excluded into
# a HashTable in a form of test_name -> log_path
function MakeAndAdd([string]$token, $HashTable) {
$test_name = $token -replace '.exe$', ''
$log_name = -join ($test_name, ".log")
$log_path = -join ($LogFolder, $log_name)
$HashTable.Add($test_name, $log_path)
}
# This function takes a list of Suites to run
# Lists all the test cases in each of the suite
# and populates HashOfHashes
# Ordered by suite(exe) @{ Exe = @{ TestCase = LogName }}
function ProcessSuites($ListOfSuites, $HashOfHashes) {
$suite_list = $ListOfSuites
# Problem: if you run --gtest_list_tests on
# a non Google Test executable then it will start executing
# and we will get nowhere
ForEach($suite in $suite_list) {
if($RunOnly.Contains($suite)) {
Write-Warning "$suite is excluded from running as Google test suite"
continue
}
if($EnableJE) {
$suite += "_je"
}
$Cases = [ordered]@{}
$Cases.Clear()
$suite_exe = -Join ($BinariesFolder, $suite)
ExtractTestCases -GTestExe $suite_exe -HashTable $Cases
if($Cases.Count -gt 0) {
$HashOfHashes.Add($suite, $Cases);
}
}
# Make logs and run
if($CasesToRun.Count -lt 1) {
Write-Error "Failed to extract tests from $SuiteRun"
exit 1
}
}
# This will contain all test executables to run
# Hash table that contains all non suite
# Test executable to run
$TestExes = [ordered]@{}
# Check for test exe that are not
# Google Test Suites
# Since this is explicitely mentioned it is not subject
# for exclusions
if($Run -ne "") {
$test_list = $Run -split ' '
ForEach($t in $test_list) {
if($EnableJE) {
$t += "_je"
}
MakeAndAdd -token $t -HashTable $TestExes
}
if($TestExes.Count -lt 1) {
Write-Error "Failed to extract tests from $Run"
exit 1
}
} elseif($RunAllExe) {
# Discover all the test binaries
if($EnableJE) {
$pattern = "*_test_je.exe"
} else {
$pattern = "*_test.exe"
}
$search_path = -join ($BinariesFolder, $pattern)
Write-Host "Binaries Search Path: $search_path"
$DiscoveredExe = @()
dir -Path $search_path | ForEach-Object {
$DiscoveredExe += ($_.Name)
}
# Remove exclusions
ForEach($e in $DiscoveredExe) {
$e = $e -replace '.exe$', ''
$bare_name = $e -replace '_je$', ''
if($ExcludeExesSet.Contains($bare_name)) {
Write-Warning "Test $e is excluded"
continue
}
MakeAndAdd -token $e -HashTable $TestExes
}
if($TestExes.Count -lt 1) {
Write-Error "Failed to discover test executables"
exit 1
}
}
# Ordered by exe @{ Exe = @{ TestCase = LogName }}
$CasesToRun = [ordered]@{}
if($SuiteRun -ne "") {
$suite_list = $SuiteRun -split ' '
ProcessSuites -ListOfSuites $suite_list -HashOfHashes $CasesToRun
} elseif ($RunAll) {
# Discover all the test binaries
if($EnableJE) {
$pattern = "*_test_je.exe"
} else {
$pattern = "*_test.exe"
}
$search_path = -join ($BinariesFolder, $pattern)
Write-Host "Binaries Search Path: $search_path"
$ListOfExe = @()
dir -Path $search_path | ForEach-Object {
$ListOfExe += ($_.Name)
}
# Exclude those in RunOnly from running as suites
$ListOfSuites = @()
ForEach($e in $ListOfExe) {
$e = $e -replace '.exe$', ''
$bare_name = $e -replace '_je$', ''
if($ExcludeExesSet.Contains($bare_name)) {
Write-Warning "Test $e is excluded"
continue
}
if($RunOnly.Contains($bare_name)) {
MakeAndAdd -token $e -HashTable $TestExes
} else {
$ListOfSuites += $bare_name
}
}
ProcessSuites -ListOfSuites $ListOfSuites -HashOfHashes $CasesToRun
}
# Invoke a test with a filter and redirect all output
$InvokeTestCase = {
param($exe, $test, $log);
&$exe --gtest_filter=$test > $log 2>&1
}
# Invoke all tests and redirect output
$InvokeTestAsync = {
param($exe, $log)
&$exe > $log 2>&1
}
# Hash that contains tests to rerun if any failed
# Those tests will be rerun sequentially
# $Rerun = [ordered]@{}
# Test limiting factor here
[int]$count = 0
# Overall status
[bool]$script:success = $true;
function RunJobs($Suites, $TestCmds, [int]$ConcurrencyVal)
{
# Array to wait for any of the running jobs
$jobs = @()
# Hash JobToLog
$JobToLog = @{}
# Wait for all to finish and get the results
while(($JobToLog.Count -gt 0) -or
($TestCmds.Count -gt 0) -or
($Suites.Count -gt 0)) {
# Make sure we have maximum concurrent jobs running if anything
# and the $Limit either not set or allows to proceed
while(($JobToLog.Count -lt $ConcurrencyVal) -and
((($TestCmds.Count -gt 0) -or ($Suites.Count -gt 0)) -and
(($Limit -lt 0) -or ($count -lt $Limit)))) {
# We always favore suites to run if available
[string]$exe_name = ""
[string]$log_path = ""
$Cases = @{}
if($Suites.Count -gt 0) {
# Will the first one
ForEach($e in $Suites.Keys) {
$exe_name = $e
$Cases = $Suites[$e]
break
}
[string]$test_case = ""
[string]$log_path = ""
ForEach($c in $Cases.Keys) {
$test_case = $c
$log_path = $Cases[$c]
break
}
Write-Host "Starting $exe_name::$test_case"
[string]$Exe = -Join ($BinariesFolder, $exe_name)
$job = Start-Job -Name "$exe_name::$test_case" -ArgumentList @($Exe,$test_case,$log_path) -ScriptBlock $InvokeTestCase
$JobToLog.Add($job, $log_path)
$Cases.Remove($test_case)
if($Cases.Count -lt 1) {
$Suites.Remove($exe_name)
}
} elseif ($TestCmds.Count -gt 0) {
ForEach($e in $TestCmds.Keys) {
$exe_name = $e
$log_path = $TestCmds[$e]
break
}
Write-Host "Starting $exe_name"
[string]$Exe = -Join ($BinariesFolder, $exe_name)
$job = Start-Job -Name $exe_name -ScriptBlock $InvokeTestAsync -ArgumentList @($Exe,$log_path)
$JobToLog.Add($job, $log_path)
$TestCmds.Remove($exe_name)
} else {
Write-Error "In the job loop but nothing to run"
exit 1
}
++$count
} # End of Job starting loop
if($JobToLog.Count -lt 1) {
break
}
$jobs = @()
foreach($k in $JobToLog.Keys) { $jobs += $k }
$completed = Wait-Job -Job $jobs -Any
$log = $JobToLog[$completed]
$JobToLog.Remove($completed)
$message = -join @($completed.Name, " State: ", ($completed.State))
$log_content = @(Get-Content $log)
if($completed.State -ne "Completed") {
$script:success = $false
Write-Warning $message
$log_content | Write-Warning
} else {
# Scan the log. If we find PASSED and no occurrence of FAILED
# then it is a success
[bool]$pass_found = $false
ForEach($l in $log_content) {
if(($l -match "^\[\s+FAILED") -or
($l -match "Assertion failed:")) {
$pass_found = $false
break
}
if(($l -match "^\[\s+PASSED") -or
($l -match " : PASSED$") -or
($l -match "^PASS$") -or # Special c_test case
($l -match "Passed all tests!") ) {
$pass_found = $true
}
}
if(!$pass_found) {
$script:success = $false;
Write-Warning $message
$log_content | Write-Warning
} else {
Write-Host $message
}
}
# Remove cached job info from the system
# Should be no output
Receive-Job -Job $completed | Out-Null
}
}
RunJobs -Suites $CasesToRun -TestCmds $TestExes -ConcurrencyVal $Concurrency
$EndDate = (Get-Date)
New-TimeSpan -Start $StartDate -End $EndDate |
ForEach-Object {
"Elapsed time: {0:g}" -f $_
}
if(!$script:success) {
# This does not succeed killing off jobs quick
# So we simply exit
# Remove-Job -Job $jobs -Force
# indicate failure using this exit code
exit 1
}
exit 0

@ -1,45 +0,0 @@
#!/bin/bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
set -ex
ROCKSDB_VERSION="6.7.3"
ZSTD_VERSION="1.4.4"
echo "This script configures CentOS with everything needed to build and run RocksDB"
yum update -y && yum install epel-release -y
yum install -y \
wget \
gcc-c++ \
snappy snappy-devel \
zlib zlib-devel \
bzip2 bzip2-devel \
lz4-devel \
libasan \
gflags
mkdir -pv /usr/local/rocksdb-${ROCKSDB_VERSION}
ln -sfT /usr/local/rocksdb-${ROCKSDB_VERSION} /usr/local/rocksdb
wget -qO /tmp/zstd-${ZSTD_VERSION}.tar.gz https://github.com/facebook/zstd/archive/v${ZSTD_VERSION}.tar.gz
wget -qO /tmp/rocksdb-${ROCKSDB_VERSION}.tar.gz https://github.com/facebook/rocksdb/archive/v${ROCKSDB_VERSION}.tar.gz
cd /tmp
tar xzvf zstd-${ZSTD_VERSION}.tar.gz
tar xzvf rocksdb-${ROCKSDB_VERSION}.tar.gz -C /usr/local/
echo "Installing ZSTD..."
pushd zstd-${ZSTD_VERSION}
make && make install
popd
echo "Compiling RocksDB..."
cd /usr/local/rocksdb
chown -R vagrant:vagrant /usr/local/rocksdb/
sudo -u vagrant make static_lib
cd examples/
sudo -u vagrant LD_LIBRARY_PATH=/usr/local/lib/ make all
sudo -u vagrant LD_LIBRARY_PATH=/usr/local/lib/ ./c_simple_example

@ -1,57 +0,0 @@
# from official ubuntu 20.04
FROM ubuntu:20.04
# update system
RUN apt-get update && apt-get upgrade -y
# install basic tools
RUN apt-get install -y vim wget curl
# install tzdata noninteractive
RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata
# install git and default compilers
RUN apt-get install -y git gcc g++ clang clang-tools
# install basic package
RUN apt-get install -y lsb-release software-properties-common gnupg
# install gflags, tbb
RUN apt-get install -y libgflags-dev libtbb-dev
# install compression libs
RUN apt-get install -y libsnappy-dev zlib1g-dev libbz2-dev liblz4-dev libzstd-dev
# install cmake
RUN apt-get install -y cmake
RUN apt-get install -y libssl-dev
# install clang-13
WORKDIR /root
RUN wget https://apt.llvm.org/llvm.sh
RUN chmod +x llvm.sh
RUN ./llvm.sh 13 all
# install gcc-7, 8, 10, 11, default is 9
RUN apt-get install -y gcc-7 g++-7
RUN apt-get install -y gcc-8 g++-8
RUN apt-get install -y gcc-10 g++-10
RUN add-apt-repository -y ppa:ubuntu-toolchain-r/test
RUN apt-get install -y gcc-11 g++-11
# install apt-get install -y valgrind
RUN apt-get install -y valgrind
# install folly depencencies
RUN apt-get install -y libgoogle-glog-dev
# install openjdk 8
RUN apt-get install -y openjdk-8-jdk
ENV JAVA_HOME /usr/lib/jvm/java-1.8.0-openjdk-amd64
# install mingw
RUN apt-get install -y mingw-w64
# install gtest-parallel package
RUN git clone --single-branch --branch master --depth 1 https://github.com/google/gtest-parallel.git ~/gtest-parallel
ENV PATH $PATH:/root/gtest-parallel
# install libprotobuf for fuzzers test
RUN apt-get install -y ninja-build binutils liblzma-dev libz-dev pkg-config autoconf libtool
RUN git clone --branch v1.0 https://github.com/google/libprotobuf-mutator.git ~/libprotobuf-mutator && cd ~/libprotobuf-mutator && git checkout ffd86a32874e5c08a143019aad1aaf0907294c9f && mkdir build && cd build && cmake .. -GNinja -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang++-13 -DCMAKE_BUILD_TYPE=Release -DLIB_PROTO_MUTATOR_DOWNLOAD_PROTOBUF=ON && ninja && ninja install
ENV PKG_CONFIG_PATH /usr/local/OFF/:/root/libprotobuf-mutator/build/external.protobuf/lib/pkgconfig/
ENV PROTOC_BIN /root/libprotobuf-mutator/build/external.protobuf/bin/protoc
# install the latest google benchmark
RUN git clone --depth 1 --branch v1.7.0 https://github.com/google/benchmark.git ~/benchmark
RUN cd ~/benchmark && mkdir build && cd build && cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_GTEST_TESTS=0 && ninja && ninja install
# clean up
RUN rm -rf /var/lib/apt/lists/*
RUN rm -rf /root/benchmark

@ -1,106 +0,0 @@
#!/bin/sh
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# Update dependencies.sh file with the latest avaliable versions
BASEDIR=$(dirname $0)
OUTPUT=""
function log_header()
{
echo "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved." >> "$OUTPUT"
echo "# The file is generated using update_dependencies.sh." >> "$OUTPUT"
}
function log_variable()
{
echo "$1=${!1}" >> "$OUTPUT"
}
TP2_LATEST="/data/users/$USER/fbsource/fbcode/third-party2/"
## $1 => lib name
## $2 => lib version (if not provided, will try to pick latest)
## $3 => platform (if not provided, will try to pick latest gcc)
##
## get_lib_base will set a variable named ${LIB_NAME}_BASE to the lib location
function get_lib_base()
{
local lib_name=$1
local lib_version=$2
local lib_platform=$3
local result="$TP2_LATEST/$lib_name/"
# Lib Version
if [ -z "$lib_version" ] || [ "$lib_version" = "LATEST" ]; then
# version is not provided, use latest
result=`ls -dr1v $result/*/ | head -n1`
else
result="$result/$lib_version/"
fi
# Lib Platform
if [ -z "$lib_platform" ]; then
# platform is not provided, use latest gcc
result=`ls -dr1v $result/gcc-*[^fb]/ | head -n1`
else
echo $lib_platform
result="$result/$lib_platform/"
fi
result=`ls -1d $result/*/ | head -n1`
echo Finding link $result
# lib_name => LIB_NAME_BASE
local __res_var=${lib_name^^}"_BASE"
__res_var=`echo $__res_var | tr - _`
# LIB_NAME_BASE=$result
eval $__res_var=`readlink -f $result`
log_variable $__res_var
}
###########################################################
# platform010 dependencies #
###########################################################
OUTPUT="$BASEDIR/dependencies_platform010.sh"
rm -f "$OUTPUT"
touch "$OUTPUT"
echo "Writing dependencies to $OUTPUT"
# Compilers locations
GCC_BASE=`readlink -f $TP2_LATEST/gcc/11.x/centos7-native/*/`
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/12/platform010/*/`
log_header
log_variable GCC_BASE
log_variable CLANG_BASE
# Libraries locations
get_lib_base libgcc 11.x platform010
get_lib_base glibc 2.34 platform010
get_lib_base snappy LATEST platform010
get_lib_base zlib LATEST platform010
get_lib_base bzip2 LATEST platform010
get_lib_base lz4 LATEST platform010
get_lib_base zstd LATEST platform010
get_lib_base gflags LATEST platform010
get_lib_base jemalloc LATEST platform010
get_lib_base numa LATEST platform010
get_lib_base libunwind LATEST platform010
get_lib_base tbb 2018_U5 platform010
get_lib_base liburing LATEST platform010
get_lib_base benchmark LATEST platform010
get_lib_base kernel-headers fb platform010
get_lib_base binutils LATEST centos7-native
get_lib_base valgrind LATEST platform010
get_lib_base lua 5.3.4 platform010
git diff $OUTPUT

@ -1,23 +0,0 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
if [ "$#" = "0" ]; then
echo "Usage: $0 major|minor|patch|full"
exit 1
fi
if [ "$1" = "major" ]; then
cat include/rocksdb/version.h | grep MAJOR | head -n1 | awk '{print $3}'
fi
if [ "$1" = "minor" ]; then
cat include/rocksdb/version.h | grep MINOR | head -n1 | awk '{print $3}'
fi
if [ "$1" = "patch" ]; then
cat include/rocksdb/version.h | grep PATCH | head -n1 | awk '{print $3}'
fi
if [ "$1" = "full" ]; then
awk '/#define ROCKSDB/ { env[$2] = $3 }
END { printf "%s.%s.%s\n", env["ROCKSDB_MAJOR"],
env["ROCKSDB_MINOR"],
env["ROCKSDB_PATCH"] }' \
include/rocksdb/version.h
fi

@ -1,469 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "cache/cache_reservation_manager.h"
#include <cstddef>
#include <cstring>
#include <memory>
#include "cache/cache_entry_roles.h"
#include "rocksdb/cache.h"
#include "rocksdb/slice.h"
#include "test_util/testharness.h"
#include "util/coding.h"
namespace ROCKSDB_NAMESPACE {
class CacheReservationManagerTest : public ::testing::Test {
protected:
static constexpr std::size_t kSizeDummyEntry =
CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
static constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry;
static constexpr int kNumShardBits = 0; // 2^0 shard
static constexpr std::size_t kMetaDataChargeOverhead = 10000;
std::shared_ptr<Cache> cache = NewLRUCache(kCacheCapacity, kNumShardBits);
std::shared_ptr<CacheReservationManager> test_cache_rev_mng;
CacheReservationManagerTest() {
test_cache_rev_mng =
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache);
}
};
TEST_F(CacheReservationManagerTest, GenerateCacheKey) {
std::size_t new_mem_used = 1 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry);
ASSERT_LT(cache->GetPinnedUsage(),
1 * kSizeDummyEntry + kMetaDataChargeOverhead);
// Next unique Cache key
CacheKey ckey = CacheKey::CreateUniqueForCacheLifetime(cache.get());
// Get to the underlying values
uint64_t* ckey_data = reinterpret_cast<uint64_t*>(&ckey);
// Back it up to the one used by CRM (using CacheKey implementation details)
ckey_data[1]--;
// Specific key (subject to implementation details)
EXPECT_EQ(ckey_data[0], 0);
EXPECT_EQ(ckey_data[1], 2);
Cache::Handle* handle = cache->Lookup(ckey.AsSlice());
EXPECT_NE(handle, nullptr)
<< "Failed to generate the cache key for the dummy entry correctly";
// Clean up the returned handle from Lookup() to prevent memory leak
cache->Release(handle);
}
TEST_F(CacheReservationManagerTest, KeepCacheReservationTheSame) {
std::size_t new_mem_used = 1 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry);
ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
std::size_t initial_pinned_usage = cache->GetPinnedUsage();
ASSERT_GE(initial_pinned_usage, 1 * kSizeDummyEntry);
ASSERT_LT(initial_pinned_usage,
1 * kSizeDummyEntry + kMetaDataChargeOverhead);
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to keep cache reservation the same when new_mem_used equals "
"to current cache reservation";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep correctly when new_mem_used equals to current "
"cache reservation";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly when new_mem_used "
"equals to current cache reservation";
EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage)
<< "Failed to keep underlying dummy entries the same when new_mem_used "
"equals to current cache reservation";
}
TEST_F(CacheReservationManagerTest,
IncreaseCacheReservationByMultiplesOfDummyEntrySize) {
std::size_t new_mem_used = 2 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to increase cache reservation correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
2 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation increase correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry)
<< "Failed to increase underlying dummy entries in cache correctly";
EXPECT_LT(cache->GetPinnedUsage(),
2 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to increase underlying dummy entries in cache correctly";
}
TEST_F(CacheReservationManagerTest,
IncreaseCacheReservationNotByMultiplesOfDummyEntrySize) {
std::size_t new_mem_used = 2 * kSizeDummyEntry + kSizeDummyEntry / 2;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to increase cache reservation correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
3 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation increase correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 3 * kSizeDummyEntry)
<< "Failed to increase underlying dummy entries in cache correctly";
EXPECT_LT(cache->GetPinnedUsage(),
3 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to increase underlying dummy entries in cache correctly";
}
TEST(CacheReservationManagerIncreaseReservcationOnFullCacheTest,
IncreaseCacheReservationOnFullCache) {
;
constexpr std::size_t kSizeDummyEntry =
CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
constexpr std::size_t kSmallCacheCapacity = 4 * kSizeDummyEntry;
constexpr std::size_t kBigCacheCapacity = 4096 * kSizeDummyEntry;
constexpr std::size_t kMetaDataChargeOverhead = 10000;
LRUCacheOptions lo;
lo.capacity = kSmallCacheCapacity;
lo.num_shard_bits = 0; // 2^0 shard
lo.strict_capacity_limit = true;
std::shared_ptr<Cache> cache = NewLRUCache(lo);
std::shared_ptr<CacheReservationManager> test_cache_rev_mng =
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache);
std::size_t new_mem_used = kSmallCacheCapacity + 1;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::MemoryLimit())
<< "Failed to return status to indicate failure of dummy entry insertion "
"during cache reservation on full cache";
EXPECT_GE(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep correctly before cache resevation failure happens "
"due to full cache";
EXPECT_LE(test_cache_rev_mng->GetTotalReservedCacheSize(),
kSmallCacheCapacity)
<< "Failed to bookkeep correctly (i.e, bookkeep only successful dummy "
"entry insertions) when encountering cache resevation failure due to "
"full cache";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
<< "Failed to insert underlying dummy entries correctly when "
"encountering cache resevation failure due to full cache";
EXPECT_LE(cache->GetPinnedUsage(), kSmallCacheCapacity)
<< "Failed to insert underlying dummy entries correctly when "
"encountering cache resevation failure due to full cache";
new_mem_used = kSmallCacheCapacity / 2; // 2 dummy entries
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to decrease cache reservation after encountering cache "
"reservation failure due to full cache";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
2 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation decrease correctly after "
"encountering cache reservation due to full cache";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry)
<< "Failed to release underlying dummy entries correctly on cache "
"reservation decrease after encountering cache resevation failure due "
"to full cache";
EXPECT_LT(cache->GetPinnedUsage(),
2 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to release underlying dummy entries correctly on cache "
"reservation decrease after encountering cache resevation failure due "
"to full cache";
// Create cache full again for subsequent tests
new_mem_used = kSmallCacheCapacity + 1;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::MemoryLimit())
<< "Failed to return status to indicate failure of dummy entry insertion "
"during cache reservation on full cache";
EXPECT_GE(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep correctly before cache resevation failure happens "
"due to full cache";
EXPECT_LE(test_cache_rev_mng->GetTotalReservedCacheSize(),
kSmallCacheCapacity)
<< "Failed to bookkeep correctly (i.e, bookkeep only successful dummy "
"entry insertions) when encountering cache resevation failure due to "
"full cache";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
<< "Failed to insert underlying dummy entries correctly when "
"encountering cache resevation failure due to full cache";
EXPECT_LE(cache->GetPinnedUsage(), kSmallCacheCapacity)
<< "Failed to insert underlying dummy entries correctly when "
"encountering cache resevation failure due to full cache";
// Increase cache capacity so the previously failed insertion can fully
// succeed
cache->SetCapacity(kBigCacheCapacity);
new_mem_used = kSmallCacheCapacity + 1;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to increase cache reservation after increasing cache capacity "
"and mitigating cache full error";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
5 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation increase correctly after "
"increasing cache capacity and mitigating cache full error";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 5 * kSizeDummyEntry)
<< "Failed to insert underlying dummy entries correctly after increasing "
"cache capacity and mitigating cache full error";
EXPECT_LT(cache->GetPinnedUsage(),
5 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to insert underlying dummy entries correctly after increasing "
"cache capacity and mitigating cache full error";
}
TEST_F(CacheReservationManagerTest,
DecreaseCacheReservationByMultiplesOfDummyEntrySize) {
std::size_t new_mem_used = 2 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
2 * kSizeDummyEntry);
ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
ASSERT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry);
ASSERT_LT(cache->GetPinnedUsage(),
2 * kSizeDummyEntry + kMetaDataChargeOverhead);
new_mem_used = 1 * kSizeDummyEntry;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to decrease cache reservation correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation decrease correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
<< "Failed to decrease underlying dummy entries in cache correctly";
EXPECT_LT(cache->GetPinnedUsage(),
1 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to decrease underlying dummy entries in cache correctly";
}
TEST_F(CacheReservationManagerTest,
DecreaseCacheReservationNotByMultiplesOfDummyEntrySize) {
std::size_t new_mem_used = 2 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
2 * kSizeDummyEntry);
ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
ASSERT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry);
ASSERT_LT(cache->GetPinnedUsage(),
2 * kSizeDummyEntry + kMetaDataChargeOverhead);
new_mem_used = kSizeDummyEntry / 2;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to decrease cache reservation correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation decrease correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
<< "Failed to decrease underlying dummy entries in cache correctly";
EXPECT_LT(cache->GetPinnedUsage(),
1 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to decrease underlying dummy entries in cache correctly";
}
TEST(CacheReservationManagerWithDelayedDecreaseTest,
DecreaseCacheReservationWithDelayedDecrease) {
constexpr std::size_t kSizeDummyEntry =
CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry;
constexpr std::size_t kMetaDataChargeOverhead = 10000;
LRUCacheOptions lo;
lo.capacity = kCacheCapacity;
lo.num_shard_bits = 0;
std::shared_ptr<Cache> cache = NewLRUCache(lo);
std::shared_ptr<CacheReservationManager> test_cache_rev_mng =
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache, true /* delayed_decrease */);
std::size_t new_mem_used = 8 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
8 * kSizeDummyEntry);
ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
std::size_t initial_pinned_usage = cache->GetPinnedUsage();
ASSERT_GE(initial_pinned_usage, 8 * kSizeDummyEntry);
ASSERT_LT(initial_pinned_usage,
8 * kSizeDummyEntry + kMetaDataChargeOverhead);
new_mem_used = 6 * kSizeDummyEntry;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK()) << "Failed to delay decreasing cache reservation";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
8 * kSizeDummyEntry)
<< "Failed to bookkeep correctly when delaying cache reservation "
"decrease";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage)
<< "Failed to delay decreasing underlying dummy entries in cache";
new_mem_used = 7 * kSizeDummyEntry;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK()) << "Failed to delay decreasing cache reservation";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
8 * kSizeDummyEntry)
<< "Failed to bookkeep correctly when delaying cache reservation "
"decrease";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage)
<< "Failed to delay decreasing underlying dummy entries in cache";
new_mem_used = 6 * kSizeDummyEntry - 1;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to decrease cache reservation correctly when new_mem_used < "
"GetTotalReservedCacheSize() * 3 / 4 on delayed decrease mode";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
6 * kSizeDummyEntry)
<< "Failed to bookkeep correctly when new_mem_used < "
"GetTotalReservedCacheSize() * 3 / 4 on delayed decrease mode";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 6 * kSizeDummyEntry)
<< "Failed to decrease underlying dummy entries in cache when "
"new_mem_used < GetTotalReservedCacheSize() * 3 / 4 on delayed "
"decrease mode";
EXPECT_LT(cache->GetPinnedUsage(),
6 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to decrease underlying dummy entries in cache when "
"new_mem_used < GetTotalReservedCacheSize() * 3 / 4 on delayed "
"decrease mode";
}
TEST(CacheReservationManagerDestructorTest,
ReleaseRemainingDummyEntriesOnDestruction) {
constexpr std::size_t kSizeDummyEntry =
CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry;
constexpr std::size_t kMetaDataChargeOverhead = 10000;
LRUCacheOptions lo;
lo.capacity = kCacheCapacity;
lo.num_shard_bits = 0;
std::shared_ptr<Cache> cache = NewLRUCache(lo);
{
std::shared_ptr<CacheReservationManager> test_cache_rev_mng =
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache);
std::size_t new_mem_used = 1 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry);
ASSERT_LT(cache->GetPinnedUsage(),
1 * kSizeDummyEntry + kMetaDataChargeOverhead);
}
EXPECT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry)
<< "Failed to release remaining underlying dummy entries in cache in "
"CacheReservationManager's destructor";
}
TEST(CacheReservationHandleTest, HandleTest) {
constexpr std::size_t kOneGigabyte = 1024 * 1024 * 1024;
constexpr std::size_t kSizeDummyEntry = 256 * 1024;
constexpr std::size_t kMetaDataChargeOverhead = 10000;
LRUCacheOptions lo;
lo.capacity = kOneGigabyte;
lo.num_shard_bits = 0;
std::shared_ptr<Cache> cache = NewLRUCache(lo);
std::shared_ptr<CacheReservationManager> test_cache_rev_mng(
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache));
std::size_t mem_used = 0;
const std::size_t incremental_mem_used_handle_1 = 1 * kSizeDummyEntry;
const std::size_t incremental_mem_used_handle_2 = 2 * kSizeDummyEntry;
std::unique_ptr<CacheReservationManager::CacheReservationHandle> handle_1,
handle_2;
// To test consecutive CacheReservationManager::MakeCacheReservation works
// correctly in terms of returning the handle as well as updating cache
// reservation and the latest total memory used
Status s = test_cache_rev_mng->MakeCacheReservation(
incremental_mem_used_handle_1, &handle_1);
mem_used = mem_used + incremental_mem_used_handle_1;
ASSERT_EQ(s, Status::OK());
EXPECT_TRUE(handle_1 != nullptr);
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used);
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used);
EXPECT_GE(cache->GetPinnedUsage(), mem_used);
EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
s = test_cache_rev_mng->MakeCacheReservation(incremental_mem_used_handle_2,
&handle_2);
mem_used = mem_used + incremental_mem_used_handle_2;
ASSERT_EQ(s, Status::OK());
EXPECT_TRUE(handle_2 != nullptr);
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used);
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used);
EXPECT_GE(cache->GetPinnedUsage(), mem_used);
EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
// To test
// CacheReservationManager::CacheReservationHandle::~CacheReservationHandle()
// works correctly in releasing the cache reserved for the handle
handle_1.reset();
EXPECT_TRUE(handle_1 == nullptr);
mem_used = mem_used - incremental_mem_used_handle_1;
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used);
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used);
EXPECT_GE(cache->GetPinnedUsage(), mem_used);
EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
// To test the actual CacheReservationManager object won't be deallocated
// as long as there remain handles pointing to it.
// We strongly recommend deallocating CacheReservationManager object only
// after all its handles are deallocated to keep things easy to reasonate
test_cache_rev_mng.reset();
EXPECT_GE(cache->GetPinnedUsage(), mem_used);
EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
handle_2.reset();
// The CacheReservationManager object is now deallocated since all the handles
// and its original pointer is gone
mem_used = mem_used - incremental_mem_used_handle_2;
EXPECT_EQ(mem_used, 0);
EXPECT_EQ(cache->GetPinnedUsage(), mem_used);
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

969
cache/cache_test.cc vendored

@ -1,969 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "rocksdb/cache.h"
#include <forward_list>
#include <functional>
#include <iostream>
#include <string>
#include <vector>
#include "cache/lru_cache.h"
#include "cache/typed_cache.h"
#include "port/stack_trace.h"
#include "test_util/secondary_cache_test_util.h"
#include "test_util/testharness.h"
#include "util/coding.h"
#include "util/string_util.h"
// HyperClockCache only supports 16-byte keys, so some of the tests
// originally written for LRUCache do not work on the other caches.
// Those tests were adapted to use 16-byte keys. We kept the original ones.
// TODO: Remove the original tests if they ever become unused.
namespace ROCKSDB_NAMESPACE {
namespace {
// Conversions between numeric keys/values and the types expected by Cache.
std::string EncodeKey16Bytes(int k) {
std::string result;
PutFixed32(&result, k);
result.append(std::string(12, 'a')); // Because we need a 16B output, we
// add a 12-byte padding.
return result;
}
int DecodeKey16Bytes(const Slice& k) {
assert(k.size() == 16);
return DecodeFixed32(k.data()); // Decodes only the first 4 bytes of k.
}
std::string EncodeKey32Bits(int k) {
std::string result;
PutFixed32(&result, k);
return result;
}
int DecodeKey32Bits(const Slice& k) {
assert(k.size() == 4);
return DecodeFixed32(k.data());
}
Cache::ObjectPtr EncodeValue(uintptr_t v) {
return reinterpret_cast<Cache::ObjectPtr>(v);
}
int DecodeValue(void* v) {
return static_cast<int>(reinterpret_cast<uintptr_t>(v));
}
const Cache::CacheItemHelper kDumbHelper{
CacheEntryRole::kMisc,
[](Cache::ObjectPtr /*value*/, MemoryAllocator* /*alloc*/) {}};
const Cache::CacheItemHelper kEraseOnDeleteHelper1{
CacheEntryRole::kMisc,
[](Cache::ObjectPtr value, MemoryAllocator* /*alloc*/) {
Cache* cache = static_cast<Cache*>(value);
cache->Erase("foo");
}};
const Cache::CacheItemHelper kEraseOnDeleteHelper2{
CacheEntryRole::kMisc,
[](Cache::ObjectPtr value, MemoryAllocator* /*alloc*/) {
Cache* cache = static_cast<Cache*>(value);
cache->Erase(EncodeKey16Bytes(1234));
}};
} // anonymous namespace
class CacheTest : public testing::Test,
public secondary_cache_test_util::WithCacheTypeParam {
public:
static CacheTest* current_;
static std::string type_;
static void Deleter(Cache::ObjectPtr v, MemoryAllocator*) {
current_->deleted_values_.push_back(DecodeValue(v));
}
static const Cache::CacheItemHelper kHelper;
static const int kCacheSize = 1000;
static const int kNumShardBits = 4;
static const int kCacheSize2 = 100;
static const int kNumShardBits2 = 2;
std::vector<int> deleted_values_;
std::shared_ptr<Cache> cache_;
std::shared_ptr<Cache> cache2_;
CacheTest()
: cache_(NewCache(kCacheSize, kNumShardBits, false)),
cache2_(NewCache(kCacheSize2, kNumShardBits2, false)) {
current_ = this;
type_ = GetParam();
}
~CacheTest() override {}
// These functions encode/decode keys in tests cases that use
// int keys.
// Currently, HyperClockCache requires keys to be 16B long, whereas
// LRUCache doesn't, so the encoding depends on the cache type.
std::string EncodeKey(int k) {
auto type = GetParam();
if (type == kHyperClock) {
return EncodeKey16Bytes(k);
} else {
return EncodeKey32Bits(k);
}
}
int DecodeKey(const Slice& k) {
auto type = GetParam();
if (type == kHyperClock) {
return DecodeKey16Bytes(k);
} else {
return DecodeKey32Bits(k);
}
}
int Lookup(std::shared_ptr<Cache> cache, int key) {
Cache::Handle* handle = cache->Lookup(EncodeKey(key));
const int r = (handle == nullptr) ? -1 : DecodeValue(cache->Value(handle));
if (handle != nullptr) {
cache->Release(handle);
}
return r;
}
void Insert(std::shared_ptr<Cache> cache, int key, int value,
int charge = 1) {
EXPECT_OK(cache->Insert(EncodeKey(key), EncodeValue(value), &kHelper,
charge, /*handle*/ nullptr, Cache::Priority::HIGH));
}
void Erase(std::shared_ptr<Cache> cache, int key) {
cache->Erase(EncodeKey(key));
}
int Lookup(int key) { return Lookup(cache_, key); }
void Insert(int key, int value, int charge = 1) {
Insert(cache_, key, value, charge);
}
void Erase(int key) { Erase(cache_, key); }
int Lookup2(int key) { return Lookup(cache2_, key); }
void Insert2(int key, int value, int charge = 1) {
Insert(cache2_, key, value, charge);
}
void Erase2(int key) { Erase(cache2_, key); }
};
const Cache::CacheItemHelper CacheTest::kHelper{CacheEntryRole::kMisc,
&CacheTest::Deleter};
CacheTest* CacheTest::current_;
std::string CacheTest::type_;
class LRUCacheTest : public CacheTest {};
TEST_P(CacheTest, UsageTest) {
auto type = GetParam();
// cache is std::shared_ptr and will be automatically cleaned up.
const size_t kCapacity = 100000;
auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata);
auto precise_cache = NewCache(kCapacity, 0, false, kFullChargeCacheMetadata);
ASSERT_EQ(0, cache->GetUsage());
size_t baseline_meta_usage = precise_cache->GetUsage();
if (type != kHyperClock) {
ASSERT_EQ(0, baseline_meta_usage);
}
size_t usage = 0;
char value[10] = "abcdef";
// make sure everything will be cached
for (int i = 1; i < 100; ++i) {
std::string key;
if (type == kLRU) {
key = std::string(i, 'a');
} else {
key = EncodeKey(i);
}
auto kv_size = key.size() + 5;
ASSERT_OK(cache->Insert(key, value, &kDumbHelper, kv_size));
ASSERT_OK(precise_cache->Insert(key, value, &kDumbHelper, kv_size));
usage += kv_size;
ASSERT_EQ(usage, cache->GetUsage());
if (type == kHyperClock) {
ASSERT_EQ(baseline_meta_usage + usage, precise_cache->GetUsage());
} else {
ASSERT_LT(usage, precise_cache->GetUsage());
}
}
cache->EraseUnRefEntries();
precise_cache->EraseUnRefEntries();
ASSERT_EQ(0, cache->GetUsage());
ASSERT_EQ(baseline_meta_usage, precise_cache->GetUsage());
// make sure the cache will be overloaded
for (size_t i = 1; i < kCapacity; ++i) {
std::string key;
if (type == kLRU) {
key = std::to_string(i);
} else {
key = EncodeKey(static_cast<int>(1000 + i));
}
ASSERT_OK(cache->Insert(key, value, &kDumbHelper, key.size() + 5));
ASSERT_OK(precise_cache->Insert(key, value, &kDumbHelper, key.size() + 5));
}
// the usage should be close to the capacity
ASSERT_GT(kCapacity, cache->GetUsage());
ASSERT_GT(kCapacity, precise_cache->GetUsage());
ASSERT_LT(kCapacity * 0.95, cache->GetUsage());
if (type != kHyperClock) {
ASSERT_LT(kCapacity * 0.95, precise_cache->GetUsage());
} else {
// estimated value size of 1 is weird for clock cache, because
// almost all of the capacity will be used for metadata, and due to only
// using power of 2 table sizes, we might hit strict occupancy limit
// before hitting capacity limit.
ASSERT_LT(kCapacity * 0.80, precise_cache->GetUsage());
}
}
// TODO: This test takes longer than expected on ClockCache. This is
// because the values size estimate at construction is too sloppy.
// Fix this.
// Why is it so slow? The cache is constructed with an estimate of 1, but
// then the charge is claimed to be 21. This will cause the hash table
// to be extremely sparse, which in turn means clock needs to scan too
// many slots to find victims.
TEST_P(CacheTest, PinnedUsageTest) {
auto type = GetParam();
// cache is std::shared_ptr and will be automatically cleaned up.
const size_t kCapacity = 200000;
auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata);
auto precise_cache = NewCache(kCapacity, 8, false, kFullChargeCacheMetadata);
size_t baseline_meta_usage = precise_cache->GetUsage();
if (type != kHyperClock) {
ASSERT_EQ(0, baseline_meta_usage);
}
size_t pinned_usage = 0;
char value[10] = "abcdef";
std::forward_list<Cache::Handle*> unreleased_handles;
std::forward_list<Cache::Handle*> unreleased_handles_in_precise_cache;
// Add entries. Unpin some of them after insertion. Then, pin some of them
// again. Check GetPinnedUsage().
for (int i = 1; i < 100; ++i) {
std::string key;
if (type == kLRU) {
key = std::string(i, 'a');
} else {
key = EncodeKey(i);
}
auto kv_size = key.size() + 5;
Cache::Handle* handle;
Cache::Handle* handle_in_precise_cache;
ASSERT_OK(cache->Insert(key, value, &kDumbHelper, kv_size, &handle));
assert(handle);
ASSERT_OK(precise_cache->Insert(key, value, &kDumbHelper, kv_size,
&handle_in_precise_cache));
assert(handle_in_precise_cache);
pinned_usage += kv_size;
ASSERT_EQ(pinned_usage, cache->GetPinnedUsage());
ASSERT_LT(pinned_usage, precise_cache->GetPinnedUsage());
if (i % 2 == 0) {
cache->Release(handle);
precise_cache->Release(handle_in_precise_cache);
pinned_usage -= kv_size;
ASSERT_EQ(pinned_usage, cache->GetPinnedUsage());
ASSERT_LT(pinned_usage, precise_cache->GetPinnedUsage());
} else {
unreleased_handles.push_front(handle);
unreleased_handles_in_precise_cache.push_front(handle_in_precise_cache);
}
if (i % 3 == 0) {
unreleased_handles.push_front(cache->Lookup(key));
auto x = precise_cache->Lookup(key);
assert(x);
unreleased_handles_in_precise_cache.push_front(x);
// If i % 2 == 0, then the entry was unpinned before Lookup, so pinned
// usage increased
if (i % 2 == 0) {
pinned_usage += kv_size;
}
ASSERT_EQ(pinned_usage, cache->GetPinnedUsage());
ASSERT_LT(pinned_usage, precise_cache->GetPinnedUsage());
}
}
auto precise_cache_pinned_usage = precise_cache->GetPinnedUsage();
ASSERT_LT(pinned_usage, precise_cache_pinned_usage);
// check that overloading the cache does not change the pinned usage
for (size_t i = 1; i < 2 * kCapacity; ++i) {
std::string key;
if (type == kLRU) {
key = std::to_string(i);
} else {
key = EncodeKey(static_cast<int>(1000 + i));
}
ASSERT_OK(cache->Insert(key, value, &kDumbHelper, key.size() + 5));
ASSERT_OK(precise_cache->Insert(key, value, &kDumbHelper, key.size() + 5));
}
ASSERT_EQ(pinned_usage, cache->GetPinnedUsage());
ASSERT_EQ(precise_cache_pinned_usage, precise_cache->GetPinnedUsage());
cache->EraseUnRefEntries();
precise_cache->EraseUnRefEntries();
ASSERT_EQ(pinned_usage, cache->GetPinnedUsage());
ASSERT_EQ(precise_cache_pinned_usage, precise_cache->GetPinnedUsage());
// release handles for pinned entries to prevent memory leaks
for (auto handle : unreleased_handles) {
cache->Release(handle);
}
for (auto handle : unreleased_handles_in_precise_cache) {
precise_cache->Release(handle);
}
ASSERT_EQ(0, cache->GetPinnedUsage());
ASSERT_EQ(0, precise_cache->GetPinnedUsage());
cache->EraseUnRefEntries();
precise_cache->EraseUnRefEntries();
ASSERT_EQ(0, cache->GetUsage());
ASSERT_EQ(baseline_meta_usage, precise_cache->GetUsage());
}
TEST_P(CacheTest, HitAndMiss) {
ASSERT_EQ(-1, Lookup(100));
Insert(100, 101);
ASSERT_EQ(101, Lookup(100));
ASSERT_EQ(-1, Lookup(200));
ASSERT_EQ(-1, Lookup(300));
Insert(200, 201);
ASSERT_EQ(101, Lookup(100));
ASSERT_EQ(201, Lookup(200));
ASSERT_EQ(-1, Lookup(300));
Insert(100, 102);
if (GetParam() == kHyperClock) {
// ClockCache usually doesn't overwrite on Insert
ASSERT_EQ(101, Lookup(100));
} else {
ASSERT_EQ(102, Lookup(100));
}
ASSERT_EQ(201, Lookup(200));
ASSERT_EQ(-1, Lookup(300));
ASSERT_EQ(1U, deleted_values_.size());
if (GetParam() == kHyperClock) {
ASSERT_EQ(102, deleted_values_[0]);
} else {
ASSERT_EQ(101, deleted_values_[0]);
}
}
TEST_P(CacheTest, InsertSameKey) {
if (GetParam() == kHyperClock) {
ROCKSDB_GTEST_BYPASS(
"ClockCache doesn't guarantee Insert overwrite same key.");
return;
}
Insert(1, 1);
Insert(1, 2);
ASSERT_EQ(2, Lookup(1));
}
TEST_P(CacheTest, Erase) {
Erase(200);
ASSERT_EQ(0U, deleted_values_.size());
Insert(100, 101);
Insert(200, 201);
Erase(100);
ASSERT_EQ(-1, Lookup(100));
ASSERT_EQ(201, Lookup(200));
ASSERT_EQ(1U, deleted_values_.size());
ASSERT_EQ(101, deleted_values_[0]);
Erase(100);
ASSERT_EQ(-1, Lookup(100));
ASSERT_EQ(201, Lookup(200));
ASSERT_EQ(1U, deleted_values_.size());
}
TEST_P(CacheTest, EntriesArePinned) {
if (GetParam() == kHyperClock) {
ROCKSDB_GTEST_BYPASS(
"ClockCache doesn't guarantee Insert overwrite same key.");
return;
}
Insert(100, 101);
Cache::Handle* h1 = cache_->Lookup(EncodeKey(100));
ASSERT_EQ(101, DecodeValue(cache_->Value(h1)));
ASSERT_EQ(1U, cache_->GetUsage());
Insert(100, 102);
Cache::Handle* h2 = cache_->Lookup(EncodeKey(100));
ASSERT_EQ(102, DecodeValue(cache_->Value(h2)));
ASSERT_EQ(0U, deleted_values_.size());
ASSERT_EQ(2U, cache_->GetUsage());
cache_->Release(h1);
ASSERT_EQ(1U, deleted_values_.size());
ASSERT_EQ(101, deleted_values_[0]);
ASSERT_EQ(1U, cache_->GetUsage());
Erase(100);
ASSERT_EQ(-1, Lookup(100));
ASSERT_EQ(1U, deleted_values_.size());
ASSERT_EQ(1U, cache_->GetUsage());
cache_->Release(h2);
ASSERT_EQ(2U, deleted_values_.size());
ASSERT_EQ(102, deleted_values_[1]);
ASSERT_EQ(0U, cache_->GetUsage());
}
TEST_P(CacheTest, EvictionPolicy) {
Insert(100, 101);
Insert(200, 201);
// Frequently used entry must be kept around
for (int i = 0; i < 2 * kCacheSize; i++) {
Insert(1000 + i, 2000 + i);
ASSERT_EQ(101, Lookup(100));
}
ASSERT_EQ(101, Lookup(100));
ASSERT_EQ(-1, Lookup(200));
}
TEST_P(CacheTest, ExternalRefPinsEntries) {
Insert(100, 101);
Cache::Handle* h = cache_->Lookup(EncodeKey(100));
ASSERT_TRUE(cache_->Ref(h));
ASSERT_EQ(101, DecodeValue(cache_->Value(h)));
ASSERT_EQ(1U, cache_->GetUsage());
for (int i = 0; i < 3; ++i) {
if (i > 0) {
// First release (i == 1) corresponds to Ref(), second release (i == 2)
// corresponds to Lookup(). Then, since all external refs are released,
// the below insertions should push out the cache entry.
cache_->Release(h);
}
// double cache size because the usage bit in block cache prevents 100 from
// being evicted in the first kCacheSize iterations
for (int j = 0; j < 2 * kCacheSize + 100; j++) {
Insert(1000 + j, 2000 + j);
}
// Clock cache is even more stateful and needs more churn to evict
if (GetParam() == kHyperClock) {
for (int j = 0; j < kCacheSize; j++) {
Insert(11000 + j, 11000 + j);
}
}
if (i < 2) {
ASSERT_EQ(101, Lookup(100));
}
}
ASSERT_EQ(-1, Lookup(100));
}
TEST_P(CacheTest, EvictionPolicyRef) {
Insert(100, 101);
Insert(101, 102);
Insert(102, 103);
Insert(103, 104);
Insert(200, 101);
Insert(201, 102);
Insert(202, 103);
Insert(203, 104);
Cache::Handle* h201 = cache_->Lookup(EncodeKey(200));
Cache::Handle* h202 = cache_->Lookup(EncodeKey(201));
Cache::Handle* h203 = cache_->Lookup(EncodeKey(202));
Cache::Handle* h204 = cache_->Lookup(EncodeKey(203));
Insert(300, 101);
Insert(301, 102);
Insert(302, 103);
Insert(303, 104);
// Insert entries much more than cache capacity.
for (int i = 0; i < 100 * kCacheSize; i++) {
Insert(1000 + i, 2000 + i);
}
// Check whether the entries inserted in the beginning
// are evicted. Ones without extra ref are evicted and
// those with are not.
ASSERT_EQ(-1, Lookup(100));
ASSERT_EQ(-1, Lookup(101));
ASSERT_EQ(-1, Lookup(102));
ASSERT_EQ(-1, Lookup(103));
ASSERT_EQ(-1, Lookup(300));
ASSERT_EQ(-1, Lookup(301));
ASSERT_EQ(-1, Lookup(302));
ASSERT_EQ(-1, Lookup(303));
ASSERT_EQ(101, Lookup(200));
ASSERT_EQ(102, Lookup(201));
ASSERT_EQ(103, Lookup(202));
ASSERT_EQ(104, Lookup(203));
// Cleaning up all the handles
cache_->Release(h201);
cache_->Release(h202);
cache_->Release(h203);
cache_->Release(h204);
}
TEST_P(CacheTest, EvictEmptyCache) {
auto type = GetParam();
// Insert item large than capacity to trigger eviction on empty cache.
auto cache = NewCache(1, 0, false);
if (type == kLRU) {
ASSERT_OK(cache->Insert("foo", nullptr, &kDumbHelper, 10));
} else {
ASSERT_OK(cache->Insert(EncodeKey(1000), nullptr, &kDumbHelper, 10));
}
}
TEST_P(CacheTest, EraseFromDeleter) {
auto type = GetParam();
// Have deleter which will erase item from cache, which will re-enter
// the cache at that point.
std::shared_ptr<Cache> cache = NewCache(10, 0, false);
std::string foo, bar;
const Cache::CacheItemHelper* erase_helper;
if (type == kLRU) {
foo = "foo";
bar = "bar";
erase_helper = &kEraseOnDeleteHelper1;
} else {
foo = EncodeKey(1234);
bar = EncodeKey(5678);
erase_helper = &kEraseOnDeleteHelper2;
}
ASSERT_OK(cache->Insert(foo, nullptr, &kDumbHelper, 1));
ASSERT_OK(cache->Insert(bar, cache.get(), erase_helper, 1));
cache->Erase(bar);
ASSERT_EQ(nullptr, cache->Lookup(foo));
ASSERT_EQ(nullptr, cache->Lookup(bar));
}
TEST_P(CacheTest, ErasedHandleState) {
// insert a key and get two handles
Insert(100, 1000);
Cache::Handle* h1 = cache_->Lookup(EncodeKey(100));
Cache::Handle* h2 = cache_->Lookup(EncodeKey(100));
ASSERT_EQ(h1, h2);
ASSERT_EQ(DecodeValue(cache_->Value(h1)), 1000);
ASSERT_EQ(DecodeValue(cache_->Value(h2)), 1000);
// delete the key from the cache
Erase(100);
// can no longer find in the cache
ASSERT_EQ(-1, Lookup(100));
// release one handle
cache_->Release(h1);
// still can't find in cache
ASSERT_EQ(-1, Lookup(100));
cache_->Release(h2);
}
TEST_P(CacheTest, HeavyEntries) {
// Add a bunch of light and heavy entries and then count the combined
// size of items still in the cache, which must be approximately the
// same as the total capacity.
const int kLight = 1;
const int kHeavy = 10;
int added = 0;
int index = 0;
while (added < 2 * kCacheSize) {
const int weight = (index & 1) ? kLight : kHeavy;
Insert(index, 1000 + index, weight);
added += weight;
index++;
}
int cached_weight = 0;
for (int i = 0; i < index; i++) {
const int weight = (i & 1 ? kLight : kHeavy);
int r = Lookup(i);
if (r >= 0) {
cached_weight += weight;
ASSERT_EQ(1000 + i, r);
}
}
ASSERT_LE(cached_weight, kCacheSize + kCacheSize / 10);
}
TEST_P(CacheTest, NewId) {
uint64_t a = cache_->NewId();
uint64_t b = cache_->NewId();
ASSERT_NE(a, b);
}
TEST_P(CacheTest, ReleaseAndErase) {
std::shared_ptr<Cache> cache = NewCache(5, 0, false);
Cache::Handle* handle;
Status s =
cache->Insert(EncodeKey(100), EncodeValue(100), &kHelper, 1, &handle);
ASSERT_TRUE(s.ok());
ASSERT_EQ(5U, cache->GetCapacity());
ASSERT_EQ(1U, cache->GetUsage());
ASSERT_EQ(0U, deleted_values_.size());
auto erased = cache->Release(handle, true);
ASSERT_TRUE(erased);
// This tests that deleter has been called
ASSERT_EQ(1U, deleted_values_.size());
}
TEST_P(CacheTest, ReleaseWithoutErase) {
std::shared_ptr<Cache> cache = NewCache(5, 0, false);
Cache::Handle* handle;
Status s =
cache->Insert(EncodeKey(100), EncodeValue(100), &kHelper, 1, &handle);
ASSERT_TRUE(s.ok());
ASSERT_EQ(5U, cache->GetCapacity());
ASSERT_EQ(1U, cache->GetUsage());
ASSERT_EQ(0U, deleted_values_.size());
auto erased = cache->Release(handle);
ASSERT_FALSE(erased);
// This tests that deleter is not called. When cache has free capacity it is
// not expected to immediately erase the released items.
ASSERT_EQ(0U, deleted_values_.size());
}
namespace {
class Value {
public:
explicit Value(int v) : v_(v) {}
int v_;
static constexpr auto kCacheEntryRole = CacheEntryRole::kMisc;
};
using SharedCache = BasicTypedSharedCacheInterface<Value>;
using TypedHandle = SharedCache::TypedHandle;
} // namespace
TEST_P(CacheTest, SetCapacity) {
auto type = GetParam();
if (type == kHyperClock) {
ROCKSDB_GTEST_BYPASS(
"FastLRUCache and HyperClockCache don't support arbitrary capacity "
"adjustments.");
return;
}
// test1: increase capacity
// lets create a cache with capacity 5,
// then, insert 5 elements, then increase capacity
// to 10, returned capacity should be 10, usage=5
SharedCache cache{NewCache(5, 0, false)};
std::vector<TypedHandle*> handles(10);
// Insert 5 entries, but not releasing.
for (int i = 0; i < 5; i++) {
std::string key = EncodeKey(i + 1);
Status s = cache.Insert(key, new Value(i + 1), 1, &handles[i]);
ASSERT_TRUE(s.ok());
}
ASSERT_EQ(5U, cache.get()->GetCapacity());
ASSERT_EQ(5U, cache.get()->GetUsage());
cache.get()->SetCapacity(10);
ASSERT_EQ(10U, cache.get()->GetCapacity());
ASSERT_EQ(5U, cache.get()->GetUsage());
// test2: decrease capacity
// insert 5 more elements to cache, then release 5,
// then decrease capacity to 7, final capacity should be 7
// and usage should be 7
for (int i = 5; i < 10; i++) {
std::string key = EncodeKey(i + 1);
Status s = cache.Insert(key, new Value(i + 1), 1, &handles[i]);
ASSERT_TRUE(s.ok());
}
ASSERT_EQ(10U, cache.get()->GetCapacity());
ASSERT_EQ(10U, cache.get()->GetUsage());
for (int i = 0; i < 5; i++) {
cache.Release(handles[i]);
}
ASSERT_EQ(10U, cache.get()->GetCapacity());
ASSERT_EQ(10U, cache.get()->GetUsage());
cache.get()->SetCapacity(7);
ASSERT_EQ(7, cache.get()->GetCapacity());
ASSERT_EQ(7, cache.get()->GetUsage());
// release remaining 5 to keep valgrind happy
for (int i = 5; i < 10; i++) {
cache.Release(handles[i]);
}
// Make sure this doesn't crash or upset ASAN/valgrind
cache.get()->DisownData();
}
TEST_P(LRUCacheTest, SetStrictCapacityLimit) {
// test1: set the flag to false. Insert more keys than capacity. See if they
// all go through.
SharedCache cache{NewCache(5, 0, false)};
std::vector<TypedHandle*> handles(10);
Status s;
for (int i = 0; i < 10; i++) {
std::string key = EncodeKey(i + 1);
s = cache.Insert(key, new Value(i + 1), 1, &handles[i]);
ASSERT_OK(s);
ASSERT_NE(nullptr, handles[i]);
}
ASSERT_EQ(10, cache.get()->GetUsage());
// test2: set the flag to true. Insert and check if it fails.
std::string extra_key = EncodeKey(100);
Value* extra_value = new Value(0);
cache.get()->SetStrictCapacityLimit(true);
TypedHandle* handle;
s = cache.Insert(extra_key, extra_value, 1, &handle);
ASSERT_TRUE(s.IsMemoryLimit());
ASSERT_EQ(nullptr, handle);
ASSERT_EQ(10, cache.get()->GetUsage());
for (int i = 0; i < 10; i++) {
cache.Release(handles[i]);
}
// test3: init with flag being true.
SharedCache cache2{NewCache(5, 0, true)};
for (int i = 0; i < 5; i++) {
std::string key = EncodeKey(i + 1);
s = cache2.Insert(key, new Value(i + 1), 1, &handles[i]);
ASSERT_OK(s);
ASSERT_NE(nullptr, handles[i]);
}
s = cache2.Insert(extra_key, extra_value, 1, &handle);
ASSERT_TRUE(s.IsMemoryLimit());
ASSERT_EQ(nullptr, handle);
// test insert without handle
s = cache2.Insert(extra_key, extra_value, 1);
// AS if the key have been inserted into cache but get evicted immediately.
ASSERT_OK(s);
ASSERT_EQ(5, cache2.get()->GetUsage());
ASSERT_EQ(nullptr, cache2.Lookup(extra_key));
for (int i = 0; i < 5; i++) {
cache2.Release(handles[i]);
}
}
TEST_P(CacheTest, OverCapacity) {
size_t n = 10;
// a LRUCache with n entries and one shard only
SharedCache cache{NewCache(n, 0, false)};
std::vector<TypedHandle*> handles(n + 1);
// Insert n+1 entries, but not releasing.
for (int i = 0; i < static_cast<int>(n + 1); i++) {
std::string key = EncodeKey(i + 1);
Status s = cache.Insert(key, new Value(i + 1), 1, &handles[i]);
ASSERT_TRUE(s.ok());
}
// Guess what's in the cache now?
for (int i = 0; i < static_cast<int>(n + 1); i++) {
std::string key = EncodeKey(i + 1);
auto h = cache.Lookup(key);
ASSERT_TRUE(h != nullptr);
if (h) cache.Release(h);
}
// the cache is over capacity since nothing could be evicted
ASSERT_EQ(n + 1U, cache.get()->GetUsage());
for (int i = 0; i < static_cast<int>(n + 1); i++) {
cache.Release(handles[i]);
}
if (GetParam() == kHyperClock) {
// Make sure eviction is triggered.
ASSERT_OK(cache.Insert(EncodeKey(-1), nullptr, 1, &handles[0]));
// cache is under capacity now since elements were released
ASSERT_GE(n, cache.get()->GetUsage());
// clean up
cache.Release(handles[0]);
} else {
// LRUCache checks for over-capacity in Release.
// cache is exactly at capacity now with minimal eviction
ASSERT_EQ(n, cache.get()->GetUsage());
// element 0 is evicted and the rest is there
// This is consistent with the LRU policy since the element 0
// was released first
for (int i = 0; i < static_cast<int>(n + 1); i++) {
std::string key = EncodeKey(i + 1);
auto h = cache.Lookup(key);
if (h) {
ASSERT_NE(static_cast<size_t>(i), 0U);
cache.Release(h);
} else {
ASSERT_EQ(static_cast<size_t>(i), 0U);
}
}
}
}
TEST_P(CacheTest, ApplyToAllEntriesTest) {
std::vector<std::string> callback_state;
const auto callback = [&](const Slice& key, Cache::ObjectPtr value,
size_t charge,
const Cache::CacheItemHelper* helper) {
callback_state.push_back(std::to_string(DecodeKey(key)) + "," +
std::to_string(DecodeValue(value)) + "," +
std::to_string(charge));
assert(helper == &CacheTest::kHelper);
};
std::vector<std::string> inserted;
callback_state.clear();
for (int i = 0; i < 10; ++i) {
Insert(i, i * 2, i + 1);
inserted.push_back(std::to_string(i) + "," + std::to_string(i * 2) + "," +
std::to_string(i + 1));
}
cache_->ApplyToAllEntries(callback, /*opts*/ {});
std::sort(inserted.begin(), inserted.end());
std::sort(callback_state.begin(), callback_state.end());
ASSERT_EQ(inserted.size(), callback_state.size());
for (int i = 0; i < static_cast<int>(inserted.size()); ++i) {
EXPECT_EQ(inserted[i], callback_state[i]);
}
}
TEST_P(CacheTest, ApplyToAllEntriesDuringResize) {
// This is a mini-stress test of ApplyToAllEntries, to ensure
// items in the cache that are neither added nor removed
// during ApplyToAllEntries are counted exactly once.
// Insert some entries that we expect to be seen exactly once
// during iteration.
constexpr int kSpecialCharge = 2;
constexpr int kNotSpecialCharge = 1;
constexpr int kSpecialCount = 100;
size_t expected_usage = 0;
for (int i = 0; i < kSpecialCount; ++i) {
Insert(i, i * 2, kSpecialCharge);
expected_usage += kSpecialCharge;
}
// For callback
int special_count = 0;
const auto callback = [&](const Slice&, Cache::ObjectPtr, size_t charge,
const Cache::CacheItemHelper*) {
if (charge == static_cast<size_t>(kSpecialCharge)) {
++special_count;
}
};
// Start counting
std::thread apply_thread([&]() {
// Use small average_entries_per_lock to make the problem difficult
Cache::ApplyToAllEntriesOptions opts;
opts.average_entries_per_lock = 2;
cache_->ApplyToAllEntries(callback, opts);
});
// In parallel, add more entries, enough to cause resize but not enough
// to cause ejections. (Note: if any cache shard is over capacity, there
// will be ejections)
for (int i = kSpecialCount * 1; i < kSpecialCount * 5; ++i) {
Insert(i, i * 2, kNotSpecialCharge);
expected_usage += kNotSpecialCharge;
}
apply_thread.join();
// verify no evictions
ASSERT_EQ(cache_->GetUsage(), expected_usage);
// verify everything seen in ApplyToAllEntries
ASSERT_EQ(special_count, kSpecialCount);
}
TEST_P(CacheTest, DefaultShardBits) {
// Prevent excessive allocation (to save time & space)
estimated_value_size_ = 100000;
// Implementations use different minimum shard sizes
size_t min_shard_size =
(GetParam() == kHyperClock ? 32U * 1024U : 512U) * 1024U;
std::shared_ptr<Cache> cache = NewCache(32U * min_shard_size);
ShardedCacheBase* sc = dynamic_cast<ShardedCacheBase*>(cache.get());
ASSERT_EQ(5, sc->GetNumShardBits());
cache = NewCache(min_shard_size / 1000U * 999U);
sc = dynamic_cast<ShardedCacheBase*>(cache.get());
ASSERT_EQ(0, sc->GetNumShardBits());
cache = NewCache(3U * 1024U * 1024U * 1024U);
sc = dynamic_cast<ShardedCacheBase*>(cache.get());
// current maximum of 6
ASSERT_EQ(6, sc->GetNumShardBits());
if constexpr (sizeof(size_t) > 4) {
cache = NewCache(128U * min_shard_size);
sc = dynamic_cast<ShardedCacheBase*>(cache.get());
// current maximum of 6
ASSERT_EQ(6, sc->GetNumShardBits());
}
}
TEST_P(CacheTest, GetChargeAndDeleter) {
Insert(1, 2);
Cache::Handle* h1 = cache_->Lookup(EncodeKey(1));
ASSERT_EQ(2, DecodeValue(cache_->Value(h1)));
ASSERT_EQ(1, cache_->GetCharge(h1));
ASSERT_EQ(&CacheTest::kHelper, cache_->GetCacheItemHelper(h1));
cache_->Release(h1);
}
INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest,
secondary_cache_test_util::GetTestingCacheTypes());
INSTANTIATE_TEST_CASE_P(CacheTestInstance, LRUCacheTest,
testing::Values(secondary_cache_test_util::kLRU));
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,980 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "cache/compressed_secondary_cache.h"
#include <array>
#include <iterator>
#include <memory>
#include <tuple>
#include "memory/jemalloc_nodump_allocator.h"
#include "rocksdb/convenience.h"
#include "test_util/secondary_cache_test_util.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
namespace ROCKSDB_NAMESPACE {
using secondary_cache_test_util::GetTestingCacheTypes;
using secondary_cache_test_util::WithCacheType;
// 16 bytes for HCC compatibility
const std::string key0 = "____ ____key0";
const std::string key1 = "____ ____key1";
const std::string key2 = "____ ____key2";
const std::string key3 = "____ ____key3";
class CompressedSecondaryCacheTestBase : public testing::Test,
public WithCacheType {
public:
CompressedSecondaryCacheTestBase() {}
~CompressedSecondaryCacheTestBase() override = default;
protected:
void BasicTestHelper(std::shared_ptr<SecondaryCache> sec_cache,
bool sec_cache_is_compressed) {
get_perf_context()->Reset();
bool kept_in_sec_cache{true};
// Lookup an non-existent key.
std::unique_ptr<SecondaryCacheResultHandle> handle0 =
sec_cache->Lookup(key0, GetHelper(), this, true, /*advise_erase=*/true,
kept_in_sec_cache);
ASSERT_EQ(handle0, nullptr);
Random rnd(301);
// Insert and Lookup the item k1 for the first time.
std::string str1(rnd.RandomString(1000));
TestItem item1(str1.data(), str1.length());
// A dummy handle is inserted if the item is inserted for the first time.
ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper()));
ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 1);
ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, 0);
ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, 0);
std::unique_ptr<SecondaryCacheResultHandle> handle1_1 =
sec_cache->Lookup(key1, GetHelper(), this, true, /*advise_erase=*/false,
kept_in_sec_cache);
ASSERT_EQ(handle1_1, nullptr);
// Insert and Lookup the item k1 for the second time and advise erasing it.
ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper()));
ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 1);
std::unique_ptr<SecondaryCacheResultHandle> handle1_2 =
sec_cache->Lookup(key1, GetHelper(), this, true, /*advise_erase=*/true,
kept_in_sec_cache);
ASSERT_NE(handle1_2, nullptr);
ASSERT_FALSE(kept_in_sec_cache);
if (sec_cache_is_compressed) {
ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes,
1000);
ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes,
1007);
} else {
ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, 0);
ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, 0);
}
std::unique_ptr<TestItem> val1 =
std::unique_ptr<TestItem>(static_cast<TestItem*>(handle1_2->Value()));
ASSERT_NE(val1, nullptr);
ASSERT_EQ(memcmp(val1->Buf(), item1.Buf(), item1.Size()), 0);
// Lookup the item k1 again.
std::unique_ptr<SecondaryCacheResultHandle> handle1_3 =
sec_cache->Lookup(key1, GetHelper(), this, true, /*advise_erase=*/true,
kept_in_sec_cache);
ASSERT_EQ(handle1_3, nullptr);
// Insert and Lookup the item k2.
std::string str2(rnd.RandomString(1000));
TestItem item2(str2.data(), str2.length());
ASSERT_OK(sec_cache->Insert(key2, &item2, GetHelper()));
ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 2);
std::unique_ptr<SecondaryCacheResultHandle> handle2_1 =
sec_cache->Lookup(key2, GetHelper(), this, true, /*advise_erase=*/false,
kept_in_sec_cache);
ASSERT_EQ(handle2_1, nullptr);
ASSERT_OK(sec_cache->Insert(key2, &item2, GetHelper()));
ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 2);
if (sec_cache_is_compressed) {
ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes,
2000);
ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes,
2014);
} else {
ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, 0);
ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, 0);
}
std::unique_ptr<SecondaryCacheResultHandle> handle2_2 =
sec_cache->Lookup(key2, GetHelper(), this, true, /*advise_erase=*/false,
kept_in_sec_cache);
ASSERT_NE(handle2_2, nullptr);
std::unique_ptr<TestItem> val2 =
std::unique_ptr<TestItem>(static_cast<TestItem*>(handle2_2->Value()));
ASSERT_NE(val2, nullptr);
ASSERT_EQ(memcmp(val2->Buf(), item2.Buf(), item2.Size()), 0);
std::vector<SecondaryCacheResultHandle*> handles = {handle1_2.get(),
handle2_2.get()};
sec_cache->WaitAll(handles);
sec_cache.reset();
}
void BasicTest(bool sec_cache_is_compressed, bool use_jemalloc) {
CompressedSecondaryCacheOptions opts;
opts.capacity = 2048;
opts.num_shard_bits = 0;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
opts.compression_type = CompressionType::kNoCompression;
sec_cache_is_compressed = false;
}
} else {
opts.compression_type = CompressionType::kNoCompression;
}
if (use_jemalloc) {
JemallocAllocatorOptions jopts;
std::shared_ptr<MemoryAllocator> allocator;
std::string msg;
if (JemallocNodumpAllocator::IsSupported(&msg)) {
Status s = NewJemallocNodumpAllocator(jopts, &allocator);
if (s.ok()) {
opts.memory_allocator = allocator;
}
} else {
ROCKSDB_GTEST_BYPASS("JEMALLOC not supported");
}
}
std::shared_ptr<SecondaryCache> sec_cache =
NewCompressedSecondaryCache(opts);
BasicTestHelper(sec_cache, sec_cache_is_compressed);
}
void FailsTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 1100;
secondary_cache_opts.num_shard_bits = 0;
std::shared_ptr<SecondaryCache> sec_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
// Insert and Lookup the first item.
Random rnd(301);
std::string str1(rnd.RandomString(1000));
TestItem item1(str1.data(), str1.length());
// Insert a dummy handle.
ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper()));
// Insert k1.
ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper()));
// Insert and Lookup the second item.
std::string str2(rnd.RandomString(200));
TestItem item2(str2.data(), str2.length());
// Insert a dummy handle, k1 is not evicted.
ASSERT_OK(sec_cache->Insert(key2, &item2, GetHelper()));
bool kept_in_sec_cache{false};
std::unique_ptr<SecondaryCacheResultHandle> handle1 =
sec_cache->Lookup(key1, GetHelper(), this, true, /*advise_erase=*/false,
kept_in_sec_cache);
ASSERT_EQ(handle1, nullptr);
// Insert k2 and k1 is evicted.
ASSERT_OK(sec_cache->Insert(key2, &item2, GetHelper()));
std::unique_ptr<SecondaryCacheResultHandle> handle2 =
sec_cache->Lookup(key2, GetHelper(), this, true, /*advise_erase=*/false,
kept_in_sec_cache);
ASSERT_NE(handle2, nullptr);
std::unique_ptr<TestItem> val2 =
std::unique_ptr<TestItem>(static_cast<TestItem*>(handle2->Value()));
ASSERT_NE(val2, nullptr);
ASSERT_EQ(memcmp(val2->Buf(), item2.Buf(), item2.Size()), 0);
// Insert k1 again and a dummy handle is inserted.
ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper()));
std::unique_ptr<SecondaryCacheResultHandle> handle1_1 =
sec_cache->Lookup(key1, GetHelper(), this, true, /*advise_erase=*/false,
kept_in_sec_cache);
ASSERT_EQ(handle1_1, nullptr);
// Create Fails.
SetFailCreate(true);
std::unique_ptr<SecondaryCacheResultHandle> handle2_1 =
sec_cache->Lookup(key2, GetHelper(), this, true, /*advise_erase=*/true,
kept_in_sec_cache);
ASSERT_EQ(handle2_1, nullptr);
// Save Fails.
std::string str3 = rnd.RandomString(10);
TestItem item3(str3.data(), str3.length());
// The Status is OK because a dummy handle is inserted.
ASSERT_OK(sec_cache->Insert(key3, &item3, GetHelperFail()));
ASSERT_NOK(sec_cache->Insert(key3, &item3, GetHelperFail()));
sec_cache.reset();
}
void BasicIntegrationTest(bool sec_cache_is_compressed,
bool enable_custom_split_merge) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
sec_cache_is_compressed = false;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 6000;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.enable_custom_split_merge = enable_custom_split_merge;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
std::shared_ptr<Cache> cache = NewCache(
/*_capacity =*/1300, /*_num_shard_bits =*/0,
/*_strict_capacity_limit =*/true, secondary_cache);
std::shared_ptr<Statistics> stats = CreateDBStatistics();
get_perf_context()->Reset();
Random rnd(301);
std::string str1 = rnd.RandomString(1001);
auto item1_1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert(key1, item1_1, GetHelper(), str1.length()));
std::string str2 = rnd.RandomString(1012);
auto item2_1 = new TestItem(str2.data(), str2.length());
// After this Insert, primary cache contains k2 and secondary cache contains
// k1's dummy item.
ASSERT_OK(cache->Insert(key2, item2_1, GetHelper(), str2.length()));
ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 1);
ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, 0);
ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, 0);
std::string str3 = rnd.RandomString(1024);
auto item3_1 = new TestItem(str3.data(), str3.length());
// After this Insert, primary cache contains k3 and secondary cache contains
// k1's dummy item and k2's dummy item.
ASSERT_OK(cache->Insert(key3, item3_1, GetHelper(), str3.length()));
ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 2);
// After this Insert, primary cache contains k1 and secondary cache contains
// k1's dummy item, k2's dummy item, and k3's dummy item.
auto item1_2 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert(key1, item1_2, GetHelper(), str1.length()));
ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 3);
// After this Insert, primary cache contains k2 and secondary cache contains
// k1's item, k2's dummy item, and k3's dummy item.
auto item2_2 = new TestItem(str2.data(), str2.length());
ASSERT_OK(cache->Insert(key2, item2_2, GetHelper(), str2.length()));
ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 1);
if (sec_cache_is_compressed) {
ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes,
str1.length());
ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes,
1008);
} else {
ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, 0);
ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, 0);
}
// After this Insert, primary cache contains k3 and secondary cache contains
// k1's item and k2's item.
auto item3_2 = new TestItem(str3.data(), str3.length());
ASSERT_OK(cache->Insert(key3, item3_2, GetHelper(), str3.length()));
ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 2);
if (sec_cache_is_compressed) {
ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes,
str1.length() + str2.length());
ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes,
2027);
} else {
ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, 0);
ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, 0);
}
Cache::Handle* handle;
handle = cache->Lookup(key3, GetHelper(), this, Cache::Priority::LOW,
stats.get());
ASSERT_NE(handle, nullptr);
auto val3 = static_cast<TestItem*>(cache->Value(handle));
ASSERT_NE(val3, nullptr);
ASSERT_EQ(memcmp(val3->Buf(), item3_2->Buf(), item3_2->Size()), 0);
cache->Release(handle);
// Lookup an non-existent key.
handle = cache->Lookup(key0, GetHelper(), this, Cache::Priority::LOW,
stats.get());
ASSERT_EQ(handle, nullptr);
// This Lookup should just insert a dummy handle in the primary cache
// and the k1 is still in the secondary cache.
handle = cache->Lookup(key1, GetHelper(), this, Cache::Priority::LOW,
stats.get());
ASSERT_NE(handle, nullptr);
ASSERT_EQ(get_perf_context()->block_cache_standalone_handle_count, 1);
auto val1_1 = static_cast<TestItem*>(cache->Value(handle));
ASSERT_NE(val1_1, nullptr);
ASSERT_EQ(memcmp(val1_1->Buf(), str1.data(), str1.size()), 0);
cache->Release(handle);
// This Lookup should erase k1 from the secondary cache and insert
// it into primary cache; then k3 is demoted.
// k2 and k3 are in secondary cache.
handle = cache->Lookup(key1, GetHelper(), this, Cache::Priority::LOW,
stats.get());
ASSERT_NE(handle, nullptr);
ASSERT_EQ(get_perf_context()->block_cache_standalone_handle_count, 1);
ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 3);
cache->Release(handle);
// k2 is still in secondary cache.
handle = cache->Lookup(key2, GetHelper(), this, Cache::Priority::LOW,
stats.get());
ASSERT_NE(handle, nullptr);
ASSERT_EQ(get_perf_context()->block_cache_standalone_handle_count, 2);
cache->Release(handle);
// Testing SetCapacity().
ASSERT_OK(secondary_cache->SetCapacity(0));
handle = cache->Lookup(key3, GetHelper(), this, Cache::Priority::LOW,
stats.get());
ASSERT_EQ(handle, nullptr);
ASSERT_OK(secondary_cache->SetCapacity(7000));
size_t capacity;
ASSERT_OK(secondary_cache->GetCapacity(capacity));
ASSERT_EQ(capacity, 7000);
auto item1_3 = new TestItem(str1.data(), str1.length());
// After this Insert, primary cache contains k1.
ASSERT_OK(cache->Insert(key1, item1_3, GetHelper(), str2.length()));
ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 3);
ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 4);
auto item2_3 = new TestItem(str2.data(), str2.length());
// After this Insert, primary cache contains k2 and secondary cache contains
// k1's dummy item.
ASSERT_OK(cache->Insert(key2, item2_3, GetHelper(), str1.length()));
ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 4);
auto item1_4 = new TestItem(str1.data(), str1.length());
// After this Insert, primary cache contains k1 and secondary cache contains
// k1's dummy item and k2's dummy item.
ASSERT_OK(cache->Insert(key1, item1_4, GetHelper(), str2.length()));
ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 5);
auto item2_4 = new TestItem(str2.data(), str2.length());
// After this Insert, primary cache contains k2 and secondary cache contains
// k1's real item and k2's dummy item.
ASSERT_OK(cache->Insert(key2, item2_4, GetHelper(), str2.length()));
ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 5);
// This Lookup should just insert a dummy handle in the primary cache
// and the k1 is still in the secondary cache.
handle = cache->Lookup(key1, GetHelper(), this, Cache::Priority::LOW,
stats.get());
ASSERT_NE(handle, nullptr);
cache->Release(handle);
ASSERT_EQ(get_perf_context()->block_cache_standalone_handle_count, 3);
cache.reset();
secondary_cache.reset();
}
void BasicIntegrationFailTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 6000;
secondary_cache_opts.num_shard_bits = 0;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
std::shared_ptr<Cache> cache = NewCache(
/*_capacity=*/1300, /*_num_shard_bits=*/0,
/*_strict_capacity_limit=*/false, secondary_cache);
Random rnd(301);
std::string str1 = rnd.RandomString(1001);
auto item1 = std::make_unique<TestItem>(str1.data(), str1.length());
ASSERT_OK(cache->Insert(key1, item1.get(), GetHelper(), str1.length()));
item1.release(); // Appease clang-analyze "potential memory leak"
Cache::Handle* handle;
handle = cache->Lookup(key2, nullptr, this, Cache::Priority::LOW);
ASSERT_EQ(handle, nullptr);
handle = cache->Lookup(key2, GetHelper(), this, Cache::Priority::LOW);
ASSERT_EQ(handle, nullptr);
Cache::AsyncLookupHandle ah;
ah.key = key2;
ah.helper = GetHelper();
ah.create_context = this;
ah.priority = Cache::Priority::LOW;
cache->StartAsyncLookup(ah);
cache->Wait(ah);
ASSERT_EQ(ah.Result(), nullptr);
cache.reset();
secondary_cache.reset();
}
void IntegrationSaveFailTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 6000;
secondary_cache_opts.num_shard_bits = 0;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
std::shared_ptr<Cache> cache = NewCache(
/*_capacity=*/1300, /*_num_shard_bits=*/0,
/*_strict_capacity_limit=*/true, secondary_cache);
Random rnd(301);
std::string str1 = rnd.RandomString(1001);
auto item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert(key1, item1, GetHelperFail(), str1.length()));
std::string str2 = rnd.RandomString(1002);
auto item2 = new TestItem(str2.data(), str2.length());
// k1 should be demoted to the secondary cache.
ASSERT_OK(cache->Insert(key2, item2, GetHelperFail(), str2.length()));
Cache::Handle* handle;
handle = cache->Lookup(key2, GetHelperFail(), this, Cache::Priority::LOW);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
// This lookup should fail, since k1 demotion would have failed.
handle = cache->Lookup(key1, GetHelperFail(), this, Cache::Priority::LOW);
ASSERT_EQ(handle, nullptr);
// Since k1 was not promoted, k2 should still be in cache.
handle = cache->Lookup(key2, GetHelperFail(), this, Cache::Priority::LOW);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
cache.reset();
secondary_cache.reset();
}
void IntegrationCreateFailTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 6000;
secondary_cache_opts.num_shard_bits = 0;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
std::shared_ptr<Cache> cache = NewCache(
/*_capacity=*/1300, /*_num_shard_bits=*/0,
/*_strict_capacity_limit=*/true, secondary_cache);
Random rnd(301);
std::string str1 = rnd.RandomString(1001);
auto item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert(key1, item1, GetHelper(), str1.length()));
std::string str2 = rnd.RandomString(1002);
auto item2 = new TestItem(str2.data(), str2.length());
// k1 should be demoted to the secondary cache.
ASSERT_OK(cache->Insert(key2, item2, GetHelper(), str2.length()));
Cache::Handle* handle;
SetFailCreate(true);
handle = cache->Lookup(key2, GetHelper(), this, Cache::Priority::LOW);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
// This lookup should fail, since k1 creation would have failed
handle = cache->Lookup(key1, GetHelper(), this, Cache::Priority::LOW);
ASSERT_EQ(handle, nullptr);
// Since k1 didn't get promoted, k2 should still be in cache
handle = cache->Lookup(key2, GetHelper(), this, Cache::Priority::LOW);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
cache.reset();
secondary_cache.reset();
}
void IntegrationFullCapacityTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 6000;
secondary_cache_opts.num_shard_bits = 0;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
std::shared_ptr<Cache> cache = NewCache(
/*_capacity=*/1300, /*_num_shard_bits=*/0,
/*_strict_capacity_limit=*/false, secondary_cache);
Random rnd(301);
std::string str1 = rnd.RandomString(1001);
auto item1_1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert(key1, item1_1, GetHelper(), str1.length()));
std::string str2 = rnd.RandomString(1002);
std::string str2_clone{str2};
auto item2 = new TestItem(str2.data(), str2.length());
// After this Insert, primary cache contains k2 and secondary cache contains
// k1's dummy item.
ASSERT_OK(cache->Insert(key2, item2, GetHelper(), str2.length()));
// After this Insert, primary cache contains k1 and secondary cache contains
// k1's dummy item and k2's dummy item.
auto item1_2 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert(key1, item1_2, GetHelper(), str1.length()));
auto item2_2 = new TestItem(str2.data(), str2.length());
// After this Insert, primary cache contains k2 and secondary cache contains
// k1's item and k2's dummy item.
ASSERT_OK(cache->Insert(key2, item2_2, GetHelper(), str2.length()));
Cache::Handle* handle2;
handle2 = cache->Lookup(key2, GetHelper(), this, Cache::Priority::LOW);
ASSERT_NE(handle2, nullptr);
cache->Release(handle2);
// k1 promotion should fail because cache is at capacity and
// strict_capacity_limit is true, but the lookup should still succeed.
// A k1's dummy item is inserted into primary cache.
Cache::Handle* handle1;
handle1 = cache->Lookup(key1, GetHelper(), this, Cache::Priority::LOW);
ASSERT_NE(handle1, nullptr);
cache->Release(handle1);
// Since k1 didn't get inserted, k2 should still be in cache
handle2 = cache->Lookup(key2, GetHelper(), this, Cache::Priority::LOW);
ASSERT_NE(handle2, nullptr);
cache->Release(handle2);
cache.reset();
secondary_cache.reset();
}
void SplitValueIntoChunksTest() {
JemallocAllocatorOptions jopts;
std::shared_ptr<MemoryAllocator> allocator;
std::string msg;
if (JemallocNodumpAllocator::IsSupported(&msg)) {
Status s = NewJemallocNodumpAllocator(jopts, &allocator);
if (!s.ok()) {
ROCKSDB_GTEST_BYPASS("JEMALLOC not supported");
}
} else {
ROCKSDB_GTEST_BYPASS("JEMALLOC not supported");
}
using CacheValueChunk = CompressedSecondaryCache::CacheValueChunk;
std::unique_ptr<CompressedSecondaryCache> sec_cache =
std::make_unique<CompressedSecondaryCache>(1000, 0, true, 0.5, 0.0,
allocator);
Random rnd(301);
// 8500 = 8169 + 233 + 98, so there should be 3 chunks after split.
size_t str_size{8500};
std::string str = rnd.RandomString(static_cast<int>(str_size));
size_t charge{0};
CacheValueChunk* chunks_head =
sec_cache->SplitValueIntoChunks(str, kLZ4Compression, charge);
ASSERT_EQ(charge, str_size + 3 * (sizeof(CacheValueChunk) - 1));
CacheValueChunk* current_chunk = chunks_head;
ASSERT_EQ(current_chunk->size, 8192 - sizeof(CacheValueChunk) + 1);
current_chunk = current_chunk->next;
ASSERT_EQ(current_chunk->size, 256 - sizeof(CacheValueChunk) + 1);
current_chunk = current_chunk->next;
ASSERT_EQ(current_chunk->size, 98);
sec_cache->GetHelper(true)->del_cb(chunks_head, /*alloc*/ nullptr);
}
void MergeChunksIntoValueTest() {
using CacheValueChunk = CompressedSecondaryCache::CacheValueChunk;
Random rnd(301);
size_t size1{2048};
std::string str1 = rnd.RandomString(static_cast<int>(size1));
CacheValueChunk* current_chunk = reinterpret_cast<CacheValueChunk*>(
new char[sizeof(CacheValueChunk) - 1 + size1]);
CacheValueChunk* chunks_head = current_chunk;
memcpy(current_chunk->data, str1.data(), size1);
current_chunk->size = size1;
size_t size2{256};
std::string str2 = rnd.RandomString(static_cast<int>(size2));
current_chunk->next = reinterpret_cast<CacheValueChunk*>(
new char[sizeof(CacheValueChunk) - 1 + size2]);
current_chunk = current_chunk->next;
memcpy(current_chunk->data, str2.data(), size2);
current_chunk->size = size2;
size_t size3{31};
std::string str3 = rnd.RandomString(static_cast<int>(size3));
current_chunk->next = reinterpret_cast<CacheValueChunk*>(
new char[sizeof(CacheValueChunk) - 1 + size3]);
current_chunk = current_chunk->next;
memcpy(current_chunk->data, str3.data(), size3);
current_chunk->size = size3;
current_chunk->next = nullptr;
std::string str = str1 + str2 + str3;
std::unique_ptr<CompressedSecondaryCache> sec_cache =
std::make_unique<CompressedSecondaryCache>(1000, 0, true, 0.5, 0.0);
size_t charge{0};
CacheAllocationPtr value =
sec_cache->MergeChunksIntoValue(chunks_head, charge);
ASSERT_EQ(charge, size1 + size2 + size3);
std::string value_str{value.get(), charge};
ASSERT_EQ(strcmp(value_str.data(), str.data()), 0);
while (chunks_head != nullptr) {
CacheValueChunk* tmp_chunk = chunks_head;
chunks_head = chunks_head->next;
tmp_chunk->Free();
}
}
void SplictValueAndMergeChunksTest() {
JemallocAllocatorOptions jopts;
std::shared_ptr<MemoryAllocator> allocator;
std::string msg;
if (JemallocNodumpAllocator::IsSupported(&msg)) {
Status s = NewJemallocNodumpAllocator(jopts, &allocator);
if (!s.ok()) {
ROCKSDB_GTEST_BYPASS("JEMALLOC not supported");
}
} else {
ROCKSDB_GTEST_BYPASS("JEMALLOC not supported");
}
using CacheValueChunk = CompressedSecondaryCache::CacheValueChunk;
std::unique_ptr<CompressedSecondaryCache> sec_cache =
std::make_unique<CompressedSecondaryCache>(1000, 0, true, 0.5, 0.0,
allocator);
Random rnd(301);
// 8500 = 8169 + 233 + 98, so there should be 3 chunks after split.
size_t str_size{8500};
std::string str = rnd.RandomString(static_cast<int>(str_size));
size_t charge{0};
CacheValueChunk* chunks_head =
sec_cache->SplitValueIntoChunks(str, kLZ4Compression, charge);
ASSERT_EQ(charge, str_size + 3 * (sizeof(CacheValueChunk) - 1));
CacheAllocationPtr value =
sec_cache->MergeChunksIntoValue(chunks_head, charge);
ASSERT_EQ(charge, str_size);
std::string value_str{value.get(), charge};
ASSERT_EQ(strcmp(value_str.data(), str.data()), 0);
sec_cache->GetHelper(true)->del_cb(chunks_head, /*alloc*/ nullptr);
}
};
class CompressedSecondaryCacheTest
: public CompressedSecondaryCacheTestBase,
public testing::WithParamInterface<std::string> {
const std::string& Type() override { return GetParam(); }
};
INSTANTIATE_TEST_CASE_P(CompressedSecondaryCacheTest,
CompressedSecondaryCacheTest, GetTestingCacheTypes());
class CompressedSecCacheTestWithCompressAndAllocatorParam
: public CompressedSecondaryCacheTestBase,
public ::testing::WithParamInterface<
std::tuple<bool, bool, std::string>> {
public:
CompressedSecCacheTestWithCompressAndAllocatorParam() {
sec_cache_is_compressed_ = std::get<0>(GetParam());
use_jemalloc_ = std::get<1>(GetParam());
}
const std::string& Type() override { return std::get<2>(GetParam()); }
bool sec_cache_is_compressed_;
bool use_jemalloc_;
};
TEST_P(CompressedSecCacheTestWithCompressAndAllocatorParam, BasicTes) {
BasicTest(sec_cache_is_compressed_, use_jemalloc_);
}
INSTANTIATE_TEST_CASE_P(CompressedSecCacheTests,
CompressedSecCacheTestWithCompressAndAllocatorParam,
::testing::Combine(testing::Bool(), testing::Bool(),
GetTestingCacheTypes()));
class CompressedSecondaryCacheTestWithCompressionParam
: public CompressedSecondaryCacheTestBase,
public ::testing::WithParamInterface<std::tuple<bool, std::string>> {
public:
CompressedSecondaryCacheTestWithCompressionParam() {
sec_cache_is_compressed_ = std::get<0>(GetParam());
}
const std::string& Type() override { return std::get<1>(GetParam()); }
bool sec_cache_is_compressed_;
};
TEST_P(CompressedSecondaryCacheTestWithCompressionParam, BasicTestFromString) {
std::shared_ptr<SecondaryCache> sec_cache{nullptr};
std::string sec_cache_uri;
if (sec_cache_is_compressed_) {
if (LZ4_Supported()) {
sec_cache_uri =
"compressed_secondary_cache://"
"capacity=2048;num_shard_bits=0;compression_type=kLZ4Compression;"
"compress_format_version=2";
} else {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
sec_cache_uri =
"compressed_secondary_cache://"
"capacity=2048;num_shard_bits=0;compression_type=kNoCompression";
sec_cache_is_compressed_ = false;
}
Status s = SecondaryCache::CreateFromString(ConfigOptions(), sec_cache_uri,
&sec_cache);
EXPECT_OK(s);
} else {
sec_cache_uri =
"compressed_secondary_cache://"
"capacity=2048;num_shard_bits=0;compression_type=kNoCompression";
Status s = SecondaryCache::CreateFromString(ConfigOptions(), sec_cache_uri,
&sec_cache);
EXPECT_OK(s);
}
BasicTestHelper(sec_cache, sec_cache_is_compressed_);
}
TEST_P(CompressedSecondaryCacheTestWithCompressionParam,
BasicTestFromStringWithSplit) {
std::shared_ptr<SecondaryCache> sec_cache{nullptr};
std::string sec_cache_uri;
if (sec_cache_is_compressed_) {
if (LZ4_Supported()) {
sec_cache_uri =
"compressed_secondary_cache://"
"capacity=2048;num_shard_bits=0;compression_type=kLZ4Compression;"
"compress_format_version=2;enable_custom_split_merge=true";
} else {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
sec_cache_uri =
"compressed_secondary_cache://"
"capacity=2048;num_shard_bits=0;compression_type=kNoCompression;"
"enable_custom_split_merge=true";
sec_cache_is_compressed_ = false;
}
Status s = SecondaryCache::CreateFromString(ConfigOptions(), sec_cache_uri,
&sec_cache);
EXPECT_OK(s);
} else {
sec_cache_uri =
"compressed_secondary_cache://"
"capacity=2048;num_shard_bits=0;compression_type=kNoCompression;"
"enable_custom_split_merge=true";
Status s = SecondaryCache::CreateFromString(ConfigOptions(), sec_cache_uri,
&sec_cache);
EXPECT_OK(s);
}
BasicTestHelper(sec_cache, sec_cache_is_compressed_);
}
TEST_P(CompressedSecondaryCacheTestWithCompressionParam, FailsTest) {
FailsTest(sec_cache_is_compressed_);
}
TEST_P(CompressedSecondaryCacheTestWithCompressionParam,
BasicIntegrationFailTest) {
BasicIntegrationFailTest(sec_cache_is_compressed_);
}
TEST_P(CompressedSecondaryCacheTestWithCompressionParam,
IntegrationSaveFailTest) {
IntegrationSaveFailTest(sec_cache_is_compressed_);
}
TEST_P(CompressedSecondaryCacheTestWithCompressionParam,
IntegrationCreateFailTest) {
IntegrationCreateFailTest(sec_cache_is_compressed_);
}
TEST_P(CompressedSecondaryCacheTestWithCompressionParam,
IntegrationFullCapacityTest) {
IntegrationFullCapacityTest(sec_cache_is_compressed_);
}
TEST_P(CompressedSecondaryCacheTestWithCompressionParam, EntryRoles) {
CompressedSecondaryCacheOptions opts;
opts.capacity = 2048;
opts.num_shard_bits = 0;
if (sec_cache_is_compressed_) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
return;
}
} else {
opts.compression_type = CompressionType::kNoCompression;
}
// Select a random subset to include, for fast test
Random& r = *Random::GetTLSInstance();
CacheEntryRoleSet do_not_compress;
for (uint32_t i = 0; i < kNumCacheEntryRoles; ++i) {
// A few included on average, but decent chance of zero
if (r.OneIn(5)) {
do_not_compress.Add(static_cast<CacheEntryRole>(i));
}
}
opts.do_not_compress_roles = do_not_compress;
std::shared_ptr<SecondaryCache> sec_cache = NewCompressedSecondaryCache(opts);
// Fixed seed to ensure consistent compressibility (doesn't compress)
std::string junk(Random(301).RandomString(1000));
for (uint32_t i = 0; i < kNumCacheEntryRoles; ++i) {
CacheEntryRole role = static_cast<CacheEntryRole>(i);
// Uniquify `junk`
junk[0] = static_cast<char>(i);
TestItem item{junk.data(), junk.length()};
Slice ith_key = Slice(junk.data(), 16);
get_perf_context()->Reset();
ASSERT_OK(sec_cache->Insert(ith_key, &item, GetHelper(role)));
ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 1U);
ASSERT_OK(sec_cache->Insert(ith_key, &item, GetHelper(role)));
ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 1U);
bool kept_in_sec_cache{true};
std::unique_ptr<SecondaryCacheResultHandle> handle =
sec_cache->Lookup(ith_key, GetHelper(role), this, true,
/*advise_erase=*/true, kept_in_sec_cache);
ASSERT_NE(handle, nullptr);
// Lookup returns the right data
std::unique_ptr<TestItem> val =
std::unique_ptr<TestItem>(static_cast<TestItem*>(handle->Value()));
ASSERT_NE(val, nullptr);
ASSERT_EQ(memcmp(val->Buf(), item.Buf(), item.Size()), 0);
bool compressed =
sec_cache_is_compressed_ && !do_not_compress.Contains(role);
if (compressed) {
ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes,
1000);
ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes,
1007);
} else {
ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, 0);
ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, 0);
}
}
}
INSTANTIATE_TEST_CASE_P(CompressedSecCacheTests,
CompressedSecondaryCacheTestWithCompressionParam,
testing::Combine(testing::Bool(),
GetTestingCacheTypes()));
class CompressedSecCacheTestWithCompressAndSplitParam
: public CompressedSecondaryCacheTestBase,
public ::testing::WithParamInterface<
std::tuple<bool, bool, std::string>> {
public:
CompressedSecCacheTestWithCompressAndSplitParam() {
sec_cache_is_compressed_ = std::get<0>(GetParam());
enable_custom_split_merge_ = std::get<1>(GetParam());
}
const std::string& Type() override { return std::get<2>(GetParam()); }
bool sec_cache_is_compressed_;
bool enable_custom_split_merge_;
};
TEST_P(CompressedSecCacheTestWithCompressAndSplitParam, BasicIntegrationTest) {
BasicIntegrationTest(sec_cache_is_compressed_, enable_custom_split_merge_);
}
INSTANTIATE_TEST_CASE_P(CompressedSecCacheTests,
CompressedSecCacheTestWithCompressAndSplitParam,
::testing::Combine(testing::Bool(), testing::Bool(),
GetTestingCacheTypes()));
TEST_P(CompressedSecondaryCacheTest, SplitValueIntoChunksTest) {
SplitValueIntoChunksTest();
}
TEST_P(CompressedSecondaryCacheTest, MergeChunksIntoValueTest) {
MergeChunksIntoValueTest();
}
TEST_P(CompressedSecondaryCacheTest, SplictValueAndMergeChunksTest) {
SplictValueAndMergeChunksTest();
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

File diff suppressed because it is too large Load Diff

@ -1,82 +0,0 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# Exit on error.
set -e
if [ -n "$USE_CLANG" ]; then
echo "Error: Coverage test is supported only for gcc."
exit 1
fi
ROOT=".."
# Fetch right version of gcov
if [ -d /mnt/gvfs/third-party -a -z "$CXX" ]; then
source $ROOT/build_tools/fbcode_config_platform010.sh
GCOV=$GCC_BASE/bin/gcov
else
GCOV=$(which gcov)
fi
echo -e "Using $GCOV"
COVERAGE_DIR="$PWD/COVERAGE_REPORT"
mkdir -p $COVERAGE_DIR
# Find all gcno files to generate the coverage report
PYTHON=${1:-`which python3`}
echo -e "Using $PYTHON"
GCNO_FILES=`find $ROOT -name "*.gcno"`
$GCOV --preserve-paths --relative-only --no-output $GCNO_FILES 2>/dev/null |
# Parse the raw gcov report to more human readable form.
$PYTHON $ROOT/coverage/parse_gcov_output.py |
# Write the output to both stdout and report file.
tee $COVERAGE_DIR/coverage_report_all.txt &&
echo -e "Generated coverage report for all files: $COVERAGE_DIR/coverage_report_all.txt\n"
# TODO: we also need to get the files of the latest commits.
# Get the most recently committed files.
LATEST_FILES=`
git show --pretty="format:" --name-only HEAD |
grep -v "^$" |
paste -s -d,`
RECENT_REPORT=$COVERAGE_DIR/coverage_report_recent.txt
echo -e "Recently updated files: $LATEST_FILES\n" > $RECENT_REPORT
$GCOV --preserve-paths --relative-only --no-output $GCNO_FILES 2>/dev/null |
$PYTHON $ROOT/coverage/parse_gcov_output.py -interested-files $LATEST_FILES |
tee -a $RECENT_REPORT &&
echo -e "Generated coverage report for recently updated files: $RECENT_REPORT\n"
# Unless otherwise specified, we'll not generate html report by default
if [ -z "$HTML" ]; then
exit 0
fi
# Generate the html report. If we cannot find lcov in this machine, we'll simply
# skip this step.
echo "Generating the html coverage report..."
LCOV=$(which lcov || true 2>/dev/null)
if [ -z $LCOV ]
then
echo "Skip: Cannot find lcov to generate the html report."
exit 0
fi
LCOV_VERSION=$(lcov -v | grep 1.1 || true)
if [ $LCOV_VERSION ]
then
echo "Not supported lcov version. Expect lcov 1.1."
exit 0
fi
(cd $ROOT; lcov --no-external \
--capture \
--directory $PWD \
--gcov-tool $GCOV \
--output-file $COVERAGE_DIR/coverage.info)
genhtml $COVERAGE_DIR/coverage.info -o $COVERAGE_DIR
echo "HTML Coverage report is generated in $COVERAGE_DIR"

@ -1,128 +0,0 @@
#!/usr/bin/env python
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import print_function
import optparse
import re
import sys
# the gcov report follows certain pattern. Each file will have two lines
# of report, from which we can extract the file name, total lines and coverage
# percentage.
def parse_gcov_report(gcov_input):
per_file_coverage = {}
total_coverage = None
for line in sys.stdin:
line = line.strip()
# --First line of the coverage report (with file name in it)?
match_obj = re.match("^File '(.*)'$", line)
if match_obj:
# fetch the file name from the first line of the report.
current_file = match_obj.group(1)
continue
# -- Second line of the file report (with coverage percentage)
match_obj = re.match("^Lines executed:(.*)% of (.*)", line)
if match_obj:
coverage = float(match_obj.group(1))
lines = int(match_obj.group(2))
if current_file is not None:
per_file_coverage[current_file] = (coverage, lines)
current_file = None
else:
# If current_file is not set, we reach the last line of report,
# which contains the summarized coverage percentage.
total_coverage = (coverage, lines)
continue
# If the line's pattern doesn't fall into the above categories. We
# can simply ignore them since they're either empty line or doesn't
# find executable lines of the given file.
current_file = None
return per_file_coverage, total_coverage
def get_option_parser():
usage = (
"Parse the gcov output and generate more human-readable code "
+ "coverage report."
)
parser = optparse.OptionParser(usage)
parser.add_option(
"--interested-files",
"-i",
dest="filenames",
help="Comma separated files names. if specified, we will display "
+ "the coverage report only for interested source files. "
+ "Otherwise we will display the coverage report for all "
+ "source files.",
)
return parser
def display_file_coverage(per_file_coverage, total_coverage):
# To print out auto-adjustable column, we need to know the longest
# length of file names.
max_file_name_length = max(len(fname) for fname in per_file_coverage.keys())
# -- Print header
# size of separator is determined by 3 column sizes:
# file name, coverage percentage and lines.
header_template = "%" + str(max_file_name_length) + "s\t%s\t%s"
separator = "-" * (max_file_name_length + 10 + 20)
print(
header_template % ("Filename", "Coverage", "Lines")
) # noqa: E999 T25377293 Grandfathered in
print(separator)
# -- Print body
# template for printing coverage report for each file.
record_template = "%" + str(max_file_name_length) + "s\t%5.2f%%\t%10d"
for fname, coverage_info in per_file_coverage.items():
coverage, lines = coverage_info
print(record_template % (fname, coverage, lines))
# -- Print footer
if total_coverage:
print(separator)
print(record_template % ("Total", total_coverage[0], total_coverage[1]))
def report_coverage():
parser = get_option_parser()
(options, args) = parser.parse_args()
interested_files = set()
if options.filenames is not None:
interested_files = {f.strip() for f in options.filenames.split(",")}
# To make things simple, right now we only read gcov report from the input
per_file_coverage, total_coverage = parse_gcov_report(sys.stdin)
# Check if we need to display coverage info for interested files.
if len(interested_files):
per_file_coverage = dict(
(fname, per_file_coverage[fname])
for fname in interested_files
if fname in per_file_coverage
)
# If we only interested in several files, it makes no sense to report
# the total_coverage
total_coverage = None
if not len(per_file_coverage):
print("Cannot find coverage info for the given files.", file=sys.stderr)
return
display_file_coverage(per_file_coverage, total_coverage)
if __name__ == "__main__":
report_coverage()

File diff suppressed because it is too large Load Diff

@ -1,491 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include <mutex>
#include <string>
#include <thread>
#include <vector>
#include "db/db_impl/db_impl.h"
#include "port/port.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "test_util/sync_point.h"
#include "test_util/testharness.h"
#include "util/cast_util.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
class CompactFilesTest : public testing::Test {
public:
CompactFilesTest() {
env_ = Env::Default();
db_name_ = test::PerThreadDBPath("compact_files_test");
}
std::string db_name_;
Env* env_;
};
// A class which remembers the name of each flushed file.
class FlushedFileCollector : public EventListener {
public:
FlushedFileCollector() {}
~FlushedFileCollector() override {}
void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override {
std::lock_guard<std::mutex> lock(mutex_);
flushed_files_.push_back(info.file_path);
}
std::vector<std::string> GetFlushedFiles() {
std::lock_guard<std::mutex> lock(mutex_);
std::vector<std::string> result;
for (auto fname : flushed_files_) {
result.push_back(fname);
}
return result;
}
void ClearFlushedFiles() {
std::lock_guard<std::mutex> lock(mutex_);
flushed_files_.clear();
}
private:
std::vector<std::string> flushed_files_;
std::mutex mutex_;
};
TEST_F(CompactFilesTest, L0ConflictsFiles) {
Options options;
// to trigger compaction more easily
const int kWriteBufferSize = 10000;
const int kLevel0Trigger = 2;
options.create_if_missing = true;
options.compaction_style = kCompactionStyleLevel;
// Small slowdown and stop trigger for experimental purpose.
options.level0_slowdown_writes_trigger = 20;
options.level0_stop_writes_trigger = 20;
options.level0_stop_writes_trigger = 20;
options.write_buffer_size = kWriteBufferSize;
options.level0_file_num_compaction_trigger = kLevel0Trigger;
options.compression = kNoCompression;
DB* db = nullptr;
ASSERT_OK(DestroyDB(db_name_, options));
Status s = DB::Open(options, db_name_, &db);
assert(s.ok());
assert(db);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
{"CompactFilesImpl:0", "BackgroundCallCompaction:0"},
{"BackgroundCallCompaction:1", "CompactFilesImpl:1"},
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
// create couple files
// Background compaction starts and waits in BackgroundCallCompaction:0
for (int i = 0; i < kLevel0Trigger * 4; ++i) {
ASSERT_OK(db->Put(WriteOptions(), std::to_string(i), ""));
ASSERT_OK(db->Put(WriteOptions(), std::to_string(100 - i), ""));
ASSERT_OK(db->Flush(FlushOptions()));
}
ROCKSDB_NAMESPACE::ColumnFamilyMetaData meta;
db->GetColumnFamilyMetaData(&meta);
std::string file1;
for (auto& file : meta.levels[0].files) {
ASSERT_EQ(0, meta.levels[0].level);
if (file1 == "") {
file1 = file.db_path + "/" + file.name;
} else {
std::string file2 = file.db_path + "/" + file.name;
// Another thread starts a compact files and creates an L0 compaction
// The background compaction then notices that there is an L0 compaction
// already in progress and doesn't do an L0 compaction
// Once the background compaction finishes, the compact files finishes
ASSERT_OK(db->CompactFiles(ROCKSDB_NAMESPACE::CompactionOptions(),
{file1, file2}, 0));
break;
}
}
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
delete db;
}
TEST_F(CompactFilesTest, MultipleLevel) {
Options options;
options.create_if_missing = true;
options.level_compaction_dynamic_level_bytes = true;
options.num_levels = 6;
// Add listener
FlushedFileCollector* collector = new FlushedFileCollector();
options.listeners.emplace_back(collector);
DB* db = nullptr;
ASSERT_OK(DestroyDB(db_name_, options));
Status s = DB::Open(options, db_name_, &db);
ASSERT_OK(s);
ASSERT_NE(db, nullptr);
// create couple files in L0, L3, L4 and L5
for (int i = 5; i > 2; --i) {
collector->ClearFlushedFiles();
ASSERT_OK(db->Put(WriteOptions(), std::to_string(i), ""));
ASSERT_OK(db->Flush(FlushOptions()));
// Ensure background work is fully finished including listener callbacks
// before accessing listener state.
ASSERT_OK(static_cast_with_check<DBImpl>(db)->TEST_WaitForBackgroundWork());
auto l0_files = collector->GetFlushedFiles();
ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files, i));
std::string prop;
ASSERT_TRUE(db->GetProperty(
"rocksdb.num-files-at-level" + std::to_string(i), &prop));
ASSERT_EQ("1", prop);
}
ASSERT_OK(db->Put(WriteOptions(), std::to_string(0), ""));
ASSERT_OK(db->Flush(FlushOptions()));
ColumnFamilyMetaData meta;
db->GetColumnFamilyMetaData(&meta);
// Compact files except the file in L3
std::vector<std::string> files;
for (int i = 0; i < 6; ++i) {
if (i == 3) continue;
for (auto& file : meta.levels[i].files) {
files.push_back(file.db_path + "/" + file.name);
}
}
SyncPoint::GetInstance()->LoadDependency({
{"CompactionJob::Run():Start", "CompactFilesTest.MultipleLevel:0"},
{"CompactFilesTest.MultipleLevel:1", "CompactFilesImpl:3"},
});
SyncPoint::GetInstance()->EnableProcessing();
std::thread thread([&] {
TEST_SYNC_POINT("CompactFilesTest.MultipleLevel:0");
ASSERT_OK(db->Put(WriteOptions(), "bar", "v2"));
ASSERT_OK(db->Put(WriteOptions(), "foo", "v2"));
ASSERT_OK(db->Flush(FlushOptions()));
TEST_SYNC_POINT("CompactFilesTest.MultipleLevel:1");
});
// Compaction cannot move up the data to higher level
// here we have input file from level 5, so the output level has to be >= 5
for (int invalid_output_level = 0; invalid_output_level < 5;
invalid_output_level++) {
s = db->CompactFiles(CompactionOptions(), files, invalid_output_level);
std::cout << s.ToString() << std::endl;
ASSERT_TRUE(s.IsInvalidArgument());
}
ASSERT_OK(db->CompactFiles(CompactionOptions(), files, 5));
SyncPoint::GetInstance()->DisableProcessing();
thread.join();
delete db;
}
TEST_F(CompactFilesTest, ObsoleteFiles) {
Options options;
// to trigger compaction more easily
const int kWriteBufferSize = 65536;
options.create_if_missing = true;
// Disable RocksDB background compaction.
options.compaction_style = kCompactionStyleNone;
options.level0_slowdown_writes_trigger = (1 << 30);
options.level0_stop_writes_trigger = (1 << 30);
options.write_buffer_size = kWriteBufferSize;
options.max_write_buffer_number = 2;
options.compression = kNoCompression;
// Add listener
FlushedFileCollector* collector = new FlushedFileCollector();
options.listeners.emplace_back(collector);
DB* db = nullptr;
ASSERT_OK(DestroyDB(db_name_, options));
Status s = DB::Open(options, db_name_, &db);
ASSERT_OK(s);
ASSERT_NE(db, nullptr);
// create couple files
for (int i = 1000; i < 2000; ++i) {
ASSERT_OK(db->Put(WriteOptions(), std::to_string(i),
std::string(kWriteBufferSize / 10, 'a' + (i % 26))));
}
auto l0_files = collector->GetFlushedFiles();
ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files, 1));
ASSERT_OK(static_cast_with_check<DBImpl>(db)->TEST_WaitForCompact());
// verify all compaction input files are deleted
for (auto fname : l0_files) {
ASSERT_EQ(Status::NotFound(), env_->FileExists(fname));
}
delete db;
}
TEST_F(CompactFilesTest, NotCutOutputOnLevel0) {
Options options;
options.create_if_missing = true;
// Disable RocksDB background compaction.
options.compaction_style = kCompactionStyleNone;
options.level0_slowdown_writes_trigger = 1000;
options.level0_stop_writes_trigger = 1000;
options.write_buffer_size = 65536;
options.max_write_buffer_number = 2;
options.compression = kNoCompression;
options.max_compaction_bytes = 5000;
// Add listener
FlushedFileCollector* collector = new FlushedFileCollector();
options.listeners.emplace_back(collector);
DB* db = nullptr;
ASSERT_OK(DestroyDB(db_name_, options));
Status s = DB::Open(options, db_name_, &db);
assert(s.ok());
assert(db);
// create couple files
for (int i = 0; i < 500; ++i) {
ASSERT_OK(db->Put(WriteOptions(), std::to_string(i),
std::string(1000, 'a' + (i % 26))));
}
ASSERT_OK(static_cast_with_check<DBImpl>(db)->TEST_WaitForFlushMemTable());
auto l0_files_1 = collector->GetFlushedFiles();
collector->ClearFlushedFiles();
for (int i = 0; i < 500; ++i) {
ASSERT_OK(db->Put(WriteOptions(), std::to_string(i),
std::string(1000, 'a' + (i % 26))));
}
ASSERT_OK(static_cast_with_check<DBImpl>(db)->TEST_WaitForFlushMemTable());
auto l0_files_2 = collector->GetFlushedFiles();
ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files_1, 0));
ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files_2, 0));
// no assertion failure
delete db;
}
TEST_F(CompactFilesTest, CapturingPendingFiles) {
Options options;
options.create_if_missing = true;
// Disable RocksDB background compaction.
options.compaction_style = kCompactionStyleNone;
// Always do full scans for obsolete files (needed to reproduce the issue).
options.delete_obsolete_files_period_micros = 0;
// Add listener.
FlushedFileCollector* collector = new FlushedFileCollector();
options.listeners.emplace_back(collector);
DB* db = nullptr;
ASSERT_OK(DestroyDB(db_name_, options));
Status s = DB::Open(options, db_name_, &db);
ASSERT_OK(s);
assert(db);
// Create 5 files.
for (int i = 0; i < 5; ++i) {
ASSERT_OK(db->Put(WriteOptions(), "key" + std::to_string(i), "value"));
ASSERT_OK(db->Flush(FlushOptions()));
}
// Ensure background work is fully finished including listener callbacks
// before accessing listener state.
ASSERT_OK(static_cast_with_check<DBImpl>(db)->TEST_WaitForBackgroundWork());
auto l0_files = collector->GetFlushedFiles();
EXPECT_EQ(5, l0_files.size());
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
{"CompactFilesImpl:2", "CompactFilesTest.CapturingPendingFiles:0"},
{"CompactFilesTest.CapturingPendingFiles:1", "CompactFilesImpl:3"},
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
// Start compacting files.
ROCKSDB_NAMESPACE::port::Thread compaction_thread(
[&] { EXPECT_OK(db->CompactFiles(CompactionOptions(), l0_files, 1)); });
// In the meantime flush another file.
TEST_SYNC_POINT("CompactFilesTest.CapturingPendingFiles:0");
ASSERT_OK(db->Put(WriteOptions(), "key5", "value"));
ASSERT_OK(db->Flush(FlushOptions()));
TEST_SYNC_POINT("CompactFilesTest.CapturingPendingFiles:1");
compaction_thread.join();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
delete db;
// Make sure we can reopen the DB.
s = DB::Open(options, db_name_, &db);
ASSERT_OK(s);
assert(db);
delete db;
}
TEST_F(CompactFilesTest, CompactionFilterWithGetSv) {
class FilterWithGet : public CompactionFilter {
public:
bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/,
std::string* /*new_value*/,
bool* /*value_changed*/) const override {
if (db_ == nullptr) {
return true;
}
std::string res;
db_->Get(ReadOptions(), "", &res);
return true;
}
void SetDB(DB* db) { db_ = db; }
const char* Name() const override { return "FilterWithGet"; }
private:
DB* db_;
};
std::shared_ptr<FilterWithGet> cf(new FilterWithGet());
Options options;
options.create_if_missing = true;
options.compaction_filter = cf.get();
DB* db = nullptr;
ASSERT_OK(DestroyDB(db_name_, options));
Status s = DB::Open(options, db_name_, &db);
ASSERT_OK(s);
cf->SetDB(db);
// Write one L0 file
ASSERT_OK(db->Put(WriteOptions(), "K1", "V1"));
ASSERT_OK(db->Flush(FlushOptions()));
// Compact all L0 files using CompactFiles
ROCKSDB_NAMESPACE::ColumnFamilyMetaData meta;
db->GetColumnFamilyMetaData(&meta);
for (auto& file : meta.levels[0].files) {
std::string fname = file.db_path + "/" + file.name;
ASSERT_OK(
db->CompactFiles(ROCKSDB_NAMESPACE::CompactionOptions(), {fname}, 0));
}
delete db;
}
TEST_F(CompactFilesTest, SentinelCompressionType) {
if (!Zlib_Supported()) {
fprintf(stderr, "zlib compression not supported, skip this test\n");
return;
}
if (!Snappy_Supported()) {
fprintf(stderr, "snappy compression not supported, skip this test\n");
return;
}
// Check that passing `CompressionType::kDisableCompressionOption` to
// `CompactFiles` causes it to use the column family compression options.
for (auto compaction_style : {CompactionStyle::kCompactionStyleLevel,
CompactionStyle::kCompactionStyleUniversal,
CompactionStyle::kCompactionStyleNone}) {
ASSERT_OK(DestroyDB(db_name_, Options()));
Options options;
options.compaction_style = compaction_style;
// L0: Snappy, L1: ZSTD, L2: Snappy
options.compression_per_level = {CompressionType::kSnappyCompression,
CompressionType::kZlibCompression,
CompressionType::kSnappyCompression};
options.create_if_missing = true;
FlushedFileCollector* collector = new FlushedFileCollector();
options.listeners.emplace_back(collector);
DB* db = nullptr;
ASSERT_OK(DB::Open(options, db_name_, &db));
ASSERT_OK(db->Put(WriteOptions(), "key", "val"));
ASSERT_OK(db->Flush(FlushOptions()));
// Ensure background work is fully finished including listener callbacks
// before accessing listener state.
ASSERT_OK(static_cast_with_check<DBImpl>(db)->TEST_WaitForBackgroundWork());
auto l0_files = collector->GetFlushedFiles();
ASSERT_EQ(1, l0_files.size());
// L0->L1 compaction, so output should be ZSTD-compressed
CompactionOptions compaction_opts;
compaction_opts.compression = CompressionType::kDisableCompressionOption;
ASSERT_OK(db->CompactFiles(compaction_opts, l0_files, 1));
ROCKSDB_NAMESPACE::TablePropertiesCollection all_tables_props;
ASSERT_OK(db->GetPropertiesOfAllTables(&all_tables_props));
for (const auto& name_and_table_props : all_tables_props) {
ASSERT_EQ(CompressionTypeToString(CompressionType::kZlibCompression),
name_and_table_props.second->compression_name);
}
delete db;
}
}
TEST_F(CompactFilesTest, GetCompactionJobInfo) {
Options options;
options.create_if_missing = true;
// Disable RocksDB background compaction.
options.compaction_style = kCompactionStyleNone;
options.level0_slowdown_writes_trigger = 1000;
options.level0_stop_writes_trigger = 1000;
options.write_buffer_size = 65536;
options.max_write_buffer_number = 2;
options.compression = kNoCompression;
options.max_compaction_bytes = 5000;
// Add listener
FlushedFileCollector* collector = new FlushedFileCollector();
options.listeners.emplace_back(collector);
DB* db = nullptr;
ASSERT_OK(DestroyDB(db_name_, options));
Status s = DB::Open(options, db_name_, &db);
ASSERT_OK(s);
assert(db);
// create couple files
for (int i = 0; i < 500; ++i) {
ASSERT_OK(db->Put(WriteOptions(), std::to_string(i),
std::string(1000, 'a' + (i % 26))));
}
ASSERT_OK(static_cast_with_check<DBImpl>(db)->TEST_WaitForFlushMemTable());
auto l0_files_1 = collector->GetFlushedFiles();
CompactionOptions co;
co.compression = CompressionType::kLZ4Compression;
CompactionJobInfo compaction_job_info{};
ASSERT_OK(
db->CompactFiles(co, l0_files_1, 0, -1, nullptr, &compaction_job_info));
ASSERT_EQ(compaction_job_info.base_input_level, 0);
ASSERT_EQ(compaction_job_info.cf_id, db->DefaultColumnFamily()->GetID());
ASSERT_EQ(compaction_job_info.cf_name, db->DefaultColumnFamily()->GetName());
ASSERT_EQ(compaction_job_info.compaction_reason,
CompactionReason::kManualCompaction);
ASSERT_EQ(compaction_job_info.compression, CompressionType::kLZ4Compression);
ASSERT_EQ(compaction_job_info.output_level, 0);
ASSERT_OK(compaction_job_info.status);
// no assertion failure
delete db;
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,678 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include <array>
#include <map>
#include <string>
#include "memtable/stl_wrappers.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/hash.h"
#include "util/kv_map.h"
#include "util/random.h"
#include "util/string_util.h"
#include "utilities/merge_operators.h"
namespace ROCKSDB_NAMESPACE {
namespace {
static const Comparator* kTestComparator = nullptr;
class KVIter : public Iterator {
public:
explicit KVIter(const stl_wrappers::KVMap* map)
: map_(map), iter_(map_->end()) {}
bool Valid() const override { return iter_ != map_->end(); }
void SeekToFirst() override { iter_ = map_->begin(); }
void SeekToLast() override {
if (map_->empty()) {
iter_ = map_->end();
} else {
iter_ = map_->find(map_->rbegin()->first);
}
}
void Seek(const Slice& k) override {
iter_ = map_->lower_bound(k.ToString());
}
void SeekForPrev(const Slice& k) override {
iter_ = map_->upper_bound(k.ToString());
Prev();
}
void Next() override { ++iter_; }
void Prev() override {
if (iter_ == map_->begin()) {
iter_ = map_->end();
return;
}
--iter_;
}
Slice key() const override { return iter_->first; }
Slice value() const override { return iter_->second; }
Status status() const override { return Status::OK(); }
private:
const stl_wrappers::KVMap* const map_;
stl_wrappers::KVMap::const_iterator iter_;
};
void AssertItersEqual(Iterator* iter1, Iterator* iter2) {
ASSERT_EQ(iter1->Valid(), iter2->Valid());
if (iter1->Valid()) {
ASSERT_EQ(iter1->key().ToString(), iter2->key().ToString());
ASSERT_EQ(iter1->value().ToString(), iter2->value().ToString());
}
}
// Measuring operations on DB (expect to be empty).
// source_strings are candidate keys
void DoRandomIteraratorTest(DB* db, std::vector<std::string> source_strings,
Random* rnd, int num_writes, int num_iter_ops,
int num_trigger_flush) {
stl_wrappers::KVMap map((stl_wrappers::LessOfComparator(kTestComparator)));
for (int i = 0; i < num_writes; i++) {
if (num_trigger_flush > 0 && i != 0 && i % num_trigger_flush == 0) {
db->Flush(FlushOptions());
}
int type = rnd->Uniform(2);
int index = rnd->Uniform(static_cast<int>(source_strings.size()));
auto& key = source_strings[index];
switch (type) {
case 0:
// put
map[key] = key;
ASSERT_OK(db->Put(WriteOptions(), key, key));
break;
case 1:
// delete
if (map.find(key) != map.end()) {
map.erase(key);
}
ASSERT_OK(db->Delete(WriteOptions(), key));
break;
default:
assert(false);
}
}
std::unique_ptr<Iterator> iter(db->NewIterator(ReadOptions()));
std::unique_ptr<Iterator> result_iter(new KVIter(&map));
bool is_valid = false;
for (int i = 0; i < num_iter_ops; i++) {
// Random walk and make sure iter and result_iter returns the
// same key and value
int type = rnd->Uniform(6);
ASSERT_OK(iter->status());
switch (type) {
case 0:
// Seek to First
iter->SeekToFirst();
result_iter->SeekToFirst();
break;
case 1:
// Seek to last
iter->SeekToLast();
result_iter->SeekToLast();
break;
case 2: {
// Seek to random key
auto key_idx = rnd->Uniform(static_cast<int>(source_strings.size()));
auto key = source_strings[key_idx];
iter->Seek(key);
result_iter->Seek(key);
break;
}
case 3:
// Next
if (is_valid) {
iter->Next();
result_iter->Next();
} else {
continue;
}
break;
case 4:
// Prev
if (is_valid) {
iter->Prev();
result_iter->Prev();
} else {
continue;
}
break;
default: {
assert(type == 5);
auto key_idx = rnd->Uniform(static_cast<int>(source_strings.size()));
auto key = source_strings[key_idx];
std::string result;
auto status = db->Get(ReadOptions(), key, &result);
if (map.find(key) == map.end()) {
ASSERT_TRUE(status.IsNotFound());
} else {
ASSERT_EQ(map[key], result);
}
break;
}
}
AssertItersEqual(iter.get(), result_iter.get());
is_valid = iter->Valid();
}
}
class DoubleComparator : public Comparator {
public:
DoubleComparator() {}
const char* Name() const override { return "DoubleComparator"; }
int Compare(const Slice& a, const Slice& b) const override {
#ifndef CYGWIN
double da = std::stod(a.ToString());
double db = std::stod(b.ToString());
#else
double da = std::strtod(a.ToString().c_str(), 0 /* endptr */);
double db = std::strtod(a.ToString().c_str(), 0 /* endptr */);
#endif
if (da == db) {
return a.compare(b);
} else if (da > db) {
return 1;
} else {
return -1;
}
}
void FindShortestSeparator(std::string* /*start*/,
const Slice& /*limit*/) const override {}
void FindShortSuccessor(std::string* /*key*/) const override {}
};
class HashComparator : public Comparator {
public:
HashComparator() {}
const char* Name() const override { return "HashComparator"; }
int Compare(const Slice& a, const Slice& b) const override {
uint32_t ha = Hash(a.data(), a.size(), 66);
uint32_t hb = Hash(b.data(), b.size(), 66);
if (ha == hb) {
return a.compare(b);
} else if (ha > hb) {
return 1;
} else {
return -1;
}
}
void FindShortestSeparator(std::string* /*start*/,
const Slice& /*limit*/) const override {}
void FindShortSuccessor(std::string* /*key*/) const override {}
};
class TwoStrComparator : public Comparator {
public:
TwoStrComparator() {}
const char* Name() const override { return "TwoStrComparator"; }
int Compare(const Slice& a, const Slice& b) const override {
assert(a.size() >= 2);
assert(b.size() >= 2);
size_t size_a1 = static_cast<size_t>(a[0]);
size_t size_b1 = static_cast<size_t>(b[0]);
size_t size_a2 = static_cast<size_t>(a[1]);
size_t size_b2 = static_cast<size_t>(b[1]);
assert(size_a1 + size_a2 + 2 == a.size());
assert(size_b1 + size_b2 + 2 == b.size());
Slice a1 = Slice(a.data() + 2, size_a1);
Slice b1 = Slice(b.data() + 2, size_b1);
Slice a2 = Slice(a.data() + 2 + size_a1, size_a2);
Slice b2 = Slice(b.data() + 2 + size_b1, size_b2);
if (a1 != b1) {
return a1.compare(b1);
}
return a2.compare(b2);
}
void FindShortestSeparator(std::string* /*start*/,
const Slice& /*limit*/) const override {}
void FindShortSuccessor(std::string* /*key*/) const override {}
};
} // anonymous namespace
class ComparatorDBTest
: public testing::Test,
virtual public ::testing::WithParamInterface<uint32_t> {
private:
std::string dbname_;
Env* env_;
DB* db_;
Options last_options_;
std::unique_ptr<const Comparator> comparator_guard;
public:
ComparatorDBTest() : env_(Env::Default()), db_(nullptr) {
kTestComparator = BytewiseComparator();
dbname_ = test::PerThreadDBPath("comparator_db_test");
BlockBasedTableOptions toptions;
toptions.format_version = GetParam();
last_options_.table_factory.reset(
ROCKSDB_NAMESPACE::NewBlockBasedTableFactory(toptions));
EXPECT_OK(DestroyDB(dbname_, last_options_));
}
~ComparatorDBTest() override {
delete db_;
EXPECT_OK(DestroyDB(dbname_, last_options_));
kTestComparator = BytewiseComparator();
}
DB* GetDB() { return db_; }
void SetOwnedComparator(const Comparator* cmp, bool owner = true) {
if (owner) {
comparator_guard.reset(cmp);
} else {
comparator_guard.reset();
}
kTestComparator = cmp;
last_options_.comparator = cmp;
}
// Return the current option configuration.
Options* GetOptions() { return &last_options_; }
void DestroyAndReopen() {
// Destroy using last options
Destroy();
ASSERT_OK(TryReopen());
}
void Destroy() {
delete db_;
db_ = nullptr;
ASSERT_OK(DestroyDB(dbname_, last_options_));
}
Status TryReopen() {
delete db_;
db_ = nullptr;
last_options_.create_if_missing = true;
return DB::Open(last_options_, dbname_, &db_);
}
};
INSTANTIATE_TEST_CASE_P(FormatDef, ComparatorDBTest,
testing::Values(test::kDefaultFormatVersion));
INSTANTIATE_TEST_CASE_P(FormatLatest, ComparatorDBTest,
testing::Values(kLatestFormatVersion));
TEST_P(ComparatorDBTest, Bytewise) {
for (int rand_seed = 301; rand_seed < 306; rand_seed++) {
DestroyAndReopen();
Random rnd(rand_seed);
DoRandomIteraratorTest(GetDB(),
{"a", "b", "c", "d", "e", "f", "g", "h", "i"}, &rnd,
8, 100, 3);
}
}
TEST_P(ComparatorDBTest, SimpleSuffixReverseComparator) {
SetOwnedComparator(new test::SimpleSuffixReverseComparator());
for (int rnd_seed = 301; rnd_seed < 316; rnd_seed++) {
Options* opt = GetOptions();
opt->comparator = kTestComparator;
DestroyAndReopen();
Random rnd(rnd_seed);
std::vector<std::string> source_strings;
std::vector<std::string> source_prefixes;
// Randomly generate 5 prefixes
for (int i = 0; i < 5; i++) {
source_prefixes.push_back(rnd.HumanReadableString(8));
}
for (int j = 0; j < 20; j++) {
int prefix_index = rnd.Uniform(static_cast<int>(source_prefixes.size()));
std::string key = source_prefixes[prefix_index] +
rnd.HumanReadableString(rnd.Uniform(8));
source_strings.push_back(key);
}
DoRandomIteraratorTest(GetDB(), source_strings, &rnd, 30, 600, 66);
}
}
TEST_P(ComparatorDBTest, Uint64Comparator) {
SetOwnedComparator(test::Uint64Comparator(), false /* owner */);
for (int rnd_seed = 301; rnd_seed < 316; rnd_seed++) {
Options* opt = GetOptions();
opt->comparator = kTestComparator;
DestroyAndReopen();
Random rnd(rnd_seed);
Random64 rnd64(rnd_seed);
std::vector<std::string> source_strings;
// Randomly generate source keys
for (int i = 0; i < 100; i++) {
uint64_t r = rnd64.Next();
std::string str;
str.resize(8);
memcpy(&str[0], static_cast<void*>(&r), 8);
source_strings.push_back(str);
}
DoRandomIteraratorTest(GetDB(), source_strings, &rnd, 200, 1000, 66);
}
}
TEST_P(ComparatorDBTest, DoubleComparator) {
SetOwnedComparator(new DoubleComparator());
for (int rnd_seed = 301; rnd_seed < 316; rnd_seed++) {
Options* opt = GetOptions();
opt->comparator = kTestComparator;
DestroyAndReopen();
Random rnd(rnd_seed);
std::vector<std::string> source_strings;
// Randomly generate source keys
for (int i = 0; i < 100; i++) {
uint32_t r = rnd.Next();
uint32_t divide_order = rnd.Uniform(8);
double to_divide = 1.0;
for (uint32_t j = 0; j < divide_order; j++) {
to_divide *= 10.0;
}
source_strings.push_back(std::to_string(r / to_divide));
}
DoRandomIteraratorTest(GetDB(), source_strings, &rnd, 200, 1000, 66);
}
}
TEST_P(ComparatorDBTest, HashComparator) {
SetOwnedComparator(new HashComparator());
for (int rnd_seed = 301; rnd_seed < 316; rnd_seed++) {
Options* opt = GetOptions();
opt->comparator = kTestComparator;
DestroyAndReopen();
Random rnd(rnd_seed);
std::vector<std::string> source_strings;
// Randomly generate source keys
for (int i = 0; i < 100; i++) {
source_strings.push_back(test::RandomKey(&rnd, 8));
}
DoRandomIteraratorTest(GetDB(), source_strings, &rnd, 200, 1000, 66);
}
}
TEST_P(ComparatorDBTest, TwoStrComparator) {
SetOwnedComparator(new TwoStrComparator());
for (int rnd_seed = 301; rnd_seed < 316; rnd_seed++) {
Options* opt = GetOptions();
opt->comparator = kTestComparator;
DestroyAndReopen();
Random rnd(rnd_seed);
std::vector<std::string> source_strings;
// Randomly generate source keys
for (int i = 0; i < 100; i++) {
std::string str;
uint32_t size1 = rnd.Uniform(8);
uint32_t size2 = rnd.Uniform(8);
str.append(1, static_cast<char>(size1));
str.append(1, static_cast<char>(size2));
str.append(test::RandomKey(&rnd, size1));
str.append(test::RandomKey(&rnd, size2));
source_strings.push_back(str);
}
DoRandomIteraratorTest(GetDB(), source_strings, &rnd, 200, 1000, 66);
}
}
namespace {
void VerifyNotSuccessor(const Slice& s, const Slice& t) {
auto bc = BytewiseComparator();
auto rbc = ReverseBytewiseComparator();
ASSERT_FALSE(bc->IsSameLengthImmediateSuccessor(s, t));
ASSERT_FALSE(rbc->IsSameLengthImmediateSuccessor(s, t));
ASSERT_FALSE(bc->IsSameLengthImmediateSuccessor(t, s));
ASSERT_FALSE(rbc->IsSameLengthImmediateSuccessor(t, s));
}
void VerifySuccessor(const Slice& s, const Slice& t) {
auto bc = BytewiseComparator();
auto rbc = ReverseBytewiseComparator();
ASSERT_TRUE(bc->IsSameLengthImmediateSuccessor(s, t));
ASSERT_FALSE(rbc->IsSameLengthImmediateSuccessor(s, t));
ASSERT_FALSE(bc->IsSameLengthImmediateSuccessor(t, s));
// Should be true but that increases exposure to a design bug in
// auto_prefix_mode, so currently set to FALSE
ASSERT_FALSE(rbc->IsSameLengthImmediateSuccessor(t, s));
}
} // anonymous namespace
TEST_P(ComparatorDBTest, IsSameLengthImmediateSuccessor) {
{
// different length
Slice s("abcxy");
Slice t("abcxyz");
VerifyNotSuccessor(s, t);
}
{
Slice s("abcxyz");
Slice t("abcxy");
VerifyNotSuccessor(s, t);
}
{
// not last byte different
Slice s("abc1xyz");
Slice t("abc2xyz");
VerifyNotSuccessor(s, t);
}
{
// same string
Slice s("abcxyz");
Slice t("abcxyz");
VerifyNotSuccessor(s, t);
}
{
Slice s("abcxy");
Slice t("abcxz");
VerifySuccessor(s, t);
}
{
const char s_array[] = "\x50\x8a\xac";
const char t_array[] = "\x50\x8a\xad";
Slice s(s_array);
Slice t(t_array);
VerifySuccessor(s, t);
}
{
const char s_array[] = "\x50\x8a\xff";
const char t_array[] = "\x50\x8b\x00";
Slice s(s_array, 3);
Slice t(t_array, 3);
VerifySuccessor(s, t);
}
{
const char s_array[] = "\x50\x8a\xff\xff";
const char t_array[] = "\x50\x8b\x00\x00";
Slice s(s_array, 4);
Slice t(t_array, 4);
VerifySuccessor(s, t);
}
{
const char s_array[] = "\x50\x8a\xff\xff";
const char t_array[] = "\x50\x8b\x00\x01";
Slice s(s_array, 4);
Slice t(t_array, 4);
VerifyNotSuccessor(s, t);
}
}
TEST_P(ComparatorDBTest, FindShortestSeparator) {
std::string s1 = "abc1xyz";
std::string s2 = "abc3xy";
BytewiseComparator()->FindShortestSeparator(&s1, s2);
ASSERT_EQ("abc2", s1);
s1 = "abc5xyztt";
ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
ASSERT_EQ("abc5", s1);
s1 = "abc3";
s2 = "abc2xy";
ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
ASSERT_EQ("abc3", s1);
s1 = "abc3xyz";
s2 = "abc2xy";
ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
ASSERT_EQ("abc3", s1);
s1 = "abc3xyz";
s2 = "abc2";
ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
ASSERT_EQ("abc3", s1);
std::string old_s1 = s1 = "abc2xy";
s2 = "abc2";
ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
ASSERT_TRUE(old_s1 >= s1);
ASSERT_TRUE(s1 > s2);
}
TEST_P(ComparatorDBTest, SeparatorSuccessorRandomizeTest) {
// Char list for boundary cases.
std::array<unsigned char, 6> char_list{{0, 1, 2, 253, 254, 255}};
Random rnd(301);
for (int attempts = 0; attempts < 1000; attempts++) {
uint32_t size1 = rnd.Skewed(4);
uint32_t size2;
if (rnd.OneIn(2)) {
// size2 to be random size
size2 = rnd.Skewed(4);
} else {
// size1 is within [-2, +2] of size1
int diff = static_cast<int>(rnd.Uniform(5)) - 2;
int tmp_size2 = static_cast<int>(size1) + diff;
if (tmp_size2 < 0) {
tmp_size2 = 0;
}
size2 = static_cast<uint32_t>(tmp_size2);
}
std::string s1;
std::string s2;
for (uint32_t i = 0; i < size1; i++) {
if (rnd.OneIn(2)) {
// Use random byte
s1 += static_cast<char>(rnd.Uniform(256));
} else {
// Use one byte in char_list
char c = static_cast<char>(char_list[rnd.Uniform(sizeof(char_list))]);
s1 += c;
}
}
// First set s2 to be the same as s1, and then modify s2.
s2 = s1;
s2.resize(size2);
// We start from the back of the string
if (size2 > 0) {
uint32_t pos = size2 - 1;
do {
if (pos >= size1 || rnd.OneIn(4)) {
// For 1/4 chance, use random byte
s2[pos] = static_cast<char>(rnd.Uniform(256));
} else if (rnd.OneIn(4)) {
// In 1/4 chance, stop here.
break;
} else {
// Create a char within [-2, +2] of the matching char of s1.
int diff = static_cast<int>(rnd.Uniform(5)) - 2;
// char may be signed or unsigned based on platform.
int s1_char = static_cast<int>(static_cast<unsigned char>(s1[pos]));
int s2_char = s1_char + diff;
if (s2_char < 0) {
s2_char = 0;
}
if (s2_char > 255) {
s2_char = 255;
}
s2[pos] = static_cast<char>(s2_char);
}
} while (pos-- != 0);
}
// Test separators
for (int rev = 0; rev < 2; rev++) {
if (rev == 1) {
// switch s1 and s2
std::string t = s1;
s1 = s2;
s2 = t;
}
std::string separator = s1;
BytewiseComparator()->FindShortestSeparator(&separator, s2);
std::string rev_separator = s1;
ReverseBytewiseComparator()->FindShortestSeparator(&rev_separator, s2);
if (s1 == s2) {
ASSERT_EQ(s1, separator);
ASSERT_EQ(s2, rev_separator);
} else if (s1 < s2) {
ASSERT_TRUE(s1 <= separator);
ASSERT_TRUE(s2 > separator);
ASSERT_LE(separator.size(), std::max(s1.size(), s2.size()));
ASSERT_EQ(s1, rev_separator);
} else {
ASSERT_TRUE(s1 >= rev_separator);
ASSERT_TRUE(s2 < rev_separator);
ASSERT_LE(rev_separator.size(), std::max(s1.size(), s2.size()));
ASSERT_EQ(s1, separator);
}
}
// Test successors
std::string succ = s1;
BytewiseComparator()->FindShortSuccessor(&succ);
ASSERT_TRUE(succ >= s1);
succ = s1;
ReverseBytewiseComparator()->FindShortSuccessor(&succ);
ASSERT_TRUE(succ <= s1);
}
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

File diff suppressed because it is too large Load Diff

@ -1,351 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/db_impl/db_impl.h"
#include "db/db_test_util.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "table/cuckoo/cuckoo_table_factory.h"
#include "table/cuckoo/cuckoo_table_reader.h"
#include "table/meta_blocks.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/cast_util.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
class CuckooTableDBTest : public testing::Test {
private:
std::string dbname_;
Env* env_;
DB* db_;
public:
CuckooTableDBTest() : env_(Env::Default()) {
dbname_ = test::PerThreadDBPath("cuckoo_table_db_test");
EXPECT_OK(DestroyDB(dbname_, Options()));
db_ = nullptr;
Reopen();
}
~CuckooTableDBTest() override {
delete db_;
EXPECT_OK(DestroyDB(dbname_, Options()));
}
Options CurrentOptions() {
Options options;
options.table_factory.reset(NewCuckooTableFactory());
options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true));
options.allow_mmap_reads = true;
options.create_if_missing = true;
options.allow_concurrent_memtable_write = false;
return options;
}
DBImpl* dbfull() { return static_cast_with_check<DBImpl>(db_); }
// The following util methods are copied from plain_table_db_test.
void Reopen(Options* options = nullptr) {
delete db_;
db_ = nullptr;
Options opts;
if (options != nullptr) {
opts = *options;
} else {
opts = CurrentOptions();
opts.create_if_missing = true;
}
ASSERT_OK(DB::Open(opts, dbname_, &db_));
}
void DestroyAndReopen(Options* options) {
assert(options);
ASSERT_OK(db_->Close());
delete db_;
db_ = nullptr;
ASSERT_OK(DestroyDB(dbname_, *options));
Reopen(options);
}
Status Put(const Slice& k, const Slice& v) {
return db_->Put(WriteOptions(), k, v);
}
Status Delete(const std::string& k) { return db_->Delete(WriteOptions(), k); }
std::string Get(const std::string& k) {
ReadOptions options;
std::string result;
Status s = db_->Get(options, k, &result);
if (s.IsNotFound()) {
result = "NOT_FOUND";
} else if (!s.ok()) {
result = s.ToString();
}
return result;
}
int NumTableFilesAtLevel(int level) {
std::string property;
EXPECT_TRUE(db_->GetProperty(
"rocksdb.num-files-at-level" + std::to_string(level), &property));
return atoi(property.c_str());
}
// Return spread of files per level
std::string FilesPerLevel() {
std::string result;
size_t last_non_zero_offset = 0;
for (int level = 0; level < db_->NumberLevels(); level++) {
int f = NumTableFilesAtLevel(level);
char buf[100];
snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
result += buf;
if (f > 0) {
last_non_zero_offset = result.size();
}
}
result.resize(last_non_zero_offset);
return result;
}
};
TEST_F(CuckooTableDBTest, Flush) {
// Try with empty DB first.
ASSERT_TRUE(dbfull() != nullptr);
ASSERT_EQ("NOT_FOUND", Get("key2"));
// Add some values to db.
Options options = CurrentOptions();
Reopen(&options);
ASSERT_OK(Put("key1", "v1"));
ASSERT_OK(Put("key2", "v2"));
ASSERT_OK(Put("key3", "v3"));
ASSERT_OK(dbfull()->TEST_FlushMemTable());
TablePropertiesCollection ptc;
ASSERT_OK(reinterpret_cast<DB*>(dbfull())->GetPropertiesOfAllTables(&ptc));
VerifySstUniqueIds(ptc);
ASSERT_EQ(1U, ptc.size());
ASSERT_EQ(3U, ptc.begin()->second->num_entries);
ASSERT_EQ("1", FilesPerLevel());
ASSERT_EQ("v1", Get("key1"));
ASSERT_EQ("v2", Get("key2"));
ASSERT_EQ("v3", Get("key3"));
ASSERT_EQ("NOT_FOUND", Get("key4"));
// Now add more keys and flush.
ASSERT_OK(Put("key4", "v4"));
ASSERT_OK(Put("key5", "v5"));
ASSERT_OK(Put("key6", "v6"));
ASSERT_OK(dbfull()->TEST_FlushMemTable());
ASSERT_OK(reinterpret_cast<DB*>(dbfull())->GetPropertiesOfAllTables(&ptc));
VerifySstUniqueIds(ptc);
ASSERT_EQ(2U, ptc.size());
auto row = ptc.begin();
ASSERT_EQ(3U, row->second->num_entries);
ASSERT_EQ(3U, (++row)->second->num_entries);
ASSERT_EQ("2", FilesPerLevel());
ASSERT_EQ("v1", Get("key1"));
ASSERT_EQ("v2", Get("key2"));
ASSERT_EQ("v3", Get("key3"));
ASSERT_EQ("v4", Get("key4"));
ASSERT_EQ("v5", Get("key5"));
ASSERT_EQ("v6", Get("key6"));
ASSERT_OK(Delete("key6"));
ASSERT_OK(Delete("key5"));
ASSERT_OK(Delete("key4"));
ASSERT_OK(dbfull()->TEST_FlushMemTable());
ASSERT_OK(reinterpret_cast<DB*>(dbfull())->GetPropertiesOfAllTables(&ptc));
VerifySstUniqueIds(ptc);
ASSERT_EQ(3U, ptc.size());
row = ptc.begin();
ASSERT_EQ(3U, row->second->num_entries);
ASSERT_EQ(3U, (++row)->second->num_entries);
ASSERT_EQ(3U, (++row)->second->num_entries);
ASSERT_EQ("3", FilesPerLevel());
ASSERT_EQ("v1", Get("key1"));
ASSERT_EQ("v2", Get("key2"));
ASSERT_EQ("v3", Get("key3"));
ASSERT_EQ("NOT_FOUND", Get("key4"));
ASSERT_EQ("NOT_FOUND", Get("key5"));
ASSERT_EQ("NOT_FOUND", Get("key6"));
}
TEST_F(CuckooTableDBTest, FlushWithDuplicateKeys) {
Options options = CurrentOptions();
Reopen(&options);
ASSERT_OK(Put("key1", "v1"));
ASSERT_OK(Put("key2", "v2"));
ASSERT_OK(Put("key1", "v3")); // Duplicate
ASSERT_OK(dbfull()->TEST_FlushMemTable());
TablePropertiesCollection ptc;
ASSERT_OK(reinterpret_cast<DB*>(dbfull())->GetPropertiesOfAllTables(&ptc));
VerifySstUniqueIds(ptc);
ASSERT_EQ(1U, ptc.size());
ASSERT_EQ(2U, ptc.begin()->second->num_entries);
ASSERT_EQ("1", FilesPerLevel());
ASSERT_EQ("v3", Get("key1"));
ASSERT_EQ("v2", Get("key2"));
}
namespace {
static std::string Key(int i) {
char buf[100];
snprintf(buf, sizeof(buf), "key_______%06d", i);
return std::string(buf);
}
static std::string Uint64Key(uint64_t i) {
std::string str;
str.resize(8);
memcpy(&str[0], static_cast<void*>(&i), 8);
return str;
}
} // namespace.
TEST_F(CuckooTableDBTest, Uint64Comparator) {
Options options = CurrentOptions();
options.comparator = test::Uint64Comparator();
DestroyAndReopen(&options);
ASSERT_OK(Put(Uint64Key(1), "v1"));
ASSERT_OK(Put(Uint64Key(2), "v2"));
ASSERT_OK(Put(Uint64Key(3), "v3"));
ASSERT_OK(dbfull()->TEST_FlushMemTable());
ASSERT_EQ("v1", Get(Uint64Key(1)));
ASSERT_EQ("v2", Get(Uint64Key(2)));
ASSERT_EQ("v3", Get(Uint64Key(3)));
ASSERT_EQ("NOT_FOUND", Get(Uint64Key(4)));
// Add more keys.
ASSERT_OK(Delete(Uint64Key(2))); // Delete.
ASSERT_OK(dbfull()->TEST_FlushMemTable());
ASSERT_OK(Put(Uint64Key(3), "v0")); // Update.
ASSERT_OK(Put(Uint64Key(4), "v4"));
ASSERT_OK(dbfull()->TEST_FlushMemTable());
ASSERT_EQ("v1", Get(Uint64Key(1)));
ASSERT_EQ("NOT_FOUND", Get(Uint64Key(2)));
ASSERT_EQ("v0", Get(Uint64Key(3)));
ASSERT_EQ("v4", Get(Uint64Key(4)));
}
TEST_F(CuckooTableDBTest, CompactionIntoMultipleFiles) {
// Create a big L0 file and check it compacts into multiple files in L1.
Options options = CurrentOptions();
options.write_buffer_size = 270 << 10;
// Two SST files should be created, each containing 14 keys.
// Number of buckets will be 16. Total size ~156 KB.
options.target_file_size_base = 160 << 10;
Reopen(&options);
// Write 28 values, each 10016 B ~ 10KB
for (int idx = 0; idx < 28; ++idx) {
ASSERT_OK(Put(Key(idx), std::string(10000, 'a' + char(idx))));
}
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
ASSERT_EQ("1", FilesPerLevel());
ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
true /* disallow trivial move */));
ASSERT_EQ("0,2", FilesPerLevel());
for (int idx = 0; idx < 28; ++idx) {
ASSERT_EQ(std::string(10000, 'a' + char(idx)), Get(Key(idx)));
}
}
TEST_F(CuckooTableDBTest, SameKeyInsertedInTwoDifferentFilesAndCompacted) {
// Insert same key twice so that they go to different SST files. Then wait for
// compaction and check if the latest value is stored and old value removed.
Options options = CurrentOptions();
options.write_buffer_size = 100 << 10; // 100KB
options.level0_file_num_compaction_trigger = 2;
Reopen(&options);
// Write 11 values, each 10016 B
for (int idx = 0; idx < 11; ++idx) {
ASSERT_OK(Put(Key(idx), std::string(10000, 'a')));
}
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
ASSERT_EQ("1", FilesPerLevel());
// Generate one more file in level-0, and should trigger level-0 compaction
for (int idx = 0; idx < 11; ++idx) {
ASSERT_OK(Put(Key(idx), std::string(10000, 'a' + char(idx))));
}
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr));
ASSERT_EQ("0,1", FilesPerLevel());
for (int idx = 0; idx < 11; ++idx) {
ASSERT_EQ(std::string(10000, 'a' + char(idx)), Get(Key(idx)));
}
}
TEST_F(CuckooTableDBTest, AdaptiveTable) {
Options options = CurrentOptions();
// Ensure options compatible with PlainTable
options.prefix_extractor.reset(NewCappedPrefixTransform(8));
// Write some keys using cuckoo table.
options.table_factory.reset(NewCuckooTableFactory());
Reopen(&options);
ASSERT_OK(Put("key1", "v1"));
ASSERT_OK(Put("key2", "v2"));
ASSERT_OK(Put("key3", "v3"));
ASSERT_OK(dbfull()->TEST_FlushMemTable());
// Write some keys using plain table.
std::shared_ptr<TableFactory> block_based_factory(
NewBlockBasedTableFactory());
std::shared_ptr<TableFactory> plain_table_factory(NewPlainTableFactory());
std::shared_ptr<TableFactory> cuckoo_table_factory(NewCuckooTableFactory());
options.create_if_missing = false;
options.table_factory.reset(
NewAdaptiveTableFactory(plain_table_factory, block_based_factory,
plain_table_factory, cuckoo_table_factory));
Reopen(&options);
ASSERT_OK(Put("key4", "v4"));
ASSERT_OK(Put("key1", "v5"));
ASSERT_OK(dbfull()->TEST_FlushMemTable());
// Write some keys using block based table.
options.table_factory.reset(
NewAdaptiveTableFactory(block_based_factory, block_based_factory,
plain_table_factory, cuckoo_table_factory));
Reopen(&options);
ASSERT_OK(Put("key5", "v6"));
ASSERT_OK(Put("key2", "v7"));
ASSERT_OK(dbfull()->TEST_FlushMemTable());
ASSERT_EQ("v5", Get("key1"));
ASSERT_EQ("v7", Get("key2"));
ASSERT_EQ("v3", Get("key3"));
ASSERT_EQ("v4", Get("key4"));
ASSERT_EQ("v6", Get("key5"));
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
if (ROCKSDB_NAMESPACE::port::kLittleEndian) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
} else {
fprintf(stderr, "SKIPPED as Cuckoo table doesn't support Big Endian\n");
return 0;
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,499 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
// Introduction of SyncPoint effectively disabled building and running this test
// in Release build.
// which is a pity, it is a good test
#include "db/db_test_util.h"
#include "port/port.h"
#include "port/stack_trace.h"
#include "rocksdb/env.h"
#include "util/random.h"
namespace ROCKSDB_NAMESPACE {
class DBTestDynamicLevel : public DBTestBase {
public:
DBTestDynamicLevel()
: DBTestBase("db_dynamic_level_test", /*env_do_fsync=*/true) {}
};
TEST_F(DBTestDynamicLevel, DynamicLevelMaxBytesBase) {
if (!Snappy_Supported() || !LZ4_Supported()) {
return;
}
// Use InMemoryEnv, or it would be too slow.
std::unique_ptr<Env> env(NewMemEnv(env_));
const int kNKeys = 1000;
int keys[kNKeys];
auto verify_func = [&]() {
for (int i = 0; i < kNKeys; i++) {
ASSERT_NE("NOT_FOUND", Get(Key(i)));
ASSERT_NE("NOT_FOUND", Get(Key(kNKeys * 2 + i)));
if (i < kNKeys / 10) {
ASSERT_EQ("NOT_FOUND", Get(Key(kNKeys + keys[i])));
} else {
ASSERT_NE("NOT_FOUND", Get(Key(kNKeys + keys[i])));
}
}
};
Random rnd(301);
for (int ordered_insert = 0; ordered_insert <= 1; ordered_insert++) {
for (int i = 0; i < kNKeys; i++) {
keys[i] = i;
}
if (ordered_insert == 0) {
RandomShuffle(std::begin(keys), std::end(keys), rnd.Next());
}
for (int max_background_compactions = 1; max_background_compactions < 4;
max_background_compactions += 2) {
Options options;
options.env = env.get();
options.create_if_missing = true;
options.write_buffer_size = 2048;
options.max_write_buffer_number = 2;
options.level0_file_num_compaction_trigger = 2;
options.level0_slowdown_writes_trigger = 2;
options.level0_stop_writes_trigger = 2;
options.target_file_size_base = 2048;
options.level_compaction_dynamic_level_bytes = true;
options.max_bytes_for_level_base = 10240;
options.max_bytes_for_level_multiplier = 4;
options.max_background_compactions = max_background_compactions;
options.num_levels = 5;
options.compression_per_level.resize(3);
options.compression_per_level[0] = kNoCompression;
options.compression_per_level[1] = kLZ4Compression;
options.compression_per_level[2] = kSnappyCompression;
options.env = env_;
DestroyAndReopen(options);
for (int i = 0; i < kNKeys; i++) {
int key = keys[i];
ASSERT_OK(Put(Key(kNKeys + key), rnd.RandomString(102)));
ASSERT_OK(Put(Key(key), rnd.RandomString(102)));
ASSERT_OK(Put(Key(kNKeys * 2 + key), rnd.RandomString(102)));
ASSERT_OK(Delete(Key(kNKeys + keys[i / 10])));
env_->SleepForMicroseconds(5000);
}
uint64_t int_prop;
ASSERT_TRUE(db_->GetIntProperty("rocksdb.background-errors", &int_prop));
ASSERT_EQ(0U, int_prop);
// Verify DB
for (int j = 0; j < 2; j++) {
verify_func();
if (j == 0) {
Reopen(options);
}
}
// Test compact range works
ASSERT_OK(
dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
// All data should be in the last level.
ColumnFamilyMetaData cf_meta;
db_->GetColumnFamilyMetaData(&cf_meta);
ASSERT_EQ(5U, cf_meta.levels.size());
for (int i = 0; i < 4; i++) {
ASSERT_EQ(0U, cf_meta.levels[i].files.size());
}
ASSERT_GT(cf_meta.levels[4U].files.size(), 0U);
verify_func();
Close();
}
}
env_->SetBackgroundThreads(1, Env::LOW);
env_->SetBackgroundThreads(1, Env::HIGH);
}
// Test specific cases in dynamic max bytes
TEST_F(DBTestDynamicLevel, DynamicLevelMaxBytesBase2) {
Random rnd(301);
int kMaxKey = 1000000;
Options options = CurrentOptions();
options.compression = kNoCompression;
options.create_if_missing = true;
options.write_buffer_size = 20480;
options.max_write_buffer_number = 2;
options.level0_file_num_compaction_trigger = 2;
options.level0_slowdown_writes_trigger = 9999;
options.level0_stop_writes_trigger = 9999;
options.target_file_size_base = 9102;
options.level_compaction_dynamic_level_bytes = true;
options.max_bytes_for_level_base = 40960;
options.max_bytes_for_level_multiplier = 4;
options.max_background_compactions = 2;
options.num_levels = 5;
options.max_compaction_bytes = 0; // Force not expanding in compactions
options.db_host_id = ""; // Setting this messes up the file size calculation
BlockBasedTableOptions table_options;
table_options.block_size = 1024;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
DestroyAndReopen(options);
ASSERT_OK(dbfull()->SetOptions({
{"disable_auto_compactions", "true"},
}));
uint64_t int_prop;
std::string str_prop;
// Initial base level is the last level
ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop));
ASSERT_EQ(4U, int_prop);
// Put about 28K to L0
for (int i = 0; i < 70; i++) {
ASSERT_OK(Put(Key(static_cast<int>(rnd.Uniform(kMaxKey))),
rnd.RandomString(380)));
}
ASSERT_OK(dbfull()->SetOptions({
{"disable_auto_compactions", "false"},
}));
ASSERT_OK(Flush());
ASSERT_OK(dbfull()->TEST_WaitForCompact());
ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop));
ASSERT_EQ(4U, int_prop);
// Insert extra about 28K to L0. After they are compacted to L4, the base
// level should be changed to L3.
ASSERT_OK(dbfull()->SetOptions({
{"disable_auto_compactions", "true"},
}));
for (int i = 0; i < 70; i++) {
ASSERT_OK(Put(Key(static_cast<int>(rnd.Uniform(kMaxKey))),
rnd.RandomString(380)));
}
ASSERT_OK(dbfull()->SetOptions({
{"disable_auto_compactions", "false"},
}));
ASSERT_OK(Flush());
ASSERT_OK(dbfull()->TEST_WaitForCompact());
ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop));
ASSERT_EQ(3U, int_prop);
ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level1", &str_prop));
ASSERT_EQ("0", str_prop);
ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level2", &str_prop));
ASSERT_EQ("0", str_prop);
// Write even more data while leaving the base level at L3.
ASSERT_OK(dbfull()->SetOptions({
{"disable_auto_compactions", "true"},
}));
// Write about 40K more
for (int i = 0; i < 100; i++) {
ASSERT_OK(Put(Key(static_cast<int>(rnd.Uniform(kMaxKey))),
rnd.RandomString(380)));
}
ASSERT_OK(dbfull()->SetOptions({
{"disable_auto_compactions", "false"},
}));
ASSERT_OK(Flush());
ASSERT_OK(dbfull()->TEST_WaitForCompact());
ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop));
ASSERT_EQ(3U, int_prop);
// Fill up L0, and then run an (auto) L0->Lmax compaction to raise the base
// level to 2.
ASSERT_OK(dbfull()->SetOptions({
{"disable_auto_compactions", "true"},
}));
// Write about 650K more.
// Each file is about 11KB, with 9KB of data.
for (int i = 0; i < 1300; i++) {
ASSERT_OK(Put(Key(static_cast<int>(rnd.Uniform(kMaxKey))),
rnd.RandomString(380)));
}
// Make sure that the compaction starts before the last bit of data is
// flushed, so that the base level isn't raised to L1.
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
{"CompactionJob::Run():Start", "DynamicLevelMaxBytesBase2:0"},
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
ASSERT_OK(dbfull()->SetOptions({
{"disable_auto_compactions", "false"},
}));
TEST_SYNC_POINT("DynamicLevelMaxBytesBase2:0");
ASSERT_OK(Flush());
ASSERT_OK(dbfull()->TEST_WaitForCompact());
ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop));
ASSERT_EQ(2U, int_prop);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
// Write more data until the base level changes to L1. There will be
// a manual compaction going on at the same time.
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
{"CompactionJob::Run():Start", "DynamicLevelMaxBytesBase2:1"},
{"DynamicLevelMaxBytesBase2:2", "CompactionJob::Run():End"},
{"DynamicLevelMaxBytesBase2:compact_range_finish",
"FlushJob::WriteLevel0Table"},
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
ROCKSDB_NAMESPACE::port::Thread thread([this] {
TEST_SYNC_POINT("DynamicLevelMaxBytesBase2:compact_range_start");
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
TEST_SYNC_POINT("DynamicLevelMaxBytesBase2:compact_range_finish");
});
TEST_SYNC_POINT("DynamicLevelMaxBytesBase2:1");
for (int i = 0; i < 2; i++) {
ASSERT_OK(Put(Key(static_cast<int>(rnd.Uniform(kMaxKey))),
rnd.RandomString(380)));
}
TEST_SYNC_POINT("DynamicLevelMaxBytesBase2:2");
ASSERT_OK(Flush());
thread.join();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop));
ASSERT_EQ(1U, int_prop);
}
// Test specific cases in dynamic max bytes
TEST_F(DBTestDynamicLevel, DynamicLevelMaxBytesCompactRange) {
Random rnd(301);
int kMaxKey = 1000000;
Options options = CurrentOptions();
options.create_if_missing = true;
options.write_buffer_size = 2048;
options.max_write_buffer_number = 2;
options.level0_file_num_compaction_trigger = 2;
options.level0_slowdown_writes_trigger = 9999;
options.level0_stop_writes_trigger = 9999;
options.target_file_size_base = 2;
options.level_compaction_dynamic_level_bytes = true;
options.max_bytes_for_level_base = 10240;
options.max_bytes_for_level_multiplier = 4;
options.max_background_compactions = 1;
const int kNumLevels = 5;
options.num_levels = kNumLevels;
options.max_compaction_bytes = 1; // Force not expanding in compactions
BlockBasedTableOptions table_options;
table_options.block_size = 1024;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
DestroyAndReopen(options);
// Compact against empty DB
ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
uint64_t int_prop;
std::string str_prop;
// Initial base level is the last level
ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop));
ASSERT_EQ(4U, int_prop);
// Put about 7K to L0
for (int i = 0; i < 140; i++) {
ASSERT_OK(
Put(Key(static_cast<int>(rnd.Uniform(kMaxKey))), rnd.RandomString(80)));
}
ASSERT_OK(Flush());
ASSERT_OK(dbfull()->TEST_WaitForCompact());
if (NumTableFilesAtLevel(0) == 0) {
// Make sure level 0 is not empty
ASSERT_OK(
Put(Key(static_cast<int>(rnd.Uniform(kMaxKey))), rnd.RandomString(80)));
ASSERT_OK(Flush());
}
ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop));
ASSERT_EQ(3U, int_prop);
ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level1", &str_prop));
ASSERT_EQ("0", str_prop);
ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level2", &str_prop));
ASSERT_EQ("0", str_prop);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
std::set<int> output_levels;
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"CompactionPicker::CompactRange:Return", [&](void* arg) {
Compaction* compaction = reinterpret_cast<Compaction*>(arg);
output_levels.insert(compaction->output_level());
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
ASSERT_EQ(output_levels.size(), 2);
ASSERT_TRUE(output_levels.find(3) != output_levels.end());
ASSERT_TRUE(output_levels.find(4) != output_levels.end());
ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level0", &str_prop));
ASSERT_EQ("0", str_prop);
ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level3", &str_prop));
ASSERT_EQ("0", str_prop);
// Base level is still level 3.
ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop));
ASSERT_EQ(3U, int_prop);
}
TEST_F(DBTestDynamicLevel, DynamicLevelMaxBytesBaseInc) {
Options options = CurrentOptions();
options.create_if_missing = true;
options.write_buffer_size = 2048;
options.max_write_buffer_number = 2;
options.level0_file_num_compaction_trigger = 2;
options.level0_slowdown_writes_trigger = 2;
options.level0_stop_writes_trigger = 2;
options.target_file_size_base = 2048;
options.level_compaction_dynamic_level_bytes = true;
options.max_bytes_for_level_base = 10240;
options.max_bytes_for_level_multiplier = 4;
options.max_background_compactions = 2;
options.num_levels = 5;
options.max_compaction_bytes = 100000000;
DestroyAndReopen(options);
int non_trivial = 0;
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"DBImpl::BackgroundCompaction:NonTrivial",
[&](void* /*arg*/) { non_trivial++; });
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
Random rnd(301);
const int total_keys = 3000;
const int random_part_size = 100;
for (int i = 0; i < total_keys; i++) {
std::string value = rnd.RandomString(random_part_size);
PutFixed32(&value, static_cast<uint32_t>(i));
ASSERT_OK(Put(Key(i), value));
}
ASSERT_OK(Flush());
ASSERT_OK(dbfull()->TEST_WaitForCompact());
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
ASSERT_EQ(non_trivial, 0);
for (int i = 0; i < total_keys; i++) {
std::string value = Get(Key(i));
ASSERT_EQ(DecodeFixed32(value.c_str() + random_part_size),
static_cast<uint32_t>(i));
}
env_->SetBackgroundThreads(1, Env::LOW);
env_->SetBackgroundThreads(1, Env::HIGH);
}
TEST_F(DBTestDynamicLevel, DISABLED_MigrateToDynamicLevelMaxBytesBase) {
Random rnd(301);
const int kMaxKey = 2000;
Options options;
options.create_if_missing = true;
options.write_buffer_size = 2048;
options.max_write_buffer_number = 8;
options.level0_file_num_compaction_trigger = 4;
options.level0_slowdown_writes_trigger = 4;
options.level0_stop_writes_trigger = 8;
options.target_file_size_base = 2048;
options.level_compaction_dynamic_level_bytes = false;
options.max_bytes_for_level_base = 10240;
options.max_bytes_for_level_multiplier = 4;
options.num_levels = 8;
DestroyAndReopen(options);
auto verify_func = [&](int num_keys, bool if_sleep) {
for (int i = 0; i < num_keys; i++) {
ASSERT_NE("NOT_FOUND", Get(Key(kMaxKey + i)));
if (i < num_keys / 10) {
ASSERT_EQ("NOT_FOUND", Get(Key(i)));
} else {
ASSERT_NE("NOT_FOUND", Get(Key(i)));
}
if (if_sleep && i % 1000 == 0) {
// Without it, valgrind may choose not to give another
// thread a chance to run before finishing the function,
// causing the test to be extremely slow.
env_->SleepForMicroseconds(1);
}
}
};
int total_keys = 1000;
for (int i = 0; i < total_keys; i++) {
ASSERT_OK(Put(Key(i), rnd.RandomString(102)));
ASSERT_OK(Put(Key(kMaxKey + i), rnd.RandomString(102)));
ASSERT_OK(Delete(Key(i / 10)));
}
verify_func(total_keys, false);
ASSERT_OK(dbfull()->TEST_WaitForCompact());
options.level_compaction_dynamic_level_bytes = true;
options.disable_auto_compactions = true;
Reopen(options);
verify_func(total_keys, false);
std::atomic_bool compaction_finished;
compaction_finished = false;
// Issue manual compaction in one thread and still verify DB state
// in main thread.
ROCKSDB_NAMESPACE::port::Thread t([&]() {
CompactRangeOptions compact_options;
compact_options.change_level = true;
compact_options.target_level = options.num_levels - 1;
ASSERT_OK(dbfull()->CompactRange(compact_options, nullptr, nullptr));
compaction_finished.store(true);
});
do {
verify_func(total_keys, true);
} while (!compaction_finished.load());
t.join();
ASSERT_OK(dbfull()->SetOptions({
{"disable_auto_compactions", "false"},
}));
int total_keys2 = 2000;
for (int i = total_keys; i < total_keys2; i++) {
ASSERT_OK(Put(Key(i), rnd.RandomString(102)));
ASSERT_OK(Put(Key(kMaxKey + i), rnd.RandomString(102)));
ASSERT_OK(Delete(Key(i / 10)));
}
verify_func(total_keys2, false);
ASSERT_OK(dbfull()->TEST_WaitForCompact());
verify_func(total_keys2, false);
// Base level is not level 1
ASSERT_EQ(NumTableFilesAtLevel(1), 0);
ASSERT_EQ(NumTableFilesAtLevel(2), 0);
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,126 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
#include "db/db_test_util.h"
#include "port/stack_trace.h"
#include "rocksdb/perf_context.h"
#include "test_util/sync_point.h"
#include <iostream>
#include <string>
namespace ROCKSDB_NAMESPACE {
class DBEncryptionTest : public DBTestBase {
public:
DBEncryptionTest()
: DBTestBase("db_encryption_test", /*env_do_fsync=*/true) {}
Env* GetTargetEnv() {
if (encrypted_env_ != nullptr) {
return (static_cast<EnvWrapper*>(encrypted_env_))->target();
} else {
return env_;
}
}
};
TEST_F(DBEncryptionTest, CheckEncrypted) {
ASSERT_OK(Put("foo567", "v1.fetdq"));
ASSERT_OK(Put("bar123", "v2.dfgkjdfghsd"));
Close();
// Open all files and look for the values we've put in there.
// They should not be found if encrypted, otherwise
// they should be found.
std::vector<std::string> fileNames;
auto status = env_->GetChildren(dbname_, &fileNames);
ASSERT_OK(status);
Env* target = GetTargetEnv();
int hits = 0;
for (auto it = fileNames.begin(); it != fileNames.end(); ++it) {
if (*it == "LOCK") {
continue;
}
auto filePath = dbname_ + "/" + *it;
std::unique_ptr<SequentialFile> seqFile;
auto envOptions = EnvOptions(CurrentOptions());
status = target->NewSequentialFile(filePath, &seqFile, envOptions);
ASSERT_OK(status);
uint64_t fileSize;
status = target->GetFileSize(filePath, &fileSize);
ASSERT_OK(status);
std::string scratch;
scratch.reserve(fileSize);
Slice data;
status = seqFile->Read(fileSize, &data, (char*)scratch.data());
ASSERT_OK(status);
if (data.ToString().find("foo567") != std::string::npos) {
hits++;
// std::cout << "Hit in " << filePath << "\n";
}
if (data.ToString().find("v1.fetdq") != std::string::npos) {
hits++;
// std::cout << "Hit in " << filePath << "\n";
}
if (data.ToString().find("bar123") != std::string::npos) {
hits++;
// std::cout << "Hit in " << filePath << "\n";
}
if (data.ToString().find("v2.dfgkjdfghsd") != std::string::npos) {
hits++;
// std::cout << "Hit in " << filePath << "\n";
}
if (data.ToString().find("dfgk") != std::string::npos) {
hits++;
// std::cout << "Hit in " << filePath << "\n";
}
}
if (encrypted_env_) {
ASSERT_EQ(hits, 0);
} else {
ASSERT_GE(hits, 4);
}
}
TEST_F(DBEncryptionTest, ReadEmptyFile) {
auto defaultEnv = GetTargetEnv();
// create empty file for reading it back in later
auto envOptions = EnvOptions(CurrentOptions());
auto filePath = dbname_ + "/empty.empty";
Status status;
{
std::unique_ptr<WritableFile> writableFile;
status = defaultEnv->NewWritableFile(filePath, &writableFile, envOptions);
ASSERT_OK(status);
}
std::unique_ptr<SequentialFile> seqFile;
status = defaultEnv->NewSequentialFile(filePath, &seqFile, envOptions);
ASSERT_OK(status);
std::string scratch;
Slice data;
// reading back 16 bytes from the empty file shouldn't trigger an assertion.
// it should just work and return an empty string
status = seqFile->Read(16, &data, (char*)scratch.data());
ASSERT_OK(status);
ASSERT_TRUE(data.empty());
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

File diff suppressed because it is too large Load Diff

@ -1,262 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "db/db_test_util.h"
#include "port/stack_trace.h"
namespace ROCKSDB_NAMESPACE {
class DBTestInPlaceUpdate : public DBTestBase {
public:
DBTestInPlaceUpdate()
: DBTestBase("db_inplace_update_test", /*env_do_fsync=*/true) {}
};
TEST_F(DBTestInPlaceUpdate, InPlaceUpdate) {
do {
Options options = CurrentOptions();
options.create_if_missing = true;
options.inplace_update_support = true;
options.env = env_;
options.write_buffer_size = 100000;
options.allow_concurrent_memtable_write = false;
Reopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
// Update key with values of smaller size
int numValues = 10;
for (int i = numValues; i > 0; i--) {
std::string value = DummyString(i, 'a');
ASSERT_OK(Put(1, "key", value));
ASSERT_EQ(value, Get(1, "key"));
}
// Only 1 instance for that key.
validateNumberOfEntries(1, 1);
} while (ChangeCompactOptions());
}
TEST_F(DBTestInPlaceUpdate, InPlaceUpdateLargeNewValue) {
do {
Options options = CurrentOptions();
options.create_if_missing = true;
options.inplace_update_support = true;
options.env = env_;
options.write_buffer_size = 100000;
options.allow_concurrent_memtable_write = false;
Reopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
// Update key with values of larger size
int numValues = 10;
for (int i = 0; i < numValues; i++) {
std::string value = DummyString(i, 'a');
ASSERT_OK(Put(1, "key", value));
ASSERT_EQ(value, Get(1, "key"));
}
// All 10 updates exist in the internal iterator
validateNumberOfEntries(numValues, 1);
} while (ChangeCompactOptions());
}
TEST_F(DBTestInPlaceUpdate, InPlaceUpdateEntitySmallerNewValue) {
do {
Options options = CurrentOptions();
options.create_if_missing = true;
options.inplace_update_support = true;
options.env = env_;
options.allow_concurrent_memtable_write = false;
Reopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
// Update key with values of smaller size
constexpr int num_values = 10;
for (int i = num_values; i > 0; --i) {
constexpr char key[] = "key";
const std::string value = DummyString(i, 'a');
WideColumns wide_columns{{"attr", value}};
ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[1], key, wide_columns));
// TODO: use Get to check entity once it's supported
}
// Only 1 instance for that key.
validateNumberOfEntries(1, 1);
} while (ChangeCompactOptions());
}
TEST_F(DBTestInPlaceUpdate, InPlaceUpdateEntityLargerNewValue) {
do {
Options options = CurrentOptions();
options.create_if_missing = true;
options.inplace_update_support = true;
options.env = env_;
options.allow_concurrent_memtable_write = false;
Reopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
// Update key with values of larger size
constexpr int num_values = 10;
for (int i = 0; i < num_values; ++i) {
constexpr char key[] = "key";
const std::string value = DummyString(i, 'a');
WideColumns wide_columns{{"attr", value}};
ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[1], key, wide_columns));
// TODO: use Get to check entity once it's supported
}
// All 10 updates exist in the internal iterator
validateNumberOfEntries(num_values, 1);
} while (ChangeCompactOptions());
}
TEST_F(DBTestInPlaceUpdate, InPlaceUpdateCallbackSmallerSize) {
do {
Options options = CurrentOptions();
options.create_if_missing = true;
options.inplace_update_support = true;
options.env = env_;
options.write_buffer_size = 100000;
options.inplace_callback =
ROCKSDB_NAMESPACE::DBTestInPlaceUpdate::updateInPlaceSmallerSize;
options.allow_concurrent_memtable_write = false;
Reopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
// Update key with values of smaller size
int numValues = 10;
ASSERT_OK(Put(1, "key", DummyString(numValues, 'a')));
ASSERT_EQ(DummyString(numValues, 'c'), Get(1, "key"));
for (int i = numValues; i > 0; i--) {
ASSERT_OK(Put(1, "key", DummyString(i, 'a')));
ASSERT_EQ(DummyString(i - 1, 'b'), Get(1, "key"));
}
// Only 1 instance for that key.
validateNumberOfEntries(1, 1);
} while (ChangeCompactOptions());
}
TEST_F(DBTestInPlaceUpdate, InPlaceUpdateCallbackSmallerVarintSize) {
do {
Options options = CurrentOptions();
options.create_if_missing = true;
options.inplace_update_support = true;
options.env = env_;
options.write_buffer_size = 100000;
options.inplace_callback =
ROCKSDB_NAMESPACE::DBTestInPlaceUpdate::updateInPlaceSmallerVarintSize;
options.allow_concurrent_memtable_write = false;
Reopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
// Update key with values of smaller varint size
int numValues = 265;
ASSERT_OK(Put(1, "key", DummyString(numValues, 'a')));
ASSERT_EQ(DummyString(numValues, 'c'), Get(1, "key"));
for (int i = numValues; i > 0; i--) {
ASSERT_OK(Put(1, "key", DummyString(i, 'a')));
ASSERT_EQ(DummyString(1, 'b'), Get(1, "key"));
}
// Only 1 instance for that key.
validateNumberOfEntries(1, 1);
} while (ChangeCompactOptions());
}
TEST_F(DBTestInPlaceUpdate, InPlaceUpdateCallbackLargeNewValue) {
do {
Options options = CurrentOptions();
options.create_if_missing = true;
options.inplace_update_support = true;
options.env = env_;
options.write_buffer_size = 100000;
options.inplace_callback =
ROCKSDB_NAMESPACE::DBTestInPlaceUpdate::updateInPlaceLargerSize;
options.allow_concurrent_memtable_write = false;
Reopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
// Update key with values of larger size
int numValues = 10;
for (int i = 0; i < numValues; i++) {
ASSERT_OK(Put(1, "key", DummyString(i, 'a')));
ASSERT_EQ(DummyString(i, 'c'), Get(1, "key"));
}
// No inplace updates. All updates are puts with new seq number
// All 10 updates exist in the internal iterator
validateNumberOfEntries(numValues, 1);
} while (ChangeCompactOptions());
}
TEST_F(DBTestInPlaceUpdate, InPlaceUpdateCallbackNoAction) {
do {
Options options = CurrentOptions();
options.create_if_missing = true;
options.inplace_update_support = true;
options.env = env_;
options.write_buffer_size = 100000;
options.inplace_callback =
ROCKSDB_NAMESPACE::DBTestInPlaceUpdate::updateInPlaceNoAction;
options.allow_concurrent_memtable_write = false;
Reopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
// Callback function requests no actions from db
ASSERT_OK(Put(1, "key", DummyString(1, 'a')));
ASSERT_EQ(Get(1, "key"), "NOT_FOUND");
} while (ChangeCompactOptions());
}
TEST_F(DBTestInPlaceUpdate, InPlaceUpdateAndSnapshot) {
do {
Options options = CurrentOptions();
options.create_if_missing = true;
options.inplace_update_support = true;
options.env = env_;
options.write_buffer_size = 100000;
options.allow_concurrent_memtable_write = false;
Reopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
// Update key with values of smaller size, and
// run GetSnapshot and ReleaseSnapshot
int numValues = 2;
for (int i = numValues; i > 0; i--) {
const Snapshot* s = db_->GetSnapshot();
ASSERT_EQ(nullptr, s);
std::string value = DummyString(i, 'a');
ASSERT_OK(Put(1, "key", value));
ASSERT_EQ(value, Get(1, "key"));
// release s (nullptr)
db_->ReleaseSnapshot(s);
}
// Only 1 instance for that key.
validateNumberOfEntries(1, 1);
} while (ChangeCompactOptions());
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,589 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "db/db_test_util.h"
#include "port/stack_trace.h"
#include "test_util/testutil.h"
#include "util/random.h"
namespace ROCKSDB_NAMESPACE {
class DBIOFailureTest : public DBTestBase {
public:
DBIOFailureTest() : DBTestBase("db_io_failure_test", /*env_do_fsync=*/true) {}
};
// Check that number of files does not grow when writes are dropped
TEST_F(DBIOFailureTest, DropWrites) {
do {
Options options = CurrentOptions();
options.env = env_;
options.paranoid_checks = false;
Reopen(options);
ASSERT_OK(Put("foo", "v1"));
ASSERT_EQ("v1", Get("foo"));
Compact("a", "z");
const size_t num_files = CountFiles();
// Force out-of-space errors
env_->drop_writes_.store(true, std::memory_order_release);
env_->sleep_counter_.Reset();
env_->SetMockSleep();
for (int i = 0; i < 5; i++) {
if (option_config_ != kUniversalCompactionMultiLevel &&
option_config_ != kUniversalSubcompactions) {
for (int level = 0; level < dbfull()->NumberLevels(); level++) {
if (level > 0 && level == dbfull()->NumberLevels() - 1) {
break;
}
Status s =
dbfull()->TEST_CompactRange(level, nullptr, nullptr, nullptr,
true /* disallow trivial move */);
ASSERT_TRUE(s.ok() || s.IsCorruption());
}
} else {
Status s =
dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
ASSERT_TRUE(s.ok() || s.IsCorruption());
}
}
std::string property_value;
ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
ASSERT_EQ("5", property_value);
env_->drop_writes_.store(false, std::memory_order_release);
const size_t count = CountFiles();
ASSERT_LT(count, num_files + 3);
// Check that compaction attempts slept after errors
// TODO @krad: Figure out why ASSERT_EQ 5 keeps failing in certain compiler
// versions
ASSERT_GE(env_->sleep_counter_.Read(), 4);
} while (ChangeCompactOptions());
}
// Check background error counter bumped on flush failures.
TEST_F(DBIOFailureTest, DropWritesFlush) {
do {
Options options = CurrentOptions();
options.env = env_;
options.max_background_flushes = 1;
Reopen(options);
ASSERT_OK(Put("foo", "v1"));
// Force out-of-space errors
env_->drop_writes_.store(true, std::memory_order_release);
std::string property_value;
// Background error count is 0 now.
ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
ASSERT_EQ("0", property_value);
// ASSERT file is too short
ASSERT_TRUE(dbfull()->TEST_FlushMemTable(true).IsCorruption());
ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
ASSERT_EQ("1", property_value);
env_->drop_writes_.store(false, std::memory_order_release);
} while (ChangeCompactOptions());
}
// Check that CompactRange() returns failure if there is not enough space left
// on device
TEST_F(DBIOFailureTest, NoSpaceCompactRange) {
do {
Options options = CurrentOptions();
options.env = env_;
options.disable_auto_compactions = true;
Reopen(options);
// generate 5 tables
for (int i = 0; i < 5; ++i) {
ASSERT_OK(Put(Key(i), Key(i) + "v"));
ASSERT_OK(Flush());
}
// Force out-of-space errors
env_->no_space_.store(true, std::memory_order_release);
Status s = dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
true /* disallow trivial move */);
ASSERT_TRUE(s.IsIOError());
ASSERT_TRUE(s.IsNoSpace());
env_->no_space_.store(false, std::memory_order_release);
} while (ChangeCompactOptions());
}
TEST_F(DBIOFailureTest, NonWritableFileSystem) {
do {
Options options = CurrentOptions();
options.write_buffer_size = 4096;
options.arena_block_size = 4096;
options.env = env_;
Reopen(options);
ASSERT_OK(Put("foo", "v1"));
env_->non_writeable_rate_.store(100);
std::string big(100000, 'x');
int errors = 0;
for (int i = 0; i < 20; i++) {
if (!Put("foo", big).ok()) {
errors++;
env_->SleepForMicroseconds(100000);
}
}
ASSERT_GT(errors, 0);
env_->non_writeable_rate_.store(0);
} while (ChangeCompactOptions());
}
TEST_F(DBIOFailureTest, ManifestWriteError) {
// Test for the following problem:
// (a) Compaction produces file F
// (b) Log record containing F is written to MANIFEST file, but Sync() fails
// (c) GC deletes F
// (d) After reopening DB, reads fail since deleted F is named in log record
// We iterate twice. In the second iteration, everything is the
// same except the log record never makes it to the MANIFEST file.
for (int iter = 0; iter < 2; iter++) {
std::atomic<bool>* error_type = (iter == 0) ? &env_->manifest_sync_error_
: &env_->manifest_write_error_;
// Insert foo=>bar mapping
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.error_if_exists = false;
options.paranoid_checks = true;
DestroyAndReopen(options);
ASSERT_OK(Put("foo", "bar"));
ASSERT_EQ("bar", Get("foo"));
// Memtable compaction (will succeed)
ASSERT_OK(Flush());
ASSERT_EQ("bar", Get("foo"));
const int last = 2;
MoveFilesToLevel(2);
ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo=>bar is now in last level
// Merging compaction (will fail)
error_type->store(true, std::memory_order_release);
ASSERT_NOK(
dbfull()->TEST_CompactRange(last, nullptr, nullptr)); // Should fail
ASSERT_EQ("bar", Get("foo"));
error_type->store(false, std::memory_order_release);
// Since paranoid_checks=true, writes should fail
ASSERT_NOK(Put("foo2", "bar2"));
// Recovery: should not lose data
ASSERT_EQ("bar", Get("foo"));
// Try again with paranoid_checks=false
Close();
options.paranoid_checks = false;
Reopen(options);
// Merging compaction (will fail)
error_type->store(true, std::memory_order_release);
Status s =
dbfull()->TEST_CompactRange(last, nullptr, nullptr); // Should fail
if (iter == 0) {
ASSERT_OK(s);
} else {
ASSERT_TRUE(s.IsIOError());
}
ASSERT_EQ("bar", Get("foo"));
// Recovery: should not lose data
error_type->store(false, std::memory_order_release);
Reopen(options);
ASSERT_EQ("bar", Get("foo"));
// Since paranoid_checks=false, writes should succeed
ASSERT_OK(Put("foo2", "bar2"));
ASSERT_EQ("bar", Get("foo"));
ASSERT_EQ("bar2", Get("foo2"));
}
}
TEST_F(DBIOFailureTest, PutFailsParanoid) {
// Test the following:
// (a) A random put fails in paranoid mode (simulate by sync fail)
// (b) All other puts have to fail, even if writes would succeed
// (c) All of that should happen ONLY if paranoid_checks = true
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.error_if_exists = false;
options.paranoid_checks = true;
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
ASSERT_OK(Put(1, "foo", "bar"));
ASSERT_OK(Put(1, "foo1", "bar1"));
// simulate error
env_->log_write_error_.store(true, std::memory_order_release);
ASSERT_NOK(Put(1, "foo2", "bar2"));
env_->log_write_error_.store(false, std::memory_order_release);
// the next put should fail, too
ASSERT_NOK(Put(1, "foo3", "bar3"));
// but we're still able to read
ASSERT_EQ("bar", Get(1, "foo"));
// do the same thing with paranoid checks off
options.paranoid_checks = false;
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
ASSERT_OK(Put(1, "foo", "bar"));
ASSERT_OK(Put(1, "foo1", "bar1"));
// simulate error
env_->log_write_error_.store(true, std::memory_order_release);
ASSERT_NOK(Put(1, "foo2", "bar2"));
env_->log_write_error_.store(false, std::memory_order_release);
// the next put should NOT fail
ASSERT_OK(Put(1, "foo3", "bar3"));
}
#if !(defined NDEBUG) || !defined(OS_WIN)
TEST_F(DBIOFailureTest, FlushSstRangeSyncError) {
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.error_if_exists = false;
options.paranoid_checks = true;
options.write_buffer_size = 256 * 1024 * 1024;
options.writable_file_max_buffer_size = 128 * 1024;
options.bytes_per_sync = 128 * 1024;
options.level0_file_num_compaction_trigger = 4;
options.memtable_factory.reset(test::NewSpecialSkipListFactory(10));
BlockBasedTableOptions table_options;
table_options.filter_policy.reset(NewBloomFilterPolicy(10));
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
const char* io_error_msg = "range sync dummy error";
std::atomic<int> range_sync_called(0);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"SpecialEnv::SStableFile::RangeSync", [&](void* arg) {
if (range_sync_called.fetch_add(1) == 0) {
Status* st = static_cast<Status*>(arg);
*st = Status::IOError(io_error_msg);
}
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
Random rnd(301);
std::string rnd_str =
rnd.RandomString(static_cast<int>(options.bytes_per_sync / 2));
std::string rnd_str_512kb = rnd.RandomString(512 * 1024);
ASSERT_OK(Put(1, "foo", "bar"));
// First 1MB doesn't get range synced
ASSERT_OK(Put(1, "foo0_0", rnd_str_512kb));
ASSERT_OK(Put(1, "foo0_1", rnd_str_512kb));
ASSERT_OK(Put(1, "foo1_1", rnd_str));
ASSERT_OK(Put(1, "foo1_2", rnd_str));
ASSERT_OK(Put(1, "foo1_3", rnd_str));
ASSERT_OK(Put(1, "foo2", "bar"));
ASSERT_OK(Put(1, "foo3_1", rnd_str));
ASSERT_OK(Put(1, "foo3_2", rnd_str));
ASSERT_OK(Put(1, "foo3_3", rnd_str));
ASSERT_OK(Put(1, "foo4", "bar"));
Status s = dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
ASSERT_TRUE(s.IsIOError());
ASSERT_STREQ(s.getState(), io_error_msg);
// Following writes should fail as flush failed.
ASSERT_NOK(Put(1, "foo2", "bar3"));
ASSERT_EQ("bar", Get(1, "foo"));
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
ASSERT_GE(1, range_sync_called.load());
ReopenWithColumnFamilies({"default", "pikachu"}, options);
ASSERT_EQ("bar", Get(1, "foo"));
}
TEST_F(DBIOFailureTest, CompactSstRangeSyncError) {
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.error_if_exists = false;
options.paranoid_checks = true;
options.write_buffer_size = 256 * 1024 * 1024;
options.writable_file_max_buffer_size = 128 * 1024;
options.bytes_per_sync = 128 * 1024;
options.level0_file_num_compaction_trigger = 2;
options.target_file_size_base = 256 * 1024 * 1024;
options.disable_auto_compactions = true;
BlockBasedTableOptions table_options;
table_options.filter_policy.reset(NewBloomFilterPolicy(10));
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
Random rnd(301);
std::string rnd_str =
rnd.RandomString(static_cast<int>(options.bytes_per_sync / 2));
std::string rnd_str_512kb = rnd.RandomString(512 * 1024);
ASSERT_OK(Put(1, "foo", "bar"));
// First 1MB doesn't get range synced
ASSERT_OK(Put(1, "foo0_0", rnd_str_512kb));
ASSERT_OK(Put(1, "foo0_1", rnd_str_512kb));
ASSERT_OK(Put(1, "foo1_1", rnd_str));
ASSERT_OK(Put(1, "foo1_2", rnd_str));
ASSERT_OK(Put(1, "foo1_3", rnd_str));
ASSERT_OK(Flush(1));
ASSERT_OK(Put(1, "foo", "bar"));
ASSERT_OK(Put(1, "foo3_1", rnd_str));
ASSERT_OK(Put(1, "foo3_2", rnd_str));
ASSERT_OK(Put(1, "foo3_3", rnd_str));
ASSERT_OK(Put(1, "foo4", "bar"));
ASSERT_OK(Flush(1));
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1]));
const char* io_error_msg = "range sync dummy error";
std::atomic<int> range_sync_called(0);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"SpecialEnv::SStableFile::RangeSync", [&](void* arg) {
if (range_sync_called.fetch_add(1) == 0) {
Status* st = static_cast<Status*>(arg);
*st = Status::IOError(io_error_msg);
}
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
ASSERT_OK(dbfull()->SetOptions(handles_[1],
{
{"disable_auto_compactions", "false"},
}));
Status s = dbfull()->TEST_WaitForCompact();
ASSERT_TRUE(s.IsIOError());
ASSERT_STREQ(s.getState(), io_error_msg);
// Following writes should fail as flush failed.
ASSERT_NOK(Put(1, "foo2", "bar3"));
ASSERT_EQ("bar", Get(1, "foo"));
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
ASSERT_GE(1, range_sync_called.load());
ReopenWithColumnFamilies({"default", "pikachu"}, options);
ASSERT_EQ("bar", Get(1, "foo"));
}
TEST_F(DBIOFailureTest, FlushSstCloseError) {
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.error_if_exists = false;
options.paranoid_checks = true;
options.level0_file_num_compaction_trigger = 4;
options.memtable_factory.reset(test::NewSpecialSkipListFactory(2));
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
const char* io_error_msg = "close dummy error";
std::atomic<int> close_called(0);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"SpecialEnv::SStableFile::Close", [&](void* arg) {
if (close_called.fetch_add(1) == 0) {
Status* st = static_cast<Status*>(arg);
*st = Status::IOError(io_error_msg);
}
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
ASSERT_OK(Put(1, "foo", "bar"));
ASSERT_OK(Put(1, "foo1", "bar1"));
ASSERT_OK(Put(1, "foo", "bar2"));
Status s = dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
ASSERT_TRUE(s.IsIOError());
ASSERT_STREQ(s.getState(), io_error_msg);
// Following writes should fail as flush failed.
ASSERT_NOK(Put(1, "foo2", "bar3"));
ASSERT_EQ("bar2", Get(1, "foo"));
ASSERT_EQ("bar1", Get(1, "foo1"));
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
ReopenWithColumnFamilies({"default", "pikachu"}, options);
ASSERT_EQ("bar2", Get(1, "foo"));
ASSERT_EQ("bar1", Get(1, "foo1"));
}
TEST_F(DBIOFailureTest, CompactionSstCloseError) {
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.error_if_exists = false;
options.paranoid_checks = true;
options.level0_file_num_compaction_trigger = 2;
options.disable_auto_compactions = true;
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
ASSERT_OK(Put(1, "foo", "bar"));
ASSERT_OK(Put(1, "foo2", "bar"));
ASSERT_OK(Flush(1));
ASSERT_OK(Put(1, "foo", "bar2"));
ASSERT_OK(Put(1, "foo2", "bar"));
ASSERT_OK(Flush(1));
ASSERT_OK(Put(1, "foo", "bar3"));
ASSERT_OK(Put(1, "foo2", "bar"));
ASSERT_OK(Flush(1));
ASSERT_OK(dbfull()->TEST_WaitForCompact());
const char* io_error_msg = "close dummy error";
std::atomic<int> close_called(0);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"SpecialEnv::SStableFile::Close", [&](void* arg) {
if (close_called.fetch_add(1) == 0) {
Status* st = static_cast<Status*>(arg);
*st = Status::IOError(io_error_msg);
}
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
ASSERT_OK(dbfull()->SetOptions(handles_[1],
{
{"disable_auto_compactions", "false"},
}));
Status s = dbfull()->TEST_WaitForCompact();
ASSERT_TRUE(s.IsIOError());
ASSERT_STREQ(s.getState(), io_error_msg);
// Following writes should fail as compaction failed.
ASSERT_NOK(Put(1, "foo2", "bar3"));
ASSERT_EQ("bar3", Get(1, "foo"));
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
ReopenWithColumnFamilies({"default", "pikachu"}, options);
ASSERT_EQ("bar3", Get(1, "foo"));
}
TEST_F(DBIOFailureTest, FlushSstSyncError) {
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.error_if_exists = false;
options.paranoid_checks = true;
options.use_fsync = false;
options.level0_file_num_compaction_trigger = 4;
options.memtable_factory.reset(test::NewSpecialSkipListFactory(2));
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
const char* io_error_msg = "sync dummy error";
std::atomic<int> sync_called(0);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"SpecialEnv::SStableFile::Sync", [&](void* arg) {
if (sync_called.fetch_add(1) == 0) {
Status* st = static_cast<Status*>(arg);
*st = Status::IOError(io_error_msg);
}
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
ASSERT_OK(Put(1, "foo", "bar"));
ASSERT_OK(Put(1, "foo1", "bar1"));
ASSERT_OK(Put(1, "foo", "bar2"));
Status s = dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
ASSERT_TRUE(s.IsIOError());
ASSERT_STREQ(s.getState(), io_error_msg);
// Following writes should fail as flush failed.
ASSERT_NOK(Put(1, "foo2", "bar3"));
ASSERT_EQ("bar2", Get(1, "foo"));
ASSERT_EQ("bar1", Get(1, "foo1"));
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
ReopenWithColumnFamilies({"default", "pikachu"}, options);
ASSERT_EQ("bar2", Get(1, "foo"));
ASSERT_EQ("bar1", Get(1, "foo1"));
}
TEST_F(DBIOFailureTest, CompactionSstSyncError) {
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.error_if_exists = false;
options.paranoid_checks = true;
options.level0_file_num_compaction_trigger = 2;
options.disable_auto_compactions = true;
options.use_fsync = false;
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
ASSERT_OK(Put(1, "foo", "bar"));
ASSERT_OK(Put(1, "foo2", "bar"));
ASSERT_OK(Flush(1));
ASSERT_OK(Put(1, "foo", "bar2"));
ASSERT_OK(Put(1, "foo2", "bar"));
ASSERT_OK(Flush(1));
ASSERT_OK(Put(1, "foo", "bar3"));
ASSERT_OK(Put(1, "foo2", "bar"));
ASSERT_OK(Flush(1));
ASSERT_OK(dbfull()->TEST_WaitForCompact());
const char* io_error_msg = "sync dummy error";
std::atomic<int> sync_called(0);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"SpecialEnv::SStableFile::Sync", [&](void* arg) {
if (sync_called.fetch_add(1) == 0) {
Status* st = static_cast<Status*>(arg);
*st = Status::IOError(io_error_msg);
}
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
ASSERT_OK(dbfull()->SetOptions(handles_[1],
{
{"disable_auto_compactions", "false"},
}));
Status s = dbfull()->TEST_WaitForCompact();
ASSERT_TRUE(s.IsIOError());
ASSERT_STREQ(s.getState(), io_error_msg);
// Following writes should fail as compaction failed.
ASSERT_NOK(Put(1, "foo2", "bar3"));
ASSERT_EQ("bar3", Get(1, "foo"));
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
ReopenWithColumnFamilies({"default", "pikachu"}, options);
ASSERT_EQ("bar3", Get(1, "foo"));
}
#endif // !(defined NDEBUG) || !defined(OS_WIN)
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
RegisterCustomObjects(argc, argv);
return RUN_ALL_TESTS();
}

@ -1,658 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/db_iter.h"
#include "db/dbformat.h"
#include "rocksdb/comparator.h"
#include "rocksdb/options.h"
#include "rocksdb/slice.h"
#include "test_util/testharness.h"
#include "util/random.h"
#include "util/string_util.h"
#include "utilities/merge_operators.h"
#ifdef GFLAGS
#include "util/gflags_compat.h"
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
DEFINE_bool(verbose, false,
"Print huge, detailed trace. Intended for debugging failures.");
#else
void ParseCommandLineFlags(int*, char***, bool) {}
bool FLAGS_verbose = false;
#endif
namespace ROCKSDB_NAMESPACE {
class DBIteratorStressTest : public testing::Test {
public:
Env* env_;
DBIteratorStressTest() : env_(Env::Default()) {}
};
namespace {
struct Entry {
std::string key;
ValueType type; // kTypeValue, kTypeDeletion, kTypeMerge
uint64_t sequence;
std::string ikey; // internal key, made from `key`, `sequence` and `type`
std::string value;
// If false, we'll pretend that this entry doesn't exist.
bool visible = true;
bool operator<(const Entry& e) const {
if (key != e.key) return key < e.key;
return std::tie(sequence, type) > std::tie(e.sequence, e.type);
}
};
struct Data {
std::vector<Entry> entries;
// Indices in `entries` with `visible` = false.
std::vector<size_t> hidden;
// Keys of entries whose `visible` changed since the last seek of iterators.
std::set<std::string> recently_touched_keys;
};
struct StressTestIterator : public InternalIterator {
Data* data;
Random64* rnd;
InternalKeyComparator cmp;
// Each operation will return error with this probability...
double error_probability = 0;
// ... and add/remove entries with this probability.
double mutation_probability = 0;
// The probability of adding vs removing entries will be chosen so that the
// amount of removed entries stays somewhat close to this number.
double target_hidden_fraction = 0;
// If true, print all mutations to stdout for debugging.
bool trace = false;
int iter = -1;
Status status_;
StressTestIterator(Data* _data, Random64* _rnd, const Comparator* _cmp)
: data(_data), rnd(_rnd), cmp(_cmp) {}
bool Valid() const override {
if (iter >= 0 && iter < (int)data->entries.size()) {
assert(status_.ok());
return true;
}
return false;
}
Status status() const override { return status_; }
bool MaybeFail() {
if (rnd->Next() >=
static_cast<double>(std::numeric_limits<uint64_t>::max()) *
error_probability) {
return false;
}
if (rnd->Next() % 2) {
status_ = Status::Incomplete("test");
} else {
status_ = Status::IOError("test");
}
if (trace) {
std::cout << "injecting " << status_.ToString() << std::endl;
}
iter = -1;
return true;
}
void MaybeMutate() {
if (rnd->Next() >=
static_cast<double>(std::numeric_limits<uint64_t>::max()) *
mutation_probability) {
return;
}
do {
// If too many entries are hidden, hide less, otherwise hide more.
double hide_probability =
data->hidden.size() > data->entries.size() * target_hidden_fraction
? 1. / 3
: 2. / 3;
if (data->hidden.empty()) {
hide_probability = 1;
}
bool do_hide = rnd->Next() <
static_cast<double>(std::numeric_limits<uint64_t>::max()) *
hide_probability;
if (do_hide) {
// Hide a random entry.
size_t idx = rnd->Next() % data->entries.size();
Entry& e = data->entries[idx];
if (e.visible) {
if (trace) {
std::cout << "hiding idx " << idx << std::endl;
}
e.visible = false;
data->hidden.push_back(idx);
data->recently_touched_keys.insert(e.key);
} else {
// Already hidden. Let's go unhide something instead, just because
// it's easy and it doesn't really matter what we do.
do_hide = false;
}
}
if (!do_hide) {
// Unhide a random entry.
size_t hi = rnd->Next() % data->hidden.size();
size_t idx = data->hidden[hi];
if (trace) {
std::cout << "unhiding idx " << idx << std::endl;
}
Entry& e = data->entries[idx];
assert(!e.visible);
e.visible = true;
data->hidden[hi] = data->hidden.back();
data->hidden.pop_back();
data->recently_touched_keys.insert(e.key);
}
} while (rnd->Next() % 3 != 0); // do 3 mutations on average
}
void SkipForward() {
while (iter < (int)data->entries.size() && !data->entries[iter].visible) {
++iter;
}
}
void SkipBackward() {
while (iter >= 0 && !data->entries[iter].visible) {
--iter;
}
}
void SeekToFirst() override {
if (MaybeFail()) return;
MaybeMutate();
status_ = Status::OK();
iter = 0;
SkipForward();
}
void SeekToLast() override {
if (MaybeFail()) return;
MaybeMutate();
status_ = Status::OK();
iter = (int)data->entries.size() - 1;
SkipBackward();
}
void Seek(const Slice& target) override {
if (MaybeFail()) return;
MaybeMutate();
status_ = Status::OK();
// Binary search.
auto it = std::partition_point(
data->entries.begin(), data->entries.end(),
[&](const Entry& e) { return cmp.Compare(e.ikey, target) < 0; });
iter = (int)(it - data->entries.begin());
SkipForward();
}
void SeekForPrev(const Slice& target) override {
if (MaybeFail()) return;
MaybeMutate();
status_ = Status::OK();
// Binary search.
auto it = std::partition_point(
data->entries.begin(), data->entries.end(),
[&](const Entry& e) { return cmp.Compare(e.ikey, target) <= 0; });
iter = (int)(it - data->entries.begin());
--iter;
SkipBackward();
}
void Next() override {
assert(Valid());
if (MaybeFail()) return;
MaybeMutate();
++iter;
SkipForward();
}
void Prev() override {
assert(Valid());
if (MaybeFail()) return;
MaybeMutate();
--iter;
SkipBackward();
}
Slice key() const override {
assert(Valid());
return data->entries[iter].ikey;
}
Slice value() const override {
assert(Valid());
return data->entries[iter].value;
}
bool IsKeyPinned() const override { return true; }
bool IsValuePinned() const override { return true; }
};
// A small reimplementation of DBIter, supporting only some of the features,
// and doing everything in O(log n).
// Skips all keys that are in recently_touched_keys.
struct ReferenceIterator {
Data* data;
uint64_t sequence; // ignore entries with sequence number below this
bool valid = false;
std::string key;
std::string value;
ReferenceIterator(Data* _data, uint64_t _sequence)
: data(_data), sequence(_sequence) {}
bool Valid() const { return valid; }
// Finds the first entry with key
// greater/less/greater-or-equal/less-or-equal than `key`, depending on
// arguments: if `skip`, inequality is strict; if `forward`, it's
// greater/greater-or-equal, otherwise less/less-or-equal.
// Sets `key` to the result.
// If no such key exists, returns false. Doesn't check `visible`.
bool FindNextKey(bool skip, bool forward) {
valid = false;
auto it = std::partition_point(data->entries.begin(), data->entries.end(),
[&](const Entry& e) {
if (forward != skip) {
return e.key < key;
} else {
return e.key <= key;
}
});
if (forward) {
if (it != data->entries.end()) {
key = it->key;
return true;
}
} else {
if (it != data->entries.begin()) {
--it;
key = it->key;
return true;
}
}
return false;
}
bool FindValueForCurrentKey() {
if (data->recently_touched_keys.count(key)) {
return false;
}
// Find the first entry for the key. The caller promises that it exists.
auto it = std::partition_point(data->entries.begin(), data->entries.end(),
[&](const Entry& e) {
if (e.key != key) {
return e.key < key;
}
return e.sequence > sequence;
});
// Find the first visible entry.
for (;; ++it) {
if (it == data->entries.end()) {
return false;
}
Entry& e = *it;
if (e.key != key) {
return false;
}
assert(e.sequence <= sequence);
if (!e.visible) continue;
if (e.type == kTypeDeletion) {
return false;
}
if (e.type == kTypeValue) {
value = e.value;
valid = true;
return true;
}
assert(e.type == kTypeMerge);
break;
}
// Collect merge operands.
std::vector<Slice> operands;
for (; it != data->entries.end(); ++it) {
Entry& e = *it;
if (e.key != key) {
break;
}
assert(e.sequence <= sequence);
if (!e.visible) continue;
if (e.type == kTypeDeletion) {
break;
}
operands.push_back(e.value);
if (e.type == kTypeValue) {
break;
}
}
// Do a merge.
value = operands.back().ToString();
for (int i = (int)operands.size() - 2; i >= 0; --i) {
value.append(",");
value.append(operands[i].data(), operands[i].size());
}
valid = true;
return true;
}
// Start at `key` and move until we encounter a valid value.
// `forward` defines the direction of movement.
// If `skip` is true, we're looking for key not equal to `key`.
void DoTheThing(bool skip, bool forward) {
while (FindNextKey(skip, forward) && !FindValueForCurrentKey()) {
skip = true;
}
}
void Seek(const Slice& target) {
key = target.ToString();
DoTheThing(false, true);
}
void SeekForPrev(const Slice& target) {
key = target.ToString();
DoTheThing(false, false);
}
void SeekToFirst() { Seek(""); }
void SeekToLast() {
key = data->entries.back().key;
DoTheThing(false, false);
}
void Next() {
assert(Valid());
DoTheThing(true, true);
}
void Prev() {
assert(Valid());
DoTheThing(true, false);
}
};
} // anonymous namespace
// Use an internal iterator that sometimes returns errors and sometimes
// adds/removes entries on the fly. Do random operations on a DBIter and
// check results.
// TODO: can be improved for more coverage:
// * Override IsKeyPinned() and IsValuePinned() to actually use
// PinnedIteratorManager and check that there's no use-after free.
// * Try different combinations of prefix_extractor, total_order_seek,
// prefix_same_as_start, iterate_lower_bound, iterate_upper_bound.
TEST_F(DBIteratorStressTest, StressTest) {
// We use a deterministic RNG, and everything happens in a single thread.
Random64 rnd(826909345792864532ll);
auto gen_key = [&](int max_key) {
assert(max_key > 0);
int len = 0;
int a = max_key;
while (a) {
a /= 10;
++len;
}
std::string s = std::to_string(rnd.Next() % static_cast<uint64_t>(max_key));
s.insert(0, len - (int)s.size(), '0');
return s;
};
Options options;
options.merge_operator = MergeOperators::CreateFromStringId("stringappend");
ReadOptions ropt;
size_t num_matching = 0;
size_t num_at_end = 0;
size_t num_not_ok = 0;
size_t num_recently_removed = 0;
// Number of iterations for each combination of parameters
// (there are ~250 of those).
// Tweak this to change the test run time.
// As of the time of writing, the test takes ~4 seconds for value of 5000.
const int num_iterations = 5000;
// Enable this to print all the operations for debugging.
bool trace = FLAGS_verbose;
for (int num_entries : {5, 10, 100}) {
for (double key_space : {0.1, 1.0, 3.0}) {
for (ValueType prevalent_entry_type :
{kTypeValue, kTypeDeletion, kTypeMerge}) {
for (double error_probability : {0.01, 0.1}) {
for (double mutation_probability : {0.01, 0.5}) {
for (double target_hidden_fraction : {0.1, 0.5}) {
std::string trace_str =
"entries: " + std::to_string(num_entries) +
", key_space: " + std::to_string(key_space) +
", error_probability: " + std::to_string(error_probability) +
", mutation_probability: " +
std::to_string(mutation_probability) +
", target_hidden_fraction: " +
std::to_string(target_hidden_fraction);
SCOPED_TRACE(trace_str);
if (trace) {
std::cout << trace_str << std::endl;
}
// Generate data.
Data data;
int max_key = (int)(num_entries * key_space) + 1;
for (int i = 0; i < num_entries; ++i) {
Entry e;
e.key = gen_key(max_key);
if (rnd.Next() % 10 != 0) {
e.type = prevalent_entry_type;
} else {
const ValueType types[] = {kTypeValue, kTypeDeletion,
kTypeMerge};
e.type =
types[rnd.Next() % (sizeof(types) / sizeof(types[0]))];
}
e.sequence = i;
e.value = "v" + std::to_string(i);
ParsedInternalKey internal_key(e.key, e.sequence, e.type);
AppendInternalKey(&e.ikey, internal_key);
data.entries.push_back(e);
}
std::sort(data.entries.begin(), data.entries.end());
if (trace) {
std::cout << "entries:";
for (size_t i = 0; i < data.entries.size(); ++i) {
Entry& e = data.entries[i];
std::cout << "\n idx " << i << ": \"" << e.key << "\": \""
<< e.value << "\" seq: " << e.sequence << " type: "
<< (e.type == kTypeValue ? "val"
: e.type == kTypeDeletion ? "del"
: "merge");
}
std::cout << std::endl;
}
std::unique_ptr<Iterator> db_iter;
std::unique_ptr<ReferenceIterator> ref_iter;
for (int iteration = 0; iteration < num_iterations; ++iteration) {
SCOPED_TRACE(iteration);
// Create a new iterator every ~30 operations.
if (db_iter == nullptr || rnd.Next() % 30 == 0) {
uint64_t sequence = rnd.Next() % (data.entries.size() + 2);
ref_iter.reset(new ReferenceIterator(&data, sequence));
if (trace) {
std::cout << "new iterator, seq: " << sequence << std::endl;
}
auto internal_iter =
new StressTestIterator(&data, &rnd, BytewiseComparator());
internal_iter->error_probability = error_probability;
internal_iter->mutation_probability = mutation_probability;
internal_iter->target_hidden_fraction =
target_hidden_fraction;
internal_iter->trace = trace;
db_iter.reset(NewDBIterator(
env_, ropt, ImmutableOptions(options),
MutableCFOptions(options), BytewiseComparator(),
internal_iter, nullptr /* version */, sequence,
options.max_sequential_skip_in_iterations,
nullptr /*read_callback*/));
}
// Do a random operation. It's important to do it on ref_it
// later than on db_iter to make sure ref_it sees the correct
// recently_touched_keys.
std::string old_key;
bool forward = rnd.Next() % 2 > 0;
// Do Next()/Prev() ~90% of the time.
bool seek = !ref_iter->Valid() || rnd.Next() % 10 == 0;
if (trace) {
std::cout << iteration << ": ";
}
if (!seek) {
assert(db_iter->Valid());
old_key = ref_iter->key;
if (trace) {
std::cout << (forward ? "Next" : "Prev") << std::endl;
}
if (forward) {
db_iter->Next();
ref_iter->Next();
} else {
db_iter->Prev();
ref_iter->Prev();
}
} else {
data.recently_touched_keys.clear();
// Do SeekToFirst less often than Seek.
if (rnd.Next() % 4 == 0) {
if (trace) {
std::cout << (forward ? "SeekToFirst" : "SeekToLast")
<< std::endl;
}
if (forward) {
old_key = "";
db_iter->SeekToFirst();
ref_iter->SeekToFirst();
} else {
old_key = data.entries.back().key;
db_iter->SeekToLast();
ref_iter->SeekToLast();
}
} else {
old_key = gen_key(max_key);
if (trace) {
std::cout << (forward ? "Seek" : "SeekForPrev") << " \""
<< old_key << '"' << std::endl;
}
if (forward) {
db_iter->Seek(old_key);
ref_iter->Seek(old_key);
} else {
db_iter->SeekForPrev(old_key);
ref_iter->SeekForPrev(old_key);
}
}
}
// Check the result.
if (db_iter->Valid()) {
ASSERT_TRUE(db_iter->status().ok());
if (data.recently_touched_keys.count(
db_iter->key().ToString())) {
// Ended on a key that may have been mutated during the
// operation. Reference iterator skips such keys, so we
// can't check the exact result.
// Check that the key moved in the right direction.
if (forward) {
if (seek)
ASSERT_GE(db_iter->key().ToString(), old_key);
else
ASSERT_GT(db_iter->key().ToString(), old_key);
} else {
if (seek)
ASSERT_LE(db_iter->key().ToString(), old_key);
else
ASSERT_LT(db_iter->key().ToString(), old_key);
}
if (ref_iter->Valid()) {
// Check that DBIter didn't miss any non-mutated key.
if (forward) {
ASSERT_LT(db_iter->key().ToString(), ref_iter->key);
} else {
ASSERT_GT(db_iter->key().ToString(), ref_iter->key);
}
}
// Tell the next iteration of the loop to reseek the
// iterators.
ref_iter->valid = false;
++num_recently_removed;
} else {
ASSERT_TRUE(ref_iter->Valid());
ASSERT_EQ(ref_iter->key, db_iter->key().ToString());
ASSERT_EQ(ref_iter->value, db_iter->value());
++num_matching;
}
} else if (db_iter->status().ok()) {
ASSERT_FALSE(ref_iter->Valid());
++num_at_end;
} else {
// Non-ok status. Nothing to check here.
// Tell the next iteration of the loop to reseek the
// iterators.
ref_iter->valid = false;
++num_not_ok;
}
}
}
}
}
}
}
}
// Check that all cases were hit many times.
EXPECT_GT(num_matching, 10000);
EXPECT_GT(num_at_end, 10000);
EXPECT_GT(num_not_ok, 10000);
EXPECT_GT(num_recently_removed, 10000);
std::cout << "stats:\n exact matches: " << num_matching
<< "\n end reached: " << num_at_end
<< "\n non-ok status: " << num_not_ok
<< "\n mutated on the fly: " << num_recently_removed << std::endl;
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
ParseCommandLineFlags(&argc, &argv, true);
return RUN_ALL_TESTS();
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,885 +0,0 @@
// Copyright (c) 2020-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/blob/blob_index.h"
#include "db/db_test_util.h"
#include "rocksdb/rocksdb_namespace.h"
namespace ROCKSDB_NAMESPACE {
enum class WriteBatchOpType {
kPut = 0,
kDelete,
kSingleDelete,
kMerge,
kPutEntity,
kDeleteRange,
kNum,
};
// Integer addition is needed for `::testing::Range()` to take the enum type.
WriteBatchOpType operator+(WriteBatchOpType lhs, const int rhs) {
using T = std::underlying_type<WriteBatchOpType>::type;
return static_cast<WriteBatchOpType>(static_cast<T>(lhs) + rhs);
}
enum class WriteMode {
// `Write()` a `WriteBatch` constructed with `protection_bytes_per_key = 0`
// and `WriteOptions::protection_bytes_per_key = 0`
kWriteUnprotectedBatch = 0,
// `Write()` a `WriteBatch` constructed with `protection_bytes_per_key > 0`.
kWriteProtectedBatch,
// `Write()` a `WriteBatch` constructed with `protection_bytes_per_key == 0`.
// Protection is enabled via `WriteOptions::protection_bytes_per_key > 0`.
kWriteOptionProtectedBatch,
// TODO(ajkr): add a mode that uses `Write()` wrappers, e.g., `Put()`.
kNum,
};
// Integer addition is needed for `::testing::Range()` to take the enum type.
WriteMode operator+(WriteMode lhs, const int rhs) {
using T = std::underlying_type<WriteMode>::type;
return static_cast<WriteMode>(static_cast<T>(lhs) + rhs);
}
std::pair<WriteBatch, Status> GetWriteBatch(ColumnFamilyHandle* cf_handle,
size_t protection_bytes_per_key,
WriteBatchOpType op_type) {
Status s;
WriteBatch wb(0 /* reserved_bytes */, 0 /* max_bytes */,
protection_bytes_per_key, 0 /* default_cf_ts_sz */);
switch (op_type) {
case WriteBatchOpType::kPut:
s = wb.Put(cf_handle, "key", "val");
break;
case WriteBatchOpType::kDelete:
s = wb.Delete(cf_handle, "key");
break;
case WriteBatchOpType::kSingleDelete:
s = wb.SingleDelete(cf_handle, "key");
break;
case WriteBatchOpType::kDeleteRange:
s = wb.DeleteRange(cf_handle, "begin", "end");
break;
case WriteBatchOpType::kMerge:
s = wb.Merge(cf_handle, "key", "val");
break;
case WriteBatchOpType::kPutEntity:
s = wb.PutEntity(cf_handle, "key",
{{"attr_name1", "foo"}, {"attr_name2", "bar"}});
break;
case WriteBatchOpType::kNum:
assert(false);
}
return {std::move(wb), std::move(s)};
}
class DbKvChecksumTestBase : public DBTestBase {
public:
DbKvChecksumTestBase(const std::string& path, bool env_do_fsync)
: DBTestBase(path, env_do_fsync) {}
ColumnFamilyHandle* GetCFHandleToUse(ColumnFamilyHandle* column_family,
WriteBatchOpType op_type) const {
// Note: PutEntity cannot be called without column family
if (op_type == WriteBatchOpType::kPutEntity && !column_family) {
return db_->DefaultColumnFamily();
}
return column_family;
}
};
class DbKvChecksumTest
: public DbKvChecksumTestBase,
public ::testing::WithParamInterface<
std::tuple<WriteBatchOpType, char, WriteMode,
uint32_t /* memtable_protection_bytes_per_key */>> {
public:
DbKvChecksumTest()
: DbKvChecksumTestBase("db_kv_checksum_test", /*env_do_fsync=*/false) {
op_type_ = std::get<0>(GetParam());
corrupt_byte_addend_ = std::get<1>(GetParam());
write_mode_ = std::get<2>(GetParam());
memtable_protection_bytes_per_key_ = std::get<3>(GetParam());
}
Status ExecuteWrite(ColumnFamilyHandle* cf_handle) {
switch (write_mode_) {
case WriteMode::kWriteUnprotectedBatch: {
auto batch_and_status =
GetWriteBatch(GetCFHandleToUse(cf_handle, op_type_),
0 /* protection_bytes_per_key */, op_type_);
assert(batch_and_status.second.ok());
// Default write option has protection_bytes_per_key = 0
return db_->Write(WriteOptions(), &batch_and_status.first);
}
case WriteMode::kWriteProtectedBatch: {
auto batch_and_status =
GetWriteBatch(GetCFHandleToUse(cf_handle, op_type_),
8 /* protection_bytes_per_key */, op_type_);
assert(batch_and_status.second.ok());
return db_->Write(WriteOptions(), &batch_and_status.first);
}
case WriteMode::kWriteOptionProtectedBatch: {
auto batch_and_status =
GetWriteBatch(GetCFHandleToUse(cf_handle, op_type_),
0 /* protection_bytes_per_key */, op_type_);
assert(batch_and_status.second.ok());
WriteOptions write_opts;
write_opts.protection_bytes_per_key = 8;
return db_->Write(write_opts, &batch_and_status.first);
}
case WriteMode::kNum:
assert(false);
}
return Status::NotSupported("WriteMode " +
std::to_string(static_cast<int>(write_mode_)));
}
void CorruptNextByteCallBack(void* arg) {
Slice encoded = *static_cast<Slice*>(arg);
if (entry_len_ == std::numeric_limits<size_t>::max()) {
// We learn the entry size on the first attempt
entry_len_ = encoded.size();
}
char* buf = const_cast<char*>(encoded.data());
buf[corrupt_byte_offset_] += corrupt_byte_addend_;
++corrupt_byte_offset_;
}
bool MoreBytesToCorrupt() { return corrupt_byte_offset_ < entry_len_; }
protected:
WriteBatchOpType op_type_;
char corrupt_byte_addend_;
WriteMode write_mode_;
uint32_t memtable_protection_bytes_per_key_;
size_t corrupt_byte_offset_ = 0;
size_t entry_len_ = std::numeric_limits<size_t>::max();
};
std::string GetOpTypeString(const WriteBatchOpType& op_type) {
switch (op_type) {
case WriteBatchOpType::kPut:
return "Put";
case WriteBatchOpType::kDelete:
return "Delete";
case WriteBatchOpType::kSingleDelete:
return "SingleDelete";
case WriteBatchOpType::kDeleteRange:
return "DeleteRange";
case WriteBatchOpType::kMerge:
return "Merge";
case WriteBatchOpType::kPutEntity:
return "PutEntity";
case WriteBatchOpType::kNum:
assert(false);
}
assert(false);
return "";
}
std::string GetWriteModeString(const WriteMode& mode) {
switch (mode) {
case WriteMode::kWriteUnprotectedBatch:
return "WriteUnprotectedBatch";
case WriteMode::kWriteProtectedBatch:
return "WriteProtectedBatch";
case WriteMode::kWriteOptionProtectedBatch:
return "kWriteOptionProtectedBatch";
case WriteMode::kNum:
assert(false);
}
return "";
}
INSTANTIATE_TEST_CASE_P(
DbKvChecksumTest, DbKvChecksumTest,
::testing::Combine(::testing::Range(static_cast<WriteBatchOpType>(0),
WriteBatchOpType::kNum),
::testing::Values(2, 103, 251),
::testing::Range(WriteMode::kWriteProtectedBatch,
WriteMode::kNum),
::testing::Values(0)),
[](const testing::TestParamInfo<
std::tuple<WriteBatchOpType, char, WriteMode, uint32_t>>& args) {
std::ostringstream oss;
oss << GetOpTypeString(std::get<0>(args.param)) << "Add"
<< static_cast<int>(
static_cast<unsigned char>(std::get<1>(args.param)))
<< GetWriteModeString(std::get<2>(args.param))
<< static_cast<uint32_t>(std::get<3>(args.param));
return oss.str();
});
// TODO(ajkr): add a test that corrupts the `WriteBatch` contents. Such
// corruptions should only be detectable in `WriteMode::kWriteProtectedBatch`.
TEST_P(DbKvChecksumTest, MemTableAddCorrupted) {
// This test repeatedly attempts to write `WriteBatch`es containing a single
// entry of type `op_type_`. Each attempt has one byte corrupted in its
// memtable entry by adding `corrupt_byte_addend_` to its original value. The
// test repeats until an attempt has been made on each byte in the encoded
// memtable entry. All attempts are expected to fail with `Status::Corruption`
SyncPoint::GetInstance()->SetCallBack(
"MemTable::Add:Encoded",
std::bind(&DbKvChecksumTest::CorruptNextByteCallBack, this,
std::placeholders::_1));
while (MoreBytesToCorrupt()) {
// Failed memtable insert always leads to read-only mode, so we have to
// reopen for every attempt.
Options options = CurrentOptions();
if (op_type_ == WriteBatchOpType::kMerge) {
options.merge_operator = MergeOperators::CreateStringAppendOperator();
}
Reopen(options);
SyncPoint::GetInstance()->EnableProcessing();
ASSERT_TRUE(ExecuteWrite(nullptr /* cf_handle */).IsCorruption());
SyncPoint::GetInstance()->DisableProcessing();
// In case the above callback is not invoked, this test will run
// numeric_limits<size_t>::max() times until it reports an error (or will
// exhaust disk space). Added this assert to report error early.
ASSERT_TRUE(entry_len_ < std::numeric_limits<size_t>::max());
}
}
TEST_P(DbKvChecksumTest, MemTableAddWithColumnFamilyCorrupted) {
// This test repeatedly attempts to write `WriteBatch`es containing a single
// entry of type `op_type_` to a non-default column family. Each attempt has
// one byte corrupted in its memtable entry by adding `corrupt_byte_addend_`
// to its original value. The test repeats until an attempt has been made on
// each byte in the encoded memtable entry. All attempts are expected to fail
// with `Status::Corruption`.
Options options = CurrentOptions();
if (op_type_ == WriteBatchOpType::kMerge) {
options.merge_operator = MergeOperators::CreateStringAppendOperator();
}
CreateAndReopenWithCF({"pikachu"}, options);
SyncPoint::GetInstance()->SetCallBack(
"MemTable::Add:Encoded",
std::bind(&DbKvChecksumTest::CorruptNextByteCallBack, this,
std::placeholders::_1));
while (MoreBytesToCorrupt()) {
// Failed memtable insert always leads to read-only mode, so we have to
// reopen for every attempt.
ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options);
SyncPoint::GetInstance()->EnableProcessing();
ASSERT_TRUE(ExecuteWrite(handles_[1]).IsCorruption());
SyncPoint::GetInstance()->DisableProcessing();
// In case the above callback is not invoked, this test will run
// numeric_limits<size_t>::max() times until it reports an error (or will
// exhaust disk space). Added this assert to report error early.
ASSERT_TRUE(entry_len_ < std::numeric_limits<size_t>::max());
}
}
TEST_P(DbKvChecksumTest, NoCorruptionCase) {
// If this test fails, we may have found a piece of malfunctioned hardware
auto batch_and_status =
GetWriteBatch(GetCFHandleToUse(nullptr, op_type_),
8 /* protection_bytes_per_key */, op_type_);
ASSERT_OK(batch_and_status.second);
ASSERT_OK(batch_and_status.first.VerifyChecksum());
}
TEST_P(DbKvChecksumTest, WriteToWALCorrupted) {
// This test repeatedly attempts to write `WriteBatch`es containing a single
// entry of type `op_type_`. Each attempt has one byte corrupted by adding
// `corrupt_byte_addend_` to its original value. The test repeats until an
// attempt has been made on each byte in the encoded write batch. All attempts
// are expected to fail with `Status::Corruption`
Options options = CurrentOptions();
if (op_type_ == WriteBatchOpType::kMerge) {
options.merge_operator = MergeOperators::CreateStringAppendOperator();
}
SyncPoint::GetInstance()->SetCallBack(
"DBImpl::WriteToWAL:log_entry",
std::bind(&DbKvChecksumTest::CorruptNextByteCallBack, this,
std::placeholders::_1));
// First 8 bytes are for sequence number which is not protected in write batch
corrupt_byte_offset_ = 8;
while (MoreBytesToCorrupt()) {
// Corrupted write batch leads to read-only mode, so we have to
// reopen for every attempt.
Reopen(options);
auto log_size_pre_write = dbfull()->TEST_total_log_size();
SyncPoint::GetInstance()->EnableProcessing();
ASSERT_TRUE(ExecuteWrite(nullptr /* cf_handle */).IsCorruption());
// Confirm that nothing was written to WAL
ASSERT_EQ(log_size_pre_write, dbfull()->TEST_total_log_size());
ASSERT_TRUE(dbfull()->TEST_GetBGError().IsCorruption());
SyncPoint::GetInstance()->DisableProcessing();
// In case the above callback is not invoked, this test will run
// numeric_limits<size_t>::max() times until it reports an error (or will
// exhaust disk space). Added this assert to report error early.
ASSERT_TRUE(entry_len_ < std::numeric_limits<size_t>::max());
}
}
TEST_P(DbKvChecksumTest, WriteToWALWithColumnFamilyCorrupted) {
// This test repeatedly attempts to write `WriteBatch`es containing a single
// entry of type `op_type_`. Each attempt has one byte corrupted by adding
// `corrupt_byte_addend_` to its original value. The test repeats until an
// attempt has been made on each byte in the encoded write batch. All attempts
// are expected to fail with `Status::Corruption`
Options options = CurrentOptions();
if (op_type_ == WriteBatchOpType::kMerge) {
options.merge_operator = MergeOperators::CreateStringAppendOperator();
}
CreateAndReopenWithCF({"pikachu"}, options);
SyncPoint::GetInstance()->SetCallBack(
"DBImpl::WriteToWAL:log_entry",
std::bind(&DbKvChecksumTest::CorruptNextByteCallBack, this,
std::placeholders::_1));
// First 8 bytes are for sequence number which is not protected in write batch
corrupt_byte_offset_ = 8;
while (MoreBytesToCorrupt()) {
// Corrupted write batch leads to read-only mode, so we have to
// reopen for every attempt.
ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options);
auto log_size_pre_write = dbfull()->TEST_total_log_size();
SyncPoint::GetInstance()->EnableProcessing();
ASSERT_TRUE(ExecuteWrite(nullptr /* cf_handle */).IsCorruption());
// Confirm that nothing was written to WAL
ASSERT_EQ(log_size_pre_write, dbfull()->TEST_total_log_size());
ASSERT_TRUE(dbfull()->TEST_GetBGError().IsCorruption());
SyncPoint::GetInstance()->DisableProcessing();
// In case the above callback is not invoked, this test will run
// numeric_limits<size_t>::max() times until it reports an error (or will
// exhaust disk space). Added this assert to report error early.
ASSERT_TRUE(entry_len_ < std::numeric_limits<size_t>::max());
}
}
class DbKvChecksumTestMergedBatch
: public DbKvChecksumTestBase,
public ::testing::WithParamInterface<
std::tuple<WriteBatchOpType, WriteBatchOpType, char>> {
public:
DbKvChecksumTestMergedBatch()
: DbKvChecksumTestBase("db_kv_checksum_test", /*env_do_fsync=*/false) {
op_type1_ = std::get<0>(GetParam());
op_type2_ = std::get<1>(GetParam());
corrupt_byte_addend_ = std::get<2>(GetParam());
}
protected:
WriteBatchOpType op_type1_;
WriteBatchOpType op_type2_;
char corrupt_byte_addend_;
};
void CorruptWriteBatch(Slice* content, size_t offset,
char corrupt_byte_addend) {
ASSERT_TRUE(offset < content->size());
char* buf = const_cast<char*>(content->data());
buf[offset] += corrupt_byte_addend;
}
TEST_P(DbKvChecksumTestMergedBatch, NoCorruptionCase) {
// Veirfy write batch checksum after write batch append
auto batch1 = GetWriteBatch(GetCFHandleToUse(nullptr, op_type1_),
8 /* protection_bytes_per_key */, op_type1_);
ASSERT_OK(batch1.second);
auto batch2 = GetWriteBatch(GetCFHandleToUse(nullptr, op_type2_),
8 /* protection_bytes_per_key */, op_type2_);
ASSERT_OK(batch2.second);
ASSERT_OK(WriteBatchInternal::Append(&batch1.first, &batch2.first));
ASSERT_OK(batch1.first.VerifyChecksum());
}
TEST_P(DbKvChecksumTestMergedBatch, WriteToWALCorrupted) {
// This test has two writers repeatedly attempt to write `WriteBatch`es
// containing a single entry of type op_type1_ and op_type2_ respectively. The
// leader of the write group writes the batch containinng the entry of type
// op_type1_. One byte of the pre-merged write batches is corrupted by adding
// `corrupt_byte_addend_` to the batch's original value during each attempt.
// The test repeats until an attempt has been made on each byte in both
// pre-merged write batches. All attempts are expected to fail with
// `Status::Corruption`.
Options options = CurrentOptions();
if (op_type1_ == WriteBatchOpType::kMerge ||
op_type2_ == WriteBatchOpType::kMerge) {
options.merge_operator = MergeOperators::CreateStringAppendOperator();
}
auto leader_batch_and_status =
GetWriteBatch(GetCFHandleToUse(nullptr, op_type1_),
8 /* protection_bytes_per_key */, op_type1_);
ASSERT_OK(leader_batch_and_status.second);
auto follower_batch_and_status =
GetWriteBatch(GetCFHandleToUse(nullptr, op_type2_),
8 /* protection_bytes_per_key */, op_type2_);
size_t leader_batch_size = leader_batch_and_status.first.GetDataSize();
size_t total_bytes =
leader_batch_size + follower_batch_and_status.first.GetDataSize();
// First 8 bytes are for sequence number which is not protected in write batch
size_t corrupt_byte_offset = 8;
std::atomic<bool> follower_joined{false};
std::atomic<int> leader_count{0};
port::Thread follower_thread;
// This callback should only be called by the leader thread
SyncPoint::GetInstance()->SetCallBack(
"WriteThread::JoinBatchGroup:Wait2", [&](void* arg_leader) {
auto* leader = reinterpret_cast<WriteThread::Writer*>(arg_leader);
ASSERT_EQ(leader->state, WriteThread::STATE_GROUP_LEADER);
// This callback should only be called by the follower thread
SyncPoint::GetInstance()->SetCallBack(
"WriteThread::JoinBatchGroup:Wait", [&](void* arg_follower) {
auto* follower =
reinterpret_cast<WriteThread::Writer*>(arg_follower);
// The leader thread will wait on this bool and hence wait until
// this writer joins the write group
ASSERT_NE(follower->state, WriteThread::STATE_GROUP_LEADER);
if (corrupt_byte_offset >= leader_batch_size) {
Slice batch_content = follower->batch->Data();
CorruptWriteBatch(&batch_content,
corrupt_byte_offset - leader_batch_size,
corrupt_byte_addend_);
}
// Leader busy waits on this flag
follower_joined = true;
// So the follower does not enter the outer callback at
// WriteThread::JoinBatchGroup:Wait2
SyncPoint::GetInstance()->DisableProcessing();
});
// Start the other writer thread which will join the write group as
// follower
follower_thread = port::Thread([&]() {
follower_batch_and_status =
GetWriteBatch(GetCFHandleToUse(nullptr, op_type2_),
8 /* protection_bytes_per_key */, op_type2_);
ASSERT_OK(follower_batch_and_status.second);
ASSERT_TRUE(
db_->Write(WriteOptions(), &follower_batch_and_status.first)
.IsCorruption());
});
ASSERT_EQ(leader->batch->GetDataSize(), leader_batch_size);
if (corrupt_byte_offset < leader_batch_size) {
Slice batch_content = leader->batch->Data();
CorruptWriteBatch(&batch_content, corrupt_byte_offset,
corrupt_byte_addend_);
}
leader_count++;
while (!follower_joined) {
// busy waiting
}
});
while (corrupt_byte_offset < total_bytes) {
// Reopen DB since it failed WAL write which lead to read-only mode
Reopen(options);
SyncPoint::GetInstance()->EnableProcessing();
auto log_size_pre_write = dbfull()->TEST_total_log_size();
leader_batch_and_status =
GetWriteBatch(GetCFHandleToUse(nullptr, op_type1_),
8 /* protection_bytes_per_key */, op_type1_);
ASSERT_OK(leader_batch_and_status.second);
ASSERT_TRUE(db_->Write(WriteOptions(), &leader_batch_and_status.first)
.IsCorruption());
follower_thread.join();
// Prevent leader thread from entering this callback
SyncPoint::GetInstance()->ClearCallBack("WriteThread::JoinBatchGroup:Wait");
ASSERT_EQ(1, leader_count);
// Nothing should have been written to WAL
ASSERT_EQ(log_size_pre_write, dbfull()->TEST_total_log_size());
ASSERT_TRUE(dbfull()->TEST_GetBGError().IsCorruption());
corrupt_byte_offset++;
if (corrupt_byte_offset == leader_batch_size) {
// skip over the sequence number part of follower's write batch
corrupt_byte_offset += 8;
}
follower_joined = false;
leader_count = 0;
}
SyncPoint::GetInstance()->DisableProcessing();
}
TEST_P(DbKvChecksumTestMergedBatch, WriteToWALWithColumnFamilyCorrupted) {
// This test has two writers repeatedly attempt to write `WriteBatch`es
// containing a single entry of type op_type1_ and op_type2_ respectively. The
// leader of the write group writes the batch containinng the entry of type
// op_type1_. One byte of the pre-merged write batches is corrupted by adding
// `corrupt_byte_addend_` to the batch's original value during each attempt.
// The test repeats until an attempt has been made on each byte in both
// pre-merged write batches. All attempts are expected to fail with
// `Status::Corruption`.
Options options = CurrentOptions();
if (op_type1_ == WriteBatchOpType::kMerge ||
op_type2_ == WriteBatchOpType::kMerge) {
options.merge_operator = MergeOperators::CreateStringAppendOperator();
}
CreateAndReopenWithCF({"ramen"}, options);
auto leader_batch_and_status =
GetWriteBatch(GetCFHandleToUse(handles_[1], op_type1_),
8 /* protection_bytes_per_key */, op_type1_);
ASSERT_OK(leader_batch_and_status.second);
auto follower_batch_and_status =
GetWriteBatch(GetCFHandleToUse(handles_[1], op_type2_),
8 /* protection_bytes_per_key */, op_type2_);
size_t leader_batch_size = leader_batch_and_status.first.GetDataSize();
size_t total_bytes =
leader_batch_size + follower_batch_and_status.first.GetDataSize();
// First 8 bytes are for sequence number which is not protected in write batch
size_t corrupt_byte_offset = 8;
std::atomic<bool> follower_joined{false};
std::atomic<int> leader_count{0};
port::Thread follower_thread;
// This callback should only be called by the leader thread
SyncPoint::GetInstance()->SetCallBack(
"WriteThread::JoinBatchGroup:Wait2", [&](void* arg_leader) {
auto* leader = reinterpret_cast<WriteThread::Writer*>(arg_leader);
ASSERT_EQ(leader->state, WriteThread::STATE_GROUP_LEADER);
// This callback should only be called by the follower thread
SyncPoint::GetInstance()->SetCallBack(
"WriteThread::JoinBatchGroup:Wait", [&](void* arg_follower) {
auto* follower =
reinterpret_cast<WriteThread::Writer*>(arg_follower);
// The leader thread will wait on this bool and hence wait until
// this writer joins the write group
ASSERT_NE(follower->state, WriteThread::STATE_GROUP_LEADER);
if (corrupt_byte_offset >= leader_batch_size) {
Slice batch_content =
WriteBatchInternal::Contents(follower->batch);
CorruptWriteBatch(&batch_content,
corrupt_byte_offset - leader_batch_size,
corrupt_byte_addend_);
}
follower_joined = true;
// So the follower does not enter the outer callback at
// WriteThread::JoinBatchGroup:Wait2
SyncPoint::GetInstance()->DisableProcessing();
});
// Start the other writer thread which will join the write group as
// follower
follower_thread = port::Thread([&]() {
follower_batch_and_status =
GetWriteBatch(GetCFHandleToUse(handles_[1], op_type2_),
8 /* protection_bytes_per_key */, op_type2_);
ASSERT_OK(follower_batch_and_status.second);
ASSERT_TRUE(
db_->Write(WriteOptions(), &follower_batch_and_status.first)
.IsCorruption());
});
ASSERT_EQ(leader->batch->GetDataSize(), leader_batch_size);
if (corrupt_byte_offset < leader_batch_size) {
Slice batch_content = WriteBatchInternal::Contents(leader->batch);
CorruptWriteBatch(&batch_content, corrupt_byte_offset,
corrupt_byte_addend_);
}
leader_count++;
while (!follower_joined) {
// busy waiting
}
});
SyncPoint::GetInstance()->EnableProcessing();
while (corrupt_byte_offset < total_bytes) {
// Reopen DB since it failed WAL write which lead to read-only mode
ReopenWithColumnFamilies({kDefaultColumnFamilyName, "ramen"}, options);
SyncPoint::GetInstance()->EnableProcessing();
auto log_size_pre_write = dbfull()->TEST_total_log_size();
leader_batch_and_status =
GetWriteBatch(GetCFHandleToUse(handles_[1], op_type1_),
8 /* protection_bytes_per_key */, op_type1_);
ASSERT_OK(leader_batch_and_status.second);
ASSERT_TRUE(db_->Write(WriteOptions(), &leader_batch_and_status.first)
.IsCorruption());
follower_thread.join();
// Prevent leader thread from entering this callback
SyncPoint::GetInstance()->ClearCallBack("WriteThread::JoinBatchGroup:Wait");
ASSERT_EQ(1, leader_count);
// Nothing should have been written to WAL
ASSERT_EQ(log_size_pre_write, dbfull()->TEST_total_log_size());
ASSERT_TRUE(dbfull()->TEST_GetBGError().IsCorruption());
corrupt_byte_offset++;
if (corrupt_byte_offset == leader_batch_size) {
// skip over the sequence number part of follower's write batch
corrupt_byte_offset += 8;
}
follower_joined = false;
leader_count = 0;
}
SyncPoint::GetInstance()->DisableProcessing();
}
INSTANTIATE_TEST_CASE_P(
DbKvChecksumTestMergedBatch, DbKvChecksumTestMergedBatch,
::testing::Combine(::testing::Range(static_cast<WriteBatchOpType>(0),
WriteBatchOpType::kNum),
::testing::Range(static_cast<WriteBatchOpType>(0),
WriteBatchOpType::kNum),
::testing::Values(2, 103, 251)),
[](const testing::TestParamInfo<
std::tuple<WriteBatchOpType, WriteBatchOpType, char>>& args) {
std::ostringstream oss;
oss << GetOpTypeString(std::get<0>(args.param))
<< GetOpTypeString(std::get<1>(args.param)) << "Add"
<< static_cast<int>(
static_cast<unsigned char>(std::get<2>(args.param)));
return oss.str();
});
// TODO: add test for transactions
// TODO: add test for corrupted write batch with WAL disabled
class DbKVChecksumWALToWriteBatchTest : public DBTestBase {
public:
DbKVChecksumWALToWriteBatchTest()
: DBTestBase("db_kv_checksum_test", /*env_do_fsync=*/false) {}
};
TEST_F(DbKVChecksumWALToWriteBatchTest, WriteBatchChecksumHandoff) {
Options options = CurrentOptions();
Reopen(options);
ASSERT_OK(db_->Put(WriteOptions(), "key", "val"));
std::string content = "";
SyncPoint::GetInstance()->SetCallBack(
"DBImpl::RecoverLogFiles:BeforeUpdateProtectionInfo:batch",
[&](void* batch_ptr) {
WriteBatch* batch = reinterpret_cast<WriteBatch*>(batch_ptr);
content.assign(batch->Data().data(), batch->GetDataSize());
Slice batch_content = batch->Data();
// Corrupt first bit
CorruptWriteBatch(&batch_content, 0, 1);
});
SyncPoint::GetInstance()->SetCallBack(
"DBImpl::RecoverLogFiles:BeforeUpdateProtectionInfo:checksum",
[&](void* checksum_ptr) {
// Verify that checksum is produced on the batch content
uint64_t checksum = *reinterpret_cast<uint64_t*>(checksum_ptr);
ASSERT_EQ(checksum, XXH3_64bits(content.data(), content.size()));
});
SyncPoint::GetInstance()->EnableProcessing();
ASSERT_TRUE(TryReopen(options).IsCorruption());
SyncPoint::GetInstance()->DisableProcessing();
};
// TODO (cbi): add DeleteRange coverage once it is implemented
class DbMemtableKVChecksumTest : public DbKvChecksumTest {
public:
DbMemtableKVChecksumTest() : DbKvChecksumTest() {}
protected:
// Indices in the memtable entry that we will not corrupt.
// For memtable entry format, see comments in MemTable::Add().
// We do not corrupt key length and value length fields in this test
// case since it causes segfault and ASAN will complain.
// For this test case, key and value are all of length 3, so
// key length field is at index 0 and value length field is at index 12.
const std::set<size_t> index_not_to_corrupt{0, 12};
void SkipNotToCorruptEntry() {
if (index_not_to_corrupt.find(corrupt_byte_offset_) !=
index_not_to_corrupt.end()) {
corrupt_byte_offset_++;
}
}
};
INSTANTIATE_TEST_CASE_P(
DbMemtableKVChecksumTest, DbMemtableKVChecksumTest,
::testing::Combine(::testing::Range(static_cast<WriteBatchOpType>(0),
WriteBatchOpType::kDeleteRange),
::testing::Values(2, 103, 251),
::testing::Range(static_cast<WriteMode>(0),
WriteMode::kWriteOptionProtectedBatch),
// skip 1 byte checksum as it makes test flaky
::testing::Values(2, 4, 8)),
[](const testing::TestParamInfo<
std::tuple<WriteBatchOpType, char, WriteMode, uint32_t>>& args) {
std::ostringstream oss;
oss << GetOpTypeString(std::get<0>(args.param)) << "Add"
<< static_cast<int>(
static_cast<unsigned char>(std::get<1>(args.param)))
<< GetWriteModeString(std::get<2>(args.param))
<< static_cast<uint32_t>(std::get<3>(args.param));
return oss.str();
});
TEST_P(DbMemtableKVChecksumTest, GetWithCorruptAfterMemtableInsert) {
// Record memtable entry size.
// Not corrupting memtable entry here since it will segfault
// or fail some asserts inside memtablerep implementation
// e.g., when key_len is corrupted.
SyncPoint::GetInstance()->SetCallBack(
"MemTable::Add:BeforeReturn:Encoded", [&](void* arg) {
Slice encoded = *static_cast<Slice*>(arg);
entry_len_ = encoded.size();
});
SyncPoint::GetInstance()->SetCallBack(
"Memtable::SaveValue:Begin:entry", [&](void* entry) {
char* buf = *static_cast<char**>(entry);
buf[corrupt_byte_offset_] += corrupt_byte_addend_;
++corrupt_byte_offset_;
});
SyncPoint::GetInstance()->EnableProcessing();
Options options = CurrentOptions();
options.memtable_protection_bytes_per_key =
memtable_protection_bytes_per_key_;
if (op_type_ == WriteBatchOpType::kMerge) {
options.merge_operator = MergeOperators::CreateStringAppendOperator();
}
SkipNotToCorruptEntry();
while (MoreBytesToCorrupt()) {
Reopen(options);
ASSERT_OK(ExecuteWrite(nullptr));
std::string val;
ASSERT_TRUE(db_->Get(ReadOptions(), "key", &val).IsCorruption());
Destroy(options);
SkipNotToCorruptEntry();
}
}
TEST_P(DbMemtableKVChecksumTest,
GetWithColumnFamilyCorruptAfterMemtableInsert) {
// Record memtable entry size.
// Not corrupting memtable entry here since it will segfault
// or fail some asserts inside memtablerep implementation
// e.g., when key_len is corrupted.
SyncPoint::GetInstance()->SetCallBack(
"MemTable::Add:BeforeReturn:Encoded", [&](void* arg) {
Slice encoded = *static_cast<Slice*>(arg);
entry_len_ = encoded.size();
});
SyncPoint::GetInstance()->SetCallBack(
"Memtable::SaveValue:Begin:entry", [&](void* entry) {
char* buf = *static_cast<char**>(entry);
buf[corrupt_byte_offset_] += corrupt_byte_addend_;
++corrupt_byte_offset_;
});
SyncPoint::GetInstance()->EnableProcessing();
Options options = CurrentOptions();
options.memtable_protection_bytes_per_key =
memtable_protection_bytes_per_key_;
if (op_type_ == WriteBatchOpType::kMerge) {
options.merge_operator = MergeOperators::CreateStringAppendOperator();
}
SkipNotToCorruptEntry();
while (MoreBytesToCorrupt()) {
Reopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
ASSERT_OK(ExecuteWrite(handles_[1]));
std::string val;
ASSERT_TRUE(
db_->Get(ReadOptions(), handles_[1], "key", &val).IsCorruption());
Destroy(options);
SkipNotToCorruptEntry();
}
}
TEST_P(DbMemtableKVChecksumTest, IteratorWithCorruptAfterMemtableInsert) {
SyncPoint::GetInstance()->SetCallBack(
"MemTable::Add:BeforeReturn:Encoded",
std::bind(&DbKvChecksumTest::CorruptNextByteCallBack, this,
std::placeholders::_1));
SyncPoint::GetInstance()->EnableProcessing();
Options options = CurrentOptions();
options.memtable_protection_bytes_per_key =
memtable_protection_bytes_per_key_;
if (op_type_ == WriteBatchOpType::kMerge) {
options.merge_operator = MergeOperators::CreateStringAppendOperator();
}
SkipNotToCorruptEntry();
while (MoreBytesToCorrupt()) {
Reopen(options);
ASSERT_OK(ExecuteWrite(nullptr));
Iterator* it = db_->NewIterator(ReadOptions());
it->SeekToFirst();
ASSERT_FALSE(it->Valid());
ASSERT_TRUE(it->status().IsCorruption());
delete it;
Destroy(options);
SkipNotToCorruptEntry();
}
}
TEST_P(DbMemtableKVChecksumTest,
IteratorWithColumnFamilyCorruptAfterMemtableInsert) {
SyncPoint::GetInstance()->SetCallBack(
"MemTable::Add:BeforeReturn:Encoded",
std::bind(&DbKvChecksumTest::CorruptNextByteCallBack, this,
std::placeholders::_1));
SyncPoint::GetInstance()->EnableProcessing();
Options options = CurrentOptions();
options.memtable_protection_bytes_per_key =
memtable_protection_bytes_per_key_;
if (op_type_ == WriteBatchOpType::kMerge) {
options.merge_operator = MergeOperators::CreateStringAppendOperator();
}
SkipNotToCorruptEntry();
while (MoreBytesToCorrupt()) {
Reopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
ASSERT_OK(ExecuteWrite(handles_[1]));
Iterator* it = db_->NewIterator(ReadOptions(), handles_[1]);
it->SeekToFirst();
ASSERT_FALSE(it->Valid());
ASSERT_TRUE(it->status().IsCorruption());
delete it;
Destroy(options);
SkipNotToCorruptEntry();
}
}
TEST_P(DbMemtableKVChecksumTest, FlushWithCorruptAfterMemtableInsert) {
SyncPoint::GetInstance()->SetCallBack(
"MemTable::Add:BeforeReturn:Encoded",
std::bind(&DbKvChecksumTest::CorruptNextByteCallBack, this,
std::placeholders::_1));
SyncPoint::GetInstance()->EnableProcessing();
Options options = CurrentOptions();
options.memtable_protection_bytes_per_key =
memtable_protection_bytes_per_key_;
if (op_type_ == WriteBatchOpType::kMerge) {
options.merge_operator = MergeOperators::CreateStringAppendOperator();
}
SkipNotToCorruptEntry();
// Not corruping each byte like other tests since Flush() is relatively slow.
Reopen(options);
ASSERT_OK(ExecuteWrite(nullptr));
ASSERT_TRUE(Flush().IsCorruption());
// DB enters read-only state when flush reads corrupted data
ASSERT_TRUE(dbfull()->TEST_GetBGError().IsCorruption());
Destroy(options);
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,297 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
// Introduction of SyncPoint effectively disabled building and running this test
// in Release build.
// which is a pity, it is a good test
#include "db/db_test_util.h"
#include "env/mock_env.h"
#include "port/stack_trace.h"
namespace ROCKSDB_NAMESPACE {
class DBTestXactLogIterator : public DBTestBase {
public:
DBTestXactLogIterator()
: DBTestBase("db_log_iter_test", /*env_do_fsync=*/true) {}
std::unique_ptr<TransactionLogIterator> OpenTransactionLogIter(
const SequenceNumber seq) {
std::unique_ptr<TransactionLogIterator> iter;
Status status = dbfull()->GetUpdatesSince(seq, &iter);
EXPECT_OK(status);
EXPECT_TRUE(iter->Valid());
return iter;
}
};
namespace {
SequenceNumber ReadRecords(std::unique_ptr<TransactionLogIterator>& iter,
int& count, bool expect_ok = true) {
count = 0;
SequenceNumber lastSequence = 0;
BatchResult res;
while (iter->Valid()) {
res = iter->GetBatch();
EXPECT_TRUE(res.sequence > lastSequence);
++count;
lastSequence = res.sequence;
EXPECT_OK(iter->status());
iter->Next();
}
if (expect_ok) {
EXPECT_OK(iter->status());
} else {
EXPECT_NOK(iter->status());
}
return res.sequence;
}
void ExpectRecords(const int expected_no_records,
std::unique_ptr<TransactionLogIterator>& iter) {
int num_records;
ReadRecords(iter, num_records);
ASSERT_EQ(num_records, expected_no_records);
}
} // anonymous namespace
TEST_F(DBTestXactLogIterator, TransactionLogIterator) {
do {
Options options = OptionsForLogIterTest();
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
ASSERT_OK(Put(0, "key1", DummyString(1024)));
ASSERT_OK(Put(1, "key2", DummyString(1024)));
ASSERT_OK(Put(1, "key2", DummyString(1024)));
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3U);
{
auto iter = OpenTransactionLogIter(0);
ExpectRecords(3, iter);
}
ReopenWithColumnFamilies({"default", "pikachu"}, options);
env_->SleepForMicroseconds(2 * 1000 * 1000);
{
ASSERT_OK(Put(0, "key4", DummyString(1024)));
ASSERT_OK(Put(1, "key5", DummyString(1024)));
ASSERT_OK(Put(0, "key6", DummyString(1024)));
}
{
auto iter = OpenTransactionLogIter(0);
ExpectRecords(6, iter);
}
} while (ChangeCompactOptions());
}
#ifndef NDEBUG // sync point is not included with DNDEBUG build
TEST_F(DBTestXactLogIterator, TransactionLogIteratorRace) {
static const int LOG_ITERATOR_RACE_TEST_COUNT = 2;
static const char* sync_points[LOG_ITERATOR_RACE_TEST_COUNT][4] = {
{"WalManager::GetSortedWalFiles:1", "WalManager::PurgeObsoleteFiles:1",
"WalManager::PurgeObsoleteFiles:2", "WalManager::GetSortedWalFiles:2"},
{"WalManager::GetSortedWalsOfType:1", "WalManager::PurgeObsoleteFiles:1",
"WalManager::PurgeObsoleteFiles:2",
"WalManager::GetSortedWalsOfType:2"}};
for (int test = 0; test < LOG_ITERATOR_RACE_TEST_COUNT; ++test) {
// Setup sync point dependency to reproduce the race condition of
// a log file moved to archived dir, in the middle of GetSortedWalFiles
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
{sync_points[test][0], sync_points[test][1]},
{sync_points[test][2], sync_points[test][3]},
});
do {
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
Options options = OptionsForLogIterTest();
DestroyAndReopen(options);
ASSERT_OK(Put("key1", DummyString(1024)));
ASSERT_OK(dbfull()->Flush(FlushOptions()));
ASSERT_OK(Put("key2", DummyString(1024)));
ASSERT_OK(dbfull()->Flush(FlushOptions()));
ASSERT_OK(Put("key3", DummyString(1024)));
ASSERT_OK(dbfull()->Flush(FlushOptions()));
ASSERT_OK(Put("key4", DummyString(1024)));
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 4U);
ASSERT_OK(dbfull()->FlushWAL(false));
{
auto iter = OpenTransactionLogIter(0);
ExpectRecords(4, iter);
}
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
// trigger async flush, and log move. Well, log move will
// wait until the GetSortedWalFiles:1 to reproduce the race
// condition
FlushOptions flush_options;
flush_options.wait = false;
ASSERT_OK(dbfull()->Flush(flush_options));
// "key5" would be written in a new memtable and log
ASSERT_OK(Put("key5", DummyString(1024)));
ASSERT_OK(dbfull()->FlushWAL(false));
{
// this iter would miss "key4" if not fixed
auto iter = OpenTransactionLogIter(0);
ExpectRecords(5, iter);
}
} while (ChangeCompactOptions());
}
}
#endif
TEST_F(DBTestXactLogIterator, TransactionLogIteratorStallAtLastRecord) {
do {
Options options = OptionsForLogIterTest();
DestroyAndReopen(options);
ASSERT_OK(Put("key1", DummyString(1024)));
auto iter = OpenTransactionLogIter(0);
ASSERT_OK(iter->status());
ASSERT_TRUE(iter->Valid());
iter->Next();
ASSERT_TRUE(!iter->Valid());
ASSERT_OK(iter->status());
ASSERT_OK(Put("key2", DummyString(1024)));
iter->Next();
ASSERT_OK(iter->status());
ASSERT_TRUE(iter->Valid());
} while (ChangeCompactOptions());
}
TEST_F(DBTestXactLogIterator, TransactionLogIteratorCheckAfterRestart) {
do {
Options options = OptionsForLogIterTest();
DestroyAndReopen(options);
ASSERT_OK(Put("key1", DummyString(1024)));
ASSERT_OK(Put("key2", DummyString(1023)));
ASSERT_OK(dbfull()->Flush(FlushOptions()));
Reopen(options);
auto iter = OpenTransactionLogIter(0);
ExpectRecords(2, iter);
} while (ChangeCompactOptions());
}
TEST_F(DBTestXactLogIterator, TransactionLogIteratorCorruptedLog) {
do {
Options options = OptionsForLogIterTest();
DestroyAndReopen(options);
for (int i = 0; i < 1024; i++) {
ASSERT_OK(Put("key" + std::to_string(i), DummyString(10)));
}
ASSERT_OK(Flush());
ASSERT_OK(db_->FlushWAL(false));
// Corrupt this log to create a gap
ASSERT_OK(db_->DisableFileDeletions());
VectorLogPtr wal_files;
ASSERT_OK(db_->GetSortedWalFiles(wal_files));
ASSERT_FALSE(wal_files.empty());
const auto logfile_path = dbname_ + "/" + wal_files.front()->PathName();
ASSERT_OK(test::TruncateFile(env_, logfile_path,
wal_files.front()->SizeFileBytes() / 2));
ASSERT_OK(db_->EnableFileDeletions());
// Insert a new entry to a new log file
ASSERT_OK(Put("key1025", DummyString(10)));
ASSERT_OK(db_->FlushWAL(false));
// Try to read from the beginning. Should stop before the gap and read less
// than 1025 entries
auto iter = OpenTransactionLogIter(0);
int count = 0;
SequenceNumber last_sequence_read = ReadRecords(iter, count, false);
ASSERT_LT(last_sequence_read, 1025U);
// Try to read past the gap, should be able to seek to key1025
auto iter2 = OpenTransactionLogIter(last_sequence_read + 1);
ExpectRecords(1, iter2);
} while (ChangeCompactOptions());
}
TEST_F(DBTestXactLogIterator, TransactionLogIteratorBatchOperations) {
do {
Options options = OptionsForLogIterTest();
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
WriteBatch batch;
ASSERT_OK(batch.Put(handles_[1], "key1", DummyString(1024)));
ASSERT_OK(batch.Put(handles_[0], "key2", DummyString(1024)));
ASSERT_OK(batch.Put(handles_[1], "key3", DummyString(1024)));
ASSERT_OK(batch.Delete(handles_[0], "key2"));
ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
ASSERT_OK(Flush(1));
ASSERT_OK(Flush(0));
ReopenWithColumnFamilies({"default", "pikachu"}, options);
ASSERT_OK(Put(1, "key4", DummyString(1024)));
auto iter = OpenTransactionLogIter(3);
ExpectRecords(2, iter);
} while (ChangeCompactOptions());
}
TEST_F(DBTestXactLogIterator, TransactionLogIteratorBlobs) {
Options options = OptionsForLogIterTest();
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
{
WriteBatch batch;
ASSERT_OK(batch.Put(handles_[1], "key1", DummyString(1024)));
ASSERT_OK(batch.Put(handles_[0], "key2", DummyString(1024)));
ASSERT_OK(batch.PutLogData(Slice("blob1")));
ASSERT_OK(batch.Put(handles_[1], "key3", DummyString(1024)));
ASSERT_OK(batch.PutLogData(Slice("blob2")));
ASSERT_OK(batch.Delete(handles_[0], "key2"));
ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
ReopenWithColumnFamilies({"default", "pikachu"}, options);
}
auto res = OpenTransactionLogIter(0)->GetBatch();
struct Handler : public WriteBatch::Handler {
std::string seen;
Status PutCF(uint32_t cf, const Slice& key, const Slice& value) override {
seen += "Put(" + std::to_string(cf) + ", " + key.ToString() + ", " +
std::to_string(value.size()) + ")";
return Status::OK();
}
Status MergeCF(uint32_t cf, const Slice& key, const Slice& value) override {
seen += "Merge(" + std::to_string(cf) + ", " + key.ToString() + ", " +
std::to_string(value.size()) + ")";
return Status::OK();
}
void LogData(const Slice& blob) override {
seen += "LogData(" + blob.ToString() + ")";
}
Status DeleteCF(uint32_t cf, const Slice& key) override {
seen += "Delete(" + std::to_string(cf) + ", " + key.ToString() + ")";
return Status::OK();
}
} handler;
ASSERT_OK(res.writeBatchPtr->Iterate(&handler));
ASSERT_EQ(
"Put(1, key1, 1024)"
"Put(0, key2, 1024)"
"LogData(blob1)"
"Put(1, key3, 1024)"
"LogData(blob2)"
"Delete(0, key2)",
handler.seen);
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,505 +0,0 @@
// Copyright (c) 2020-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "test_util/testharness.h"
#ifdef OS_LINUX
#include "env/io_posix.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
namespace ROCKSDB_NAMESPACE {
class EnvWithCustomLogicalBlockSizeCache : public EnvWrapper {
public:
EnvWithCustomLogicalBlockSizeCache(Env* env, LogicalBlockSizeCache* cache)
: EnvWrapper(env), cache_(cache) {}
Status RegisterDbPaths(const std::vector<std::string>& paths) override {
return cache_->RefAndCacheLogicalBlockSize(paths);
}
Status UnregisterDbPaths(const std::vector<std::string>& paths) override {
cache_->UnrefAndTryRemoveCachedLogicalBlockSize(paths);
return Status::OK();
}
private:
LogicalBlockSizeCache* cache_;
};
class DBLogicalBlockSizeCacheTest : public testing::Test {
public:
DBLogicalBlockSizeCacheTest()
: dbname_(test::PerThreadDBPath("logical_block_size_cache_test")),
data_path_0_(dbname_ + "/data_path_0"),
data_path_1_(dbname_ + "/data_path_1"),
cf_path_0_(dbname_ + "/cf_path_0"),
cf_path_1_(dbname_ + "/cf_path_1") {
auto get_fd_block_size = [&](int fd) { return fd; };
auto get_dir_block_size = [&](const std::string& /*dir*/, size_t* size) {
*size = 1024;
return Status::OK();
};
cache_.reset(
new LogicalBlockSizeCache(get_fd_block_size, get_dir_block_size));
env_.reset(
new EnvWithCustomLogicalBlockSizeCache(Env::Default(), cache_.get()));
}
protected:
std::string dbname_;
std::string data_path_0_;
std::string data_path_1_;
std::string cf_path_0_;
std::string cf_path_1_;
std::unique_ptr<LogicalBlockSizeCache> cache_;
std::unique_ptr<Env> env_;
};
TEST_F(DBLogicalBlockSizeCacheTest, OpenClose) {
// Tests that Open will cache the logical block size for data paths,
// and Close will remove the cached sizes.
Options options;
options.create_if_missing = true;
options.env = env_.get();
options.db_paths = {{data_path_0_, 2048}, {data_path_1_, 2048}};
for (int i = 0; i < 2; i++) {
DB* db;
if (!i) {
printf("Open\n");
ASSERT_OK(DB::Open(options, dbname_, &db));
} else {
printf("OpenForReadOnly\n");
ASSERT_OK(DB::OpenForReadOnly(options, dbname_, &db));
}
ASSERT_EQ(2, cache_->Size());
ASSERT_TRUE(cache_->Contains(data_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(data_path_0_));
ASSERT_TRUE(cache_->Contains(data_path_1_));
ASSERT_EQ(1, cache_->GetRefCount(data_path_1_));
ASSERT_OK(db->Close());
ASSERT_EQ(0, cache_->Size());
delete db;
}
ASSERT_OK(DestroyDB(dbname_, options, {}));
}
TEST_F(DBLogicalBlockSizeCacheTest, OpenDelete) {
// Tests that Open will cache the logical block size for data paths,
// and delete the db pointer will remove the cached sizes.
Options options;
options.create_if_missing = true;
options.env = env_.get();
for (int i = 0; i < 2; i++) {
DB* db;
if (!i) {
printf("Open\n");
ASSERT_OK(DB::Open(options, dbname_, &db));
} else {
printf("OpenForReadOnly\n");
ASSERT_OK(DB::OpenForReadOnly(options, dbname_, &db));
}
ASSERT_EQ(1, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
delete db;
ASSERT_EQ(0, cache_->Size());
}
ASSERT_OK(DestroyDB(dbname_, options, {}));
}
TEST_F(DBLogicalBlockSizeCacheTest, CreateColumnFamily) {
// Tests that CreateColumnFamily will cache the cf_paths,
// drop the column family handle won't drop the cache,
// drop and then delete the column family handle will drop the cache.
Options options;
options.create_if_missing = true;
options.env = env_.get();
ColumnFamilyOptions cf_options;
cf_options.cf_paths = {{cf_path_0_, 1024}, {cf_path_1_, 2048}};
DB* db;
ASSERT_OK(DB::Open(options, dbname_, &db));
ASSERT_EQ(1, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
ColumnFamilyHandle* cf = nullptr;
ASSERT_OK(db->CreateColumnFamily(cf_options, "cf", &cf));
ASSERT_EQ(3, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
ASSERT_TRUE(cache_->Contains(cf_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_));
ASSERT_TRUE(cache_->Contains(cf_path_1_));
ASSERT_EQ(1, cache_->GetRefCount(cf_path_1_));
// Drop column family does not drop cache.
ASSERT_OK(db->DropColumnFamily(cf));
ASSERT_EQ(3, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
ASSERT_TRUE(cache_->Contains(cf_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_));
ASSERT_TRUE(cache_->Contains(cf_path_1_));
ASSERT_EQ(1, cache_->GetRefCount(cf_path_1_));
// Delete handle will drop cache.
ASSERT_OK(db->DestroyColumnFamilyHandle(cf));
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
delete db;
ASSERT_EQ(0, cache_->Size());
ASSERT_OK(DestroyDB(dbname_, options, {{"cf", cf_options}}));
}
TEST_F(DBLogicalBlockSizeCacheTest, CreateColumnFamilies) {
// To test:
// (1) CreateColumnFamilies will cache the cf_paths in
// DBLogicalBlockSizeCache
// (2) Dropping column family handles associated with
// that cf_paths won't drop the cached cf_paths
// (3) Deleting all the column family handles associated
// with that cf_paths will drop the cached cf_paths
Options options;
options.create_if_missing = true;
options.env = env_.get();
ColumnFamilyOptions cf_options;
cf_options.cf_paths = {{cf_path_0_, 1024}};
DB* db;
ASSERT_OK(DB::Open(options, dbname_, &db));
ASSERT_EQ(1, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
std::vector<ColumnFamilyHandle*> cfs;
ASSERT_OK(db->CreateColumnFamilies(cf_options, {"cf1", "cf2"}, &cfs));
ASSERT_EQ(2, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
ASSERT_TRUE(cache_->Contains(cf_path_0_));
ASSERT_EQ(2, cache_->GetRefCount(cf_path_0_));
// Drop column family does not drop cf_path_0_'s entry from cache
for (ColumnFamilyHandle* cf : cfs) {
ASSERT_OK(db->DropColumnFamily(cf));
ASSERT_EQ(2, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
ASSERT_TRUE(cache_->Contains(cf_path_0_));
ASSERT_EQ(2, cache_->GetRefCount(cf_path_0_));
}
// Delete one cf handle will not drop cf_path_0_'s entry from cache because
// another handle is still referencing cf_path_0_.
ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[0]));
ASSERT_EQ(2, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
ASSERT_TRUE(cache_->Contains(cf_path_0_));
// Delete all cf handles and ensure the ref count of cf_path_0_ in cache_
// can be properly decreased by releasing any background reference to the
// ColumnFamilyData during db deletion
ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[1]));
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
delete db;
// Now cf_path_0_ in cache_ has been properly decreased and cf_path_0_'s entry
// is dropped from cache
ASSERT_EQ(0, cache_->Size());
ASSERT_OK(
DestroyDB(dbname_, options, {{"cf1", cf_options}, {"cf2", cf_options}}));
}
TEST_F(DBLogicalBlockSizeCacheTest, OpenWithColumnFamilies) {
// Tests that Open two column families with the same cf_path will cache the
// cf_path and have 2 references to the cached size,
// drop the column family handle won't drop the cache,
// drop and then delete the column family handle will drop the cache.
Options options;
options.create_if_missing = true;
options.env = env_.get();
ColumnFamilyOptions cf_options;
cf_options.cf_paths = {{cf_path_0_, 1024}};
for (int i = 0; i < 2; i++) {
DB* db;
ColumnFamilyHandle* cf1 = nullptr;
ColumnFamilyHandle* cf2 = nullptr;
ASSERT_OK(DB::Open(options, dbname_, &db));
ASSERT_OK(db->CreateColumnFamily(cf_options, "cf1", &cf1));
ASSERT_OK(db->CreateColumnFamily(cf_options, "cf2", &cf2));
ASSERT_OK(db->DestroyColumnFamilyHandle(cf1));
ASSERT_OK(db->DestroyColumnFamilyHandle(cf2));
delete db;
ASSERT_EQ(0, cache_->Size());
std::vector<ColumnFamilyHandle*> cfs;
if (!i) {
printf("Open\n");
ASSERT_OK(DB::Open(options, dbname_,
{{"cf1", cf_options},
{"cf2", cf_options},
{"default", ColumnFamilyOptions()}},
&cfs, &db));
} else {
printf("OpenForReadOnly\n");
ASSERT_OK(DB::OpenForReadOnly(options, dbname_,
{{"cf1", cf_options},
{"cf2", cf_options},
{"default", ColumnFamilyOptions()}},
&cfs, &db));
}
// Logical block sizes of dbname_ and cf_path_0_ are cached during Open.
ASSERT_EQ(2, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
ASSERT_TRUE(cache_->Contains(cf_path_0_));
ASSERT_EQ(2, cache_->GetRefCount(cf_path_0_));
// Drop handles won't drop the cache.
ASSERT_OK(db->DropColumnFamily(cfs[0]));
ASSERT_OK(db->DropColumnFamily(cfs[1]));
ASSERT_EQ(2, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
ASSERT_TRUE(cache_->Contains(cf_path_0_));
ASSERT_EQ(2, cache_->GetRefCount(cf_path_0_));
// Delete 1st handle won't drop the cache for cf_path_0_.
ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[0]));
ASSERT_EQ(2, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
ASSERT_TRUE(cache_->Contains(cf_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_));
// Delete 2nd handle will drop the cache for cf_path_0_.
ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[1]));
ASSERT_EQ(1, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
// Delete the default handle won't affect the cache because db still refers
// to the default CF.
ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[2]));
ASSERT_EQ(1, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
delete db;
ASSERT_EQ(0, cache_->Size());
}
ASSERT_OK(
DestroyDB(dbname_, options, {{"cf1", cf_options}, {"cf2", cf_options}}));
}
TEST_F(DBLogicalBlockSizeCacheTest, DestroyColumnFamilyHandle) {
// Tests that destroy column family without dropping won't drop the cache,
// because compaction and flush might still need to get logical block size
// when opening new files.
Options options;
options.create_if_missing = true;
options.env = env_.get();
ColumnFamilyOptions cf_options;
cf_options.cf_paths = {{cf_path_0_, 1024}};
DB* db;
ASSERT_OK(DB::Open(options, dbname_, &db));
ASSERT_EQ(1, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
ColumnFamilyHandle* cf = nullptr;
ASSERT_OK(db->CreateColumnFamily(cf_options, "cf", &cf));
ASSERT_EQ(2, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
ASSERT_TRUE(cache_->Contains(cf_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_));
// Delete handle won't drop cache.
ASSERT_OK(db->DestroyColumnFamilyHandle(cf));
ASSERT_EQ(2, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
ASSERT_TRUE(cache_->Contains(cf_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_));
delete db;
ASSERT_EQ(0, cache_->Size());
// Open with column families.
std::vector<ColumnFamilyHandle*> cfs;
for (int i = 0; i < 2; i++) {
if (!i) {
printf("Open\n");
ASSERT_OK(DB::Open(
options, dbname_,
{{"cf", cf_options}, {"default", ColumnFamilyOptions()}}, &cfs, &db));
} else {
printf("OpenForReadOnly\n");
ASSERT_OK(DB::OpenForReadOnly(
options, dbname_,
{{"cf", cf_options}, {"default", ColumnFamilyOptions()}}, &cfs, &db));
}
// cf_path_0_ and dbname_ are cached.
ASSERT_EQ(2, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
ASSERT_TRUE(cache_->Contains(cf_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_));
// Deleting handle won't drop cache.
ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[0]));
ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[1]));
ASSERT_EQ(2, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
ASSERT_TRUE(cache_->Contains(cf_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_));
delete db;
ASSERT_EQ(0, cache_->Size());
}
ASSERT_OK(DestroyDB(dbname_, options, {{"cf", cf_options}}));
}
TEST_F(DBLogicalBlockSizeCacheTest, MultiDBWithDifferentPaths) {
// Tests the cache behavior when there are multiple DBs sharing the same env
// with different db_paths and cf_paths.
Options options;
options.create_if_missing = true;
options.env = env_.get();
ASSERT_OK(env_->CreateDirIfMissing(dbname_));
DB* db0;
ASSERT_OK(DB::Open(options, data_path_0_, &db0));
ASSERT_EQ(1, cache_->Size());
ASSERT_TRUE(cache_->Contains(data_path_0_));
ColumnFamilyOptions cf_options0;
cf_options0.cf_paths = {{cf_path_0_, 1024}};
ColumnFamilyHandle* cf0;
ASSERT_OK(db0->CreateColumnFamily(cf_options0, "cf", &cf0));
ASSERT_EQ(2, cache_->Size());
ASSERT_TRUE(cache_->Contains(data_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(data_path_0_));
ASSERT_TRUE(cache_->Contains(cf_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_));
DB* db1;
ASSERT_OK(DB::Open(options, data_path_1_, &db1));
ASSERT_EQ(3, cache_->Size());
ASSERT_TRUE(cache_->Contains(data_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(data_path_0_));
ASSERT_TRUE(cache_->Contains(cf_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_));
ASSERT_TRUE(cache_->Contains(data_path_1_));
ASSERT_EQ(1, cache_->GetRefCount(data_path_1_));
ColumnFamilyOptions cf_options1;
cf_options1.cf_paths = {{cf_path_1_, 1024}};
ColumnFamilyHandle* cf1;
ASSERT_OK(db1->CreateColumnFamily(cf_options1, "cf", &cf1));
ASSERT_EQ(4, cache_->Size());
ASSERT_TRUE(cache_->Contains(data_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(data_path_0_));
ASSERT_TRUE(cache_->Contains(cf_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_));
ASSERT_TRUE(cache_->Contains(data_path_1_));
ASSERT_EQ(1, cache_->GetRefCount(data_path_1_));
ASSERT_TRUE(cache_->Contains(cf_path_1_));
ASSERT_EQ(1, cache_->GetRefCount(cf_path_1_));
ASSERT_OK(db0->DestroyColumnFamilyHandle(cf0));
delete db0;
ASSERT_EQ(2, cache_->Size());
ASSERT_TRUE(cache_->Contains(data_path_1_));
ASSERT_EQ(1, cache_->GetRefCount(data_path_1_));
ASSERT_TRUE(cache_->Contains(cf_path_1_));
ASSERT_EQ(1, cache_->GetRefCount(cf_path_1_));
ASSERT_OK(DestroyDB(data_path_0_, options, {{"cf", cf_options0}}));
ASSERT_OK(db1->DestroyColumnFamilyHandle(cf1));
delete db1;
ASSERT_EQ(0, cache_->Size());
ASSERT_OK(DestroyDB(data_path_1_, options, {{"cf", cf_options1}}));
}
TEST_F(DBLogicalBlockSizeCacheTest, MultiDBWithSamePaths) {
// Tests the cache behavior when there are multiple DBs sharing the same env
// with the same db_paths and cf_paths.
Options options;
options.create_if_missing = true;
options.env = env_.get();
options.db_paths = {{data_path_0_, 1024}};
ColumnFamilyOptions cf_options;
cf_options.cf_paths = {{cf_path_0_, 1024}};
ASSERT_OK(env_->CreateDirIfMissing(dbname_));
DB* db0;
ASSERT_OK(DB::Open(options, dbname_ + "/db0", &db0));
ASSERT_EQ(1, cache_->Size());
ASSERT_TRUE(cache_->Contains(data_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(data_path_0_));
ColumnFamilyHandle* cf0;
ASSERT_OK(db0->CreateColumnFamily(cf_options, "cf", &cf0));
ASSERT_EQ(2, cache_->Size());
ASSERT_TRUE(cache_->Contains(data_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(data_path_0_));
ASSERT_TRUE(cache_->Contains(cf_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_));
DB* db1;
ASSERT_OK(DB::Open(options, dbname_ + "/db1", &db1));
ASSERT_EQ(2, cache_->Size());
ASSERT_TRUE(cache_->Contains(data_path_0_));
ASSERT_EQ(2, cache_->GetRefCount(data_path_0_));
ASSERT_TRUE(cache_->Contains(cf_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_));
ColumnFamilyHandle* cf1;
ASSERT_OK(db1->CreateColumnFamily(cf_options, "cf", &cf1));
ASSERT_EQ(2, cache_->Size());
ASSERT_TRUE(cache_->Contains(data_path_0_));
ASSERT_EQ(2, cache_->GetRefCount(data_path_0_));
ASSERT_TRUE(cache_->Contains(cf_path_0_));
ASSERT_EQ(2, cache_->GetRefCount(cf_path_0_));
ASSERT_OK(db0->DestroyColumnFamilyHandle(cf0));
delete db0;
ASSERT_EQ(2, cache_->Size());
ASSERT_TRUE(cache_->Contains(data_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(data_path_0_));
ASSERT_TRUE(cache_->Contains(cf_path_0_));
ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_));
ASSERT_OK(DestroyDB(dbname_ + "/db0", options, {{"cf", cf_options}}));
ASSERT_OK(db1->DestroyColumnFamilyHandle(cf1));
delete db1;
ASSERT_EQ(0, cache_->Size());
ASSERT_OK(DestroyDB(dbname_ + "/db1", options, {{"cf", cf_options}}));
}
} // namespace ROCKSDB_NAMESPACE
#endif // OS_LINUX
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,344 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include <memory>
#include <string>
#include "db/db_test_util.h"
#include "db/memtable.h"
#include "db/range_del_aggregator.h"
#include "port/stack_trace.h"
#include "rocksdb/memtablerep.h"
#include "rocksdb/slice_transform.h"
namespace ROCKSDB_NAMESPACE {
class DBMemTableTest : public DBTestBase {
public:
DBMemTableTest() : DBTestBase("db_memtable_test", /*env_do_fsync=*/true) {}
};
class MockMemTableRep : public MemTableRep {
public:
explicit MockMemTableRep(Allocator* allocator, MemTableRep* rep)
: MemTableRep(allocator), rep_(rep), num_insert_with_hint_(0) {}
KeyHandle Allocate(const size_t len, char** buf) override {
return rep_->Allocate(len, buf);
}
void Insert(KeyHandle handle) override { rep_->Insert(handle); }
void InsertWithHint(KeyHandle handle, void** hint) override {
num_insert_with_hint_++;
EXPECT_NE(nullptr, hint);
last_hint_in_ = *hint;
rep_->InsertWithHint(handle, hint);
last_hint_out_ = *hint;
}
bool Contains(const char* key) const override { return rep_->Contains(key); }
void Get(const LookupKey& k, void* callback_args,
bool (*callback_func)(void* arg, const char* entry)) override {
rep_->Get(k, callback_args, callback_func);
}
size_t ApproximateMemoryUsage() override {
return rep_->ApproximateMemoryUsage();
}
Iterator* GetIterator(Arena* arena) override {
return rep_->GetIterator(arena);
}
void* last_hint_in() { return last_hint_in_; }
void* last_hint_out() { return last_hint_out_; }
int num_insert_with_hint() { return num_insert_with_hint_; }
private:
std::unique_ptr<MemTableRep> rep_;
void* last_hint_in_;
void* last_hint_out_;
int num_insert_with_hint_;
};
class MockMemTableRepFactory : public MemTableRepFactory {
public:
MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator& cmp,
Allocator* allocator,
const SliceTransform* transform,
Logger* logger) override {
SkipListFactory factory;
MemTableRep* skiplist_rep =
factory.CreateMemTableRep(cmp, allocator, transform, logger);
mock_rep_ = new MockMemTableRep(allocator, skiplist_rep);
return mock_rep_;
}
MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator& cmp,
Allocator* allocator,
const SliceTransform* transform,
Logger* logger,
uint32_t column_family_id) override {
last_column_family_id_ = column_family_id;
return CreateMemTableRep(cmp, allocator, transform, logger);
}
const char* Name() const override { return "MockMemTableRepFactory"; }
MockMemTableRep* rep() { return mock_rep_; }
bool IsInsertConcurrentlySupported() const override { return false; }
uint32_t GetLastColumnFamilyId() { return last_column_family_id_; }
private:
MockMemTableRep* mock_rep_;
// workaround since there's no std::numeric_limits<uint32_t>::max() yet.
uint32_t last_column_family_id_ = static_cast<uint32_t>(-1);
};
class TestPrefixExtractor : public SliceTransform {
public:
const char* Name() const override { return "TestPrefixExtractor"; }
Slice Transform(const Slice& key) const override {
const char* p = separator(key);
if (p == nullptr) {
return Slice();
}
return Slice(key.data(), p - key.data() + 1);
}
bool InDomain(const Slice& key) const override {
return separator(key) != nullptr;
}
bool InRange(const Slice& /*key*/) const override { return false; }
private:
const char* separator(const Slice& key) const {
return reinterpret_cast<const char*>(memchr(key.data(), '_', key.size()));
}
};
// Test that ::Add properly returns false when inserting duplicate keys
TEST_F(DBMemTableTest, DuplicateSeq) {
SequenceNumber seq = 123;
std::string value;
MergeContext merge_context;
Options options;
InternalKeyComparator ikey_cmp(options.comparator);
ReadRangeDelAggregator range_del_agg(&ikey_cmp,
kMaxSequenceNumber /* upper_bound */);
// Create a MemTable
InternalKeyComparator cmp(BytewiseComparator());
auto factory = std::make_shared<SkipListFactory>();
options.memtable_factory = factory;
ImmutableOptions ioptions(options);
WriteBufferManager wb(options.db_write_buffer_size);
MemTable* mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb,
kMaxSequenceNumber, 0 /* column_family_id */);
// Write some keys and make sure it returns false on duplicates
ASSERT_OK(
mem->Add(seq, kTypeValue, "key", "value2", nullptr /* kv_prot_info */));
ASSERT_TRUE(
mem->Add(seq, kTypeValue, "key", "value2", nullptr /* kv_prot_info */)
.IsTryAgain());
// Changing the type should still cause the duplicatae key
ASSERT_TRUE(
mem->Add(seq, kTypeMerge, "key", "value2", nullptr /* kv_prot_info */)
.IsTryAgain());
// Changing the seq number will make the key fresh
ASSERT_OK(mem->Add(seq + 1, kTypeMerge, "key", "value2",
nullptr /* kv_prot_info */));
// Test with different types for duplicate keys
ASSERT_TRUE(
mem->Add(seq, kTypeDeletion, "key", "", nullptr /* kv_prot_info */)
.IsTryAgain());
ASSERT_TRUE(
mem->Add(seq, kTypeSingleDeletion, "key", "", nullptr /* kv_prot_info */)
.IsTryAgain());
// Test the duplicate keys under stress
for (int i = 0; i < 10000; i++) {
bool insert_dup = i % 10 == 1;
if (!insert_dup) {
seq++;
}
Status s = mem->Add(seq, kTypeValue, "foo", "value" + std::to_string(seq),
nullptr /* kv_prot_info */);
if (insert_dup) {
ASSERT_TRUE(s.IsTryAgain());
} else {
ASSERT_OK(s);
}
}
delete mem;
// Test with InsertWithHint
options.memtable_insert_with_hint_prefix_extractor.reset(
new TestPrefixExtractor()); // which uses _ to extract the prefix
ioptions = ImmutableOptions(options);
mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb,
kMaxSequenceNumber, 0 /* column_family_id */);
// Insert a duplicate key with _ in it
ASSERT_OK(
mem->Add(seq, kTypeValue, "key_1", "value", nullptr /* kv_prot_info */));
ASSERT_TRUE(
mem->Add(seq, kTypeValue, "key_1", "value", nullptr /* kv_prot_info */)
.IsTryAgain());
delete mem;
// Test when InsertConcurrently will be invoked
options.allow_concurrent_memtable_write = true;
ioptions = ImmutableOptions(options);
mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb,
kMaxSequenceNumber, 0 /* column_family_id */);
MemTablePostProcessInfo post_process_info;
ASSERT_OK(mem->Add(seq, kTypeValue, "key", "value",
nullptr /* kv_prot_info */, true, &post_process_info));
ASSERT_TRUE(mem->Add(seq, kTypeValue, "key", "value",
nullptr /* kv_prot_info */, true, &post_process_info)
.IsTryAgain());
delete mem;
}
// A simple test to verify that the concurrent merge writes is functional
TEST_F(DBMemTableTest, ConcurrentMergeWrite) {
int num_ops = 1000;
std::string value;
MergeContext merge_context;
Options options;
// A merge operator that is not sensitive to concurrent writes since in this
// test we don't order the writes.
options.merge_operator = MergeOperators::CreateUInt64AddOperator();
// Create a MemTable
InternalKeyComparator cmp(BytewiseComparator());
auto factory = std::make_shared<SkipListFactory>();
options.memtable_factory = factory;
options.allow_concurrent_memtable_write = true;
ImmutableOptions ioptions(options);
WriteBufferManager wb(options.db_write_buffer_size);
MemTable* mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb,
kMaxSequenceNumber, 0 /* column_family_id */);
// Put 0 as the base
PutFixed64(&value, static_cast<uint64_t>(0));
ASSERT_OK(mem->Add(0, kTypeValue, "key", value, nullptr /* kv_prot_info */));
value.clear();
// Write Merge concurrently
ROCKSDB_NAMESPACE::port::Thread write_thread1([&]() {
MemTablePostProcessInfo post_process_info1;
std::string v1;
for (int seq = 1; seq < num_ops / 2; seq++) {
PutFixed64(&v1, seq);
ASSERT_OK(mem->Add(seq, kTypeMerge, "key", v1, nullptr /* kv_prot_info */,
true, &post_process_info1));
v1.clear();
}
});
ROCKSDB_NAMESPACE::port::Thread write_thread2([&]() {
MemTablePostProcessInfo post_process_info2;
std::string v2;
for (int seq = num_ops / 2; seq < num_ops; seq++) {
PutFixed64(&v2, seq);
ASSERT_OK(mem->Add(seq, kTypeMerge, "key", v2, nullptr /* kv_prot_info */,
true, &post_process_info2));
v2.clear();
}
});
write_thread1.join();
write_thread2.join();
Status status;
ReadOptions roptions;
SequenceNumber max_covering_tombstone_seq = 0;
LookupKey lkey("key", kMaxSequenceNumber);
bool res = mem->Get(lkey, &value, /*columns=*/nullptr, /*timestamp=*/nullptr,
&status, &merge_context, &max_covering_tombstone_seq,
roptions, false /* immutable_memtable */);
ASSERT_OK(status);
ASSERT_TRUE(res);
uint64_t ivalue = DecodeFixed64(Slice(value).data());
uint64_t sum = 0;
for (int seq = 0; seq < num_ops; seq++) {
sum += seq;
}
ASSERT_EQ(ivalue, sum);
delete mem;
}
TEST_F(DBMemTableTest, InsertWithHint) {
Options options;
options.allow_concurrent_memtable_write = false;
options.create_if_missing = true;
options.memtable_factory.reset(new MockMemTableRepFactory());
options.memtable_insert_with_hint_prefix_extractor.reset(
new TestPrefixExtractor());
options.env = env_;
Reopen(options);
MockMemTableRep* rep =
reinterpret_cast<MockMemTableRepFactory*>(options.memtable_factory.get())
->rep();
ASSERT_OK(Put("foo_k1", "foo_v1"));
ASSERT_EQ(nullptr, rep->last_hint_in());
void* hint_foo = rep->last_hint_out();
ASSERT_OK(Put("foo_k2", "foo_v2"));
ASSERT_EQ(hint_foo, rep->last_hint_in());
ASSERT_EQ(hint_foo, rep->last_hint_out());
ASSERT_OK(Put("foo_k3", "foo_v3"));
ASSERT_EQ(hint_foo, rep->last_hint_in());
ASSERT_EQ(hint_foo, rep->last_hint_out());
ASSERT_OK(Put("bar_k1", "bar_v1"));
ASSERT_EQ(nullptr, rep->last_hint_in());
void* hint_bar = rep->last_hint_out();
ASSERT_NE(hint_foo, hint_bar);
ASSERT_OK(Put("bar_k2", "bar_v2"));
ASSERT_EQ(hint_bar, rep->last_hint_in());
ASSERT_EQ(hint_bar, rep->last_hint_out());
ASSERT_EQ(5, rep->num_insert_with_hint());
ASSERT_OK(Put("NotInPrefixDomain", "vvv"));
ASSERT_EQ(5, rep->num_insert_with_hint());
ASSERT_EQ("foo_v1", Get("foo_k1"));
ASSERT_EQ("foo_v2", Get("foo_k2"));
ASSERT_EQ("foo_v3", Get("foo_k3"));
ASSERT_EQ("bar_v1", Get("bar_k1"));
ASSERT_EQ("bar_v2", Get("bar_k2"));
ASSERT_EQ("vvv", Get("NotInPrefixDomain"));
}
TEST_F(DBMemTableTest, ColumnFamilyId) {
// Verifies MemTableRepFactory is told the right column family id.
Options options;
options.env = CurrentOptions().env;
options.allow_concurrent_memtable_write = false;
options.create_if_missing = true;
options.memtable_factory.reset(new MockMemTableRepFactory());
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
for (uint32_t cf = 0; cf < 2; ++cf) {
ASSERT_OK(Put(cf, "key", "val"));
ASSERT_OK(Flush(cf));
ASSERT_EQ(
cf, static_cast<MockMemTableRepFactory*>(options.memtable_factory.get())
->GetLastColumnFamilyId());
}
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,488 +0,0 @@
// Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/db_test_util.h"
#include "port/stack_trace.h"
#include "rocksdb/perf_context.h"
#include "rocksdb/utilities/debug.h"
#include "table/block_based/block_builder.h"
#include "test_util/sync_point.h"
#include "rocksdb/merge_operator.h"
#include "utilities/fault_injection_env.h"
#include "utilities/merge_operators.h"
#include "utilities/merge_operators/sortlist.h"
#include "utilities/merge_operators/string_append/stringappend2.h"
namespace ROCKSDB_NAMESPACE {
namespace {
class LimitedStringAppendMergeOp : public StringAppendTESTOperator {
public:
LimitedStringAppendMergeOp(int limit, char delim)
: StringAppendTESTOperator(delim), limit_(limit) {}
const char* Name() const override {
return "DBMergeOperatorTest::LimitedStringAppendMergeOp";
}
bool ShouldMerge(const std::vector<Slice>& operands) const override {
if (operands.size() > 0 && limit_ > 0 && operands.size() >= limit_) {
return true;
}
return false;
}
private:
size_t limit_ = 0;
};
} // anonymous namespace
class DBMergeOperandTest : public DBTestBase {
public:
DBMergeOperandTest()
: DBTestBase("db_merge_operand_test", /*env_do_fsync=*/true) {}
};
TEST_F(DBMergeOperandTest, CacheEvictedMergeOperandReadAfterFreeBug) {
// There was a bug of reading merge operands after they are mistakely freed
// in DB::GetMergeOperands, which is surfaced by cache full.
// See PR#9507 for more.
Options options;
options.create_if_missing = true;
options.merge_operator = MergeOperators::CreateStringAppendOperator();
options.env = env_;
BlockBasedTableOptions table_options;
// Small cache to simulate cache full
table_options.block_cache = NewLRUCache(1);
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
Reopen(options);
int num_records = 4;
int number_of_operands = 0;
std::vector<PinnableSlice> values(num_records);
GetMergeOperandsOptions merge_operands_info;
merge_operands_info.expected_max_number_of_operands = num_records;
ASSERT_OK(Merge("k1", "v1"));
ASSERT_OK(Flush());
ASSERT_OK(Merge("k1", "v2"));
ASSERT_OK(Flush());
ASSERT_OK(Merge("k1", "v3"));
ASSERT_OK(Flush());
ASSERT_OK(Merge("k1", "v4"));
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"k1", values.data(), &merge_operands_info,
&number_of_operands));
ASSERT_EQ(number_of_operands, 4);
ASSERT_EQ(values[0].ToString(), "v1");
ASSERT_EQ(values[1].ToString(), "v2");
ASSERT_EQ(values[2].ToString(), "v3");
ASSERT_EQ(values[3].ToString(), "v4");
}
TEST_F(DBMergeOperandTest, FlushedMergeOperandReadAfterFreeBug) {
// Repro for a bug where a memtable containing a merge operand could be
// deleted before the merge operand was saved to the result.
auto options = CurrentOptions();
options.merge_operator = MergeOperators::CreateStringAppendOperator();
Reopen(options);
ASSERT_OK(Merge("key", "value"));
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
{{"DBImpl::GetImpl:PostMemTableGet:0",
"DBMergeOperandTest::FlushedMergeOperandReadAfterFreeBug:PreFlush"},
{"DBMergeOperandTest::FlushedMergeOperandReadAfterFreeBug:PostFlush",
"DBImpl::GetImpl:PostMemTableGet:1"}});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
auto flush_thread = port::Thread([&]() {
TEST_SYNC_POINT(
"DBMergeOperandTest::FlushedMergeOperandReadAfterFreeBug:PreFlush");
ASSERT_OK(Flush());
TEST_SYNC_POINT(
"DBMergeOperandTest::FlushedMergeOperandReadAfterFreeBug:PostFlush");
});
PinnableSlice value;
GetMergeOperandsOptions merge_operands_info;
merge_operands_info.expected_max_number_of_operands = 1;
int number_of_operands;
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"key", &value, &merge_operands_info,
&number_of_operands));
ASSERT_EQ(1, number_of_operands);
flush_thread.join();
}
TEST_F(DBMergeOperandTest, GetMergeOperandsBasic) {
Options options;
options.create_if_missing = true;
// Use only the latest two merge operands.
options.merge_operator = std::make_shared<LimitedStringAppendMergeOp>(2, ',');
options.env = env_;
Reopen(options);
int num_records = 4;
int number_of_operands = 0;
std::vector<PinnableSlice> values(num_records);
GetMergeOperandsOptions merge_operands_info;
merge_operands_info.expected_max_number_of_operands = num_records;
// k0 value in memtable
ASSERT_OK(Put("k0", "PutARock"));
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"k0", values.data(), &merge_operands_info,
&number_of_operands));
ASSERT_EQ(values[0], "PutARock");
// k0.1 value in SST
ASSERT_OK(Put("k0.1", "RockInSST"));
ASSERT_OK(Flush());
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"k0.1", values.data(), &merge_operands_info,
&number_of_operands));
ASSERT_EQ(values[0], "RockInSST");
// All k1 values are in memtable.
ASSERT_OK(Merge("k1", "a"));
ASSERT_OK(Put("k1", "x"));
ASSERT_OK(Merge("k1", "b"));
ASSERT_OK(Merge("k1", "c"));
ASSERT_OK(Merge("k1", "d"));
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"k1", values.data(), &merge_operands_info,
&number_of_operands));
ASSERT_EQ(values[0], "x");
ASSERT_EQ(values[1], "b");
ASSERT_EQ(values[2], "c");
ASSERT_EQ(values[3], "d");
// expected_max_number_of_operands is less than number of merge operands so
// status should be Incomplete.
merge_operands_info.expected_max_number_of_operands = num_records - 1;
Status status = db_->GetMergeOperands(
ReadOptions(), db_->DefaultColumnFamily(), "k1", values.data(),
&merge_operands_info, &number_of_operands);
ASSERT_EQ(status.IsIncomplete(), true);
merge_operands_info.expected_max_number_of_operands = num_records;
// All k1.1 values are in memtable.
ASSERT_OK(Merge("k1.1", "r"));
ASSERT_OK(Delete("k1.1"));
ASSERT_OK(Merge("k1.1", "c"));
ASSERT_OK(Merge("k1.1", "k"));
ASSERT_OK(Merge("k1.1", "s"));
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"k1.1", values.data(), &merge_operands_info,
&number_of_operands));
ASSERT_EQ(values[0], "c");
ASSERT_EQ(values[1], "k");
ASSERT_EQ(values[2], "s");
// All k2 values are flushed to L0 into a single file.
ASSERT_OK(Merge("k2", "q"));
ASSERT_OK(Merge("k2", "w"));
ASSERT_OK(Merge("k2", "e"));
ASSERT_OK(Merge("k2", "r"));
ASSERT_OK(Flush());
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"k2", values.data(), &merge_operands_info,
&number_of_operands));
ASSERT_EQ(values[0], "q");
ASSERT_EQ(values[1], "w");
ASSERT_EQ(values[2], "e");
ASSERT_EQ(values[3], "r");
// All k2.1 values are flushed to L0 into a single file.
ASSERT_OK(Merge("k2.1", "m"));
ASSERT_OK(Put("k2.1", "l"));
ASSERT_OK(Merge("k2.1", "n"));
ASSERT_OK(Merge("k2.1", "o"));
ASSERT_OK(Flush());
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"k2.1", values.data(), &merge_operands_info,
&number_of_operands));
ASSERT_EQ(values[0], "l,n,o");
// All k2.2 values are flushed to L0 into a single file.
ASSERT_OK(Merge("k2.2", "g"));
ASSERT_OK(Delete("k2.2"));
ASSERT_OK(Merge("k2.2", "o"));
ASSERT_OK(Merge("k2.2", "t"));
ASSERT_OK(Flush());
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"k2.2", values.data(), &merge_operands_info,
&number_of_operands));
ASSERT_EQ(values[0], "o,t");
// Do some compaction that will make the following tests more predictable
// Slice start("PutARock");
// Slice end("t");
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
// All k3 values are flushed and are in different files.
ASSERT_OK(Merge("k3", "ab"));
ASSERT_OK(Flush());
ASSERT_OK(Merge("k3", "bc"));
ASSERT_OK(Flush());
ASSERT_OK(Merge("k3", "cd"));
ASSERT_OK(Flush());
ASSERT_OK(Merge("k3", "de"));
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"k3", values.data(), &merge_operands_info,
&number_of_operands));
ASSERT_EQ(values[0], "ab");
ASSERT_EQ(values[1], "bc");
ASSERT_EQ(values[2], "cd");
ASSERT_EQ(values[3], "de");
// All k3.1 values are flushed and are in different files.
ASSERT_OK(Merge("k3.1", "ab"));
ASSERT_OK(Flush());
ASSERT_OK(Put("k3.1", "bc"));
ASSERT_OK(Flush());
ASSERT_OK(Merge("k3.1", "cd"));
ASSERT_OK(Flush());
ASSERT_OK(Merge("k3.1", "de"));
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"k3.1", values.data(), &merge_operands_info,
&number_of_operands));
ASSERT_EQ(values[0], "bc");
ASSERT_EQ(values[1], "cd");
ASSERT_EQ(values[2], "de");
// All k3.2 values are flushed and are in different files.
ASSERT_OK(Merge("k3.2", "ab"));
ASSERT_OK(Flush());
ASSERT_OK(Delete("k3.2"));
ASSERT_OK(Flush());
ASSERT_OK(Merge("k3.2", "cd"));
ASSERT_OK(Flush());
ASSERT_OK(Merge("k3.2", "de"));
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"k3.2", values.data(), &merge_operands_info,
&number_of_operands));
ASSERT_EQ(values[0], "cd");
ASSERT_EQ(values[1], "de");
// All K4 values are in different levels
ASSERT_OK(Merge("k4", "ba"));
ASSERT_OK(Flush());
MoveFilesToLevel(4);
ASSERT_OK(Merge("k4", "cb"));
ASSERT_OK(Flush());
MoveFilesToLevel(3);
ASSERT_OK(Merge("k4", "dc"));
ASSERT_OK(Flush());
MoveFilesToLevel(1);
ASSERT_OK(Merge("k4", "ed"));
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"k4", values.data(), &merge_operands_info,
&number_of_operands));
ASSERT_EQ(values[0], "ba");
ASSERT_EQ(values[1], "cb");
ASSERT_EQ(values[2], "dc");
ASSERT_EQ(values[3], "ed");
// First 3 k5 values are in SST and next 4 k5 values are in Immutable
// Memtable
ASSERT_OK(Merge("k5", "who"));
ASSERT_OK(Merge("k5", "am"));
ASSERT_OK(Merge("k5", "i"));
ASSERT_OK(Flush());
ASSERT_OK(Put("k5", "remember"));
ASSERT_OK(Merge("k5", "i"));
ASSERT_OK(Merge("k5", "am"));
ASSERT_OK(Merge("k5", "rocks"));
ASSERT_OK(dbfull()->TEST_SwitchMemtable());
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"k5", values.data(), &merge_operands_info,
&number_of_operands));
ASSERT_EQ(values[0], "remember");
ASSERT_EQ(values[1], "i");
ASSERT_EQ(values[2], "am");
}
TEST_F(DBMergeOperandTest, BlobDBGetMergeOperandsBasic) {
Options options;
options.create_if_missing = true;
options.enable_blob_files = true;
options.min_blob_size = 0;
// Use only the latest two merge operands.
options.merge_operator = std::make_shared<LimitedStringAppendMergeOp>(2, ',');
options.env = env_;
Reopen(options);
int num_records = 4;
int number_of_operands = 0;
std::vector<PinnableSlice> values(num_records);
GetMergeOperandsOptions merge_operands_info;
merge_operands_info.expected_max_number_of_operands = num_records;
// All k1 values are in memtable.
ASSERT_OK(Put("k1", "x"));
ASSERT_OK(Merge("k1", "b"));
ASSERT_OK(Merge("k1", "c"));
ASSERT_OK(Merge("k1", "d"));
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"k1", values.data(), &merge_operands_info,
&number_of_operands));
ASSERT_EQ(values[0], "x");
ASSERT_EQ(values[1], "b");
ASSERT_EQ(values[2], "c");
ASSERT_EQ(values[3], "d");
// expected_max_number_of_operands is less than number of merge operands so
// status should be Incomplete.
merge_operands_info.expected_max_number_of_operands = num_records - 1;
Status status = db_->GetMergeOperands(
ReadOptions(), db_->DefaultColumnFamily(), "k1", values.data(),
&merge_operands_info, &number_of_operands);
ASSERT_EQ(status.IsIncomplete(), true);
merge_operands_info.expected_max_number_of_operands = num_records;
// All k2 values are flushed to L0 into a single file.
ASSERT_OK(Put("k2", "q"));
ASSERT_OK(Merge("k2", "w"));
ASSERT_OK(Merge("k2", "e"));
ASSERT_OK(Merge("k2", "r"));
ASSERT_OK(Flush());
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"k2", values.data(), &merge_operands_info,
&number_of_operands));
ASSERT_EQ(values[0], "q,w,e,r");
// Do some compaction that will make the following tests more predictable
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
// All k3 values are flushed and are in different files.
ASSERT_OK(Put("k3", "ab"));
ASSERT_OK(Flush());
ASSERT_OK(Merge("k3", "bc"));
ASSERT_OK(Flush());
ASSERT_OK(Merge("k3", "cd"));
ASSERT_OK(Flush());
ASSERT_OK(Merge("k3", "de"));
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"k3", values.data(), &merge_operands_info,
&number_of_operands));
ASSERT_EQ(values[0], "ab");
ASSERT_EQ(values[1], "bc");
ASSERT_EQ(values[2], "cd");
ASSERT_EQ(values[3], "de");
// All K4 values are in different levels
ASSERT_OK(Put("k4", "ba"));
ASSERT_OK(Flush());
MoveFilesToLevel(4);
ASSERT_OK(Merge("k4", "cb"));
ASSERT_OK(Flush());
MoveFilesToLevel(3);
ASSERT_OK(Merge("k4", "dc"));
ASSERT_OK(Flush());
MoveFilesToLevel(1);
ASSERT_OK(Merge("k4", "ed"));
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"k4", values.data(), &merge_operands_info,
&number_of_operands));
ASSERT_EQ(values[0], "ba");
ASSERT_EQ(values[1], "cb");
ASSERT_EQ(values[2], "dc");
ASSERT_EQ(values[3], "ed");
}
TEST_F(DBMergeOperandTest, GetMergeOperandsLargeResultOptimization) {
// These constants are chosen to trigger the large result optimization
// (pinning a bundle of `DBImpl` resources).
const int kNumOperands = 1024;
const int kOperandLen = 1024;
Options options;
options.create_if_missing = true;
options.merge_operator = MergeOperators::CreateStringAppendOperator();
DestroyAndReopen(options);
Random rnd(301);
std::vector<std::string> expected_merge_operands;
expected_merge_operands.reserve(kNumOperands);
for (int i = 0; i < kNumOperands; ++i) {
expected_merge_operands.emplace_back(rnd.RandomString(kOperandLen));
ASSERT_OK(Merge("key", expected_merge_operands.back()));
}
std::vector<PinnableSlice> merge_operands(kNumOperands);
GetMergeOperandsOptions merge_operands_info;
merge_operands_info.expected_max_number_of_operands = kNumOperands;
int num_merge_operands = 0;
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"key", merge_operands.data(),
&merge_operands_info, &num_merge_operands));
ASSERT_EQ(num_merge_operands, kNumOperands);
// Ensures the large result optimization was used.
for (int i = 0; i < kNumOperands; ++i) {
ASSERT_TRUE(merge_operands[i].IsPinned());
}
// Add a Flush() to change the `SuperVersion` to challenge the resource
// pinning.
ASSERT_OK(Flush());
for (int i = 0; i < kNumOperands; ++i) {
ASSERT_EQ(expected_merge_operands[i], merge_operands[i]);
}
}
TEST_F(DBMergeOperandTest, GetMergeOperandsBaseDeletionInImmMem) {
// In this test, "k1" has a MERGE in a mutable memtable on top of a base
// DELETE in an immutable memtable.
Options opts = CurrentOptions();
opts.max_write_buffer_number = 10;
opts.min_write_buffer_number_to_merge = 10;
opts.merge_operator = MergeOperators::CreateDeprecatedPutOperator();
Reopen(opts);
ASSERT_OK(Put("k1", "val"));
ASSERT_OK(Flush());
ASSERT_OK(Put("k0", "val"));
ASSERT_OK(Delete("k1"));
ASSERT_OK(Put("k2", "val"));
ASSERT_OK(dbfull()->TEST_SwitchMemtable());
ASSERT_OK(Merge("k1", "val"));
{
std::vector<PinnableSlice> values(2);
GetMergeOperandsOptions merge_operands_info;
merge_operands_info.expected_max_number_of_operands =
static_cast<int>(values.size());
std::string key = "k1", from_db;
int number_of_operands = 0;
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
key, values.data(), &merge_operands_info,
&number_of_operands));
ASSERT_EQ(1, number_of_operands);
from_db = values[0].ToString();
ASSERT_EQ("val", from_db);
}
{
std::string val;
ASSERT_OK(db_->Get(ReadOptions(), "k1", &val));
ASSERT_EQ("val", val);
}
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,824 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include <string>
#include <vector>
#include "db/db_test_util.h"
#include "db/forward_iterator.h"
#include "port/stack_trace.h"
#include "rocksdb/merge_operator.h"
#include "util/random.h"
#include "utilities/merge_operators.h"
#include "utilities/merge_operators/string_append/stringappend2.h"
namespace ROCKSDB_NAMESPACE {
class TestReadCallback : public ReadCallback {
public:
TestReadCallback(SnapshotChecker* snapshot_checker,
SequenceNumber snapshot_seq)
: ReadCallback(snapshot_seq),
snapshot_checker_(snapshot_checker),
snapshot_seq_(snapshot_seq) {}
bool IsVisibleFullCheck(SequenceNumber seq) override {
return snapshot_checker_->CheckInSnapshot(seq, snapshot_seq_) ==
SnapshotCheckerResult::kInSnapshot;
}
private:
SnapshotChecker* snapshot_checker_;
SequenceNumber snapshot_seq_;
};
// Test merge operator functionality.
class DBMergeOperatorTest : public DBTestBase {
public:
DBMergeOperatorTest()
: DBTestBase("db_merge_operator_test", /*env_do_fsync=*/false) {}
std::string GetWithReadCallback(SnapshotChecker* snapshot_checker,
const Slice& key,
const Snapshot* snapshot = nullptr) {
SequenceNumber seq = snapshot == nullptr ? db_->GetLatestSequenceNumber()
: snapshot->GetSequenceNumber();
TestReadCallback read_callback(snapshot_checker, seq);
ReadOptions read_opt;
read_opt.snapshot = snapshot;
PinnableSlice value;
DBImpl::GetImplOptions get_impl_options;
get_impl_options.column_family = db_->DefaultColumnFamily();
get_impl_options.value = &value;
get_impl_options.callback = &read_callback;
Status s = dbfull()->GetImpl(read_opt, key, get_impl_options);
if (!s.ok()) {
return s.ToString();
}
return value.ToString();
}
};
TEST_F(DBMergeOperatorTest, LimitMergeOperands) {
class LimitedStringAppendMergeOp : public StringAppendTESTOperator {
public:
LimitedStringAppendMergeOp(int limit, char delim)
: StringAppendTESTOperator(delim), limit_(limit) {}
const char* Name() const override {
return "DBMergeOperatorTest::LimitedStringAppendMergeOp";
}
bool ShouldMerge(const std::vector<Slice>& operands) const override {
if (operands.size() > 0 && limit_ > 0 && operands.size() >= limit_) {
return true;
}
return false;
}
private:
size_t limit_ = 0;
};
Options options;
options.create_if_missing = true;
// Use only the latest two merge operands.
options.merge_operator = std::make_shared<LimitedStringAppendMergeOp>(2, ',');
options.env = env_;
Reopen(options);
// All K1 values are in memtable.
ASSERT_OK(Merge("k1", "a"));
ASSERT_OK(Merge("k1", "b"));
ASSERT_OK(Merge("k1", "c"));
ASSERT_OK(Merge("k1", "d"));
std::string value;
ASSERT_OK(db_->Get(ReadOptions(), "k1", &value));
// Make sure that only the latest two merge operands are used. If this was
// not the case the value would be "a,b,c,d".
ASSERT_EQ(value, "c,d");
// All K2 values are flushed to L0 into a single file.
ASSERT_OK(Merge("k2", "a"));
ASSERT_OK(Merge("k2", "b"));
ASSERT_OK(Merge("k2", "c"));
ASSERT_OK(Merge("k2", "d"));
ASSERT_OK(Flush());
ASSERT_OK(db_->Get(ReadOptions(), "k2", &value));
ASSERT_EQ(value, "c,d");
// All K3 values are flushed and are in different files.
ASSERT_OK(Merge("k3", "ab"));
ASSERT_OK(Flush());
ASSERT_OK(Merge("k3", "bc"));
ASSERT_OK(Flush());
ASSERT_OK(Merge("k3", "cd"));
ASSERT_OK(Flush());
ASSERT_OK(Merge("k3", "de"));
ASSERT_OK(db_->Get(ReadOptions(), "k3", &value));
ASSERT_EQ(value, "cd,de");
// All K4 values are in different levels
ASSERT_OK(Merge("k4", "ab"));
ASSERT_OK(Flush());
MoveFilesToLevel(4);
ASSERT_OK(Merge("k4", "bc"));
ASSERT_OK(Flush());
MoveFilesToLevel(3);
ASSERT_OK(Merge("k4", "cd"));
ASSERT_OK(Flush());
MoveFilesToLevel(1);
ASSERT_OK(Merge("k4", "de"));
ASSERT_OK(db_->Get(ReadOptions(), "k4", &value));
ASSERT_EQ(value, "cd,de");
}
TEST_F(DBMergeOperatorTest, MergeErrorOnRead) {
Options options;
options.create_if_missing = true;
options.merge_operator.reset(new TestPutOperator());
options.env = env_;
Reopen(options);
ASSERT_OK(Merge("k1", "v1"));
ASSERT_OK(Merge("k1", "corrupted"));
std::string value;
ASSERT_TRUE(db_->Get(ReadOptions(), "k1", &value).IsCorruption());
VerifyDBInternal({{"k1", "corrupted"}, {"k1", "v1"}});
}
TEST_F(DBMergeOperatorTest, MergeErrorOnWrite) {
Options options;
options.create_if_missing = true;
options.merge_operator.reset(new TestPutOperator());
options.max_successive_merges = 3;
options.env = env_;
Reopen(options);
ASSERT_OK(Merge("k1", "v1"));
ASSERT_OK(Merge("k1", "v2"));
// Will trigger a merge when hitting max_successive_merges and the merge
// will fail. The delta will be inserted nevertheless.
ASSERT_OK(Merge("k1", "corrupted"));
// Data should stay unmerged after the error.
VerifyDBInternal({{"k1", "corrupted"}, {"k1", "v2"}, {"k1", "v1"}});
}
TEST_F(DBMergeOperatorTest, MergeErrorOnIteration) {
Options options;
options.create_if_missing = true;
options.merge_operator.reset(new TestPutOperator());
options.env = env_;
DestroyAndReopen(options);
ASSERT_OK(Merge("k1", "v1"));
ASSERT_OK(Merge("k1", "corrupted"));
ASSERT_OK(Put("k2", "v2"));
auto* iter = db_->NewIterator(ReadOptions());
iter->Seek("k1");
ASSERT_FALSE(iter->Valid());
ASSERT_TRUE(iter->status().IsCorruption());
delete iter;
iter = db_->NewIterator(ReadOptions());
iter->Seek("k2");
ASSERT_TRUE(iter->Valid());
ASSERT_OK(iter->status());
iter->Prev();
ASSERT_FALSE(iter->Valid());
ASSERT_TRUE(iter->status().IsCorruption());
delete iter;
VerifyDBInternal({{"k1", "corrupted"}, {"k1", "v1"}, {"k2", "v2"}});
DestroyAndReopen(options);
ASSERT_OK(Merge("k1", "v1"));
ASSERT_OK(Put("k2", "v2"));
ASSERT_OK(Merge("k2", "corrupted"));
iter = db_->NewIterator(ReadOptions());
iter->Seek("k1");
ASSERT_TRUE(iter->Valid());
ASSERT_OK(iter->status());
iter->Next();
ASSERT_FALSE(iter->Valid());
ASSERT_TRUE(iter->status().IsCorruption());
delete iter;
VerifyDBInternal({{"k1", "v1"}, {"k2", "corrupted"}, {"k2", "v2"}});
}
TEST_F(DBMergeOperatorTest, MergeOperatorFailsWithMustMerge) {
// This is like a mini-stress test dedicated to `OpFailureScope::kMustMerge`.
// Some or most of it might be deleted upon adding that option to the actual
// stress test.
//
// "k0" and "k2" are stable (uncorrupted) keys before and after a corrupted
// key ("k1"). The outer loop (`i`) varies which write (`j`) to "k1" triggers
// the corruption. Inside that loop there are three cases:
//
// - Case 1: pure `Merge()`s
// - Case 2: `Merge()`s on top of a `Put()`
// - Case 3: `Merge()`s on top of a `Delete()`
//
// For each case we test query results before flush, after flush, and after
// compaction, as well as cleanup after deletion+compaction. The queries
// expect "k0" and "k2" to always be readable. "k1" is expected to be readable
// only by APIs that do not require merging, such as `GetMergeOperands()`.
const int kNumOperands = 3;
Options options;
options.merge_operator.reset(new TestPutOperator());
options.env = env_;
Reopen(options);
for (int i = 0; i < kNumOperands; ++i) {
auto check_query = [&]() {
{
std::string value;
ASSERT_OK(db_->Get(ReadOptions(), "k0", &value));
Status s = db_->Get(ReadOptions(), "k1", &value);
ASSERT_TRUE(s.IsCorruption());
ASSERT_EQ(Status::SubCode::kMergeOperatorFailed, s.subcode());
ASSERT_OK(db_->Get(ReadOptions(), "k2", &value));
}
{
std::unique_ptr<Iterator> iter;
iter.reset(db_->NewIterator(ReadOptions()));
iter->SeekToFirst();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("k0", iter->key());
iter->Next();
ASSERT_TRUE(iter->status().IsCorruption());
ASSERT_EQ(Status::SubCode::kMergeOperatorFailed,
iter->status().subcode());
iter->SeekToLast();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("k2", iter->key());
iter->Prev();
ASSERT_TRUE(iter->status().IsCorruption());
iter->Seek("k2");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("k2", iter->key());
}
std::vector<PinnableSlice> values(kNumOperands);
GetMergeOperandsOptions merge_operands_info;
merge_operands_info.expected_max_number_of_operands = kNumOperands;
int num_operands_found = 0;
ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
"k1", values.data(), &merge_operands_info,
&num_operands_found));
ASSERT_EQ(kNumOperands, num_operands_found);
for (int j = 0; j < num_operands_found; ++j) {
if (i == j) {
ASSERT_EQ(values[j], "corrupted_must_merge");
} else {
ASSERT_EQ(values[j], "ok");
}
}
};
ASSERT_OK(Put("k0", "val"));
ASSERT_OK(Put("k2", "val"));
// Case 1
for (int j = 0; j < kNumOperands; ++j) {
if (j == i) {
ASSERT_OK(Merge("k1", "corrupted_must_merge"));
} else {
ASSERT_OK(Merge("k1", "ok"));
}
}
check_query();
ASSERT_OK(Flush());
check_query();
{
CompactRangeOptions cro;
cro.bottommost_level_compaction =
BottommostLevelCompaction::kForceOptimized;
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
}
check_query();
// Case 2
for (int j = 0; j < kNumOperands; ++j) {
Slice val;
if (j == i) {
val = "corrupted_must_merge";
} else {
val = "ok";
}
if (j == 0) {
ASSERT_OK(Put("k1", val));
} else {
ASSERT_OK(Merge("k1", val));
}
}
check_query();
ASSERT_OK(Flush());
check_query();
{
CompactRangeOptions cro;
cro.bottommost_level_compaction =
BottommostLevelCompaction::kForceOptimized;
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
}
check_query();
// Case 3
ASSERT_OK(Delete("k1"));
for (int j = 0; j < kNumOperands; ++j) {
if (i == j) {
ASSERT_OK(Merge("k1", "corrupted_must_merge"));
} else {
ASSERT_OK(Merge("k1", "ok"));
}
}
check_query();
ASSERT_OK(Flush());
check_query();
{
CompactRangeOptions cro;
cro.bottommost_level_compaction =
BottommostLevelCompaction::kForceOptimized;
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
}
check_query();
// Verify obsolete data removal still happens
ASSERT_OK(Delete("k0"));
ASSERT_OK(Delete("k1"));
ASSERT_OK(Delete("k2"));
ASSERT_EQ("NOT_FOUND", Get("k0"));
ASSERT_EQ("NOT_FOUND", Get("k1"));
ASSERT_EQ("NOT_FOUND", Get("k2"));
CompactRangeOptions cro;
cro.bottommost_level_compaction =
BottommostLevelCompaction::kForceOptimized;
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
ASSERT_EQ("", FilesPerLevel());
}
}
class MergeOperatorPinningTest : public DBMergeOperatorTest,
public testing::WithParamInterface<bool> {
public:
MergeOperatorPinningTest() { disable_block_cache_ = GetParam(); }
bool disable_block_cache_;
};
INSTANTIATE_TEST_CASE_P(MergeOperatorPinningTest, MergeOperatorPinningTest,
::testing::Bool());
TEST_P(MergeOperatorPinningTest, OperandsMultiBlocks) {
Options options = CurrentOptions();
BlockBasedTableOptions table_options;
table_options.block_size = 1; // every block will contain one entry
table_options.no_block_cache = disable_block_cache_;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
options.level0_slowdown_writes_trigger = (1 << 30);
options.level0_stop_writes_trigger = (1 << 30);
options.disable_auto_compactions = true;
DestroyAndReopen(options);
const int kKeysPerFile = 10;
const int kOperandsPerKeyPerFile = 7;
const int kOperandSize = 100;
// Filse to write in L0 before compacting to lower level
const int kFilesPerLevel = 3;
Random rnd(301);
std::map<std::string, std::string> true_data;
int batch_num = 1;
int lvl_to_fill = 4;
int key_id = 0;
while (true) {
for (int j = 0; j < kKeysPerFile; j++) {
std::string key = Key(key_id % 35);
key_id++;
for (int k = 0; k < kOperandsPerKeyPerFile; k++) {
std::string val = rnd.RandomString(kOperandSize);
ASSERT_OK(db_->Merge(WriteOptions(), key, val));
if (true_data[key].size() == 0) {
true_data[key] = val;
} else {
true_data[key] += "," + val;
}
}
}
if (lvl_to_fill == -1) {
// Keep last batch in memtable and stop
break;
}
ASSERT_OK(Flush());
if (batch_num % kFilesPerLevel == 0) {
if (lvl_to_fill != 0) {
MoveFilesToLevel(lvl_to_fill);
}
lvl_to_fill--;
}
batch_num++;
}
// 3 L0 files
// 1 L1 file
// 3 L2 files
// 1 L3 file
// 3 L4 Files
ASSERT_EQ(FilesPerLevel(), "3,1,3,1,3");
VerifyDBFromMap(true_data);
}
class MergeOperatorHook : public MergeOperator {
public:
explicit MergeOperatorHook(std::shared_ptr<MergeOperator> _merge_op)
: merge_op_(_merge_op) {}
bool FullMergeV2(const MergeOperationInput& merge_in,
MergeOperationOutput* merge_out) const override {
before_merge_();
bool res = merge_op_->FullMergeV2(merge_in, merge_out);
after_merge_();
return res;
}
const char* Name() const override { return merge_op_->Name(); }
std::shared_ptr<MergeOperator> merge_op_;
std::function<void()> before_merge_ = []() {};
std::function<void()> after_merge_ = []() {};
};
TEST_P(MergeOperatorPinningTest, EvictCacheBeforeMerge) {
Options options = CurrentOptions();
auto merge_hook =
std::make_shared<MergeOperatorHook>(MergeOperators::CreateMaxOperator());
options.merge_operator = merge_hook;
options.disable_auto_compactions = true;
options.level0_slowdown_writes_trigger = (1 << 30);
options.level0_stop_writes_trigger = (1 << 30);
options.max_open_files = 20;
BlockBasedTableOptions bbto;
bbto.no_block_cache = disable_block_cache_;
if (bbto.no_block_cache == false) {
bbto.block_cache = NewLRUCache(64 * 1024 * 1024);
} else {
bbto.block_cache = nullptr;
}
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
DestroyAndReopen(options);
const int kNumOperands = 30;
const int kNumKeys = 1000;
const int kOperandSize = 100;
Random rnd(301);
// 1000 keys every key have 30 operands, every operand is in a different file
std::map<std::string, std::string> true_data;
for (int i = 0; i < kNumOperands; i++) {
for (int j = 0; j < kNumKeys; j++) {
std::string k = Key(j);
std::string v = rnd.RandomString(kOperandSize);
ASSERT_OK(db_->Merge(WriteOptions(), k, v));
true_data[k] = std::max(true_data[k], v);
}
ASSERT_OK(Flush());
}
std::vector<uint64_t> file_numbers = ListTableFiles(env_, dbname_);
ASSERT_EQ(file_numbers.size(), kNumOperands);
int merge_cnt = 0;
// Code executed before merge operation
merge_hook->before_merge_ = [&]() {
// Evict all tables from cache before every merge operation
auto* table_cache = dbfull()->TEST_table_cache();
for (uint64_t num : file_numbers) {
TableCache::Evict(table_cache, num);
}
// Decrease cache capacity to force all unrefed blocks to be evicted
if (bbto.block_cache) {
bbto.block_cache->SetCapacity(1);
}
merge_cnt++;
};
// Code executed after merge operation
merge_hook->after_merge_ = [&]() {
// Increase capacity again after doing the merge
if (bbto.block_cache) {
bbto.block_cache->SetCapacity(64 * 1024 * 1024);
}
};
size_t total_reads;
VerifyDBFromMap(true_data, &total_reads);
ASSERT_EQ(merge_cnt, total_reads);
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
VerifyDBFromMap(true_data, &total_reads);
}
TEST_P(MergeOperatorPinningTest, TailingIterator) {
Options options = CurrentOptions();
options.merge_operator = MergeOperators::CreateMaxOperator();
BlockBasedTableOptions bbto;
bbto.no_block_cache = disable_block_cache_;
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
DestroyAndReopen(options);
const int kNumOperands = 100;
const int kNumWrites = 100000;
std::function<void()> writer_func = [&]() {
int k = 0;
for (int i = 0; i < kNumWrites; i++) {
ASSERT_OK(db_->Merge(WriteOptions(), Key(k), Key(k)));
if (i && i % kNumOperands == 0) {
k++;
}
if (i && i % 127 == 0) {
ASSERT_OK(Flush());
}
if (i && i % 317 == 0) {
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
}
}
};
std::function<void()> reader_func = [&]() {
ReadOptions ro;
ro.tailing = true;
Iterator* iter = db_->NewIterator(ro);
ASSERT_OK(iter->status());
iter->SeekToFirst();
for (int i = 0; i < (kNumWrites / kNumOperands); i++) {
while (!iter->Valid()) {
// wait for the key to be written
env_->SleepForMicroseconds(100);
iter->Seek(Key(i));
}
ASSERT_EQ(iter->key(), Key(i));
ASSERT_EQ(iter->value(), Key(i));
iter->Next();
}
ASSERT_OK(iter->status());
delete iter;
};
ROCKSDB_NAMESPACE::port::Thread writer_thread(writer_func);
ROCKSDB_NAMESPACE::port::Thread reader_thread(reader_func);
writer_thread.join();
reader_thread.join();
}
TEST_F(DBMergeOperatorTest, TailingIteratorMemtableUnrefedBySomeoneElse) {
Options options = CurrentOptions();
options.merge_operator = MergeOperators::CreateStringAppendOperator();
DestroyAndReopen(options);
// Overview of the test:
// * There are two merge operands for the same key: one in an sst file,
// another in a memtable.
// * Seek a tailing iterator to this key.
// * As part of the seek, the iterator will:
// (a) first visit the operand in the memtable and tell ForwardIterator
// to pin this operand, then
// (b) move on to the operand in the sst file, then pass both operands
// to merge operator.
// * The memtable may get flushed and unreferenced by another thread between
// (a) and (b). The test simulates it by flushing the memtable inside a
// SyncPoint callback located between (a) and (b).
// * In this case it's ForwardIterator's responsibility to keep the memtable
// pinned until (b) is complete. There used to be a bug causing
// ForwardIterator to not pin it in some circumstances. This test
// reproduces it.
ASSERT_OK(db_->Merge(WriteOptions(), "key", "sst"));
ASSERT_OK(db_->Flush(FlushOptions())); // Switch to SuperVersion A
ASSERT_OK(db_->Merge(WriteOptions(), "key", "memtable"));
// Pin SuperVersion A
std::unique_ptr<Iterator> someone_else(db_->NewIterator(ReadOptions()));
ASSERT_OK(someone_else->status());
bool pushed_first_operand = false;
bool stepped_to_next_operand = false;
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"DBIter::MergeValuesNewToOld:PushedFirstOperand", [&](void*) {
EXPECT_FALSE(pushed_first_operand);
pushed_first_operand = true;
EXPECT_OK(db_->Flush(FlushOptions())); // Switch to SuperVersion B
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"DBIter::MergeValuesNewToOld:SteppedToNextOperand", [&](void*) {
EXPECT_FALSE(stepped_to_next_operand);
stepped_to_next_operand = true;
someone_else.reset(); // Unpin SuperVersion A
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
ReadOptions ro;
ro.tailing = true;
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
iter->Seek("key");
ASSERT_OK(iter->status());
ASSERT_TRUE(iter->Valid());
EXPECT_EQ(std::string("sst,memtable"), iter->value().ToString());
EXPECT_TRUE(pushed_first_operand);
EXPECT_TRUE(stepped_to_next_operand);
}
TEST_F(DBMergeOperatorTest, SnapshotCheckerAndReadCallback) {
Options options = CurrentOptions();
options.merge_operator = MergeOperators::CreateStringAppendOperator();
DestroyAndReopen(options);
class TestSnapshotChecker : public SnapshotChecker {
public:
SnapshotCheckerResult CheckInSnapshot(
SequenceNumber seq, SequenceNumber snapshot_seq) const override {
return IsInSnapshot(seq, snapshot_seq)
? SnapshotCheckerResult::kInSnapshot
: SnapshotCheckerResult::kNotInSnapshot;
}
bool IsInSnapshot(SequenceNumber seq, SequenceNumber snapshot_seq) const {
switch (snapshot_seq) {
case 0:
return seq == 0;
case 1:
return seq <= 1;
case 2:
// seq = 2 not visible to snapshot with seq = 2
return seq <= 1;
case 3:
return seq <= 3;
case 4:
// seq = 4 not visible to snpahost with seq = 4
return seq <= 3;
default:
// seq >=4 is uncommitted
return seq <= 4;
};
}
};
TestSnapshotChecker* snapshot_checker = new TestSnapshotChecker();
dbfull()->SetSnapshotChecker(snapshot_checker);
std::string value;
ASSERT_OK(Merge("foo", "v1"));
ASSERT_EQ(1, db_->GetLatestSequenceNumber());
ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo"));
ASSERT_OK(Merge("foo", "v2"));
ASSERT_EQ(2, db_->GetLatestSequenceNumber());
// v2 is not visible to latest snapshot, which has seq = 2.
ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo"));
// Take a snapshot with seq = 2.
const Snapshot* snapshot1 = db_->GetSnapshot();
ASSERT_EQ(2, snapshot1->GetSequenceNumber());
// v2 is not visible to snapshot1, which has seq = 2
ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo", snapshot1));
// Verify flush doesn't alter the result.
ASSERT_OK(Flush());
ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo", snapshot1));
ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo"));
ASSERT_OK(Merge("foo", "v3"));
ASSERT_EQ(3, db_->GetLatestSequenceNumber());
ASSERT_EQ("v1,v2,v3", GetWithReadCallback(snapshot_checker, "foo"));
ASSERT_OK(Merge("foo", "v4"));
ASSERT_EQ(4, db_->GetLatestSequenceNumber());
// v4 is not visible to latest snapshot, which has seq = 4.
ASSERT_EQ("v1,v2,v3", GetWithReadCallback(snapshot_checker, "foo"));
const Snapshot* snapshot2 = db_->GetSnapshot();
ASSERT_EQ(4, snapshot2->GetSequenceNumber());
// v4 is not visible to snapshot2, which has seq = 4.
ASSERT_EQ("v1,v2,v3",
GetWithReadCallback(snapshot_checker, "foo", snapshot2));
// Verify flush doesn't alter the result.
ASSERT_OK(Flush());
ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo", snapshot1));
ASSERT_EQ("v1,v2,v3",
GetWithReadCallback(snapshot_checker, "foo", snapshot2));
ASSERT_EQ("v1,v2,v3", GetWithReadCallback(snapshot_checker, "foo"));
ASSERT_OK(Merge("foo", "v5"));
ASSERT_EQ(5, db_->GetLatestSequenceNumber());
// v5 is uncommitted
ASSERT_EQ("v1,v2,v3,v4", GetWithReadCallback(snapshot_checker, "foo"));
// full manual compaction.
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
// Verify compaction doesn't alter the result.
ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo", snapshot1));
ASSERT_EQ("v1,v2,v3",
GetWithReadCallback(snapshot_checker, "foo", snapshot2));
ASSERT_EQ("v1,v2,v3,v4", GetWithReadCallback(snapshot_checker, "foo"));
db_->ReleaseSnapshot(snapshot1);
db_->ReleaseSnapshot(snapshot2);
}
class PerConfigMergeOperatorPinningTest
: public DBMergeOperatorTest,
public testing::WithParamInterface<std::tuple<bool, int>> {
public:
PerConfigMergeOperatorPinningTest() {
std::tie(disable_block_cache_, option_config_) = GetParam();
}
bool disable_block_cache_;
};
INSTANTIATE_TEST_CASE_P(
MergeOperatorPinningTest, PerConfigMergeOperatorPinningTest,
::testing::Combine(::testing::Bool(),
::testing::Range(static_cast<int>(DBTestBase::kDefault),
static_cast<int>(DBTestBase::kEnd))));
TEST_P(PerConfigMergeOperatorPinningTest, Randomized) {
if (ShouldSkipOptions(option_config_, kSkipMergePut)) {
return;
}
Options options = CurrentOptions();
options.merge_operator = MergeOperators::CreateMaxOperator();
BlockBasedTableOptions table_options;
table_options.no_block_cache = disable_block_cache_;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
DestroyAndReopen(options);
Random rnd(301);
std::map<std::string, std::string> true_data;
const int kTotalMerges = 5000;
// Every key gets ~10 operands
const int kKeyRange = kTotalMerges / 10;
const int kOperandSize = 20;
const int kNumPutBefore = kKeyRange / 10; // 10% value
const int kNumPutAfter = kKeyRange / 10; // 10% overwrite
const int kNumDelete = kKeyRange / 10; // 10% delete
// kNumPutBefore keys will have base values
for (int i = 0; i < kNumPutBefore; i++) {
std::string key = Key(rnd.Next() % kKeyRange);
std::string value = rnd.RandomString(kOperandSize);
ASSERT_OK(db_->Put(WriteOptions(), key, value));
true_data[key] = value;
}
// Do kTotalMerges merges
for (int i = 0; i < kTotalMerges; i++) {
std::string key = Key(rnd.Next() % kKeyRange);
std::string value = rnd.RandomString(kOperandSize);
ASSERT_OK(db_->Merge(WriteOptions(), key, value));
if (true_data[key] < value) {
true_data[key] = value;
}
}
// Overwrite random kNumPutAfter keys
for (int i = 0; i < kNumPutAfter; i++) {
std::string key = Key(rnd.Next() % kKeyRange);
std::string value = rnd.RandomString(kOperandSize);
ASSERT_OK(db_->Put(WriteOptions(), key, value));
true_data[key] = value;
}
// Delete random kNumDelete keys
for (int i = 0; i < kNumDelete; i++) {
std::string key = Key(rnd.Next() % kKeyRange);
ASSERT_OK(db_->Delete(WriteOptions(), key));
true_data.erase(key);
}
VerifyDBFromMap(true_data);
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,436 +0,0 @@
// Copyright (c) 2022-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include <gtest/gtest.h>
#include <cstdint>
#include <string>
#include "db/db_test_util.h"
#include "port/stack_trace.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "test_util/testharness.h"
#include "util/file_checksum_helper.h"
namespace ROCKSDB_NAMESPACE {
class DBRateLimiterOnReadTest
: public DBTestBase,
public ::testing::WithParamInterface<std::tuple<bool, bool, bool>> {
public:
explicit DBRateLimiterOnReadTest()
: DBTestBase("db_rate_limiter_on_read_test", /*env_do_fsync=*/false),
use_direct_io_(std::get<0>(GetParam())),
use_block_cache_(std::get<1>(GetParam())),
use_readahead_(std::get<2>(GetParam())) {}
void Init() {
options_ = GetOptions();
Reopen(options_);
for (int i = 0; i < kNumFiles; ++i) {
for (int j = 0; j < kNumKeysPerFile; ++j) {
ASSERT_OK(Put(Key(i * kNumKeysPerFile + j), "val"));
}
ASSERT_OK(Flush());
}
MoveFilesToLevel(1);
}
BlockBasedTableOptions GetTableOptions() {
BlockBasedTableOptions table_options;
table_options.no_block_cache = !use_block_cache_;
return table_options;
}
ReadOptions GetReadOptions() {
ReadOptions read_options;
read_options.rate_limiter_priority = Env::IO_USER;
read_options.readahead_size = use_readahead_ ? kReadaheadBytes : 0;
return read_options;
}
Options GetOptions() {
Options options = CurrentOptions();
options.disable_auto_compactions = true;
options.file_checksum_gen_factory.reset(new FileChecksumGenCrc32cFactory());
options.rate_limiter.reset(NewGenericRateLimiter(
1 << 20 /* rate_bytes_per_sec */, 100 * 1000 /* refill_period_us */,
10 /* fairness */, RateLimiter::Mode::kAllIo));
options.table_factory.reset(NewBlockBasedTableFactory(GetTableOptions()));
options.use_direct_reads = use_direct_io_;
return options;
}
protected:
const static int kNumKeysPerFile = 1;
const static int kNumFiles = 3;
const static int kReadaheadBytes = 32 << 10; // 32KB
Options options_;
const bool use_direct_io_;
const bool use_block_cache_;
const bool use_readahead_;
};
std::string GetTestNameSuffix(
::testing::TestParamInfo<std::tuple<bool, bool, bool>> info) {
std::ostringstream oss;
if (std::get<0>(info.param)) {
oss << "DirectIO";
} else {
oss << "BufferedIO";
}
if (std::get<1>(info.param)) {
oss << "_BlockCache";
} else {
oss << "_NoBlockCache";
}
if (std::get<2>(info.param)) {
oss << "_Readahead";
} else {
oss << "_NoReadahead";
}
return oss.str();
}
INSTANTIATE_TEST_CASE_P(DBRateLimiterOnReadTest, DBRateLimiterOnReadTest,
::testing::Combine(::testing::Bool(), ::testing::Bool(),
::testing::Bool()),
GetTestNameSuffix);
TEST_P(DBRateLimiterOnReadTest, Get) {
if (use_direct_io_ && !IsDirectIOSupported()) {
return;
}
Init();
ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
int expected = 0;
for (int i = 0; i < kNumFiles; ++i) {
{
std::string value;
ASSERT_OK(db_->Get(GetReadOptions(), Key(i * kNumKeysPerFile), &value));
++expected;
}
ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
{
std::string value;
ASSERT_OK(db_->Get(GetReadOptions(), Key(i * kNumKeysPerFile), &value));
if (!use_block_cache_) {
++expected;
}
}
ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
}
}
TEST_P(DBRateLimiterOnReadTest, NewMultiGet) {
if (use_direct_io_ && !IsDirectIOSupported()) {
return;
}
Init();
ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
const int kNumKeys = kNumFiles * kNumKeysPerFile;
int64_t expected = 0;
{
std::vector<std::string> key_bufs;
key_bufs.reserve(kNumKeys);
std::vector<Slice> keys;
keys.reserve(kNumKeys);
for (int i = 0; i < kNumKeys; ++i) {
key_bufs.emplace_back(Key(i));
keys.emplace_back(key_bufs[i]);
}
std::vector<Status> statuses(kNumKeys);
std::vector<PinnableSlice> values(kNumKeys);
const int64_t prev_total_rl_req = options_.rate_limiter->GetTotalRequests();
db_->MultiGet(GetReadOptions(), dbfull()->DefaultColumnFamily(), kNumKeys,
keys.data(), values.data(), statuses.data());
const int64_t cur_total_rl_req = options_.rate_limiter->GetTotalRequests();
for (int i = 0; i < kNumKeys; ++i) {
ASSERT_TRUE(statuses[i].ok());
}
ASSERT_GT(cur_total_rl_req, prev_total_rl_req);
ASSERT_EQ(cur_total_rl_req - prev_total_rl_req,
options_.rate_limiter->GetTotalRequests(Env::IO_USER));
}
expected += kNumKeys;
ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
}
TEST_P(DBRateLimiterOnReadTest, OldMultiGet) {
// The old `vector<Status>`-returning `MultiGet()` APIs use `Read()`, which
// supports rate limiting.
if (use_direct_io_ && !IsDirectIOSupported()) {
return;
}
Init();
ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
const int kNumKeys = kNumFiles * kNumKeysPerFile;
int expected = 0;
{
std::vector<std::string> key_bufs;
key_bufs.reserve(kNumKeys);
std::vector<Slice> keys;
keys.reserve(kNumKeys);
for (int i = 0; i < kNumKeys; ++i) {
key_bufs.emplace_back(Key(i));
keys.emplace_back(key_bufs[i]);
}
std::vector<std::string> values;
std::vector<Status> statuses =
db_->MultiGet(GetReadOptions(), keys, &values);
for (int i = 0; i < kNumKeys; ++i) {
ASSERT_OK(statuses[i]);
}
}
expected += kNumKeys;
ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
}
TEST_P(DBRateLimiterOnReadTest, Iterator) {
if (use_direct_io_ && !IsDirectIOSupported()) {
return;
}
Init();
std::unique_ptr<Iterator> iter(db_->NewIterator(GetReadOptions()));
ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
int expected = 0;
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
++expected;
ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
}
for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
// When `use_block_cache_ == true`, the reverse scan will access the blocks
// loaded to cache during the above forward scan, in which case no further
// file reads are expected.
if (!use_block_cache_) {
++expected;
}
}
// Reverse scan does not read evenly (one block per iteration) due to
// descending seqno ordering, so wait until after the loop to check total.
ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
}
TEST_P(DBRateLimiterOnReadTest, VerifyChecksum) {
if (use_direct_io_ && !IsDirectIOSupported()) {
return;
}
Init();
ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
ASSERT_OK(db_->VerifyChecksum(GetReadOptions()));
// The files are tiny so there should have just been one read per file.
int expected = kNumFiles;
ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
}
TEST_P(DBRateLimiterOnReadTest, VerifyFileChecksums) {
if (use_direct_io_ && !IsDirectIOSupported()) {
return;
}
Init();
ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
ASSERT_OK(db_->VerifyFileChecksums(GetReadOptions()));
// The files are tiny so there should have just been one read per file.
int expected = kNumFiles;
ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
}
class DBRateLimiterOnWriteTest : public DBTestBase {
public:
explicit DBRateLimiterOnWriteTest()
: DBTestBase("db_rate_limiter_on_write_test", /*env_do_fsync=*/false) {}
void Init() {
options_ = GetOptions();
ASSERT_OK(TryReopenWithColumnFamilies({"default"}, options_));
Random rnd(301);
for (int i = 0; i < kNumFiles; i++) {
ASSERT_OK(Put(0, kStartKey, rnd.RandomString(2)));
ASSERT_OK(Put(0, kEndKey, rnd.RandomString(2)));
ASSERT_OK(Flush(0));
}
}
Options GetOptions() {
Options options = CurrentOptions();
options.disable_auto_compactions = true;
options.rate_limiter.reset(NewGenericRateLimiter(
1 << 20 /* rate_bytes_per_sec */, 100 * 1000 /* refill_period_us */,
10 /* fairness */, RateLimiter::Mode::kWritesOnly));
options.table_factory.reset(
NewBlockBasedTableFactory(BlockBasedTableOptions()));
return options;
}
protected:
inline const static int64_t kNumFiles = 3;
inline const static std::string kStartKey = "a";
inline const static std::string kEndKey = "b";
Options options_;
};
TEST_F(DBRateLimiterOnWriteTest, Flush) {
std::int64_t prev_total_request = 0;
Init();
std::int64_t actual_flush_request =
options_.rate_limiter->GetTotalRequests(Env::IO_TOTAL) -
prev_total_request;
std::int64_t exepcted_flush_request = kNumFiles;
EXPECT_EQ(actual_flush_request, exepcted_flush_request);
EXPECT_EQ(actual_flush_request,
options_.rate_limiter->GetTotalRequests(Env::IO_HIGH));
}
TEST_F(DBRateLimiterOnWriteTest, Compact) {
Init();
// Pre-comaction:
// level-0 : `kNumFiles` SST files overlapping on [kStartKey, kEndKey]
std::string files_per_level_pre_compaction = std::to_string(kNumFiles);
ASSERT_EQ(files_per_level_pre_compaction, FilesPerLevel(0 /* cf */));
std::int64_t prev_total_request =
options_.rate_limiter->GetTotalRequests(Env::IO_TOTAL);
ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_LOW));
Compact(kStartKey, kEndKey);
std::int64_t actual_compaction_request =
options_.rate_limiter->GetTotalRequests(Env::IO_TOTAL) -
prev_total_request;
// Post-comaction:
// level-0 : 0 SST file
// level-1 : 1 SST file
std::string files_per_level_post_compaction = "0,1";
ASSERT_EQ(files_per_level_post_compaction, FilesPerLevel(0 /* cf */));
std::int64_t exepcted_compaction_request = 1;
EXPECT_EQ(actual_compaction_request, exepcted_compaction_request);
EXPECT_EQ(actual_compaction_request,
options_.rate_limiter->GetTotalRequests(Env::IO_LOW));
}
class DBRateLimiterOnWriteWALTest
: public DBRateLimiterOnWriteTest,
public ::testing::WithParamInterface<std::tuple<
bool /* WriteOptions::disableWal */,
bool /* Options::manual_wal_flush */,
Env::IOPriority /* WriteOptions::rate_limiter_priority */>> {
public:
static std::string GetTestNameSuffix(
::testing::TestParamInfo<std::tuple<bool, bool, Env::IOPriority>> info) {
std::ostringstream oss;
if (std::get<0>(info.param)) {
oss << "DisableWAL";
} else {
oss << "EnableWAL";
}
if (std::get<1>(info.param)) {
oss << "_ManualWALFlush";
} else {
oss << "_AutoWALFlush";
}
if (std::get<2>(info.param) == Env::IO_USER) {
oss << "_RateLimitAutoWALFlush";
} else if (std::get<2>(info.param) == Env::IO_TOTAL) {
oss << "_NoRateLimitAutoWALFlush";
} else {
oss << "_RateLimitAutoWALFlushWithIncorrectPriority";
}
return oss.str();
}
explicit DBRateLimiterOnWriteWALTest()
: disable_wal_(std::get<0>(GetParam())),
manual_wal_flush_(std::get<1>(GetParam())),
rate_limiter_priority_(std::get<2>(GetParam())) {}
void Init() {
options_ = GetOptions();
options_.manual_wal_flush = manual_wal_flush_;
Reopen(options_);
}
WriteOptions GetWriteOptions() {
WriteOptions write_options;
write_options.disableWAL = disable_wal_;
write_options.rate_limiter_priority = rate_limiter_priority_;
return write_options;
}
protected:
bool disable_wal_;
bool manual_wal_flush_;
Env::IOPriority rate_limiter_priority_;
};
INSTANTIATE_TEST_CASE_P(
DBRateLimiterOnWriteWALTest, DBRateLimiterOnWriteWALTest,
::testing::Values(std::make_tuple(false, false, Env::IO_TOTAL),
std::make_tuple(false, false, Env::IO_USER),
std::make_tuple(false, false, Env::IO_HIGH),
std::make_tuple(false, true, Env::IO_USER),
std::make_tuple(true, false, Env::IO_USER)),
DBRateLimiterOnWriteWALTest::GetTestNameSuffix);
TEST_P(DBRateLimiterOnWriteWALTest, AutoWalFlush) {
Init();
const bool no_rate_limit_auto_wal_flush =
(rate_limiter_priority_ == Env::IO_TOTAL);
const bool valid_arg = (rate_limiter_priority_ == Env::IO_USER &&
!disable_wal_ && !manual_wal_flush_);
std::int64_t prev_total_request =
options_.rate_limiter->GetTotalRequests(Env::IO_TOTAL);
ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
Status s = Put("foo", "v1", GetWriteOptions());
if (no_rate_limit_auto_wal_flush || valid_arg) {
EXPECT_TRUE(s.ok());
} else {
EXPECT_TRUE(s.IsInvalidArgument());
EXPECT_TRUE(s.ToString().find("WriteOptions::rate_limiter_priority") !=
std::string::npos);
}
std::int64_t actual_auto_wal_flush_request =
options_.rate_limiter->GetTotalRequests(Env::IO_TOTAL) -
prev_total_request;
std::int64_t expected_auto_wal_flush_request = valid_arg ? 1 : 0;
EXPECT_EQ(actual_auto_wal_flush_request, expected_auto_wal_flush_request);
EXPECT_EQ(actual_auto_wal_flush_request,
options_.rate_limiter->GetTotalRequests(Env::IO_USER));
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,956 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "db/db_with_timestamp_test_util.h"
#include "test_util/testutil.h"
namespace ROCKSDB_NAMESPACE {
class DBReadOnlyTestWithTimestamp : public DBBasicTestWithTimestampBase {
public:
DBReadOnlyTestWithTimestamp()
: DBBasicTestWithTimestampBase("db_readonly_test_with_timestamp") {}
protected:
void CheckDBOpenedAsCompactedDBWithOneLevel0File() {
VersionSet* const versions = dbfull()->GetVersionSet();
ASSERT_NE(versions, nullptr);
ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
ASSERT_NE(cfd, nullptr);
Version* const current = cfd->current();
ASSERT_NE(current, nullptr);
const VersionStorageInfo* const storage_info = current->storage_info();
ASSERT_NE(storage_info, nullptr);
// Only 1 L0 file.
ASSERT_EQ(1, NumTableFilesAtLevel(0));
// L0 is the max level.
ASSERT_EQ(storage_info->num_non_empty_levels(), 1);
}
void CheckDBOpenedAsCompactedDBWithOnlyHighestNonEmptyLevelFiles() {
VersionSet* const versions = dbfull()->GetVersionSet();
ASSERT_NE(versions, nullptr);
ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
ASSERT_NE(cfd, nullptr);
Version* const current = cfd->current();
ASSERT_NE(current, nullptr);
const VersionStorageInfo* const storage_info = current->storage_info();
ASSERT_NE(storage_info, nullptr);
// L0 has no files.
ASSERT_EQ(0, NumTableFilesAtLevel(0));
// All other levels have no files except the highest level with files.
for (int i = 1; i < storage_info->num_non_empty_levels() - 1; ++i) {
ASSERT_FALSE(storage_info->LevelFilesBrief(i).num_files > 0);
}
// The highest level with files have some files.
int highest_non_empty_level = storage_info->num_non_empty_levels() - 1;
ASSERT_TRUE(
storage_info->LevelFilesBrief(highest_non_empty_level).num_files > 0);
}
};
TEST_F(DBReadOnlyTestWithTimestamp, IteratorAndGetReadTimestampSizeMismatch) {
const int kNumKeysPerFile = 128;
const uint64_t kMaxKey = 1024;
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;
options.memtable_factory.reset(
test::NewSpecialSkipListFactory(kNumKeysPerFile));
DestroyAndReopen(options);
const std::string write_timestamp = Timestamp(1, 0);
WriteOptions write_opts;
for (uint64_t key = 0; key <= kMaxKey; ++key) {
Status s = db_->Put(write_opts, Key1(key), write_timestamp,
"value" + std::to_string(key));
ASSERT_OK(s);
}
// Reopen the database in read only mode to test its timestamp support.
Close();
ASSERT_OK(ReadOnlyReopen(options));
ReadOptions read_opts;
std::string different_size_read_timestamp;
PutFixed32(&different_size_read_timestamp, 2);
Slice different_size_read_ts = different_size_read_timestamp;
read_opts.timestamp = &different_size_read_ts;
{
std::unique_ptr<Iterator> iter(db_->NewIterator(read_opts));
ASSERT_FALSE(iter->Valid());
ASSERT_TRUE(iter->status().IsInvalidArgument());
}
for (uint64_t key = 0; key <= kMaxKey; ++key) {
std::string value_from_get;
std::string timestamp;
ASSERT_TRUE(db_->Get(read_opts, Key1(key), &value_from_get, &timestamp)
.IsInvalidArgument());
}
Close();
}
TEST_F(DBReadOnlyTestWithTimestamp,
IteratorAndGetReadTimestampSpecifiedWithoutWriteTimestamp) {
const int kNumKeysPerFile = 128;
const uint64_t kMaxKey = 1024;
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.memtable_factory.reset(
test::NewSpecialSkipListFactory(kNumKeysPerFile));
DestroyAndReopen(options);
WriteOptions write_opts;
for (uint64_t key = 0; key <= kMaxKey; ++key) {
Status s = db_->Put(write_opts, Key1(key), "value" + std::to_string(key));
ASSERT_OK(s);
}
// Reopen the database in read only mode to test its timestamp support.
Close();
ASSERT_OK(ReadOnlyReopen(options));
ReadOptions read_opts;
const std::string read_timestamp = Timestamp(2, 0);
Slice read_ts = read_timestamp;
read_opts.timestamp = &read_ts;
{
std::unique_ptr<Iterator> iter(db_->NewIterator(read_opts));
ASSERT_FALSE(iter->Valid());
ASSERT_TRUE(iter->status().IsInvalidArgument());
}
for (uint64_t key = 0; key <= kMaxKey; ++key) {
std::string value_from_get;
std::string timestamp;
ASSERT_TRUE(db_->Get(read_opts, Key1(key), &value_from_get, &timestamp)
.IsInvalidArgument());
}
Close();
}
TEST_F(DBReadOnlyTestWithTimestamp,
IteratorAndGetWriteWithTimestampReadWithoutTimestamp) {
const int kNumKeysPerFile = 128;
const uint64_t kMaxKey = 1024;
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;
options.memtable_factory.reset(
test::NewSpecialSkipListFactory(kNumKeysPerFile));
DestroyAndReopen(options);
const std::string write_timestamp = Timestamp(1, 0);
WriteOptions write_opts;
for (uint64_t key = 0; key <= kMaxKey; ++key) {
Status s = db_->Put(write_opts, Key1(key), write_timestamp,
"value" + std::to_string(key));
ASSERT_OK(s);
}
// Reopen the database in read only mode to test its timestamp support.
Close();
ASSERT_OK(ReadOnlyReopen(options));
ReadOptions read_opts;
{
std::unique_ptr<Iterator> iter(db_->NewIterator(read_opts));
ASSERT_FALSE(iter->Valid());
ASSERT_TRUE(iter->status().IsInvalidArgument());
}
for (uint64_t key = 0; key <= kMaxKey; ++key) {
std::string value_from_get;
ASSERT_TRUE(
db_->Get(read_opts, Key1(key), &value_from_get).IsInvalidArgument());
}
Close();
}
TEST_F(DBReadOnlyTestWithTimestamp, IteratorAndGet) {
const int kNumKeysPerFile = 128;
const uint64_t kMaxKey = 1024;
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;
options.memtable_factory.reset(
test::NewSpecialSkipListFactory(kNumKeysPerFile));
DestroyAndReopen(options);
const std::vector<uint64_t> start_keys = {1, 0};
const std::vector<std::string> write_timestamps = {Timestamp(1, 0),
Timestamp(3, 0)};
const std::vector<std::string> read_timestamps = {Timestamp(2, 0),
Timestamp(4, 0)};
for (size_t i = 0; i < write_timestamps.size(); ++i) {
WriteOptions write_opts;
for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key) {
Status s = db_->Put(write_opts, Key1(key), write_timestamps[i],
"value" + std::to_string(i));
ASSERT_OK(s);
}
}
// Reopen the database in read only mode to test its timestamp support.
Close();
ASSERT_OK(ReadOnlyReopen(options));
auto get_value_and_check = [](DB* db, ReadOptions read_opts, Slice key,
Slice expected_value, std::string expected_ts) {
std::string value_from_get;
std::string timestamp;
ASSERT_OK(db->Get(read_opts, key.ToString(), &value_from_get, &timestamp));
ASSERT_EQ(expected_value, value_from_get);
ASSERT_EQ(expected_ts, timestamp);
};
for (size_t i = 0; i < read_timestamps.size(); ++i) {
ReadOptions read_opts;
Slice read_ts = read_timestamps[i];
read_opts.timestamp = &read_ts;
std::unique_ptr<Iterator> it(db_->NewIterator(read_opts));
int count = 0;
uint64_t key = 0;
// Forward iterate.
for (it->Seek(Key1(0)), key = start_keys[i]; it->Valid();
it->Next(), ++count, ++key) {
CheckIterUserEntry(it.get(), Key1(key), kTypeValue,
"value" + std::to_string(i), write_timestamps[i]);
get_value_and_check(db_, read_opts, it->key(), it->value(),
write_timestamps[i]);
}
size_t expected_count = kMaxKey - start_keys[i] + 1;
ASSERT_EQ(expected_count, count);
// Backward iterate.
count = 0;
for (it->SeekForPrev(Key1(kMaxKey)), key = kMaxKey; it->Valid();
it->Prev(), ++count, --key) {
CheckIterUserEntry(it.get(), Key1(key), kTypeValue,
"value" + std::to_string(i), write_timestamps[i]);
get_value_and_check(db_, read_opts, it->key(), it->value(),
write_timestamps[i]);
}
ASSERT_EQ(static_cast<size_t>(kMaxKey) - start_keys[i] + 1, count);
// SeekToFirst()/SeekToLast() with lower/upper bounds.
// Then iter with lower and upper bounds.
uint64_t l = 0;
uint64_t r = kMaxKey + 1;
while (l < r) {
std::string lb_str = Key1(l);
Slice lb = lb_str;
std::string ub_str = Key1(r);
Slice ub = ub_str;
read_opts.iterate_lower_bound = &lb;
read_opts.iterate_upper_bound = &ub;
it.reset(db_->NewIterator(read_opts));
for (it->SeekToFirst(), key = std::max(l, start_keys[i]), count = 0;
it->Valid(); it->Next(), ++key, ++count) {
CheckIterUserEntry(it.get(), Key1(key), kTypeValue,
"value" + std::to_string(i), write_timestamps[i]);
get_value_and_check(db_, read_opts, it->key(), it->value(),
write_timestamps[i]);
}
ASSERT_EQ(r - std::max(l, start_keys[i]), count);
for (it->SeekToLast(), key = std::min(r, kMaxKey + 1), count = 0;
it->Valid(); it->Prev(), --key, ++count) {
CheckIterUserEntry(it.get(), Key1(key - 1), kTypeValue,
"value" + std::to_string(i), write_timestamps[i]);
get_value_and_check(db_, read_opts, it->key(), it->value(),
write_timestamps[i]);
}
l += (kMaxKey / 100);
r -= (kMaxKey / 100);
}
}
Close();
}
TEST_F(DBReadOnlyTestWithTimestamp, Iterators) {
const int kNumKeysPerFile = 128;
const uint64_t kMaxKey = 1024;
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;
options.memtable_factory.reset(
test::NewSpecialSkipListFactory(kNumKeysPerFile));
DestroyAndReopen(options);
const std::string write_timestamp = Timestamp(1, 0);
const std::string read_timestamp = Timestamp(2, 0);
WriteOptions write_opts;
for (uint64_t key = 0; key <= kMaxKey; ++key) {
Status s = db_->Put(write_opts, Key1(key), write_timestamp,
"value" + std::to_string(key));
ASSERT_OK(s);
}
// Reopen the database in read only mode to test its timestamp support.
Close();
ASSERT_OK(ReadOnlyReopen(options));
ReadOptions read_opts;
Slice read_ts = read_timestamp;
read_opts.timestamp = &read_ts;
std::vector<Iterator*> iters;
ASSERT_OK(db_->NewIterators(read_opts, {db_->DefaultColumnFamily()}, &iters));
ASSERT_EQ(static_cast<uint64_t>(1), iters.size());
int count = 0;
uint64_t key = 0;
// Forward iterate.
for (iters[0]->Seek(Key1(0)), key = 0; iters[0]->Valid();
iters[0]->Next(), ++count, ++key) {
CheckIterUserEntry(iters[0], Key1(key), kTypeValue,
"value" + std::to_string(key), write_timestamp);
}
size_t expected_count = kMaxKey - 0 + 1;
ASSERT_EQ(expected_count, count);
delete iters[0];
Close();
}
TEST_F(DBReadOnlyTestWithTimestamp, IteratorsReadTimestampSizeMismatch) {
const int kNumKeysPerFile = 128;
const uint64_t kMaxKey = 1024;
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;
options.memtable_factory.reset(
test::NewSpecialSkipListFactory(kNumKeysPerFile));
DestroyAndReopen(options);
const std::string write_timestamp = Timestamp(1, 0);
WriteOptions write_opts;
for (uint64_t key = 0; key <= kMaxKey; ++key) {
Status s = db_->Put(write_opts, Key1(key), write_timestamp,
"value" + std::to_string(key));
ASSERT_OK(s);
}
// Reopen the database in read only mode to test its timestamp support.
Close();
ASSERT_OK(ReadOnlyReopen(options));
ReadOptions read_opts;
std::string different_size_read_timestamp;
PutFixed32(&different_size_read_timestamp, 2);
Slice different_size_read_ts = different_size_read_timestamp;
read_opts.timestamp = &different_size_read_ts;
{
std::vector<Iterator*> iters;
ASSERT_TRUE(
db_->NewIterators(read_opts, {db_->DefaultColumnFamily()}, &iters)
.IsInvalidArgument());
}
Close();
}
TEST_F(DBReadOnlyTestWithTimestamp,
IteratorsReadTimestampSpecifiedWithoutWriteTimestamp) {
const int kNumKeysPerFile = 128;
const uint64_t kMaxKey = 1024;
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.memtable_factory.reset(
test::NewSpecialSkipListFactory(kNumKeysPerFile));
DestroyAndReopen(options);
WriteOptions write_opts;
for (uint64_t key = 0; key <= kMaxKey; ++key) {
Status s = db_->Put(write_opts, Key1(key), "value" + std::to_string(key));
ASSERT_OK(s);
}
// Reopen the database in read only mode to test its timestamp support.
Close();
ASSERT_OK(ReadOnlyReopen(options));
ReadOptions read_opts;
const std::string read_timestamp = Timestamp(2, 0);
Slice read_ts = read_timestamp;
read_opts.timestamp = &read_ts;
{
std::vector<Iterator*> iters;
ASSERT_TRUE(
db_->NewIterators(read_opts, {db_->DefaultColumnFamily()}, &iters)
.IsInvalidArgument());
}
Close();
}
TEST_F(DBReadOnlyTestWithTimestamp,
IteratorsWriteWithTimestampReadWithoutTimestamp) {
const int kNumKeysPerFile = 128;
const uint64_t kMaxKey = 1024;
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;
options.memtable_factory.reset(
test::NewSpecialSkipListFactory(kNumKeysPerFile));
DestroyAndReopen(options);
const std::string write_timestamp = Timestamp(1, 0);
WriteOptions write_opts;
for (uint64_t key = 0; key <= kMaxKey; ++key) {
Status s = db_->Put(write_opts, Key1(key), write_timestamp,
"value" + std::to_string(key));
ASSERT_OK(s);
}
// Reopen the database in read only mode to test its timestamp support.
Close();
ASSERT_OK(ReadOnlyReopen(options));
ReadOptions read_opts;
{
std::vector<Iterator*> iters;
ASSERT_TRUE(
db_->NewIterators(read_opts, {db_->DefaultColumnFamily()}, &iters)
.IsInvalidArgument());
}
Close();
}
TEST_F(DBReadOnlyTestWithTimestamp, CompactedDBGetReadTimestampSizeMismatch) {
const int kNumKeysPerFile = 1026;
const uint64_t kMaxKey = 1024;
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.disable_auto_compactions = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;
options.memtable_factory.reset(
test::NewSpecialSkipListFactory(kNumKeysPerFile));
DestroyAndReopen(options);
std::string write_timestamp = Timestamp(1, 0);
WriteOptions write_opts;
for (uint64_t key = 0; key <= kMaxKey; ++key) {
Status s = db_->Put(write_opts, Key1(key), write_timestamp,
"value" + std::to_string(0));
ASSERT_OK(s);
}
ASSERT_OK(db_->Flush(FlushOptions()));
Close();
// Reopen the database in read only mode as a Compacted DB to test its
// timestamp support.
options.max_open_files = -1;
ASSERT_OK(ReadOnlyReopen(options));
CheckDBOpenedAsCompactedDBWithOneLevel0File();
ReadOptions read_opts;
std::string different_size_read_timestamp;
PutFixed32(&different_size_read_timestamp, 2);
Slice different_size_read_ts = different_size_read_timestamp;
read_opts.timestamp = &different_size_read_ts;
for (uint64_t key = 0; key <= kMaxKey; ++key) {
std::string value_from_get;
std::string timestamp;
ASSERT_TRUE(db_->Get(read_opts, Key1(key), &value_from_get, &timestamp)
.IsInvalidArgument());
}
Close();
}
TEST_F(DBReadOnlyTestWithTimestamp,
CompactedDBGetReadTimestampSpecifiedWithoutWriteTimestamp) {
const int kNumKeysPerFile = 1026;
const uint64_t kMaxKey = 1024;
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.disable_auto_compactions = true;
options.memtable_factory.reset(
test::NewSpecialSkipListFactory(kNumKeysPerFile));
DestroyAndReopen(options);
WriteOptions write_opts;
for (uint64_t key = 0; key <= kMaxKey; ++key) {
Status s = db_->Put(write_opts, Key1(key), "value" + std::to_string(0));
ASSERT_OK(s);
}
ASSERT_OK(db_->Flush(FlushOptions()));
Close();
// Reopen the database in read only mode as a Compacted DB to test its
// timestamp support.
options.max_open_files = -1;
ASSERT_OK(ReadOnlyReopen(options));
CheckDBOpenedAsCompactedDBWithOneLevel0File();
ReadOptions read_opts;
const std::string read_timestamp = Timestamp(2, 0);
Slice read_ts = read_timestamp;
read_opts.timestamp = &read_ts;
for (uint64_t key = 0; key <= kMaxKey; ++key) {
std::string value_from_get;
std::string timestamp;
ASSERT_TRUE(db_->Get(read_opts, Key1(key), &value_from_get, &timestamp)
.IsInvalidArgument());
}
Close();
}
TEST_F(DBReadOnlyTestWithTimestamp,
CompactedDBGetWriteWithTimestampReadWithoutTimestamp) {
const int kNumKeysPerFile = 1026;
const uint64_t kMaxKey = 1024;
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.disable_auto_compactions = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;
options.memtable_factory.reset(
test::NewSpecialSkipListFactory(kNumKeysPerFile));
DestroyAndReopen(options);
std::string write_timestamp = Timestamp(1, 0);
WriteOptions write_opts;
for (uint64_t key = 0; key <= kMaxKey; ++key) {
Status s = db_->Put(write_opts, Key1(key), write_timestamp,
"value" + std::to_string(0));
ASSERT_OK(s);
}
ASSERT_OK(db_->Flush(FlushOptions()));
Close();
// Reopen the database in read only mode as a Compacted DB to test its
// timestamp support.
options.max_open_files = -1;
ASSERT_OK(ReadOnlyReopen(options));
CheckDBOpenedAsCompactedDBWithOneLevel0File();
ReadOptions read_opts;
for (uint64_t key = 0; key <= kMaxKey; ++key) {
std::string value_from_get;
ASSERT_TRUE(
db_->Get(read_opts, Key1(key), &value_from_get).IsInvalidArgument());
}
Close();
}
TEST_F(DBReadOnlyTestWithTimestamp, CompactedDBGetWithOnlyOneL0File) {
const int kNumKeysPerFile = 1026 * 2;
const uint64_t kMaxKey = 1024;
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.disable_auto_compactions = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;
options.memtable_factory.reset(
test::NewSpecialSkipListFactory(kNumKeysPerFile));
DestroyAndReopen(options);
const std::vector<uint64_t> start_keys = {1, 0};
const std::vector<std::string> write_timestamps = {Timestamp(1, 0),
Timestamp(3, 0)};
const std::vector<std::string> read_timestamps = {Timestamp(2, 0),
Timestamp(4, 0)};
for (size_t i = 0; i < write_timestamps.size(); ++i) {
WriteOptions write_opts;
for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key) {
Status s = db_->Put(write_opts, Key1(key), write_timestamps[i],
"value" + std::to_string(i));
ASSERT_OK(s);
}
}
ASSERT_OK(db_->Flush(FlushOptions()));
Close();
// Reopen the database in read only mode as a Compacted DB to test its
// timestamp support.
options.max_open_files = -1;
ASSERT_OK(ReadOnlyReopen(options));
CheckDBOpenedAsCompactedDBWithOneLevel0File();
for (size_t i = 0; i < read_timestamps.size(); ++i) {
ReadOptions read_opts;
Slice read_ts = read_timestamps[i];
read_opts.timestamp = &read_ts;
int count = 0;
for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key, ++count) {
std::string value_from_get;
std::string timestamp;
ASSERT_OK(db_->Get(read_opts, Key1(key), &value_from_get, &timestamp));
ASSERT_EQ("value" + std::to_string(i), value_from_get);
ASSERT_EQ(write_timestamps[i], timestamp);
}
size_t expected_count = kMaxKey - start_keys[i] + 1;
ASSERT_EQ(expected_count, count);
}
Close();
}
TEST_F(DBReadOnlyTestWithTimestamp,
CompactedDBGetWithOnlyHighestNonEmptyLevelFiles) {
const int kNumKeysPerFile = 128;
const uint64_t kMaxKey = 1024;
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.disable_auto_compactions = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;
options.memtable_factory.reset(
test::NewSpecialSkipListFactory(kNumKeysPerFile));
DestroyAndReopen(options);
const std::vector<uint64_t> start_keys = {1, 0};
const std::vector<std::string> write_timestamps = {Timestamp(1, 0),
Timestamp(3, 0)};
const std::vector<std::string> read_timestamps = {Timestamp(2, 0),
Timestamp(4, 0)};
for (size_t i = 0; i < write_timestamps.size(); ++i) {
WriteOptions write_opts;
for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key) {
Status s = db_->Put(write_opts, Key1(key), write_timestamps[i],
"value" + std::to_string(i));
ASSERT_OK(s);
}
}
ASSERT_OK(db_->Flush(FlushOptions()));
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
Close();
// Reopen the database in read only mode as a Compacted DB to test its
// timestamp support.
options.max_open_files = -1;
ASSERT_OK(ReadOnlyReopen(options));
CheckDBOpenedAsCompactedDBWithOnlyHighestNonEmptyLevelFiles();
for (size_t i = 0; i < read_timestamps.size(); ++i) {
ReadOptions read_opts;
Slice read_ts = read_timestamps[i];
read_opts.timestamp = &read_ts;
int count = 0;
for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key, ++count) {
std::string value_from_get;
std::string timestamp;
ASSERT_OK(db_->Get(read_opts, Key1(key), &value_from_get, &timestamp));
ASSERT_EQ("value" + std::to_string(i), value_from_get);
ASSERT_EQ(write_timestamps[i], timestamp);
}
size_t expected_count = kMaxKey - start_keys[i] + 1;
ASSERT_EQ(expected_count, count);
}
Close();
}
TEST_F(DBReadOnlyTestWithTimestamp,
CompactedDBMultiGetReadTimestampSizeMismatch) {
const int kNumKeysPerFile = 1026;
const uint64_t kMaxKey = 1024;
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.disable_auto_compactions = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;
options.memtable_factory.reset(
test::NewSpecialSkipListFactory(kNumKeysPerFile));
DestroyAndReopen(options);
std::string write_timestamp = Timestamp(1, 0);
WriteOptions write_opts;
for (uint64_t key = 0; key <= kMaxKey; ++key) {
Status s = db_->Put(write_opts, Key1(key), write_timestamp,
"value" + std::to_string(0));
ASSERT_OK(s);
}
ASSERT_OK(db_->Flush(FlushOptions()));
Close();
// Reopen the database in read only mode as a Compacted DB to test its
// timestamp support.
options.max_open_files = -1;
ASSERT_OK(ReadOnlyReopen(options));
CheckDBOpenedAsCompactedDBWithOneLevel0File();
ReadOptions read_opts;
std::string different_size_read_timestamp;
PutFixed32(&different_size_read_timestamp, 2);
Slice different_size_read_ts = different_size_read_timestamp;
read_opts.timestamp = &different_size_read_ts;
std::vector<std::string> key_strs;
std::vector<Slice> keys;
for (uint64_t key = 0; key <= kMaxKey; ++key) {
key_strs.push_back(Key1(key));
}
for (const auto& key_str : key_strs) {
keys.emplace_back(key_str);
}
std::vector<std::string> values;
std::vector<std::string> timestamps;
std::vector<Status> status_list =
db_->MultiGet(read_opts, keys, &values, &timestamps);
for (const auto& status : status_list) {
ASSERT_TRUE(status.IsInvalidArgument());
}
Close();
}
TEST_F(DBReadOnlyTestWithTimestamp,
CompactedDBMultiGetReadTimestampSpecifiedWithoutWriteTimestamp) {
const int kNumKeysPerFile = 1026;
const uint64_t kMaxKey = 1024;
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.disable_auto_compactions = true;
options.memtable_factory.reset(
test::NewSpecialSkipListFactory(kNumKeysPerFile));
DestroyAndReopen(options);
WriteOptions write_opts;
for (uint64_t key = 0; key <= kMaxKey; ++key) {
Status s = db_->Put(write_opts, Key1(key), "value" + std::to_string(0));
ASSERT_OK(s);
}
ASSERT_OK(db_->Flush(FlushOptions()));
Close();
// Reopen the database in read only mode as a Compacted DB to test its
// timestamp support.
options.max_open_files = -1;
ASSERT_OK(ReadOnlyReopen(options));
CheckDBOpenedAsCompactedDBWithOneLevel0File();
ReadOptions read_opts;
std::string read_timestamp = Timestamp(2, 0);
Slice read_ts = read_timestamp;
read_opts.timestamp = &read_ts;
std::vector<std::string> key_strs;
std::vector<Slice> keys;
for (uint64_t key = 0; key <= kMaxKey; ++key) {
key_strs.push_back(Key1(key));
}
for (const auto& key_str : key_strs) {
keys.emplace_back(key_str);
}
std::vector<std::string> values;
std::vector<std::string> timestamps;
std::vector<Status> status_list =
db_->MultiGet(read_opts, keys, &values, &timestamps);
for (const auto& status : status_list) {
ASSERT_TRUE(status.IsInvalidArgument());
}
Close();
}
TEST_F(DBReadOnlyTestWithTimestamp,
CompactedDBMultiGetWriteWithTimestampReadWithoutTimestamp) {
const int kNumKeysPerFile = 1026;
const uint64_t kMaxKey = 1024;
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.disable_auto_compactions = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;
options.memtable_factory.reset(
test::NewSpecialSkipListFactory(kNumKeysPerFile));
DestroyAndReopen(options);
std::string write_timestamp = Timestamp(1, 0);
WriteOptions write_opts;
for (uint64_t key = 0; key <= kMaxKey; ++key) {
Status s = db_->Put(write_opts, Key1(key), write_timestamp,
"value" + std::to_string(0));
ASSERT_OK(s);
}
ASSERT_OK(db_->Flush(FlushOptions()));
Close();
// Reopen the database in read only mode as a Compacted DB to test its
// timestamp support.
options.max_open_files = -1;
ASSERT_OK(ReadOnlyReopen(options));
CheckDBOpenedAsCompactedDBWithOneLevel0File();
ReadOptions read_opts;
std::vector<std::string> key_strs;
std::vector<Slice> keys;
for (uint64_t key = 0; key <= kMaxKey; ++key) {
key_strs.push_back(Key1(key));
}
for (const auto& key_str : key_strs) {
keys.emplace_back(key_str);
}
std::vector<std::string> values;
std::vector<Status> status_list = db_->MultiGet(read_opts, keys, &values);
for (const auto& status : status_list) {
ASSERT_TRUE(status.IsInvalidArgument());
}
Close();
}
TEST_F(DBReadOnlyTestWithTimestamp, CompactedDBMultiGetWithOnlyOneL0File) {
const int kNumKeysPerFile = 1026 * 2;
const uint64_t kMaxKey = 1024;
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.disable_auto_compactions = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;
options.memtable_factory.reset(
test::NewSpecialSkipListFactory(kNumKeysPerFile));
DestroyAndReopen(options);
const std::vector<uint64_t> start_keys = {1, 0};
const std::vector<std::string> write_timestamps = {Timestamp(1, 0),
Timestamp(3, 0)};
const std::vector<std::string> read_timestamps = {Timestamp(2, 0),
Timestamp(4, 0)};
for (size_t i = 0; i < write_timestamps.size(); ++i) {
WriteOptions write_opts;
for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key) {
Status s = db_->Put(write_opts, Key1(key), write_timestamps[i],
"value" + std::to_string(i));
ASSERT_OK(s);
}
}
ASSERT_OK(db_->Flush(FlushOptions()));
Close();
// Reopen the database in read only mode as a Compacted DB to test its
// timestamp support.
options.max_open_files = -1;
ASSERT_OK(ReadOnlyReopen(options));
CheckDBOpenedAsCompactedDBWithOneLevel0File();
for (size_t i = 0; i < write_timestamps.size(); ++i) {
ReadOptions read_opts;
Slice read_ts = read_timestamps[i];
read_opts.timestamp = &read_ts;
std::vector<std::string> key_strs;
std::vector<Slice> keys;
for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key) {
key_strs.push_back(Key1(key));
}
for (const auto& key_str : key_strs) {
keys.emplace_back(key_str);
}
size_t batch_size = kMaxKey - start_keys[i] + 1;
std::vector<std::string> values;
std::vector<std::string> timestamps;
std::vector<Status> status_list =
db_->MultiGet(read_opts, keys, &values, &timestamps);
ASSERT_EQ(batch_size, values.size());
ASSERT_EQ(batch_size, timestamps.size());
for (uint64_t idx = 0; idx < values.size(); ++idx) {
ASSERT_EQ("value" + std::to_string(i), values[idx]);
ASSERT_EQ(write_timestamps[i], timestamps[idx]);
ASSERT_OK(status_list[idx]);
}
}
Close();
}
TEST_F(DBReadOnlyTestWithTimestamp,
CompactedDBMultiGetWithOnlyHighestNonEmptyLevelFiles) {
const int kNumKeysPerFile = 128;
const uint64_t kMaxKey = 1024;
Options options = CurrentOptions();
options.env = env_;
options.create_if_missing = true;
options.disable_auto_compactions = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;
options.memtable_factory.reset(
test::NewSpecialSkipListFactory(kNumKeysPerFile));
DestroyAndReopen(options);
const std::vector<uint64_t> start_keys = {1, 0};
const std::vector<std::string> write_timestamps = {Timestamp(1, 0),
Timestamp(3, 0)};
const std::vector<std::string> read_timestamps = {Timestamp(2, 0),
Timestamp(4, 0)};
for (size_t i = 0; i < write_timestamps.size(); ++i) {
WriteOptions write_opts;
for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key) {
Status s = db_->Put(write_opts, Key1(key), write_timestamps[i],
"value" + std::to_string(i));
ASSERT_OK(s);
}
}
ASSERT_OK(db_->Flush(FlushOptions()));
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
Close();
// Reopen the database in read only mode as a Compacted DB to test its
// timestamp support.
options.max_open_files = -1;
ASSERT_OK(ReadOnlyReopen(options));
CheckDBOpenedAsCompactedDBWithOnlyHighestNonEmptyLevelFiles();
for (size_t i = 0; i < write_timestamps.size(); ++i) {
ReadOptions read_opts;
Slice read_ts = read_timestamps[i];
read_opts.timestamp = &read_ts;
std::vector<std::string> key_strs;
std::vector<Slice> keys;
for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key) {
key_strs.push_back(Key1(key));
}
for (const auto& key_str : key_strs) {
keys.emplace_back(key_str);
}
size_t batch_size = kMaxKey - start_keys[i] + 1;
std::vector<std::string> values;
std::vector<std::string> timestamps;
std::vector<Status> status_list =
db_->MultiGet(read_opts, keys, &values, &timestamps);
ASSERT_EQ(batch_size, values.size());
ASSERT_EQ(batch_size, timestamps.size());
for (uint64_t idx = 0; idx < values.size(); ++idx) {
ASSERT_EQ("value" + std::to_string(i), values[idx]);
ASSERT_EQ(write_timestamps[i], timestamps[idx]);
ASSERT_OK(status_list[idx]);
}
}
Close();
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
RegisterCustomObjects(argc, argv);
return RUN_ALL_TESTS();
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,213 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include <string>
#include "db/db_test_util.h"
#include "monitoring/thread_status_util.h"
#include "port/stack_trace.h"
#include "rocksdb/statistics.h"
#include "util/random.h"
namespace ROCKSDB_NAMESPACE {
class DBStatisticsTest : public DBTestBase {
public:
DBStatisticsTest()
: DBTestBase("db_statistics_test", /*env_do_fsync=*/true) {}
};
TEST_F(DBStatisticsTest, CompressionStatsTest) {
CompressionType type;
if (Snappy_Supported()) {
type = kSnappyCompression;
fprintf(stderr, "using snappy\n");
} else if (Zlib_Supported()) {
type = kZlibCompression;
fprintf(stderr, "using zlib\n");
} else if (BZip2_Supported()) {
type = kBZip2Compression;
fprintf(stderr, "using bzip2\n");
} else if (LZ4_Supported()) {
type = kLZ4Compression;
fprintf(stderr, "using lz4\n");
} else if (XPRESS_Supported()) {
type = kXpressCompression;
fprintf(stderr, "using xpress\n");
} else if (ZSTD_Supported()) {
type = kZSTD;
fprintf(stderr, "using ZSTD\n");
} else {
fprintf(stderr, "skipping test, compression disabled\n");
return;
}
Options options = CurrentOptions();
options.compression = type;
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
options.statistics->set_stats_level(StatsLevel::kExceptTimeForMutex);
DestroyAndReopen(options);
int kNumKeysWritten = 100000;
// Check that compressions occur and are counted when compression is turned on
Random rnd(301);
for (int i = 0; i < kNumKeysWritten; ++i) {
// compressible string
ASSERT_OK(Put(Key(i), rnd.RandomString(128) + std::string(128, 'a')));
}
ASSERT_OK(Flush());
ASSERT_GT(options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED), 0);
for (int i = 0; i < kNumKeysWritten; ++i) {
auto r = Get(Key(i));
}
ASSERT_GT(options.statistics->getTickerCount(NUMBER_BLOCK_DECOMPRESSED), 0);
options.compression = kNoCompression;
DestroyAndReopen(options);
uint64_t currentCompressions =
options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED);
uint64_t currentDecompressions =
options.statistics->getTickerCount(NUMBER_BLOCK_DECOMPRESSED);
// Check that compressions do not occur when turned off
for (int i = 0; i < kNumKeysWritten; ++i) {
// compressible string
ASSERT_OK(Put(Key(i), rnd.RandomString(128) + std::string(128, 'a')));
}
ASSERT_OK(Flush());
ASSERT_EQ(options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED) -
currentCompressions,
0);
for (int i = 0; i < kNumKeysWritten; ++i) {
auto r = Get(Key(i));
}
ASSERT_EQ(options.statistics->getTickerCount(NUMBER_BLOCK_DECOMPRESSED) -
currentDecompressions,
0);
}
TEST_F(DBStatisticsTest, MutexWaitStatsDisabledByDefault) {
Options options = CurrentOptions();
options.create_if_missing = true;
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
CreateAndReopenWithCF({"pikachu"}, options);
const uint64_t kMutexWaitDelay = 100;
ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT,
kMutexWaitDelay);
ASSERT_OK(Put("hello", "rocksdb"));
ASSERT_EQ(TestGetTickerCount(options, DB_MUTEX_WAIT_MICROS), 0);
ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 0);
}
TEST_F(DBStatisticsTest, MutexWaitStats) {
Options options = CurrentOptions();
options.create_if_missing = true;
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
options.statistics->set_stats_level(StatsLevel::kAll);
CreateAndReopenWithCF({"pikachu"}, options);
const uint64_t kMutexWaitDelay = 100;
ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT,
kMutexWaitDelay);
ASSERT_OK(Put("hello", "rocksdb"));
ASSERT_GE(TestGetTickerCount(options, DB_MUTEX_WAIT_MICROS), kMutexWaitDelay);
ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 0);
}
TEST_F(DBStatisticsTest, ResetStats) {
Options options = CurrentOptions();
options.create_if_missing = true;
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
DestroyAndReopen(options);
for (int i = 0; i < 2; ++i) {
// pick arbitrary ticker and histogram. On first iteration they're zero
// because db is unused. On second iteration they're zero due to Reset().
ASSERT_EQ(0, TestGetTickerCount(options, NUMBER_KEYS_WRITTEN));
HistogramData histogram_data;
options.statistics->histogramData(DB_WRITE, &histogram_data);
ASSERT_EQ(0.0, histogram_data.max);
if (i == 0) {
// The Put() makes some of the ticker/histogram stats nonzero until we
// Reset().
ASSERT_OK(Put("hello", "rocksdb"));
ASSERT_EQ(1, TestGetTickerCount(options, NUMBER_KEYS_WRITTEN));
options.statistics->histogramData(DB_WRITE, &histogram_data);
ASSERT_GT(histogram_data.max, 0.0);
ASSERT_OK(options.statistics->Reset());
}
}
}
TEST_F(DBStatisticsTest, ExcludeTickers) {
Options options = CurrentOptions();
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
DestroyAndReopen(options);
options.statistics->set_stats_level(StatsLevel::kExceptTickers);
ASSERT_OK(Put("foo", "value"));
ASSERT_EQ(0, options.statistics->getTickerCount(BYTES_WRITTEN));
options.statistics->set_stats_level(StatsLevel::kExceptHistogramOrTimers);
Reopen(options);
ASSERT_EQ("value", Get("foo"));
ASSERT_GT(options.statistics->getTickerCount(BYTES_READ), 0);
}
TEST_F(DBStatisticsTest, VerifyChecksumReadStat) {
Options options = CurrentOptions();
options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
Reopen(options);
// Expected to be populated regardless of `PerfLevel` in user thread
SetPerfLevel(kDisable);
{
// Scenario 0: only WAL data. Not verified so require ticker to be zero.
ASSERT_OK(Put("foo", "value"));
ASSERT_OK(db_->VerifyFileChecksums(ReadOptions()));
ASSERT_OK(db_->VerifyChecksum());
ASSERT_EQ(0,
options.statistics->getTickerCount(VERIFY_CHECKSUM_READ_BYTES));
}
// Create one SST.
ASSERT_OK(Flush());
std::unordered_map<std::string, uint64_t> table_files;
uint64_t table_files_size = 0;
GetAllDataFiles(kTableFile, &table_files, &table_files_size);
{
// Scenario 1: Table verified in `VerifyFileChecksums()`. This should read
// the whole file so we require the ticker stat exactly matches the file
// size.
ASSERT_OK(options.statistics->Reset());
ASSERT_OK(db_->VerifyFileChecksums(ReadOptions()));
ASSERT_EQ(table_files_size,
options.statistics->getTickerCount(VERIFY_CHECKSUM_READ_BYTES));
}
{
// Scenario 2: Table verified in `VerifyChecksum()`. This opens a
// `TableReader` to verify each block. It can involve duplicate reads of the
// same data so we set a lower-bound only.
ASSERT_OK(options.statistics->Reset());
ASSERT_OK(db_->VerifyChecksum());
ASSERT_GE(options.statistics->getTickerCount(VERIFY_CHECKSUM_READ_BYTES),
table_files_size);
}
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,623 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <memory>
#include <unordered_set>
#include <vector>
#include "db/db_test_util.h"
#include "port/port.h"
#include "port/stack_trace.h"
#include "rocksdb/db.h"
#include "rocksdb/types.h"
#include "rocksdb/utilities/table_properties_collectors.h"
#include "table/format.h"
#include "table/meta_blocks.h"
#include "table/table_properties_internal.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/random.h"
namespace ROCKSDB_NAMESPACE {
// A helper function that ensures the table properties returned in
// `GetPropertiesOfAllTablesTest` is correct.
// This test assumes entries size is different for each of the tables.
namespace {
void VerifyTableProperties(DB* db, uint64_t expected_entries_size) {
TablePropertiesCollection props;
ASSERT_OK(db->GetPropertiesOfAllTables(&props));
ASSERT_EQ(4U, props.size());
std::unordered_set<uint64_t> unique_entries;
// Indirect test
uint64_t sum = 0;
for (const auto& item : props) {
unique_entries.insert(item.second->num_entries);
sum += item.second->num_entries;
}
ASSERT_EQ(props.size(), unique_entries.size());
ASSERT_EQ(expected_entries_size, sum);
VerifySstUniqueIds(props);
}
} // anonymous namespace
class DBTablePropertiesTest : public DBTestBase,
public testing::WithParamInterface<std::string> {
public:
DBTablePropertiesTest()
: DBTestBase("db_table_properties_test", /*env_do_fsync=*/false) {}
TablePropertiesCollection TestGetPropertiesOfTablesInRange(
std::vector<Range> ranges, std::size_t* num_properties = nullptr,
std::size_t* num_files = nullptr);
};
TEST_F(DBTablePropertiesTest, GetPropertiesOfAllTablesTest) {
Options options = CurrentOptions();
options.level0_file_num_compaction_trigger = 8;
// Part of strategy to prevent pinning table files
options.max_open_files = 42;
Reopen(options);
// Create 4 tables
for (int table = 0; table < 4; ++table) {
// Use old meta name for table properties for one file
if (table == 3) {
SyncPoint::GetInstance()->SetCallBack(
"BlockBasedTableBuilder::WritePropertiesBlock:Meta", [&](void* meta) {
*reinterpret_cast<const std::string**>(meta) =
&kPropertiesBlockOldName;
});
SyncPoint::GetInstance()->EnableProcessing();
}
// Build file
for (int i = 0; i < 10 + table; ++i) {
ASSERT_OK(
db_->Put(WriteOptions(), std::to_string(table * 100 + i), "val"));
}
ASSERT_OK(db_->Flush(FlushOptions()));
}
SyncPoint::GetInstance()->DisableProcessing();
std::string original_session_id;
ASSERT_OK(db_->GetDbSessionId(original_session_id));
// Part of strategy to prevent pinning table files
SyncPoint::GetInstance()->SetCallBack(
"VersionEditHandler::LoadTables:skip_load_table_files",
[&](void* skip_load) { *reinterpret_cast<bool*>(skip_load) = true; });
SyncPoint::GetInstance()->EnableProcessing();
// 1. Read table properties directly from file
Reopen(options);
// Clear out auto-opened files
dbfull()->TEST_table_cache()->EraseUnRefEntries();
ASSERT_EQ(dbfull()->TEST_table_cache()->GetUsage(), 0U);
VerifyTableProperties(db_, 10 + 11 + 12 + 13);
// 2. Put two tables to table cache and
Reopen(options);
// Clear out auto-opened files
dbfull()->TEST_table_cache()->EraseUnRefEntries();
ASSERT_EQ(dbfull()->TEST_table_cache()->GetUsage(), 0U);
// fetch key from 1st and 2nd table, which will internally place that table to
// the table cache.
for (int i = 0; i < 2; ++i) {
Get(std::to_string(i * 100 + 0));
}
VerifyTableProperties(db_, 10 + 11 + 12 + 13);
// 3. Put all tables to table cache
Reopen(options);
// fetch key from all tables, which will place them in table cache.
for (int i = 0; i < 4; ++i) {
Get(std::to_string(i * 100 + 0));
}
VerifyTableProperties(db_, 10 + 11 + 12 + 13);
// 4. Try to read CORRUPT properties (a) directly from file, and (b)
// through reader on Get
// It's not practical to prevent table file read on Open, so we
// corrupt after open and after purging table cache.
for (bool direct : {true, false}) {
Reopen(options);
// Clear out auto-opened files
dbfull()->TEST_table_cache()->EraseUnRefEntries();
ASSERT_EQ(dbfull()->TEST_table_cache()->GetUsage(), 0U);
TablePropertiesCollection props;
ASSERT_OK(db_->GetPropertiesOfAllTables(&props));
std::string sst_file = props.begin()->first;
// Corrupt the file's TableProperties using session id
std::string contents;
ASSERT_OK(
ReadFileToString(env_->GetFileSystem().get(), sst_file, &contents));
size_t pos = contents.find(original_session_id);
ASSERT_NE(pos, std::string::npos);
ASSERT_OK(test::CorruptFile(env_, sst_file, static_cast<int>(pos), 1,
/*verify checksum fails*/ false));
// Try to read CORRUPT properties
if (direct) {
ASSERT_TRUE(db_->GetPropertiesOfAllTables(&props).IsCorruption());
} else {
bool found_corruption = false;
for (int i = 0; i < 4; ++i) {
std::string result = Get(std::to_string(i * 100 + 0));
if (result.find_first_of("Corruption: block checksum mismatch") !=
std::string::npos) {
found_corruption = true;
}
}
ASSERT_TRUE(found_corruption);
}
// UN-corrupt file for next iteration
ASSERT_OK(test::CorruptFile(env_, sst_file, static_cast<int>(pos), 1,
/*verify checksum fails*/ false));
}
SyncPoint::GetInstance()->DisableProcessing();
}
TEST_F(DBTablePropertiesTest, InvalidIgnored) {
// RocksDB versions 2.5 - 2.7 generate some properties that Block considers
// invalid in some way. This approximates that.
// Inject properties block data that Block considers invalid
SyncPoint::GetInstance()->SetCallBack(
"BlockBasedTableBuilder::WritePropertiesBlock:BlockData",
[&](void* block_data) {
*reinterpret_cast<Slice*>(block_data) = Slice("X");
});
SyncPoint::GetInstance()->EnableProcessing();
// Corrupting the table properties corrupts the unique id.
// Ignore the unique id recorded in the manifest.
auto options = CurrentOptions();
options.verify_sst_unique_id_in_manifest = false;
Reopen(options);
// Build file
for (int i = 0; i < 10; ++i) {
ASSERT_OK(db_->Put(WriteOptions(), std::to_string(i), "val"));
}
ASSERT_OK(db_->Flush(FlushOptions()));
SyncPoint::GetInstance()->DisableProcessing();
// Not crashing is good enough
TablePropertiesCollection props;
ASSERT_OK(db_->GetPropertiesOfAllTables(&props));
}
TEST_F(DBTablePropertiesTest, CreateOnDeletionCollectorFactory) {
ConfigOptions options;
options.ignore_unsupported_options = false;
std::shared_ptr<TablePropertiesCollectorFactory> factory;
std::string id = CompactOnDeletionCollectorFactory::kClassName();
ASSERT_OK(
TablePropertiesCollectorFactory::CreateFromString(options, id, &factory));
auto del_factory = factory->CheckedCast<CompactOnDeletionCollectorFactory>();
ASSERT_NE(del_factory, nullptr);
ASSERT_EQ(0U, del_factory->GetWindowSize());
ASSERT_EQ(0U, del_factory->GetDeletionTrigger());
ASSERT_EQ(0.0, del_factory->GetDeletionRatio());
ASSERT_OK(TablePropertiesCollectorFactory::CreateFromString(
options, "window_size=100; deletion_trigger=90; id=" + id, &factory));
del_factory = factory->CheckedCast<CompactOnDeletionCollectorFactory>();
ASSERT_NE(del_factory, nullptr);
ASSERT_EQ(100U, del_factory->GetWindowSize());
ASSERT_EQ(90U, del_factory->GetDeletionTrigger());
ASSERT_EQ(0.0, del_factory->GetDeletionRatio());
ASSERT_OK(TablePropertiesCollectorFactory::CreateFromString(
options,
"window_size=100; deletion_trigger=90; deletion_ratio=0.5; id=" + id,
&factory));
del_factory = factory->CheckedCast<CompactOnDeletionCollectorFactory>();
ASSERT_NE(del_factory, nullptr);
ASSERT_EQ(100U, del_factory->GetWindowSize());
ASSERT_EQ(90U, del_factory->GetDeletionTrigger());
ASSERT_EQ(0.5, del_factory->GetDeletionRatio());
}
TablePropertiesCollection
DBTablePropertiesTest::TestGetPropertiesOfTablesInRange(
std::vector<Range> ranges, std::size_t* num_properties,
std::size_t* num_files) {
// Since we deref zero element in the vector it can not be empty
// otherwise we pass an address to some random memory
EXPECT_GT(ranges.size(), 0U);
// run the query
TablePropertiesCollection props;
EXPECT_OK(db_->GetPropertiesOfTablesInRange(
db_->DefaultColumnFamily(), &ranges[0], ranges.size(), &props));
// Make sure that we've received properties for those and for those files
// only which fall within requested ranges
std::vector<LiveFileMetaData> vmd;
db_->GetLiveFilesMetaData(&vmd);
for (auto& md : vmd) {
std::string fn = md.db_path + md.name;
bool in_range = false;
for (auto& r : ranges) {
// smallestkey < limit && largestkey >= start
if (r.limit.compare(md.smallestkey) >= 0 &&
r.start.compare(md.largestkey) <= 0) {
in_range = true;
EXPECT_GT(props.count(fn), 0);
}
}
if (!in_range) {
EXPECT_EQ(props.count(fn), 0);
}
}
if (num_properties) {
*num_properties = props.size();
}
if (num_files) {
*num_files = vmd.size();
}
return props;
}
TEST_F(DBTablePropertiesTest, GetPropertiesOfTablesInRange) {
// Fixed random sead
Random rnd(301);
Options options;
options.create_if_missing = true;
options.write_buffer_size = 4096;
options.max_write_buffer_number = 2;
options.level0_file_num_compaction_trigger = 2;
options.level0_slowdown_writes_trigger = 2;
options.level0_stop_writes_trigger = 2;
options.target_file_size_base = 2048;
options.max_bytes_for_level_base = 40960;
options.max_bytes_for_level_multiplier = 4;
options.hard_pending_compaction_bytes_limit = 16 * 1024;
options.num_levels = 8;
options.env = env_;
DestroyAndReopen(options);
// build a decent LSM
for (int i = 0; i < 10000; i++) {
ASSERT_OK(Put(test::RandomKey(&rnd, 5), rnd.RandomString(102)));
}
ASSERT_OK(Flush());
ASSERT_OK(dbfull()->TEST_WaitForCompact());
if (NumTableFilesAtLevel(0) == 0) {
ASSERT_OK(Put(test::RandomKey(&rnd, 5), rnd.RandomString(102)));
ASSERT_OK(Flush());
}
ASSERT_OK(db_->PauseBackgroundWork());
// Ensure that we have at least L0, L1 and L2
ASSERT_GT(NumTableFilesAtLevel(0), 0);
ASSERT_GT(NumTableFilesAtLevel(1), 0);
ASSERT_GT(NumTableFilesAtLevel(2), 0);
// Query the largest range
std::size_t num_properties, num_files;
TestGetPropertiesOfTablesInRange(
{Range(test::RandomKey(&rnd, 5, test::RandomKeyType::SMALLEST),
test::RandomKey(&rnd, 5, test::RandomKeyType::LARGEST))},
&num_properties, &num_files);
ASSERT_EQ(num_properties, num_files);
// Query the empty range
TestGetPropertiesOfTablesInRange(
{Range(test::RandomKey(&rnd, 5, test::RandomKeyType::LARGEST),
test::RandomKey(&rnd, 5, test::RandomKeyType::SMALLEST))},
&num_properties, &num_files);
ASSERT_GT(num_files, 0);
ASSERT_EQ(num_properties, 0);
// Query the middle rangee
TestGetPropertiesOfTablesInRange(
{Range(test::RandomKey(&rnd, 5, test::RandomKeyType::MIDDLE),
test::RandomKey(&rnd, 5, test::RandomKeyType::LARGEST))},
&num_properties, &num_files);
ASSERT_GT(num_files, 0);
ASSERT_GT(num_files, num_properties);
ASSERT_GT(num_properties, 0);
// Query a bunch of random ranges
for (int j = 0; j < 100; j++) {
// create a bunch of ranges
std::vector<std::string> random_keys;
// Random returns numbers with zero included
// when we pass empty ranges TestGetPropertiesOfTablesInRange()
// derefs random memory in the empty ranges[0]
// so want to be greater than zero and even since
// the below loop requires that random_keys.size() to be even.
auto n = 2 * (rnd.Uniform(50) + 1);
for (uint32_t i = 0; i < n; ++i) {
random_keys.push_back(test::RandomKey(&rnd, 5));
}
ASSERT_GT(random_keys.size(), 0U);
ASSERT_EQ((random_keys.size() % 2), 0U);
std::vector<Range> ranges;
auto it = random_keys.begin();
while (it != random_keys.end()) {
ranges.push_back(Range(*it, *(it + 1)));
it += 2;
}
TestGetPropertiesOfTablesInRange(std::move(ranges));
}
}
TEST_F(DBTablePropertiesTest, GetColumnFamilyNameProperty) {
std::string kExtraCfName = "pikachu";
CreateAndReopenWithCF({kExtraCfName}, CurrentOptions());
// Create one table per CF, then verify it was created with the column family
// name property.
for (uint32_t cf = 0; cf < 2; ++cf) {
ASSERT_OK(Put(cf, "key", "val"));
ASSERT_OK(Flush(cf));
TablePropertiesCollection fname_to_props;
ASSERT_OK(db_->GetPropertiesOfAllTables(handles_[cf], &fname_to_props));
ASSERT_EQ(1U, fname_to_props.size());
std::string expected_cf_name;
if (cf > 0) {
expected_cf_name = kExtraCfName;
} else {
expected_cf_name = kDefaultColumnFamilyName;
}
ASSERT_EQ(expected_cf_name,
fname_to_props.begin()->second->column_family_name);
ASSERT_EQ(cf, static_cast<uint32_t>(
fname_to_props.begin()->second->column_family_id));
}
}
TEST_F(DBTablePropertiesTest, GetDbIdentifiersProperty) {
CreateAndReopenWithCF({"goku"}, CurrentOptions());
for (uint32_t cf = 0; cf < 2; ++cf) {
ASSERT_OK(Put(cf, "key", "val"));
ASSERT_OK(Put(cf, "foo", "bar"));
ASSERT_OK(Flush(cf));
TablePropertiesCollection fname_to_props;
ASSERT_OK(db_->GetPropertiesOfAllTables(handles_[cf], &fname_to_props));
ASSERT_EQ(1U, fname_to_props.size());
std::string id, sid;
ASSERT_OK(db_->GetDbIdentity(id));
ASSERT_OK(db_->GetDbSessionId(sid));
ASSERT_EQ(id, fname_to_props.begin()->second->db_id);
ASSERT_EQ(sid, fname_to_props.begin()->second->db_session_id);
}
}
class DBTableHostnamePropertyTest
: public DBTestBase,
public ::testing::WithParamInterface<std::tuple<int, std::string>> {
public:
DBTableHostnamePropertyTest()
: DBTestBase("db_table_hostname_property_test",
/*env_do_fsync=*/false) {}
};
TEST_P(DBTableHostnamePropertyTest, DbHostLocationProperty) {
option_config_ = std::get<0>(GetParam());
Options opts = CurrentOptions();
std::string expected_host_id = std::get<1>(GetParam());
;
if (expected_host_id == kHostnameForDbHostId) {
ASSERT_OK(env_->GetHostNameString(&expected_host_id));
} else {
opts.db_host_id = expected_host_id;
}
CreateAndReopenWithCF({"goku"}, opts);
for (uint32_t cf = 0; cf < 2; ++cf) {
ASSERT_OK(Put(cf, "key", "val"));
ASSERT_OK(Put(cf, "foo", "bar"));
ASSERT_OK(Flush(cf));
TablePropertiesCollection fname_to_props;
ASSERT_OK(db_->GetPropertiesOfAllTables(handles_[cf], &fname_to_props));
ASSERT_EQ(1U, fname_to_props.size());
ASSERT_EQ(fname_to_props.begin()->second->db_host_id, expected_host_id);
}
}
INSTANTIATE_TEST_CASE_P(
DBTableHostnamePropertyTest, DBTableHostnamePropertyTest,
::testing::Values(
// OptionConfig, override db_host_location
std::make_tuple(DBTestBase::OptionConfig::kDefault,
kHostnameForDbHostId),
std::make_tuple(DBTestBase::OptionConfig::kDefault, "foobar"),
std::make_tuple(DBTestBase::OptionConfig::kDefault, ""),
std::make_tuple(DBTestBase::OptionConfig::kPlainTableFirstBytePrefix,
kHostnameForDbHostId),
std::make_tuple(DBTestBase::OptionConfig::kPlainTableFirstBytePrefix,
"foobar"),
std::make_tuple(DBTestBase::OptionConfig::kPlainTableFirstBytePrefix,
"")));
class DeletionTriggeredCompactionTestListener : public EventListener {
public:
void OnCompactionBegin(DB*, const CompactionJobInfo& ci) override {
ASSERT_EQ(ci.compaction_reason,
CompactionReason::kFilesMarkedForCompaction);
}
void OnCompactionCompleted(DB*, const CompactionJobInfo& ci) override {
ASSERT_EQ(ci.compaction_reason,
CompactionReason::kFilesMarkedForCompaction);
}
};
TEST_P(DBTablePropertiesTest, DeletionTriggeredCompactionMarking) {
int kNumKeys = 1000;
int kWindowSize = 100;
int kNumDelsTrigger = 90;
std::shared_ptr<TablePropertiesCollectorFactory> compact_on_del =
NewCompactOnDeletionCollectorFactory(kWindowSize, kNumDelsTrigger);
Options opts = CurrentOptions();
opts.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
opts.table_properties_collector_factories.emplace_back(compact_on_del);
if (GetParam() == "kCompactionStyleUniversal") {
opts.compaction_style = kCompactionStyleUniversal;
}
Reopen(opts);
// add an L1 file to prevent tombstones from dropping due to obsolescence
// during flush
ASSERT_OK(Put(Key(0), "val"));
ASSERT_OK(Flush());
MoveFilesToLevel(1);
DeletionTriggeredCompactionTestListener* listener =
new DeletionTriggeredCompactionTestListener();
opts.listeners.emplace_back(listener);
Reopen(opts);
for (int i = 0; i < kNumKeys; ++i) {
if (i >= kNumKeys - kWindowSize &&
i < kNumKeys - kWindowSize + kNumDelsTrigger) {
ASSERT_OK(Delete(Key(i)));
} else {
ASSERT_OK(Put(Key(i), "val"));
}
}
ASSERT_OK(Flush());
ASSERT_OK(dbfull()->TEST_WaitForCompact());
ASSERT_EQ(0, NumTableFilesAtLevel(0));
// Change the window size and deletion trigger and ensure new values take
// effect
kWindowSize = 50;
kNumDelsTrigger = 40;
static_cast<CompactOnDeletionCollectorFactory*>(compact_on_del.get())
->SetWindowSize(kWindowSize);
static_cast<CompactOnDeletionCollectorFactory*>(compact_on_del.get())
->SetDeletionTrigger(kNumDelsTrigger);
for (int i = 0; i < kNumKeys; ++i) {
if (i >= kNumKeys - kWindowSize &&
i < kNumKeys - kWindowSize + kNumDelsTrigger) {
ASSERT_OK(Delete(Key(i)));
} else {
ASSERT_OK(Put(Key(i), "val"));
}
}
ASSERT_OK(Flush());
ASSERT_OK(dbfull()->TEST_WaitForCompact());
ASSERT_EQ(0, NumTableFilesAtLevel(0));
// Change the window size to disable delete triggered compaction
kWindowSize = 0;
static_cast<CompactOnDeletionCollectorFactory*>(compact_on_del.get())
->SetWindowSize(kWindowSize);
static_cast<CompactOnDeletionCollectorFactory*>(compact_on_del.get())
->SetDeletionTrigger(kNumDelsTrigger);
for (int i = 0; i < kNumKeys; ++i) {
if (i >= kNumKeys - kWindowSize &&
i < kNumKeys - kWindowSize + kNumDelsTrigger) {
ASSERT_OK(Delete(Key(i)));
} else {
ASSERT_OK(Put(Key(i), "val"));
}
}
ASSERT_OK(Flush());
ASSERT_OK(dbfull()->TEST_WaitForCompact());
ASSERT_EQ(1, NumTableFilesAtLevel(0));
ASSERT_LT(0, opts.statistics->getTickerCount(COMPACT_WRITE_BYTES_MARKED));
ASSERT_LT(0, opts.statistics->getTickerCount(COMPACT_READ_BYTES_MARKED));
}
TEST_P(DBTablePropertiesTest, RatioBasedDeletionTriggeredCompactionMarking) {
constexpr int kNumKeys = 1000;
constexpr int kWindowSize = 0;
constexpr int kNumDelsTrigger = 0;
constexpr double kDeletionRatio = 0.1;
std::shared_ptr<TablePropertiesCollectorFactory> compact_on_del =
NewCompactOnDeletionCollectorFactory(kWindowSize, kNumDelsTrigger,
kDeletionRatio);
Options opts = CurrentOptions();
opts.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
opts.table_properties_collector_factories.emplace_back(compact_on_del);
Reopen(opts);
// Add an L2 file to prevent tombstones from dropping due to obsolescence
// during flush
ASSERT_OK(Put(Key(0), "val"));
ASSERT_OK(Flush());
MoveFilesToLevel(2);
auto* listener = new DeletionTriggeredCompactionTestListener();
opts.listeners.emplace_back(listener);
Reopen(opts);
// Generate one L0 with kNumKeys Put.
for (int i = 0; i < kNumKeys; ++i) {
ASSERT_OK(Put(Key(i), "not important"));
}
ASSERT_OK(Flush());
// Generate another L0 with kNumKeys Delete.
// This file, due to deletion ratio, will trigger compaction: 2@0 files to L1.
// The resulting L1 file has only one tombstone for user key 'Key(0)'.
// Again, due to deletion ratio, a compaction will be triggered: 1@1 + 1@2
// files to L2. However, the resulting file is empty because the tombstone
// and value are both dropped.
for (int i = 0; i < kNumKeys; ++i) {
ASSERT_OK(Delete(Key(i)));
}
ASSERT_OK(Flush());
ASSERT_OK(dbfull()->TEST_WaitForCompact());
for (int i = 0; i < 3; ++i) {
ASSERT_EQ(0, NumTableFilesAtLevel(i));
}
}
INSTANTIATE_TEST_CASE_P(DBTablePropertiesTest, DBTablePropertiesTest,
::testing::Values("kCompactionStyleLevel",
"kCompactionStyleUniversal"));
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,595 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
// Introduction of SyncPoint effectively disabled building and running this test
// in Release build.
// which is a pity, it is a good test
#include "db/db_test_util.h"
#include "db/forward_iterator.h"
#include "port/stack_trace.h"
namespace ROCKSDB_NAMESPACE {
class DBTestTailingIterator : public DBTestBase,
public ::testing::WithParamInterface<bool> {
public:
DBTestTailingIterator()
: DBTestBase("db_tailing_iterator_test", /*env_do_fsync=*/true) {}
};
INSTANTIATE_TEST_CASE_P(DBTestTailingIterator, DBTestTailingIterator,
::testing::Bool());
TEST_P(DBTestTailingIterator, TailingIteratorSingle) {
ReadOptions read_options;
read_options.tailing = true;
if (GetParam()) {
read_options.async_io = true;
}
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
iter->SeekToFirst();
ASSERT_TRUE(!iter->Valid());
ASSERT_OK(iter->status());
// add a record and check that iter can see it
ASSERT_OK(db_->Put(WriteOptions(), "mirko", "fodor"));
iter->SeekToFirst();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().ToString(), "mirko");
iter->Next();
ASSERT_TRUE(!iter->Valid());
}
TEST_P(DBTestTailingIterator, TailingIteratorKeepAdding) {
CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
ReadOptions read_options;
read_options.tailing = true;
if (GetParam()) {
read_options.async_io = true;
}
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options, handles_[1]));
ASSERT_OK(iter->status());
std::string value(1024, 'a');
const int num_records = 10000;
for (int i = 0; i < num_records; ++i) {
char buf[32];
snprintf(buf, sizeof(buf), "%016d", i);
Slice key(buf, 16);
ASSERT_OK(Put(1, key, value));
iter->Seek(key);
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(key), 0);
}
}
TEST_P(DBTestTailingIterator, TailingIteratorSeekToNext) {
CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
ReadOptions read_options;
read_options.tailing = true;
if (GetParam()) {
read_options.async_io = true;
}
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options, handles_[1]));
ASSERT_OK(iter->status());
std::unique_ptr<Iterator> itern(db_->NewIterator(read_options, handles_[1]));
ASSERT_OK(itern->status());
std::string value(1024, 'a');
const int num_records = 1000;
for (int i = 1; i < num_records; ++i) {
char buf1[32];
char buf2[32];
snprintf(buf1, sizeof(buf1), "00a0%016d", i * 5);
Slice key(buf1, 20);
ASSERT_OK(Put(1, key, value));
if (i % 100 == 99) {
ASSERT_OK(Flush(1));
}
snprintf(buf2, sizeof(buf2), "00a0%016d", i * 5 - 2);
Slice target(buf2, 20);
iter->Seek(target);
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(key), 0);
if (i == 1) {
itern->SeekToFirst();
} else {
itern->Next();
}
ASSERT_TRUE(itern->Valid());
ASSERT_EQ(itern->key().compare(key), 0);
}
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
for (int i = 2 * num_records; i > 0; --i) {
char buf1[32];
char buf2[32];
snprintf(buf1, sizeof(buf1), "00a0%016d", i * 5);
Slice key(buf1, 20);
ASSERT_OK(Put(1, key, value));
if (i % 100 == 99) {
ASSERT_OK(Flush(1));
}
snprintf(buf2, sizeof(buf2), "00a0%016d", i * 5 - 2);
Slice target(buf2, 20);
iter->Seek(target);
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(key), 0);
}
}
TEST_P(DBTestTailingIterator, TailingIteratorTrimSeekToNext) {
const uint64_t k150KB = 150 * 1024;
Options options;
options.write_buffer_size = k150KB;
options.max_write_buffer_number = 3;
options.min_write_buffer_number_to_merge = 2;
options.env = env_;
CreateAndReopenWithCF({"pikachu"}, options);
ReadOptions read_options;
read_options.tailing = true;
if (GetParam()) {
read_options.async_io = true;
}
int num_iters, deleted_iters;
char bufe[32];
snprintf(bufe, sizeof(bufe), "00b0%016d", 0);
Slice keyu(bufe, 20);
read_options.iterate_upper_bound = &keyu;
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options, handles_[1]));
ASSERT_OK(iter->status());
std::unique_ptr<Iterator> itern(db_->NewIterator(read_options, handles_[1]));
ASSERT_OK(itern->status());
std::unique_ptr<Iterator> iterh(db_->NewIterator(read_options, handles_[1]));
ASSERT_OK(iterh->status());
std::string value(1024, 'a');
bool file_iters_deleted = false;
bool file_iters_renewed_null = false;
bool file_iters_renewed_copy = false;
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"ForwardIterator::SeekInternal:Return", [&](void* arg) {
ForwardIterator* fiter = reinterpret_cast<ForwardIterator*>(arg);
ASSERT_TRUE(!file_iters_deleted ||
fiter->TEST_CheckDeletedIters(&deleted_iters, &num_iters));
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"ForwardIterator::Next:Return", [&](void* arg) {
ForwardIterator* fiter = reinterpret_cast<ForwardIterator*>(arg);
ASSERT_TRUE(!file_iters_deleted ||
fiter->TEST_CheckDeletedIters(&deleted_iters, &num_iters));
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"ForwardIterator::RenewIterators:Null",
[&](void* /*arg*/) { file_iters_renewed_null = true; });
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"ForwardIterator::RenewIterators:Copy",
[&](void* /*arg*/) { file_iters_renewed_copy = true; });
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
const int num_records = 1000;
for (int i = 1; i < num_records; ++i) {
char buf1[32];
char buf2[32];
char buf3[32];
char buf4[32];
snprintf(buf1, sizeof(buf1), "00a0%016d", i * 5);
snprintf(buf3, sizeof(buf3), "00b0%016d", i * 5);
Slice key(buf1, 20);
ASSERT_OK(Put(1, key, value));
Slice keyn(buf3, 20);
ASSERT_OK(Put(1, keyn, value));
if (i % 100 == 99) {
ASSERT_OK(Flush(1));
ASSERT_OK(dbfull()->TEST_WaitForCompact());
if (i == 299) {
file_iters_deleted = true;
}
snprintf(buf4, sizeof(buf4), "00a0%016d", i * 5 / 2);
Slice target(buf4, 20);
iterh->Seek(target);
ASSERT_TRUE(iter->Valid());
for (int j = (i + 1) * 5 / 2; j < i * 5; j += 5) {
iterh->Next();
ASSERT_TRUE(iterh->Valid());
}
if (i == 299) {
file_iters_deleted = false;
}
}
file_iters_deleted = true;
snprintf(buf2, sizeof(buf2), "00a0%016d", i * 5 - 2);
Slice target(buf2, 20);
iter->Seek(target);
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(key), 0);
ASSERT_LE(num_iters, 1);
if (i == 1) {
itern->SeekToFirst();
} else {
itern->Next();
}
ASSERT_TRUE(itern->Valid());
ASSERT_EQ(itern->key().compare(key), 0);
ASSERT_LE(num_iters, 1);
file_iters_deleted = false;
}
ASSERT_TRUE(file_iters_renewed_null);
ASSERT_TRUE(file_iters_renewed_copy);
iter = nullptr;
itern = nullptr;
iterh = nullptr;
BlockBasedTableOptions table_options;
table_options.no_block_cache = true;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
ReopenWithColumnFamilies({"default", "pikachu"}, options);
read_options.read_tier = kBlockCacheTier;
std::unique_ptr<Iterator> iteri(db_->NewIterator(read_options, handles_[1]));
ASSERT_OK(iteri->status());
char buf5[32];
snprintf(buf5, sizeof(buf5), "00a0%016d", (num_records / 2) * 5 - 2);
Slice target1(buf5, 20);
iteri->Seek(target1);
ASSERT_TRUE(iteri->status().IsIncomplete());
iteri = nullptr;
read_options.read_tier = kReadAllTier;
options.table_factory.reset(NewBlockBasedTableFactory());
ReopenWithColumnFamilies({"default", "pikachu"}, options);
iter.reset(db_->NewIterator(read_options, handles_[1]));
ASSERT_OK(iter->status());
for (int i = 2 * num_records; i > 0; --i) {
char buf1[32];
char buf2[32];
snprintf(buf1, sizeof(buf1), "00a0%016d", i * 5);
Slice key(buf1, 20);
ASSERT_OK(Put(1, key, value));
if (i % 100 == 99) {
ASSERT_OK(Flush(1));
}
snprintf(buf2, sizeof(buf2), "00a0%016d", i * 5 - 2);
Slice target(buf2, 20);
iter->Seek(target);
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(key), 0);
}
}
TEST_P(DBTestTailingIterator, TailingIteratorDeletes) {
CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
ReadOptions read_options;
read_options.tailing = true;
if (GetParam()) {
read_options.async_io = true;
}
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options, handles_[1]));
ASSERT_OK(iter->status());
// write a single record, read it using the iterator, then delete it
ASSERT_OK(Put(1, "0test", "test"));
iter->SeekToFirst();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().ToString(), "0test");
ASSERT_OK(Delete(1, "0test"));
// write many more records
const int num_records = 10000;
std::string value(1024, 'A');
for (int i = 0; i < num_records; ++i) {
char buf[32];
snprintf(buf, sizeof(buf), "1%015d", i);
Slice key(buf, 16);
ASSERT_OK(Put(1, key, value));
}
// force a flush to make sure that no records are read from memtable
ASSERT_OK(Flush(1));
// skip "0test"
iter->Next();
// make sure we can read all new records using the existing iterator
int count = 0;
for (; iter->Valid(); iter->Next(), ++count)
;
ASSERT_EQ(count, num_records);
}
TEST_P(DBTestTailingIterator, TailingIteratorPrefixSeek) {
ReadOptions read_options;
read_options.tailing = true;
if (GetParam()) {
read_options.async_io = true;
}
Options options = CurrentOptions();
options.create_if_missing = true;
options.disable_auto_compactions = true;
options.prefix_extractor.reset(NewFixedPrefixTransform(2));
options.memtable_factory.reset(NewHashSkipListRepFactory(16));
options.allow_concurrent_memtable_write = false;
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options, handles_[1]));
ASSERT_OK(iter->status());
ASSERT_OK(Put(1, "0101", "test"));
ASSERT_OK(Flush(1));
ASSERT_OK(Put(1, "0202", "test"));
// Seek(0102) shouldn't find any records since 0202 has a different prefix
iter->Seek("0102");
ASSERT_TRUE(!iter->Valid());
iter->Seek("0202");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().ToString(), "0202");
iter->Next();
ASSERT_TRUE(!iter->Valid());
}
TEST_P(DBTestTailingIterator, TailingIteratorIncomplete) {
CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
ReadOptions read_options;
read_options.tailing = true;
if (GetParam()) {
read_options.async_io = true;
}
read_options.read_tier = kBlockCacheTier;
std::string key("key");
std::string value("value");
ASSERT_OK(db_->Put(WriteOptions(), key, value));
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
ASSERT_OK(iter->status());
iter->SeekToFirst();
// we either see the entry or it's not in cache
ASSERT_TRUE(iter->Valid() || iter->status().IsIncomplete());
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
iter->SeekToFirst();
// should still be true after compaction
ASSERT_TRUE(iter->Valid() || iter->status().IsIncomplete());
}
TEST_P(DBTestTailingIterator, TailingIteratorSeekToSame) {
Options options = CurrentOptions();
options.compaction_style = kCompactionStyleUniversal;
options.write_buffer_size = 1000;
CreateAndReopenWithCF({"pikachu"}, options);
ReadOptions read_options;
read_options.tailing = true;
if (GetParam()) {
read_options.async_io = true;
}
const int NROWS = 10000;
// Write rows with keys 00000, 00002, 00004 etc.
for (int i = 0; i < NROWS; ++i) {
char buf[100];
snprintf(buf, sizeof(buf), "%05d", 2 * i);
std::string key(buf);
std::string value("value");
ASSERT_OK(db_->Put(WriteOptions(), key, value));
}
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
ASSERT_OK(iter->status());
// Seek to 00001. We expect to find 00002.
std::string start_key = "00001";
iter->Seek(start_key);
ASSERT_TRUE(iter->Valid());
std::string found = iter->key().ToString();
ASSERT_EQ("00002", found);
// Now seek to the same key. The iterator should remain in the same
// position.
iter->Seek(found);
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(found, iter->key().ToString());
}
// Sets iterate_upper_bound and verifies that ForwardIterator doesn't call
// Seek() on immutable iterators when target key is >= prev_key and all
// iterators, including the memtable iterator, are over the upper bound.
TEST_P(DBTestTailingIterator, TailingIteratorUpperBound) {
CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
const Slice upper_bound("20", 3);
ReadOptions read_options;
read_options.tailing = true;
read_options.iterate_upper_bound = &upper_bound;
if (GetParam()) {
read_options.async_io = true;
}
ASSERT_OK(Put(1, "11", "11"));
ASSERT_OK(Put(1, "12", "12"));
ASSERT_OK(Put(1, "22", "22"));
ASSERT_OK(Flush(1)); // flush all those keys to an immutable SST file
// Add another key to the memtable.
ASSERT_OK(Put(1, "21", "21"));
std::unique_ptr<Iterator> it(db_->NewIterator(read_options, handles_[1]));
ASSERT_OK(it->status());
it->Seek("12");
ASSERT_TRUE(it->Valid());
ASSERT_EQ("12", it->key().ToString());
it->Next();
// Not valid since "21" is over the upper bound.
ASSERT_FALSE(it->Valid());
ASSERT_OK(it->status());
// This keeps track of the number of times NeedToSeekImmutable() was true.
int immutable_seeks = 0;
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"ForwardIterator::SeekInternal:Immutable",
[&](void* /*arg*/) { ++immutable_seeks; });
// Seek to 13. This should not require any immutable seeks.
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
it->Seek("13");
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
ASSERT_FALSE(it->Valid());
ASSERT_OK(it->status());
if (GetParam()) {
ASSERT_EQ(1, immutable_seeks);
} else {
ASSERT_EQ(0, immutable_seeks);
}
}
TEST_P(DBTestTailingIterator, TailingIteratorGap) {
// level 1: [20, 25] [35, 40]
// level 2: [10 - 15] [45 - 50]
// level 3: [20, 30, 40]
// Previously there is a bug in tailing_iterator that if there is a gap in
// lower level, the key will be skipped if it is within the range between
// the largest key of index n file and the smallest key of index n+1 file
// if both file fit in that gap. In this example, 25 < key < 35
// https://github.com/facebook/rocksdb/issues/1372
CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
ReadOptions read_options;
read_options.tailing = true;
if (GetParam()) {
read_options.async_io = true;
}
ASSERT_OK(Put(1, "20", "20"));
ASSERT_OK(Put(1, "30", "30"));
ASSERT_OK(Put(1, "40", "40"));
ASSERT_OK(Flush(1));
MoveFilesToLevel(3, 1);
ASSERT_OK(Put(1, "10", "10"));
ASSERT_OK(Put(1, "15", "15"));
ASSERT_OK(Flush(1));
ASSERT_OK(Put(1, "45", "45"));
ASSERT_OK(Put(1, "50", "50"));
ASSERT_OK(Flush(1));
MoveFilesToLevel(2, 1);
ASSERT_OK(Put(1, "20", "20"));
ASSERT_OK(Put(1, "25", "25"));
ASSERT_OK(Flush(1));
ASSERT_OK(Put(1, "35", "35"));
ASSERT_OK(Put(1, "40", "40"));
ASSERT_OK(Flush(1));
MoveFilesToLevel(1, 1);
ColumnFamilyMetaData meta;
db_->GetColumnFamilyMetaData(handles_[1], &meta);
std::unique_ptr<Iterator> it(db_->NewIterator(read_options, handles_[1]));
it->Seek("30");
ASSERT_TRUE(it->Valid());
ASSERT_EQ("30", it->key().ToString());
it->Next();
ASSERT_TRUE(it->Valid());
ASSERT_EQ("35", it->key().ToString());
it->Next();
ASSERT_TRUE(it->Valid());
ASSERT_EQ("40", it->key().ToString());
ASSERT_OK(it->status());
}
TEST_P(DBTestTailingIterator, SeekWithUpperBoundBug) {
ReadOptions read_options;
read_options.tailing = true;
if (GetParam()) {
read_options.async_io = true;
}
const Slice upper_bound("cc", 3);
read_options.iterate_upper_bound = &upper_bound;
// 1st L0 file
ASSERT_OK(db_->Put(WriteOptions(), "aa", "SEEN"));
ASSERT_OK(Flush());
// 2nd L0 file
ASSERT_OK(db_->Put(WriteOptions(), "zz", "NOT-SEEN"));
ASSERT_OK(Flush());
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
ASSERT_OK(iter->status());
iter->Seek("aa");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().ToString(), "aa");
}
TEST_P(DBTestTailingIterator, SeekToFirstWithUpperBoundBug) {
ReadOptions read_options;
read_options.tailing = true;
if (GetParam()) {
read_options.async_io = true;
}
const Slice upper_bound("cc", 3);
read_options.iterate_upper_bound = &upper_bound;
// 1st L0 file
ASSERT_OK(db_->Put(WriteOptions(), "aa", "SEEN"));
ASSERT_OK(Flush());
// 2nd L0 file
ASSERT_OK(db_->Put(WriteOptions(), "zz", "NOT-SEEN"));
ASSERT_OK(Flush());
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
ASSERT_OK(iter->status());
iter->SeekToFirst();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().ToString(), "aa");
iter->Next();
ASSERT_FALSE(iter->Valid());
iter->SeekToFirst();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().ToString(), "aa");
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,353 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "db/compaction/compaction.h"
#include "db/db_test_util.h"
#include "port/stack_trace.h"
#include "test_util/testutil.h"
namespace ROCKSDB_NAMESPACE {
namespace {
std::string Key1(uint64_t key) {
std::string ret;
PutFixed64(&ret, key);
std::reverse(ret.begin(), ret.end());
return ret;
}
std::string Timestamp(uint64_t ts) {
std::string ret;
PutFixed64(&ret, ts);
return ret;
}
} // anonymous namespace
class TimestampCompatibleCompactionTest : public DBTestBase {
public:
TimestampCompatibleCompactionTest()
: DBTestBase("ts_compatible_compaction_test", /*env_do_fsync=*/true) {}
std::string Get(const std::string& key, uint64_t ts) {
ReadOptions read_opts;
std::string ts_str = Timestamp(ts);
Slice ts_slice = ts_str;
read_opts.timestamp = &ts_slice;
std::string value;
Status s = db_->Get(read_opts, key, &value);
if (s.IsNotFound()) {
value.assign("NOT_FOUND");
} else if (!s.ok()) {
value.assign(s.ToString());
}
return value;
}
};
TEST_F(TimestampCompatibleCompactionTest, UserKeyCrossFileBoundary) {
Options options = CurrentOptions();
options.env = env_;
options.compaction_style = kCompactionStyleLevel;
options.comparator = test::BytewiseComparatorWithU64TsWrapper();
options.level0_file_num_compaction_trigger = 3;
constexpr size_t kNumKeysPerFile = 101;
options.memtable_factory.reset(
test::NewSpecialSkipListFactory(kNumKeysPerFile));
DestroyAndReopen(options);
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->SetCallBack(
"LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
const auto* compaction = reinterpret_cast<Compaction*>(arg);
ASSERT_NE(nullptr, compaction);
ASSERT_EQ(0, compaction->start_level());
ASSERT_EQ(1, compaction->num_input_levels());
// Check that all 3 L0 ssts are picked for level compaction.
ASSERT_EQ(3, compaction->num_input_files(0));
});
SyncPoint::GetInstance()->EnableProcessing();
// Write a L0 with keys 0, 1, ..., 99 with ts from 100 to 199.
uint64_t ts = 100;
uint64_t key = 0;
WriteOptions write_opts;
for (; key < kNumKeysPerFile - 1; ++key, ++ts) {
std::string ts_str = Timestamp(ts);
ASSERT_OK(
db_->Put(write_opts, Key1(key), ts_str, "foo_" + std::to_string(key)));
}
// Write another L0 with keys 99 with newer ts.
ASSERT_OK(Flush());
uint64_t saved_read_ts1 = ts++;
key = 99;
for (int i = 0; i < 4; ++i, ++ts) {
std::string ts_str = Timestamp(ts);
ASSERT_OK(
db_->Put(write_opts, Key1(key), ts_str, "bar_" + std::to_string(key)));
}
ASSERT_OK(Flush());
uint64_t saved_read_ts2 = ts++;
// Write another L0 with keys 99, 100, 101, ..., 150
for (; key <= 150; ++key, ++ts) {
std::string ts_str = Timestamp(ts);
ASSERT_OK(
db_->Put(write_opts, Key1(key), ts_str, "foo1_" + std::to_string(key)));
}
ASSERT_OK(Flush());
// Wait for compaction to finish
ASSERT_OK(dbfull()->TEST_WaitForCompact());
uint64_t read_ts = ts;
ASSERT_EQ("foo_99", Get(Key1(99), saved_read_ts1));
ASSERT_EQ("bar_99", Get(Key1(99), saved_read_ts2));
ASSERT_EQ("foo1_99", Get(Key1(99), read_ts));
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->DisableProcessing();
}
TEST_F(TimestampCompatibleCompactionTest, MultipleSubCompactions) {
Options options = CurrentOptions();
options.env = env_;
options.compaction_style = kCompactionStyleUniversal;
options.comparator = test::BytewiseComparatorWithU64TsWrapper();
options.level0_file_num_compaction_trigger = 3;
options.max_subcompactions = 3;
options.target_file_size_base = 1024;
options.statistics = CreateDBStatistics();
DestroyAndReopen(options);
uint64_t ts = 100;
uint64_t key = 0;
WriteOptions write_opts;
// Write keys 0, 1, ..., 499 with ts from 100 to 599.
{
for (; key <= 499; ++key, ++ts) {
std::string ts_str = Timestamp(ts);
ASSERT_OK(db_->Put(write_opts, Key1(key), ts_str,
"foo_" + std::to_string(key)));
}
}
// Write keys 500, ..., 999 with ts from 600 to 1099.
{
for (; key <= 999; ++key, ++ts) {
std::string ts_str = Timestamp(ts);
ASSERT_OK(db_->Put(write_opts, Key1(key), ts_str,
"foo_" + std::to_string(key)));
}
ASSERT_OK(Flush());
}
// Wait for compaction to finish
{
ASSERT_OK(dbfull()->RunManualCompaction(
static_cast_with_check<ColumnFamilyHandleImpl>(
db_->DefaultColumnFamily())
->cfd(),
0 /* input_level */, 1 /* output_level */, CompactRangeOptions(),
nullptr /* begin */, nullptr /* end */, true /* exclusive */,
true /* disallow_trivial_move */,
std::numeric_limits<uint64_t>::max() /* max_file_num_to_ignore */,
"" /*trim_ts*/));
}
// Check stats to make sure multiple subcompactions were scheduled for
// boundaries not to be nullptr.
{
HistogramData num_sub_compactions;
options.statistics->histogramData(NUM_SUBCOMPACTIONS_SCHEDULED,
&num_sub_compactions);
ASSERT_GT(num_sub_compactions.sum, 1);
}
for (key = 0; key <= 999; ++key) {
ASSERT_EQ("foo_" + std::to_string(key), Get(Key1(key), ts));
}
}
class TestFilePartitioner : public SstPartitioner {
public:
explicit TestFilePartitioner() {}
~TestFilePartitioner() override {}
const char* Name() const override { return "TestFilePartitioner"; }
PartitionerResult ShouldPartition(
const PartitionerRequest& /*request*/) override {
return PartitionerResult::kRequired;
}
bool CanDoTrivialMove(const Slice& /*smallest_user_key*/,
const Slice& /*largest_user_key*/) override {
return false;
}
};
class TestFilePartitionerFactory : public SstPartitionerFactory {
public:
explicit TestFilePartitionerFactory() {}
std::unique_ptr<SstPartitioner> CreatePartitioner(
const SstPartitioner::Context& /*context*/) const override {
std::unique_ptr<SstPartitioner> ret =
std::make_unique<TestFilePartitioner>();
return ret;
}
const char* Name() const override { return "TestFilePartitionerFactory"; }
};
TEST_F(TimestampCompatibleCompactionTest, CompactFilesRangeCheckL0) {
Options options = CurrentOptions();
options.env = env_;
options.sst_partitioner_factory =
std::make_shared<TestFilePartitionerFactory>();
options.comparator = test::BytewiseComparatorWithU64TsWrapper();
options.disable_auto_compactions = true;
DestroyAndReopen(options);
constexpr int kNumFiles = 10;
constexpr int kKeysPerFile = 2;
const std::string user_key = "foo";
constexpr uint64_t start_ts = 10000;
uint64_t cur_ts = start_ts;
for (int k = 0; k < kNumFiles; ++k) {
for (int i = 0; i < kKeysPerFile; ++i) {
ASSERT_OK(db_->Put(WriteOptions(), user_key, Timestamp(cur_ts),
"v" + std::to_string(i)));
++cur_ts;
}
ASSERT_OK(db_->Flush(FlushOptions()));
}
std::vector<std::string> input_files{};
{
std::vector<std::string> files;
ASSERT_OK(env_->GetChildren(dbname_, &files));
for (const auto& f : files) {
uint64_t file_num = 0;
FileType file_type = FileType::kWalFile;
if (!ParseFileName(f, &file_num, &file_type) ||
file_type != FileType::kTableFile) {
continue;
}
input_files.emplace_back(f);
}
// sorting here by name, which also happens to sort by generation date.
std::sort(input_files.begin(), input_files.end());
assert(kNumFiles == input_files.size());
std::vector<std::string> tmp;
tmp.emplace_back(input_files[input_files.size() / 2]);
input_files.swap(tmp);
}
{
std::vector<std::string> output_file_names;
CompactionJobInfo compaction_job_info;
ASSERT_OK(db_->CompactFiles(CompactionOptions(), input_files,
/*output_level=*/1, /*output_path_id=*/-1,
&output_file_names, &compaction_job_info));
// We expect the L0 files older than the original provided input were all
// included in the compaction.
ASSERT_EQ(static_cast<size_t>(kNumFiles / 2 + 1),
compaction_job_info.input_files.size());
}
}
TEST_F(TimestampCompatibleCompactionTest, CompactFilesRangeCheckL1) {
Options options = CurrentOptions();
options.env = env_;
options.sst_partitioner_factory =
std::make_shared<TestFilePartitionerFactory>();
options.comparator = test::BytewiseComparatorWithU64TsWrapper();
constexpr int kNumFiles = 4;
options.level0_file_num_compaction_trigger = kNumFiles;
DestroyAndReopen(options);
constexpr int kKeysPerFile = 2;
const std::string user_key = "foo";
constexpr uint64_t start_ts = 10000;
uint64_t cur_ts = start_ts;
// Generate some initial files in both L0 and L1.
for (int k = 0; k < kNumFiles; ++k) {
for (int i = 0; i < kKeysPerFile; ++i) {
ASSERT_OK(db_->Put(WriteOptions(), user_key, Timestamp(cur_ts),
"v" + std::to_string(i)));
++cur_ts;
}
ASSERT_OK(db_->Flush(FlushOptions()));
}
ASSERT_OK(dbfull()->TEST_WaitForCompact());
ASSERT_EQ(0, NumTableFilesAtLevel(/*level=*/0, /*cf=*/0));
ASSERT_EQ(kNumFiles * kKeysPerFile,
NumTableFilesAtLevel(/*level=*/1, /*cf=*/0));
constexpr int additional_l0s = 2;
for (int i = 0; i < additional_l0s; ++i, ++cur_ts) {
ASSERT_OK(db_->Put(WriteOptions(), user_key, Timestamp(cur_ts), "v"));
ASSERT_OK(db_->Flush(FlushOptions()));
}
ASSERT_EQ(additional_l0s, NumTableFilesAtLevel(/*level=*/0, /*cf=*/0));
std::vector<std::string> inputs;
{
std::vector<LiveFileMetaData> fmetas;
db_->GetLiveFilesMetaData(&fmetas);
bool included_one_l1 = false;
for (const auto& meta : fmetas) {
if (meta.level == 0) {
inputs.emplace_back(meta.relative_filename);
} else if (!included_one_l1) {
inputs.emplace_back(meta.relative_filename);
included_one_l1 = true;
}
}
}
ASSERT_EQ(static_cast<size_t>(3), inputs.size());
{
std::vector<std::string> output_file_names;
CompactionJobInfo compaction_job_info;
ASSERT_OK(db_->CompactFiles(CompactionOptions(), inputs, /*output_level=*/1,
/*output_path_id=*/-1, &output_file_names,
&compaction_job_info));
ASSERT_EQ(kNumFiles * kKeysPerFile + 2, output_file_names.size());
ASSERT_EQ(kNumFiles * kKeysPerFile + 2,
static_cast<int>(compaction_job_info.input_files.size()));
}
}
TEST_F(TimestampCompatibleCompactionTest, EmptyCompactionOutput) {
Options options = CurrentOptions();
options.env = env_;
options.comparator = test::BytewiseComparatorWithU64TsWrapper();
DestroyAndReopen(options);
std::string ts_str = Timestamp(1);
WriteOptions wopts;
ASSERT_OK(
db_->DeleteRange(wopts, db_->DefaultColumnFamily(), "k1", "k3", ts_str));
ASSERT_OK(Flush());
ts_str = Timestamp(3);
Slice ts = ts_str;
CompactRangeOptions cro;
// range tombstone will be dropped during compaction
cro.full_history_ts_low = &ts;
cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,860 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "db/db_test_util.h"
#include "db/write_thread.h"
#include "port/stack_trace.h"
namespace ROCKSDB_NAMESPACE {
class DBWriteBufferManagerTest : public DBTestBase,
public testing::WithParamInterface<bool> {
public:
DBWriteBufferManagerTest()
: DBTestBase("db_write_buffer_manager_test", /*env_do_fsync=*/false) {}
bool cost_cache_;
};
TEST_P(DBWriteBufferManagerTest, SharedBufferAcrossCFs1) {
Options options = CurrentOptions();
options.arena_block_size = 4096;
options.write_buffer_size = 500000; // this is never hit
std::shared_ptr<Cache> cache = NewLRUCache(4 * 1024 * 1024, 2);
ASSERT_LT(cache->GetUsage(), 256 * 1024);
cost_cache_ = GetParam();
if (cost_cache_) {
options.write_buffer_manager.reset(
new WriteBufferManager(100000, cache, true));
} else {
options.write_buffer_manager.reset(
new WriteBufferManager(100000, nullptr, true));
}
WriteOptions wo;
wo.disableWAL = true;
CreateAndReopenWithCF({"cf1", "cf2", "cf3"}, options);
ASSERT_OK(Put(3, Key(1), DummyString(1), wo));
Flush(3);
ASSERT_OK(Put(3, Key(1), DummyString(1), wo));
ASSERT_OK(Put(0, Key(1), DummyString(1), wo));
Flush(0);
// Write to "Default", "cf2" and "cf3".
ASSERT_OK(Put(3, Key(1), DummyString(30000), wo));
ASSERT_OK(Put(0, Key(1), DummyString(40000), wo));
ASSERT_OK(Put(2, Key(1), DummyString(1), wo));
ASSERT_OK(Put(3, Key(2), DummyString(40000), wo));
// WriteBufferManager::buffer_size_ has exceeded after the previous write is
// completed.
// This make sures write will go through and if stall was in effect, it will
// end.
ASSERT_OK(Put(0, Key(2), DummyString(1), wo));
}
// Test Single DB with multiple writer threads get blocked when
// WriteBufferManager execeeds buffer_size_ and flush is waiting to be
// finished.
TEST_P(DBWriteBufferManagerTest, SharedWriteBufferAcrossCFs2) {
Options options = CurrentOptions();
options.arena_block_size = 4096;
options.write_buffer_size = 500000; // this is never hit
std::shared_ptr<Cache> cache = NewLRUCache(4 * 1024 * 1024, 2);
ASSERT_LT(cache->GetUsage(), 256 * 1024);
cost_cache_ = GetParam();
if (cost_cache_) {
options.write_buffer_manager.reset(
new WriteBufferManager(100000, cache, true));
} else {
options.write_buffer_manager.reset(
new WriteBufferManager(100000, nullptr, true));
}
WriteOptions wo;
wo.disableWAL = true;
CreateAndReopenWithCF({"cf1", "cf2", "cf3"}, options);
ASSERT_OK(Put(3, Key(1), DummyString(1), wo));
Flush(3);
ASSERT_OK(Put(3, Key(1), DummyString(1), wo));
ASSERT_OK(Put(0, Key(1), DummyString(1), wo));
Flush(0);
// Write to "Default", "cf2" and "cf3". No flush will be triggered.
ASSERT_OK(Put(3, Key(1), DummyString(30000), wo));
ASSERT_OK(Put(0, Key(1), DummyString(40000), wo));
ASSERT_OK(Put(2, Key(1), DummyString(1), wo));
ASSERT_OK(Put(3, Key(2), DummyString(40000), wo));
// WriteBufferManager::buffer_size_ has exceeded after the previous write is
// completed.
std::unordered_set<WriteThread::Writer*> w_set;
std::vector<port::Thread> threads;
int wait_count_db = 0;
int num_writers = 4;
InstrumentedMutex mutex;
InstrumentedCondVar cv(&mutex);
std::atomic<int> thread_num(0);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
{{"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0",
"DBImpl::BackgroundCallFlush:start"}});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"WBMStallInterface::BlockDB", [&](void*) {
InstrumentedMutexLock lock(&mutex);
wait_count_db++;
cv.SignalAll();
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"WriteThread::WriteStall::Wait", [&](void* arg) {
InstrumentedMutexLock lock(&mutex);
WriteThread::Writer* w = reinterpret_cast<WriteThread::Writer*>(arg);
w_set.insert(w);
// Allow the flush to continue if all writer threads are blocked.
if (w_set.size() == (unsigned long)num_writers) {
TEST_SYNC_POINT(
"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0");
}
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
bool s = true;
std::function<void(int)> writer = [&](int cf) {
int a = thread_num.fetch_add(1);
std::string key = "foo" + std::to_string(a);
Status tmp = Put(cf, Slice(key), DummyString(1), wo);
InstrumentedMutexLock lock(&mutex);
s = s && tmp.ok();
};
// Flow:
// main_writer thread will write but will be blocked (as Flush will on hold,
// buffer_size_ has exceeded, thus will create stall in effect).
// |
// |
// multiple writer threads will be created to write across multiple columns
// and they will be blocked.
// |
// |
// Last writer thread will write and when its blocked it will signal Flush to
// continue to clear the stall.
threads.emplace_back(writer, 1);
// Wait untill first thread (main_writer) writing to DB is blocked and then
// create the multiple writers which will be blocked from getting added to the
// queue because stall is in effect.
{
InstrumentedMutexLock lock(&mutex);
while (wait_count_db != 1) {
cv.Wait();
}
}
for (int i = 0; i < num_writers; i++) {
threads.emplace_back(writer, i % 4);
}
for (auto& t : threads) {
t.join();
}
ASSERT_TRUE(s);
// Number of DBs blocked.
ASSERT_EQ(wait_count_db, 1);
// Number of Writer threads blocked.
ASSERT_EQ(w_set.size(), num_writers);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}
// Test multiple DBs get blocked when WriteBufferManager limit exceeds and flush
// is waiting to be finished but DBs tries to write meanwhile.
TEST_P(DBWriteBufferManagerTest, SharedWriteBufferLimitAcrossDB) {
std::vector<std::string> dbnames;
std::vector<DB*> dbs;
int num_dbs = 3;
for (int i = 0; i < num_dbs; i++) {
dbs.push_back(nullptr);
dbnames.push_back(
test::PerThreadDBPath("db_shared_wb_db" + std::to_string(i)));
}
Options options = CurrentOptions();
options.arena_block_size = 4096;
options.write_buffer_size = 500000; // this is never hit
std::shared_ptr<Cache> cache = NewLRUCache(4 * 1024 * 1024, 2);
ASSERT_LT(cache->GetUsage(), 256 * 1024);
cost_cache_ = GetParam();
if (cost_cache_) {
options.write_buffer_manager.reset(
new WriteBufferManager(100000, cache, true));
} else {
options.write_buffer_manager.reset(
new WriteBufferManager(100000, nullptr, true));
}
CreateAndReopenWithCF({"cf1", "cf2"}, options);
for (int i = 0; i < num_dbs; i++) {
ASSERT_OK(DestroyDB(dbnames[i], options));
ASSERT_OK(DB::Open(options, dbnames[i], &(dbs[i])));
}
WriteOptions wo;
wo.disableWAL = true;
for (int i = 0; i < num_dbs; i++) {
ASSERT_OK(dbs[i]->Put(wo, Key(1), DummyString(20000)));
}
// Insert to db_.
ASSERT_OK(Put(0, Key(1), DummyString(30000), wo));
// WriteBufferManager Limit exceeded.
std::vector<port::Thread> threads;
int wait_count_db = 0;
InstrumentedMutex mutex;
InstrumentedCondVar cv(&mutex);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
{{"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0",
"DBImpl::BackgroundCallFlush:start"}});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"WBMStallInterface::BlockDB", [&](void*) {
{
InstrumentedMutexLock lock(&mutex);
wait_count_db++;
cv.Signal();
// Since this is the last DB, signal Flush to continue.
if (wait_count_db == num_dbs + 1) {
TEST_SYNC_POINT(
"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0");
}
}
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
bool s = true;
// Write to DB.
std::function<void(DB*)> write_db = [&](DB* db) {
Status tmp = db->Put(wo, Key(3), DummyString(1));
InstrumentedMutexLock lock(&mutex);
s = s && tmp.ok();
};
// Flow:
// db_ will write and will be blocked (as Flush will on hold and will create
// stall in effect).
// |
// multiple dbs writers will be created to write to that db and they will be
// blocked.
// |
// |
// Last writer will write and when its blocked it will signal Flush to
// continue to clear the stall.
threads.emplace_back(write_db, db_);
// Wait untill first DB is blocked and then create the multiple writers for
// different DBs which will be blocked from getting added to the queue because
// stall is in effect.
{
InstrumentedMutexLock lock(&mutex);
while (wait_count_db != 1) {
cv.Wait();
}
}
for (int i = 0; i < num_dbs; i++) {
threads.emplace_back(write_db, dbs[i]);
}
for (auto& t : threads) {
t.join();
}
ASSERT_TRUE(s);
ASSERT_EQ(num_dbs + 1, wait_count_db);
// Clean up DBs.
for (int i = 0; i < num_dbs; i++) {
ASSERT_OK(dbs[i]->Close());
ASSERT_OK(DestroyDB(dbnames[i], options));
delete dbs[i];
}
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}
// Test multiple threads writing across multiple DBs and multiple columns get
// blocked when stall by WriteBufferManager is in effect.
TEST_P(DBWriteBufferManagerTest, SharedWriteBufferLimitAcrossDB1) {
std::vector<std::string> dbnames;
std::vector<DB*> dbs;
int num_dbs = 3;
for (int i = 0; i < num_dbs; i++) {
dbs.push_back(nullptr);
dbnames.push_back(
test::PerThreadDBPath("db_shared_wb_db" + std::to_string(i)));
}
Options options = CurrentOptions();
options.arena_block_size = 4096;
options.write_buffer_size = 500000; // this is never hit
std::shared_ptr<Cache> cache = NewLRUCache(4 * 1024 * 1024, 2);
ASSERT_LT(cache->GetUsage(), 256 * 1024);
cost_cache_ = GetParam();
if (cost_cache_) {
options.write_buffer_manager.reset(
new WriteBufferManager(100000, cache, true));
} else {
options.write_buffer_manager.reset(
new WriteBufferManager(100000, nullptr, true));
}
CreateAndReopenWithCF({"cf1", "cf2"}, options);
for (int i = 0; i < num_dbs; i++) {
ASSERT_OK(DestroyDB(dbnames[i], options));
ASSERT_OK(DB::Open(options, dbnames[i], &(dbs[i])));
}
WriteOptions wo;
wo.disableWAL = true;
for (int i = 0; i < num_dbs; i++) {
ASSERT_OK(dbs[i]->Put(wo, Key(1), DummyString(20000)));
}
// Insert to db_.
ASSERT_OK(Put(0, Key(1), DummyString(30000), wo));
// WriteBufferManager::buffer_size_ has exceeded after the previous write to
// dbs[0] is completed.
std::vector<port::Thread> threads;
int wait_count_db = 0;
InstrumentedMutex mutex;
InstrumentedCondVar cv(&mutex);
std::unordered_set<WriteThread::Writer*> w_set;
std::vector<port::Thread> writer_threads;
std::atomic<int> thread_num(0);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
{{"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0",
"DBImpl::BackgroundCallFlush:start"}});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"WBMStallInterface::BlockDB", [&](void*) {
{
InstrumentedMutexLock lock(&mutex);
wait_count_db++;
thread_num.fetch_add(1);
cv.Signal();
// Allow the flush to continue if all writer threads are blocked.
if (thread_num.load(std::memory_order_relaxed) == 2 * num_dbs + 1) {
TEST_SYNC_POINT(
"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0");
}
}
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"WriteThread::WriteStall::Wait", [&](void* arg) {
WriteThread::Writer* w = reinterpret_cast<WriteThread::Writer*>(arg);
{
InstrumentedMutexLock lock(&mutex);
w_set.insert(w);
thread_num.fetch_add(1);
// Allow the flush continue if all writer threads are blocked.
if (thread_num.load(std::memory_order_relaxed) == 2 * num_dbs + 1) {
TEST_SYNC_POINT(
"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0");
}
}
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
bool s1 = true, s2 = true;
// Write to multiple columns of db_.
std::function<void(int)> write_cf = [&](int cf) {
Status tmp = Put(cf, Key(3), DummyString(1), wo);
InstrumentedMutexLock lock(&mutex);
s1 = s1 && tmp.ok();
};
// Write to multiple DBs.
std::function<void(DB*)> write_db = [&](DB* db) {
Status tmp = db->Put(wo, Key(3), DummyString(1));
InstrumentedMutexLock lock(&mutex);
s2 = s2 && tmp.ok();
};
// Flow:
// thread will write to db_ will be blocked (as Flush will on hold,
// buffer_size_ has exceeded and will create stall in effect).
// |
// |
// multiple writers threads writing to different DBs and to db_ across
// multiple columns will be created and they will be blocked due to stall.
// |
// |
// Last writer thread will write and when its blocked it will signal Flush to
// continue to clear the stall.
threads.emplace_back(write_db, db_);
// Wait untill first thread is blocked and then create the multiple writer
// threads.
{
InstrumentedMutexLock lock(&mutex);
while (wait_count_db != 1) {
cv.Wait();
}
}
for (int i = 0; i < num_dbs; i++) {
// Write to multiple columns of db_.
writer_threads.emplace_back(write_cf, i % 3);
// Write to different dbs.
threads.emplace_back(write_db, dbs[i]);
}
for (auto& t : threads) {
t.join();
}
for (auto& t : writer_threads) {
t.join();
}
ASSERT_TRUE(s1);
ASSERT_TRUE(s2);
// Number of DBs blocked.
ASSERT_EQ(num_dbs + 1, wait_count_db);
// Number of Writer threads blocked.
ASSERT_EQ(w_set.size(), num_dbs);
// Clean up DBs.
for (int i = 0; i < num_dbs; i++) {
ASSERT_OK(dbs[i]->Close());
ASSERT_OK(DestroyDB(dbnames[i], options));
delete dbs[i];
}
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}
// Test multiple threads writing across multiple columns of db_ by passing
// different values to WriteOption.no_slown_down.
TEST_P(DBWriteBufferManagerTest, MixedSlowDownOptionsSingleDB) {
Options options = CurrentOptions();
options.arena_block_size = 4096;
options.write_buffer_size = 500000; // this is never hit
std::shared_ptr<Cache> cache = NewLRUCache(4 * 1024 * 1024, 2);
ASSERT_LT(cache->GetUsage(), 256 * 1024);
cost_cache_ = GetParam();
if (cost_cache_) {
options.write_buffer_manager.reset(
new WriteBufferManager(100000, cache, true));
} else {
options.write_buffer_manager.reset(
new WriteBufferManager(100000, nullptr, true));
}
WriteOptions wo;
wo.disableWAL = true;
CreateAndReopenWithCF({"cf1", "cf2", "cf3"}, options);
ASSERT_OK(Put(3, Key(1), DummyString(1), wo));
Flush(3);
ASSERT_OK(Put(3, Key(1), DummyString(1), wo));
ASSERT_OK(Put(0, Key(1), DummyString(1), wo));
Flush(0);
// Write to "Default", "cf2" and "cf3". No flush will be triggered.
ASSERT_OK(Put(3, Key(1), DummyString(30000), wo));
ASSERT_OK(Put(0, Key(1), DummyString(40000), wo));
ASSERT_OK(Put(2, Key(1), DummyString(1), wo));
ASSERT_OK(Put(3, Key(2), DummyString(40000), wo));
// WriteBufferManager::buffer_size_ has exceeded after the previous write to
// db_ is completed.
std::unordered_set<WriteThread::Writer*> w_slowdown_set;
std::vector<port::Thread> threads;
int wait_count_db = 0;
int num_writers = 4;
InstrumentedMutex mutex;
InstrumentedCondVar cv(&mutex);
std::atomic<int> thread_num(0);
std::atomic<int> w_no_slowdown(0);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
{{"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0",
"DBImpl::BackgroundCallFlush:start"}});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"WBMStallInterface::BlockDB", [&](void*) {
{
InstrumentedMutexLock lock(&mutex);
wait_count_db++;
cv.SignalAll();
}
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"WriteThread::WriteStall::Wait", [&](void* arg) {
{
InstrumentedMutexLock lock(&mutex);
WriteThread::Writer* w = reinterpret_cast<WriteThread::Writer*>(arg);
w_slowdown_set.insert(w);
// Allow the flush continue if all writer threads are blocked.
if (w_slowdown_set.size() + (unsigned long)w_no_slowdown.load(
std::memory_order_relaxed) ==
(unsigned long)num_writers) {
TEST_SYNC_POINT(
"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0");
}
}
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
bool s1 = true, s2 = true;
std::function<void(int)> write_slow_down = [&](int cf) {
int a = thread_num.fetch_add(1);
std::string key = "foo" + std::to_string(a);
WriteOptions write_op;
write_op.no_slowdown = false;
Status tmp = Put(cf, Slice(key), DummyString(1), write_op);
InstrumentedMutexLock lock(&mutex);
s1 = s1 && tmp.ok();
};
std::function<void(int)> write_no_slow_down = [&](int cf) {
int a = thread_num.fetch_add(1);
std::string key = "foo" + std::to_string(a);
WriteOptions write_op;
write_op.no_slowdown = true;
Status tmp = Put(cf, Slice(key), DummyString(1), write_op);
{
InstrumentedMutexLock lock(&mutex);
s2 = s2 && !tmp.ok();
w_no_slowdown.fetch_add(1);
// Allow the flush continue if all writer threads are blocked.
if (w_slowdown_set.size() +
(unsigned long)w_no_slowdown.load(std::memory_order_relaxed) ==
(unsigned long)num_writers) {
TEST_SYNC_POINT(
"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0");
}
}
};
// Flow:
// main_writer thread will write but will be blocked (as Flush will on hold,
// buffer_size_ has exceeded, thus will create stall in effect).
// |
// |
// multiple writer threads will be created to write across multiple columns
// with different values of WriteOptions.no_slowdown. Some of them will
// be blocked and some of them will return with Incomplete status.
// |
// |
// Last writer thread will write and when its blocked/return it will signal
// Flush to continue to clear the stall.
threads.emplace_back(write_slow_down, 1);
// Wait untill first thread (main_writer) writing to DB is blocked and then
// create the multiple writers which will be blocked from getting added to the
// queue because stall is in effect.
{
InstrumentedMutexLock lock(&mutex);
while (wait_count_db != 1) {
cv.Wait();
}
}
for (int i = 0; i < num_writers; i += 2) {
threads.emplace_back(write_no_slow_down, (i) % 4);
threads.emplace_back(write_slow_down, (i + 1) % 4);
}
for (auto& t : threads) {
t.join();
}
ASSERT_TRUE(s1);
ASSERT_TRUE(s2);
// Number of DBs blocked.
ASSERT_EQ(wait_count_db, 1);
// Number of Writer threads blocked.
ASSERT_EQ(w_slowdown_set.size(), num_writers / 2);
// Number of Writer threads with WriteOptions.no_slowdown = true.
ASSERT_EQ(w_no_slowdown.load(std::memory_order_relaxed), num_writers / 2);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}
// Test multiple threads writing across multiple columns of db_ and different
// dbs by passing different values to WriteOption.no_slown_down.
TEST_P(DBWriteBufferManagerTest, MixedSlowDownOptionsMultipleDB) {
std::vector<std::string> dbnames;
std::vector<DB*> dbs;
int num_dbs = 4;
for (int i = 0; i < num_dbs; i++) {
dbs.push_back(nullptr);
dbnames.push_back(
test::PerThreadDBPath("db_shared_wb_db" + std::to_string(i)));
}
Options options = CurrentOptions();
options.arena_block_size = 4096;
options.write_buffer_size = 500000; // this is never hit
std::shared_ptr<Cache> cache = NewLRUCache(4 * 1024 * 1024, 2);
ASSERT_LT(cache->GetUsage(), 256 * 1024);
cost_cache_ = GetParam();
if (cost_cache_) {
options.write_buffer_manager.reset(
new WriteBufferManager(100000, cache, true));
} else {
options.write_buffer_manager.reset(
new WriteBufferManager(100000, nullptr, true));
}
CreateAndReopenWithCF({"cf1", "cf2"}, options);
for (int i = 0; i < num_dbs; i++) {
ASSERT_OK(DestroyDB(dbnames[i], options));
ASSERT_OK(DB::Open(options, dbnames[i], &(dbs[i])));
}
WriteOptions wo;
wo.disableWAL = true;
for (int i = 0; i < num_dbs; i++) {
ASSERT_OK(dbs[i]->Put(wo, Key(1), DummyString(20000)));
}
// Insert to db_.
ASSERT_OK(Put(0, Key(1), DummyString(30000), wo));
// WriteBufferManager::buffer_size_ has exceeded after the previous write to
// dbs[0] is completed.
std::vector<port::Thread> threads;
int wait_count_db = 0;
InstrumentedMutex mutex;
InstrumentedCondVar cv(&mutex);
std::unordered_set<WriteThread::Writer*> w_slowdown_set;
std::vector<port::Thread> writer_threads;
std::atomic<int> thread_num(0);
std::atomic<int> w_no_slowdown(0);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
{{"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0",
"DBImpl::BackgroundCallFlush:start"}});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"WBMStallInterface::BlockDB", [&](void*) {
InstrumentedMutexLock lock(&mutex);
wait_count_db++;
cv.Signal();
// Allow the flush continue if all writer threads are blocked.
if (w_slowdown_set.size() +
(unsigned long)(w_no_slowdown.load(std::memory_order_relaxed) +
wait_count_db) ==
(unsigned long)(2 * num_dbs + 1)) {
TEST_SYNC_POINT(
"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0");
}
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"WriteThread::WriteStall::Wait", [&](void* arg) {
WriteThread::Writer* w = reinterpret_cast<WriteThread::Writer*>(arg);
InstrumentedMutexLock lock(&mutex);
w_slowdown_set.insert(w);
// Allow the flush continue if all writer threads are blocked.
if (w_slowdown_set.size() +
(unsigned long)(w_no_slowdown.load(std::memory_order_relaxed) +
wait_count_db) ==
(unsigned long)(2 * num_dbs + 1)) {
TEST_SYNC_POINT(
"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0");
}
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
bool s1 = true, s2 = true;
std::function<void(DB*)> write_slow_down = [&](DB* db) {
int a = thread_num.fetch_add(1);
std::string key = "foo" + std::to_string(a);
WriteOptions write_op;
write_op.no_slowdown = false;
Status tmp = db->Put(write_op, Slice(key), DummyString(1));
InstrumentedMutexLock lock(&mutex);
s1 = s1 && tmp.ok();
};
std::function<void(DB*)> write_no_slow_down = [&](DB* db) {
int a = thread_num.fetch_add(1);
std::string key = "foo" + std::to_string(a);
WriteOptions write_op;
write_op.no_slowdown = true;
Status tmp = db->Put(write_op, Slice(key), DummyString(1));
{
InstrumentedMutexLock lock(&mutex);
s2 = s2 && !tmp.ok();
w_no_slowdown.fetch_add(1);
if (w_slowdown_set.size() +
(unsigned long)(w_no_slowdown.load(std::memory_order_relaxed) +
wait_count_db) ==
(unsigned long)(2 * num_dbs + 1)) {
TEST_SYNC_POINT(
"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0");
}
}
};
// Flow:
// first thread will write but will be blocked (as Flush will on hold,
// buffer_size_ has exceeded, thus will create stall in effect).
// |
// |
// multiple writer threads will be created to write across multiple columns
// of db_ and different DBs with different values of
// WriteOptions.no_slowdown. Some of them will be blocked and some of them
// will return with Incomplete status.
// |
// |
// Last writer thread will write and when its blocked/return it will signal
// Flush to continue to clear the stall.
threads.emplace_back(write_slow_down, db_);
// Wait untill first thread writing to DB is blocked and then
// create the multiple writers.
{
InstrumentedMutexLock lock(&mutex);
while (wait_count_db != 1) {
cv.Wait();
}
}
for (int i = 0; i < num_dbs; i += 2) {
// Write to multiple columns of db_.
writer_threads.emplace_back(write_slow_down, db_);
writer_threads.emplace_back(write_no_slow_down, db_);
// Write to different DBs.
threads.emplace_back(write_slow_down, dbs[i]);
threads.emplace_back(write_no_slow_down, dbs[i + 1]);
}
for (auto& t : threads) {
t.join();
}
for (auto& t : writer_threads) {
t.join();
}
ASSERT_TRUE(s1);
ASSERT_TRUE(s2);
// Number of DBs blocked.
ASSERT_EQ((num_dbs / 2) + 1, wait_count_db);
// Number of writer threads writing to db_ blocked from getting added to the
// queue.
ASSERT_EQ(w_slowdown_set.size(), num_dbs / 2);
// Number of threads with WriteOptions.no_slowdown = true.
ASSERT_EQ(w_no_slowdown.load(std::memory_order_relaxed), num_dbs);
// Clean up DBs.
for (int i = 0; i < num_dbs; i++) {
ASSERT_OK(dbs[i]->Close());
ASSERT_OK(DestroyDB(dbnames[i], options));
delete dbs[i];
}
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}
// Tests a `WriteBufferManager` constructed with `allow_stall == false` does not
// thrash memtable switching when full and a CF receives multiple writes.
// Instead, we expect to switch a CF's memtable for flush only when that CF does
// not have any pending or running flush.
//
// This test uses multiple DBs each with a single CF instead of a single DB
// with multiple CFs. That way we can control which CF is considered for switch
// by writing to that CF's DB.
//
// Not supported in LITE mode due to `GetProperty()` unavailable.
TEST_P(DBWriteBufferManagerTest, StopSwitchingMemTablesOnceFlushing) {
Options options = CurrentOptions();
options.arena_block_size = 4 << 10; // 4KB
options.write_buffer_size = 1 << 20; // 1MB
std::shared_ptr<Cache> cache =
NewLRUCache(4 << 20 /* capacity (4MB) */, 2 /* num_shard_bits */);
ASSERT_LT(cache->GetUsage(), 256 << 10 /* 256KB */);
cost_cache_ = GetParam();
if (cost_cache_) {
options.write_buffer_manager.reset(new WriteBufferManager(
512 << 10 /* buffer_size (512KB) */, cache, false /* allow_stall */));
} else {
options.write_buffer_manager.reset(
new WriteBufferManager(512 << 10 /* buffer_size (512KB) */,
nullptr /* cache */, false /* allow_stall */));
}
Reopen(options);
std::string dbname = test::PerThreadDBPath("db_shared_wbm_db");
DB* shared_wbm_db = nullptr;
ASSERT_OK(DestroyDB(dbname, options));
ASSERT_OK(DB::Open(options, dbname, &shared_wbm_db));
// The last write will make WBM need flush, but it won't flush yet.
ASSERT_OK(Put(Key(1), DummyString(256 << 10 /* 256KB */), WriteOptions()));
ASSERT_FALSE(options.write_buffer_manager->ShouldFlush());
ASSERT_OK(Put(Key(1), DummyString(256 << 10 /* 256KB */), WriteOptions()));
ASSERT_TRUE(options.write_buffer_manager->ShouldFlush());
// Flushes will be pending, not running because flush threads are blocked.
test::SleepingBackgroundTask sleeping_task_high;
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
&sleeping_task_high, Env::Priority::HIGH);
for (int i = 0; i < 3; ++i) {
ASSERT_OK(
shared_wbm_db->Put(WriteOptions(), Key(1), DummyString(1 /* len */)));
std::string prop;
ASSERT_TRUE(
shared_wbm_db->GetProperty("rocksdb.num-immutable-mem-table", &prop));
ASSERT_EQ(std::to_string(i > 0 ? 1 : 0), prop);
ASSERT_TRUE(
shared_wbm_db->GetProperty("rocksdb.mem-table-flush-pending", &prop));
ASSERT_EQ(std::to_string(i > 0 ? 1 : 0), prop);
}
// Clean up DBs.
sleeping_task_high.WakeUp();
sleeping_task_high.WaitUntilDone();
ASSERT_OK(shared_wbm_db->Close());
ASSERT_OK(DestroyDB(dbname, options));
delete shared_wbm_db;
}
INSTANTIATE_TEST_CASE_P(DBWriteBufferManagerTest, DBWriteBufferManagerTest,
testing::Bool());
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
RegisterCustomObjects(argc, argv);
return RUN_ALL_TESTS();
}

@ -1,790 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include <atomic>
#include <cstdint>
#include <fstream>
#include <memory>
#include <thread>
#include <vector>
#include "db/db_test_util.h"
#include "db/write_batch_internal.h"
#include "db/write_thread.h"
#include "port/port.h"
#include "port/stack_trace.h"
#include "test_util/sync_point.h"
#include "util/random.h"
#include "util/string_util.h"
#include "utilities/fault_injection_env.h"
#include "utilities/fault_injection_fs.h"
namespace ROCKSDB_NAMESPACE {
// Test variations of WriteImpl.
class DBWriteTest : public DBTestBase, public testing::WithParamInterface<int> {
public:
DBWriteTest() : DBTestBase("db_write_test", /*env_do_fsync=*/true) {}
Options GetOptions() { return DBTestBase::GetOptions(GetParam()); }
void Open() { DBTestBase::Reopen(GetOptions()); }
};
class DBWriteTestUnparameterized : public DBTestBase {
public:
explicit DBWriteTestUnparameterized()
: DBTestBase("pipelined_write_test", /*env_do_fsync=*/false) {}
};
// It is invalid to do sync write while disabling WAL.
TEST_P(DBWriteTest, SyncAndDisableWAL) {
WriteOptions write_options;
write_options.sync = true;
write_options.disableWAL = true;
ASSERT_TRUE(dbfull()->Put(write_options, "foo", "bar").IsInvalidArgument());
WriteBatch batch;
ASSERT_OK(batch.Put("foo", "bar"));
ASSERT_TRUE(dbfull()->Write(write_options, &batch).IsInvalidArgument());
}
TEST_P(DBWriteTest, WriteStallRemoveNoSlowdownWrite) {
Options options = GetOptions();
options.level0_stop_writes_trigger = options.level0_slowdown_writes_trigger =
4;
std::vector<port::Thread> threads;
std::atomic<int> thread_num(0);
port::Mutex mutex;
port::CondVar cv(&mutex);
// Guarded by mutex
int writers = 0;
Reopen(options);
std::function<void()> write_slowdown_func = [&]() {
int a = thread_num.fetch_add(1);
std::string key = "foo" + std::to_string(a);
WriteOptions wo;
wo.no_slowdown = false;
ASSERT_OK(dbfull()->Put(wo, key, "bar"));
};
std::function<void()> write_no_slowdown_func = [&]() {
int a = thread_num.fetch_add(1);
std::string key = "foo" + std::to_string(a);
WriteOptions wo;
wo.no_slowdown = true;
Status s = dbfull()->Put(wo, key, "bar");
ASSERT_TRUE(s.ok() || s.IsIncomplete());
};
std::function<void(void*)> unblock_main_thread_func = [&](void*) {
mutex.Lock();
++writers;
cv.SignalAll();
mutex.Unlock();
};
// Create 3 L0 files and schedule 4th without waiting
ASSERT_OK(Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar"));
ASSERT_OK(Flush());
ASSERT_OK(Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar"));
ASSERT_OK(Flush());
ASSERT_OK(Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar"));
ASSERT_OK(Flush());
ASSERT_OK(Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar"));
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"WriteThread::JoinBatchGroup:Start", unblock_main_thread_func);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
{{"DBWriteTest::WriteStallRemoveNoSlowdownWrite:1",
"DBImpl::BackgroundCallFlush:start"},
{"DBWriteTest::WriteStallRemoveNoSlowdownWrite:2",
"DBImplWrite::PipelinedWriteImpl:AfterJoinBatchGroup"},
// Make compaction start wait for the write stall to be detected and
// implemented by a write group leader
{"DBWriteTest::WriteStallRemoveNoSlowdownWrite:3",
"BackgroundCallCompaction:0"}});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
// Schedule creation of 4th L0 file without waiting. This will seal the
// memtable and then wait for a sync point before writing the file. We need
// to do it this way because SwitchMemtable() needs to enter the
// write_thread
FlushOptions fopt;
fopt.wait = false;
ASSERT_OK(dbfull()->Flush(fopt));
// Create a mix of slowdown/no_slowdown write threads
mutex.Lock();
// First leader
threads.emplace_back(write_slowdown_func);
while (writers != 1) {
cv.Wait();
}
// Second leader. Will stall writes
// Build a writers list with no slowdown in the middle:
// +-------------+
// | slowdown +<----+ newest
// +--+----------+
// |
// v
// +--+----------+
// | no slowdown |
// +--+----------+
// |
// v
// +--+----------+
// | slowdown +
// +-------------+
threads.emplace_back(write_slowdown_func);
while (writers != 2) {
cv.Wait();
}
threads.emplace_back(write_no_slowdown_func);
while (writers != 3) {
cv.Wait();
}
threads.emplace_back(write_slowdown_func);
while (writers != 4) {
cv.Wait();
}
mutex.Unlock();
TEST_SYNC_POINT("DBWriteTest::WriteStallRemoveNoSlowdownWrite:1");
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(nullptr));
// This would have triggered a write stall. Unblock the write group leader
TEST_SYNC_POINT("DBWriteTest::WriteStallRemoveNoSlowdownWrite:2");
// The leader is going to create missing newer links. When the leader
// finishes, the next leader is going to delay writes and fail writers with
// no_slowdown
TEST_SYNC_POINT("DBWriteTest::WriteStallRemoveNoSlowdownWrite:3");
for (auto& t : threads) {
t.join();
}
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
}
TEST_P(DBWriteTest, WriteThreadHangOnWriteStall) {
Options options = GetOptions();
options.level0_stop_writes_trigger = options.level0_slowdown_writes_trigger =
4;
std::vector<port::Thread> threads;
std::atomic<int> thread_num(0);
port::Mutex mutex;
port::CondVar cv(&mutex);
// Guarded by mutex
int writers = 0;
Reopen(options);
std::function<void()> write_slowdown_func = [&]() {
int a = thread_num.fetch_add(1);
std::string key = "foo" + std::to_string(a);
WriteOptions wo;
wo.no_slowdown = false;
ASSERT_OK(dbfull()->Put(wo, key, "bar"));
};
std::function<void()> write_no_slowdown_func = [&]() {
int a = thread_num.fetch_add(1);
std::string key = "foo" + std::to_string(a);
WriteOptions wo;
wo.no_slowdown = true;
Status s = dbfull()->Put(wo, key, "bar");
ASSERT_TRUE(s.ok() || s.IsIncomplete());
};
std::function<void(void*)> unblock_main_thread_func = [&](void*) {
mutex.Lock();
++writers;
cv.SignalAll();
mutex.Unlock();
};
// Create 3 L0 files and schedule 4th without waiting
ASSERT_OK(Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar"));
ASSERT_OK(Flush());
ASSERT_OK(Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar"));
ASSERT_OK(Flush());
ASSERT_OK(Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar"));
ASSERT_OK(Flush());
ASSERT_OK(Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar"));
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"WriteThread::JoinBatchGroup:Start", unblock_main_thread_func);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
{{"DBWriteTest::WriteThreadHangOnWriteStall:1",
"DBImpl::BackgroundCallFlush:start"},
{"DBWriteTest::WriteThreadHangOnWriteStall:2",
"DBImpl::WriteImpl:BeforeLeaderEnters"},
// Make compaction start wait for the write stall to be detected and
// implemented by a write group leader
{"DBWriteTest::WriteThreadHangOnWriteStall:3",
"BackgroundCallCompaction:0"}});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
// Schedule creation of 4th L0 file without waiting. This will seal the
// memtable and then wait for a sync point before writing the file. We need
// to do it this way because SwitchMemtable() needs to enter the
// write_thread
FlushOptions fopt;
fopt.wait = false;
ASSERT_OK(dbfull()->Flush(fopt));
// Create a mix of slowdown/no_slowdown write threads
mutex.Lock();
// First leader
threads.emplace_back(write_slowdown_func);
while (writers != 1) {
cv.Wait();
}
// Second leader. Will stall writes
threads.emplace_back(write_slowdown_func);
threads.emplace_back(write_no_slowdown_func);
threads.emplace_back(write_slowdown_func);
threads.emplace_back(write_no_slowdown_func);
threads.emplace_back(write_slowdown_func);
while (writers != 6) {
cv.Wait();
}
mutex.Unlock();
TEST_SYNC_POINT("DBWriteTest::WriteThreadHangOnWriteStall:1");
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(nullptr));
// This would have triggered a write stall. Unblock the write group leader
TEST_SYNC_POINT("DBWriteTest::WriteThreadHangOnWriteStall:2");
// The leader is going to create missing newer links. When the leader
// finishes, the next leader is going to delay writes and fail writers with
// no_slowdown
TEST_SYNC_POINT("DBWriteTest::WriteThreadHangOnWriteStall:3");
for (auto& t : threads) {
t.join();
}
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
}
TEST_P(DBWriteTest, IOErrorOnWALWritePropagateToWriteThreadFollower) {
constexpr int kNumThreads = 5;
std::unique_ptr<FaultInjectionTestEnv> mock_env(
new FaultInjectionTestEnv(env_));
Options options = GetOptions();
options.env = mock_env.get();
Reopen(options);
std::atomic<int> ready_count{0};
std::atomic<int> leader_count{0};
std::vector<port::Thread> threads;
mock_env->SetFilesystemActive(false);
// Wait until all threads linked to write threads, to make sure
// all threads join the same batch group.
SyncPoint::GetInstance()->SetCallBack(
"WriteThread::JoinBatchGroup:Wait", [&](void* arg) {
ready_count++;
auto* w = reinterpret_cast<WriteThread::Writer*>(arg);
if (w->state == WriteThread::STATE_GROUP_LEADER) {
leader_count++;
while (ready_count < kNumThreads) {
// busy waiting
}
}
});
SyncPoint::GetInstance()->EnableProcessing();
for (int i = 0; i < kNumThreads; i++) {
threads.push_back(port::Thread(
[&](int index) {
// All threads should fail.
auto res = Put("key" + std::to_string(index), "value");
if (options.manual_wal_flush) {
ASSERT_TRUE(res.ok());
// we should see fs error when we do the flush
// TSAN reports a false alarm for lock-order-inversion but Open and
// FlushWAL are not run concurrently. Disabling this until TSAN is
// fixed.
// res = dbfull()->FlushWAL(false);
// ASSERT_FALSE(res.ok());
} else {
ASSERT_FALSE(res.ok());
}
},
i));
}
for (int i = 0; i < kNumThreads; i++) {
threads[i].join();
}
ASSERT_EQ(1, leader_count);
// The Failed PUT operations can cause a BG error to be set.
// Mark it as Checked for the ASSERT_STATUS_CHECKED
dbfull()->Resume().PermitUncheckedError();
// Close before mock_env destruct.
Close();
}
TEST_F(DBWriteTestUnparameterized, PipelinedWriteRace) {
// This test was written to trigger a race in ExitAsBatchGroupLeader in case
// enable_pipelined_write_ was true.
// Writers for which ShouldWriteToMemtable() evaluates to false are removed
// from the write_group via CompleteFollower/ CompleteLeader. Writers in the
// middle of the group are fully unlinked, but if that writers is the
// last_writer, then we did not update the predecessor's link_older, i.e.,
// this writer was still reachable via newest_writer_.
//
// But the problem was, that CompleteFollower already wakes up the thread
// owning that writer before the writer has been removed. This resulted in a
// race - if the leader thread was fast enough, then everything was fine.
// However, if the woken up thread finished the current write operation and
// then performed yet another write, then a new writer instance was added
// to newest_writer_. It is possible that the new writer is located on the
// same address on stack, and if this happened, then we had a problem,
// because the old code tried to find the last_writer in the list to unlink
// it, which in this case produced a cycle in the list.
// Whether two invocations of PipelinedWriteImpl() by the same thread actually
// allocate the writer on the same address depends on the OS and/or compiler,
// so it is rather hard to create a deterministic test for this.
Options options = GetDefaultOptions();
options.create_if_missing = true;
options.enable_pipelined_write = true;
std::vector<port::Thread> threads;
std::atomic<int> write_counter{0};
std::atomic<int> active_writers{0};
std::atomic<bool> second_write_starting{false};
std::atomic<bool> second_write_in_progress{false};
std::atomic<WriteThread::Writer*> leader{nullptr};
std::atomic<bool> finished_WAL_write{false};
DestroyAndReopen(options);
auto write_one_doc = [&]() {
int a = write_counter.fetch_add(1);
std::string key = "foo" + std::to_string(a);
WriteOptions wo;
ASSERT_OK(dbfull()->Put(wo, key, "bar"));
--active_writers;
};
auto write_two_docs = [&]() {
write_one_doc();
second_write_starting = true;
write_one_doc();
};
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"WriteThread::JoinBatchGroup:Wait", [&](void* arg) {
if (second_write_starting.load()) {
second_write_in_progress = true;
return;
}
auto* w = reinterpret_cast<WriteThread::Writer*>(arg);
if (w->state == WriteThread::STATE_GROUP_LEADER) {
active_writers++;
if (leader.load() == nullptr) {
leader.store(w);
while (active_writers.load() < 2) {
// wait for another thread to join the write_group
}
}
} else {
// we disable the memtable for all followers so that they they are
// removed from the write_group before enqueuing it for the memtable
// write
w->disable_memtable = true;
active_writers++;
}
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"WriteThread::ExitAsBatchGroupLeader:Start", [&](void* arg) {
auto* wg = reinterpret_cast<WriteThread::WriteGroup*>(arg);
if (wg->leader == leader && !finished_WAL_write) {
finished_WAL_write = true;
while (active_writers.load() < 3) {
// wait for the new writer to be enqueued
}
}
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"WriteThread::ExitAsBatchGroupLeader:AfterCompleteWriters",
[&](void* arg) {
auto* wg = reinterpret_cast<WriteThread::WriteGroup*>(arg);
if (wg->leader == leader) {
while (!second_write_in_progress.load()) {
// wait for the old follower thread to start the next write
}
}
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
// start leader + one follower
threads.emplace_back(write_one_doc);
while (leader.load() == nullptr) {
// wait for leader
}
// we perform two writes in the follower, so that for the second write
// the thread reinserts a Writer with the same address
threads.emplace_back(write_two_docs);
// wait for the leader to enter ExitAsBatchGroupLeader
while (!finished_WAL_write.load()) {
// wait for write_group to have finished the WAL writes
}
// start another writer thread to be enqueued before the leader can
// complete the writers from its write_group
threads.emplace_back(write_one_doc);
for (auto& t : threads) {
t.join();
}
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
}
TEST_P(DBWriteTest, ManualWalFlushInEffect) {
Options options = GetOptions();
Reopen(options);
// try the 1st WAL created during open
ASSERT_TRUE(Put("key" + std::to_string(0), "value").ok());
ASSERT_TRUE(options.manual_wal_flush != dbfull()->WALBufferIsEmpty());
ASSERT_TRUE(dbfull()->FlushWAL(false).ok());
ASSERT_TRUE(dbfull()->WALBufferIsEmpty());
// try the 2nd wal created during SwitchWAL
ASSERT_OK(dbfull()->TEST_SwitchWAL());
ASSERT_TRUE(Put("key" + std::to_string(0), "value").ok());
ASSERT_TRUE(options.manual_wal_flush != dbfull()->WALBufferIsEmpty());
ASSERT_TRUE(dbfull()->FlushWAL(false).ok());
ASSERT_TRUE(dbfull()->WALBufferIsEmpty());
}
TEST_P(DBWriteTest, UnflushedPutRaceWithTrackedWalSync) {
// Repro race condition bug where unflushed WAL data extended the synced size
// recorded to MANIFEST despite being unrecoverable.
Options options = GetOptions();
std::unique_ptr<FaultInjectionTestEnv> fault_env(
new FaultInjectionTestEnv(env_));
options.env = fault_env.get();
options.manual_wal_flush = true;
options.track_and_verify_wals_in_manifest = true;
Reopen(options);
ASSERT_OK(Put("key1", "val1"));
SyncPoint::GetInstance()->SetCallBack(
"DBImpl::SyncWAL:Begin",
[this](void* /* arg */) { ASSERT_OK(Put("key2", "val2")); });
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
ASSERT_OK(db_->FlushWAL(true /* sync */));
// Ensure callback ran.
ASSERT_EQ("val2", Get("key2"));
Close();
// Simulate full loss of unsynced data. This drops "key2" -> "val2" from the
// DB WAL.
fault_env->DropUnsyncedFileData();
Reopen(options);
// Need to close before `fault_env` goes out of scope.
Close();
}
TEST_P(DBWriteTest, InactiveWalFullySyncedBeforeUntracked) {
// Repro bug where a WAL is appended and switched after
// `FlushWAL(true /* sync */)`'s sync finishes and before it untracks fully
// synced inactive logs. Previously such a WAL would be wrongly untracked
// so the final append would never be synced.
Options options = GetOptions();
std::unique_ptr<FaultInjectionTestEnv> fault_env(
new FaultInjectionTestEnv(env_));
options.env = fault_env.get();
Reopen(options);
ASSERT_OK(Put("key1", "val1"));
SyncPoint::GetInstance()->SetCallBack(
"DBImpl::SyncWAL:BeforeMarkLogsSynced:1", [this](void* /* arg */) {
ASSERT_OK(Put("key2", "val2"));
ASSERT_OK(dbfull()->TEST_SwitchMemtable());
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
ASSERT_OK(db_->FlushWAL(true /* sync */));
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
ASSERT_OK(Put("key3", "val3"));
ASSERT_OK(db_->FlushWAL(true /* sync */));
Close();
// Simulate full loss of unsynced data. This should drop nothing since we did
// `FlushWAL(true /* sync */)` before `Close()`.
fault_env->DropUnsyncedFileData();
Reopen(options);
ASSERT_EQ("val1", Get("key1"));
ASSERT_EQ("val2", Get("key2"));
ASSERT_EQ("val3", Get("key3"));
// Need to close before `fault_env` goes out of scope.
Close();
}
TEST_P(DBWriteTest, IOErrorOnWALWriteTriggersReadOnlyMode) {
std::unique_ptr<FaultInjectionTestEnv> mock_env(
new FaultInjectionTestEnv(env_));
Options options = GetOptions();
options.env = mock_env.get();
Reopen(options);
for (int i = 0; i < 2; i++) {
// Forcibly fail WAL write for the first Put only. Subsequent Puts should
// fail due to read-only mode
mock_env->SetFilesystemActive(i != 0);
auto res = Put("key" + std::to_string(i), "value");
// TSAN reports a false alarm for lock-order-inversion but Open and
// FlushWAL are not run concurrently. Disabling this until TSAN is
// fixed.
/*
if (options.manual_wal_flush && i == 0) {
// even with manual_wal_flush the 2nd Put should return error because of
// the read-only mode
ASSERT_TRUE(res.ok());
// we should see fs error when we do the flush
res = dbfull()->FlushWAL(false);
}
*/
if (!options.manual_wal_flush) {
ASSERT_NOK(res);
} else {
ASSERT_OK(res);
}
}
// Close before mock_env destruct.
Close();
}
TEST_P(DBWriteTest, IOErrorOnSwitchMemtable) {
Random rnd(301);
std::unique_ptr<FaultInjectionTestEnv> mock_env(
new FaultInjectionTestEnv(env_));
Options options = GetOptions();
options.env = mock_env.get();
options.writable_file_max_buffer_size = 4 * 1024 * 1024;
options.write_buffer_size = 3 * 512 * 1024;
options.wal_bytes_per_sync = 256 * 1024;
options.manual_wal_flush = true;
Reopen(options);
mock_env->SetFilesystemActive(false, Status::IOError("Not active"));
Status s;
for (int i = 0; i < 4 * 512; ++i) {
s = Put(Key(i), rnd.RandomString(1024));
if (!s.ok()) {
break;
}
}
ASSERT_EQ(s.severity(), Status::Severity::kFatalError);
mock_env->SetFilesystemActive(true);
// Close before mock_env destruct.
Close();
}
// Test that db->LockWAL() flushes the WAL after locking, which can fail
TEST_P(DBWriteTest, LockWALInEffect) {
if (mem_env_ || encrypted_env_) {
ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment");
return;
}
Options options = GetOptions();
std::shared_ptr<FaultInjectionTestFS> fault_fs(
new FaultInjectionTestFS(FileSystem::Default()));
std::unique_ptr<Env> fault_fs_env(NewCompositeEnv(fault_fs));
options.env = fault_fs_env.get();
options.disable_auto_compactions = true;
options.paranoid_checks = false;
options.max_bgerror_resume_count = 0; // manual Resume()
Reopen(options);
// try the 1st WAL created during open
ASSERT_OK(Put("key0", "value"));
ASSERT_NE(options.manual_wal_flush, dbfull()->WALBufferIsEmpty());
ASSERT_OK(db_->LockWAL());
ASSERT_TRUE(dbfull()->WALBufferIsEmpty());
ASSERT_OK(db_->UnlockWAL());
// try the 2nd wal created during SwitchWAL
ASSERT_OK(dbfull()->TEST_SwitchWAL());
ASSERT_OK(Put("key1", "value"));
ASSERT_NE(options.manual_wal_flush, dbfull()->WALBufferIsEmpty());
ASSERT_OK(db_->LockWAL());
ASSERT_TRUE(dbfull()->WALBufferIsEmpty());
ASSERT_OK(db_->UnlockWAL());
// Fail the WAL flush if applicable
fault_fs->SetFilesystemActive(false);
Status s = Put("key2", "value");
if (options.manual_wal_flush) {
ASSERT_OK(s);
// I/O failure
ASSERT_NOK(db_->LockWAL());
// Should not need UnlockWAL after LockWAL fails
} else {
ASSERT_NOK(s);
ASSERT_OK(db_->LockWAL());
ASSERT_OK(db_->UnlockWAL());
}
fault_fs->SetFilesystemActive(true);
ASSERT_OK(db_->Resume());
// Writes should work again
ASSERT_OK(Put("key3", "value"));
ASSERT_EQ(Get("key3"), "value");
// Should be extraneous, but allowed
ASSERT_NOK(db_->UnlockWAL());
// Close before mock_env destruct.
Close();
}
TEST_P(DBWriteTest, LockWALConcurrentRecursive) {
Options options = GetOptions();
Reopen(options);
ASSERT_OK(Put("k1", "val"));
ASSERT_OK(db_->LockWAL()); // 0 -> 1
auto frozen_seqno = db_->GetLatestSequenceNumber();
std::atomic<bool> t1_completed{false};
port::Thread t1{[&]() {
// Won't finish until WAL unlocked
ASSERT_OK(Put("k1", "val2"));
t1_completed = true;
}};
ASSERT_OK(db_->LockWAL()); // 1 -> 2
// Read-only ops are OK
ASSERT_EQ(Get("k1"), "val");
{
std::vector<LiveFileStorageInfo> files;
LiveFilesStorageInfoOptions lf_opts;
// A DB flush could deadlock
lf_opts.wal_size_for_flush = UINT64_MAX;
ASSERT_OK(db_->GetLiveFilesStorageInfo({lf_opts}, &files));
}
port::Thread t2{[&]() {
ASSERT_OK(db_->LockWAL()); // 2 -> 3 or 1 -> 2
}};
ASSERT_OK(db_->UnlockWAL()); // 2 -> 1 or 3 -> 2
// Give t1 an extra chance to jump in case of bug
std::this_thread::yield();
t2.join();
ASSERT_FALSE(t1_completed.load());
// Should now have 2 outstanding LockWAL
ASSERT_EQ(Get("k1"), "val");
ASSERT_OK(db_->UnlockWAL()); // 2 -> 1
ASSERT_FALSE(t1_completed.load());
ASSERT_EQ(Get("k1"), "val");
ASSERT_EQ(frozen_seqno, db_->GetLatestSequenceNumber());
// Ensure final Unlock is concurrency safe and extra Unlock is safe but
// non-OK
std::atomic<int> unlock_ok{0};
port::Thread t3{[&]() {
if (db_->UnlockWAL().ok()) {
unlock_ok++;
}
ASSERT_OK(db_->LockWAL());
if (db_->UnlockWAL().ok()) {
unlock_ok++;
}
}};
if (db_->UnlockWAL().ok()) {
unlock_ok++;
}
t3.join();
// There was one extra unlock, so just one non-ok
ASSERT_EQ(unlock_ok.load(), 2);
// Write can proceed
t1.join();
ASSERT_TRUE(t1_completed.load());
ASSERT_EQ(Get("k1"), "val2");
// And new writes
ASSERT_OK(Put("k2", "val"));
ASSERT_EQ(Get("k2"), "val");
}
TEST_P(DBWriteTest, ConcurrentlyDisabledWAL) {
Options options = GetOptions();
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
options.statistics->set_stats_level(StatsLevel::kAll);
Reopen(options);
std::string wal_key_prefix = "WAL_KEY_";
std::string no_wal_key_prefix = "K_";
// 100 KB value each for NO-WAL operation
std::string no_wal_value(1024 * 100, 'X');
// 1B value each for WAL operation
std::string wal_value = "0";
std::thread threads[10];
for (int t = 0; t < 10; t++) {
threads[t] = std::thread([t, wal_key_prefix, wal_value, no_wal_key_prefix,
no_wal_value, this] {
for (int i = 0; i < 10; i++) {
ROCKSDB_NAMESPACE::WriteOptions write_option_disable;
write_option_disable.disableWAL = true;
ROCKSDB_NAMESPACE::WriteOptions write_option_default;
std::string no_wal_key =
no_wal_key_prefix + std::to_string(t) + "_" + std::to_string(i);
ASSERT_OK(this->Put(no_wal_key, no_wal_value, write_option_disable));
std::string wal_key =
wal_key_prefix + std::to_string(i) + "_" + std::to_string(i);
ASSERT_OK(this->Put(wal_key, wal_value, write_option_default));
ASSERT_OK(dbfull()->SyncWAL());
}
return;
});
}
for (auto& t : threads) {
t.join();
}
uint64_t bytes_num = options.statistics->getTickerCount(
ROCKSDB_NAMESPACE::Tickers::WAL_FILE_BYTES);
// written WAL size should less than 100KB (even included HEADER & FOOTER
// overhead)
ASSERT_LE(bytes_num, 1024 * 100);
}
INSTANTIATE_TEST_CASE_P(DBWriteTestInstance, DBWriteTest,
testing::Values(DBTestBase::kDefault,
DBTestBase::kConcurrentWALWrites,
DBTestBase::kPipelinedWrite));
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
RegisterCustomObjects(argc, argv);
return RUN_ALL_TESTS();
}

@ -1,214 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "db/dbformat.h"
#include "table/block_based/index_builder.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
namespace ROCKSDB_NAMESPACE {
static std::string IKey(const std::string& user_key, uint64_t seq,
ValueType vt) {
std::string encoded;
AppendInternalKey(&encoded, ParsedInternalKey(user_key, seq, vt));
return encoded;
}
static std::string Shorten(const std::string& s, const std::string& l) {
std::string result = s;
ShortenedIndexBuilder::FindShortestInternalKeySeparator(*BytewiseComparator(),
&result, l);
return result;
}
static std::string ShortSuccessor(const std::string& s) {
std::string result = s;
ShortenedIndexBuilder::FindShortInternalKeySuccessor(*BytewiseComparator(),
&result);
return result;
}
static void TestKey(const std::string& key, uint64_t seq, ValueType vt) {
std::string encoded = IKey(key, seq, vt);
Slice in(encoded);
ParsedInternalKey decoded("", 0, kTypeValue);
ASSERT_OK(ParseInternalKey(in, &decoded, true /* log_err_key */));
ASSERT_EQ(key, decoded.user_key.ToString());
ASSERT_EQ(seq, decoded.sequence);
ASSERT_EQ(vt, decoded.type);
ASSERT_NOK(ParseInternalKey(Slice("bar"), &decoded, true /* log_err_key */));
}
class FormatTest : public testing::Test {};
TEST_F(FormatTest, InternalKey_EncodeDecode) {
const char* keys[] = {"", "k", "hello", "longggggggggggggggggggggg"};
const uint64_t seq[] = {1,
2,
3,
(1ull << 8) - 1,
1ull << 8,
(1ull << 8) + 1,
(1ull << 16) - 1,
1ull << 16,
(1ull << 16) + 1,
(1ull << 32) - 1,
1ull << 32,
(1ull << 32) + 1};
for (unsigned int k = 0; k < sizeof(keys) / sizeof(keys[0]); k++) {
for (unsigned int s = 0; s < sizeof(seq) / sizeof(seq[0]); s++) {
TestKey(keys[k], seq[s], kTypeValue);
TestKey("hello", 1, kTypeDeletion);
}
}
}
TEST_F(FormatTest, InternalKeyShortSeparator) {
// When user keys are same
ASSERT_EQ(IKey("foo", 100, kTypeValue),
Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 99, kTypeValue)));
ASSERT_EQ(
IKey("foo", 100, kTypeValue),
Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 101, kTypeValue)));
ASSERT_EQ(
IKey("foo", 100, kTypeValue),
Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 100, kTypeValue)));
ASSERT_EQ(
IKey("foo", 100, kTypeValue),
Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 100, kTypeDeletion)));
// When user keys are misordered
ASSERT_EQ(IKey("foo", 100, kTypeValue),
Shorten(IKey("foo", 100, kTypeValue), IKey("bar", 99, kTypeValue)));
// When user keys are different, but correctly ordered
ASSERT_EQ(
IKey("g", kMaxSequenceNumber, kValueTypeForSeek),
Shorten(IKey("foo", 100, kTypeValue), IKey("hello", 200, kTypeValue)));
ASSERT_EQ(IKey("ABC2", kMaxSequenceNumber, kValueTypeForSeek),
Shorten(IKey("ABC1AAAAA", 100, kTypeValue),
IKey("ABC2ABB", 200, kTypeValue)));
ASSERT_EQ(IKey("AAA2", kMaxSequenceNumber, kValueTypeForSeek),
Shorten(IKey("AAA1AAA", 100, kTypeValue),
IKey("AAA2AA", 200, kTypeValue)));
ASSERT_EQ(
IKey("AAA2", kMaxSequenceNumber, kValueTypeForSeek),
Shorten(IKey("AAA1AAA", 100, kTypeValue), IKey("AAA4", 200, kTypeValue)));
ASSERT_EQ(
IKey("AAA1B", kMaxSequenceNumber, kValueTypeForSeek),
Shorten(IKey("AAA1AAA", 100, kTypeValue), IKey("AAA2", 200, kTypeValue)));
ASSERT_EQ(IKey("AAA2", kMaxSequenceNumber, kValueTypeForSeek),
Shorten(IKey("AAA1AAA", 100, kTypeValue),
IKey("AAA2A", 200, kTypeValue)));
ASSERT_EQ(
IKey("AAA1", 100, kTypeValue),
Shorten(IKey("AAA1", 100, kTypeValue), IKey("AAA2", 200, kTypeValue)));
// When start user key is prefix of limit user key
ASSERT_EQ(
IKey("foo", 100, kTypeValue),
Shorten(IKey("foo", 100, kTypeValue), IKey("foobar", 200, kTypeValue)));
// When limit user key is prefix of start user key
ASSERT_EQ(
IKey("foobar", 100, kTypeValue),
Shorten(IKey("foobar", 100, kTypeValue), IKey("foo", 200, kTypeValue)));
}
TEST_F(FormatTest, InternalKeyShortestSuccessor) {
ASSERT_EQ(IKey("g", kMaxSequenceNumber, kValueTypeForSeek),
ShortSuccessor(IKey("foo", 100, kTypeValue)));
ASSERT_EQ(IKey("\xff\xff", 100, kTypeValue),
ShortSuccessor(IKey("\xff\xff", 100, kTypeValue)));
}
TEST_F(FormatTest, IterKeyOperation) {
IterKey k;
const char p[] = "abcdefghijklmnopqrstuvwxyz";
const char q[] = "0123456789";
ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()),
std::string(""));
k.TrimAppend(0, p, 3);
ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()),
std::string("abc"));
k.TrimAppend(1, p, 3);
ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()),
std::string("aabc"));
k.TrimAppend(0, p, 26);
ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()),
std::string("abcdefghijklmnopqrstuvwxyz"));
k.TrimAppend(26, q, 10);
ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()),
std::string("abcdefghijklmnopqrstuvwxyz0123456789"));
k.TrimAppend(36, q, 1);
ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()),
std::string("abcdefghijklmnopqrstuvwxyz01234567890"));
k.TrimAppend(26, q, 1);
ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()),
std::string("abcdefghijklmnopqrstuvwxyz0"));
// Size going up, memory allocation is triggered
k.TrimAppend(27, p, 26);
ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()),
std::string("abcdefghijklmnopqrstuvwxyz0"
"abcdefghijklmnopqrstuvwxyz"));
}
TEST_F(FormatTest, UpdateInternalKey) {
std::string user_key("abcdefghijklmnopqrstuvwxyz");
uint64_t new_seq = 0x123456;
ValueType new_val_type = kTypeDeletion;
std::string ikey;
AppendInternalKey(&ikey, ParsedInternalKey(user_key, 100U, kTypeValue));
size_t ikey_size = ikey.size();
UpdateInternalKey(&ikey, new_seq, new_val_type);
ASSERT_EQ(ikey_size, ikey.size());
Slice in(ikey);
ParsedInternalKey decoded;
ASSERT_OK(ParseInternalKey(in, &decoded, true /* log_err_key */));
ASSERT_EQ(user_key, decoded.user_key.ToString());
ASSERT_EQ(new_seq, decoded.sequence);
ASSERT_EQ(new_val_type, decoded.type);
}
TEST_F(FormatTest, RangeTombstoneSerializeEndKey) {
RangeTombstone t("a", "b", 2);
InternalKey k("b", 3, kTypeValue);
const InternalKeyComparator cmp(BytewiseComparator());
ASSERT_LT(cmp.Compare(t.SerializeEndKey(), k), 0);
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
RegisterCustomObjects(argc, argv);
return RUN_ALL_TESTS();
}

@ -1,603 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <stdlib.h>
#include <map>
#include <string>
#include <vector>
#include "db/db_impl/db_impl.h"
#include "db/db_test_util.h"
#include "db/version_set.h"
#include "db/write_batch_internal.h"
#include "file/filename.h"
#include "port/stack_trace.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/transaction_log.h"
#include "test_util/sync_point.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
class DeleteFileTest : public DBTestBase {
public:
const int numlevels_;
const std::string wal_dir_;
DeleteFileTest()
: DBTestBase("deletefile_test", /*env_do_fsync=*/true),
numlevels_(7),
wal_dir_(dbname_ + "/wal_files") {}
void SetOptions(Options* options) {
ASSERT_NE(options, nullptr);
options->delete_obsolete_files_period_micros = 0; // always do full purge
options->enable_thread_tracking = true;
options->write_buffer_size = 1024 * 1024 * 1000;
options->target_file_size_base = 1024 * 1024 * 1000;
options->max_bytes_for_level_base = 1024 * 1024 * 1000;
options->WAL_ttl_seconds = 300; // Used to test log files
options->WAL_size_limit_MB = 1024; // Used to test log files
options->wal_dir = wal_dir_;
}
void AddKeys(int numkeys, int startkey = 0) {
WriteOptions options;
options.sync = false;
ReadOptions roptions;
for (int i = startkey; i < (numkeys + startkey); i++) {
std::string temp = std::to_string(i);
Slice key(temp);
Slice value(temp);
ASSERT_OK(db_->Put(options, key, value));
}
}
int numKeysInLevels(std::vector<LiveFileMetaData>& metadata,
std::vector<int>* keysperlevel = nullptr) {
if (keysperlevel != nullptr) {
keysperlevel->resize(numlevels_);
}
int numKeys = 0;
for (size_t i = 0; i < metadata.size(); i++) {
int startkey = atoi(metadata[i].smallestkey.c_str());
int endkey = atoi(metadata[i].largestkey.c_str());
int numkeysinfile = (endkey - startkey + 1);
numKeys += numkeysinfile;
if (keysperlevel != nullptr) {
(*keysperlevel)[(int)metadata[i].level] += numkeysinfile;
}
fprintf(stderr, "level %d name %s smallest %s largest %s\n",
metadata[i].level, metadata[i].name.c_str(),
metadata[i].smallestkey.c_str(), metadata[i].largestkey.c_str());
}
return numKeys;
}
void CreateTwoLevels() {
AddKeys(50000, 10000);
ASSERT_OK(dbfull()->TEST_FlushMemTable());
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
for (int i = 0; i < 2; ++i) {
ASSERT_OK(dbfull()->TEST_CompactRange(i, nullptr, nullptr));
}
AddKeys(50000, 10000);
ASSERT_OK(dbfull()->TEST_FlushMemTable());
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr));
}
void CheckFileTypeCounts(const std::string& dir, int required_log,
int required_sst, int required_manifest) {
std::vector<std::string> filenames;
ASSERT_OK(env_->GetChildren(dir, &filenames));
int log_cnt = 0, sst_cnt = 0, manifest_cnt = 0;
for (auto file : filenames) {
uint64_t number;
FileType type;
if (ParseFileName(file, &number, &type)) {
log_cnt += (type == kWalFile);
sst_cnt += (type == kTableFile);
manifest_cnt += (type == kDescriptorFile);
}
}
if (required_log >= 0) {
ASSERT_EQ(required_log, log_cnt);
}
if (required_sst >= 0) {
ASSERT_EQ(required_sst, sst_cnt);
}
if (required_manifest >= 0) {
ASSERT_EQ(required_manifest, manifest_cnt);
}
}
static void DoSleep(void* arg) {
auto test = reinterpret_cast<DeleteFileTest*>(arg);
test->env_->SleepForMicroseconds(2 * 1000 * 1000);
}
// An empty job to guard all jobs are processed
static void GuardFinish(void* /*arg*/) {
TEST_SYNC_POINT("DeleteFileTest::GuardFinish");
}
};
TEST_F(DeleteFileTest, AddKeysAndQueryLevels) {
Options options = CurrentOptions();
SetOptions(&options);
Destroy(options);
options.create_if_missing = true;
Reopen(options);
CreateTwoLevels();
std::vector<LiveFileMetaData> metadata;
db_->GetLiveFilesMetaData(&metadata);
std::string level1file = "";
int level1keycount = 0;
std::string level2file = "";
int level2keycount = 0;
int level1index = 0;
int level2index = 1;
ASSERT_EQ((int)metadata.size(), 2);
if (metadata[0].level == 2) {
level1index = 1;
level2index = 0;
}
level1file = metadata[level1index].name;
int startkey = atoi(metadata[level1index].smallestkey.c_str());
int endkey = atoi(metadata[level1index].largestkey.c_str());
level1keycount = (endkey - startkey + 1);
level2file = metadata[level2index].name;
startkey = atoi(metadata[level2index].smallestkey.c_str());
endkey = atoi(metadata[level2index].largestkey.c_str());
level2keycount = (endkey - startkey + 1);
// COntrolled setup. Levels 1 and 2 should both have 50K files.
// This is a little fragile as it depends on the current
// compaction heuristics.
ASSERT_EQ(level1keycount, 50000);
ASSERT_EQ(level2keycount, 50000);
Status status = db_->DeleteFile("0.sst");
ASSERT_TRUE(status.IsInvalidArgument());
// intermediate level files cannot be deleted.
status = db_->DeleteFile(level1file);
ASSERT_TRUE(status.IsInvalidArgument());
// Lowest level file deletion should succeed.
status = db_->DeleteFile(level2file);
ASSERT_OK(status);
}
TEST_F(DeleteFileTest, PurgeObsoleteFilesTest) {
Options options = CurrentOptions();
SetOptions(&options);
Destroy(options);
options.create_if_missing = true;
Reopen(options);
CreateTwoLevels();
// there should be only one (empty) log file because CreateTwoLevels()
// flushes the memtables to disk
CheckFileTypeCounts(wal_dir_, 1, 0, 0);
// 2 ssts, 1 manifest
CheckFileTypeCounts(dbname_, 0, 2, 1);
std::string first("0"), last("999999");
CompactRangeOptions compact_options;
compact_options.change_level = true;
compact_options.target_level = 2;
Slice first_slice(first), last_slice(last);
ASSERT_OK(db_->CompactRange(compact_options, &first_slice, &last_slice));
// 1 sst after compaction
CheckFileTypeCounts(dbname_, 0, 1, 1);
// this time, we keep an iterator alive
Reopen(options);
Iterator* itr = nullptr;
CreateTwoLevels();
itr = db_->NewIterator(ReadOptions());
ASSERT_OK(itr->status());
ASSERT_OK(db_->CompactRange(compact_options, &first_slice, &last_slice));
ASSERT_OK(itr->status());
// 3 sst after compaction with live iterator
CheckFileTypeCounts(dbname_, 0, 3, 1);
delete itr;
// 1 sst after iterator deletion
CheckFileTypeCounts(dbname_, 0, 1, 1);
}
TEST_F(DeleteFileTest, BackgroundPurgeIteratorTest) {
Options options = CurrentOptions();
SetOptions(&options);
Destroy(options);
options.create_if_missing = true;
Reopen(options);
std::string first("0"), last("999999");
CompactRangeOptions compact_options;
compact_options.change_level = true;
compact_options.target_level = 2;
Slice first_slice(first), last_slice(last);
// We keep an iterator alive
Iterator* itr = nullptr;
CreateTwoLevels();
ReadOptions read_options;
read_options.background_purge_on_iterator_cleanup = true;
itr = db_->NewIterator(read_options);
ASSERT_OK(itr->status());
ASSERT_OK(db_->CompactRange(compact_options, &first_slice, &last_slice));
// 3 sst after compaction with live iterator
CheckFileTypeCounts(dbname_, 0, 3, 1);
test::SleepingBackgroundTask sleeping_task_before;
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
&sleeping_task_before, Env::Priority::HIGH);
delete itr;
test::SleepingBackgroundTask sleeping_task_after;
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
&sleeping_task_after, Env::Priority::HIGH);
// Make sure no purges are executed foreground
CheckFileTypeCounts(dbname_, 0, 3, 1);
sleeping_task_before.WakeUp();
sleeping_task_before.WaitUntilDone();
// Make sure all background purges are executed
sleeping_task_after.WakeUp();
sleeping_task_after.WaitUntilDone();
// 1 sst after iterator deletion
CheckFileTypeCounts(dbname_, 0, 1, 1);
}
TEST_F(DeleteFileTest, PurgeDuringOpen) {
Options options = CurrentOptions();
CheckFileTypeCounts(dbname_, -1, 0, -1);
Close();
std::unique_ptr<WritableFile> file;
ASSERT_OK(options.env->NewWritableFile(dbname_ + "/000002.sst", &file,
EnvOptions()));
ASSERT_OK(file->Close());
CheckFileTypeCounts(dbname_, -1, 1, -1);
options.avoid_unnecessary_blocking_io = false;
options.create_if_missing = false;
Reopen(options);
CheckFileTypeCounts(dbname_, -1, 0, -1);
Close();
// test background purge
options.avoid_unnecessary_blocking_io = true;
options.create_if_missing = false;
ASSERT_OK(options.env->NewWritableFile(dbname_ + "/000002.sst", &file,
EnvOptions()));
ASSERT_OK(file->Close());
CheckFileTypeCounts(dbname_, -1, 1, -1);
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->LoadDependency(
{{"DeleteFileTest::PurgeDuringOpen:1", "DBImpl::BGWorkPurge:start"}});
SyncPoint::GetInstance()->EnableProcessing();
Reopen(options);
// the obsolete file is not deleted until the background purge job is ran
CheckFileTypeCounts(dbname_, -1, 1, -1);
TEST_SYNC_POINT("DeleteFileTest::PurgeDuringOpen:1");
ASSERT_OK(dbfull()->TEST_WaitForPurge());
CheckFileTypeCounts(dbname_, -1, 0, -1);
}
TEST_F(DeleteFileTest, BackgroundPurgeCFDropTest) {
Options options = CurrentOptions();
SetOptions(&options);
Destroy(options);
options.create_if_missing = true;
Reopen(options);
auto do_test = [&](bool bg_purge) {
ColumnFamilyOptions co;
co.max_write_buffer_size_to_maintain =
static_cast<int64_t>(co.write_buffer_size);
WriteOptions wo;
FlushOptions fo;
ColumnFamilyHandle* cfh = nullptr;
ASSERT_OK(db_->CreateColumnFamily(co, "dropme", &cfh));
ASSERT_OK(db_->Put(wo, cfh, "pika", "chu"));
ASSERT_OK(db_->Flush(fo, cfh));
// Expect 1 sst file.
CheckFileTypeCounts(dbname_, 0, 1, 1);
ASSERT_OK(db_->DropColumnFamily(cfh));
// Still 1 file, it won't be deleted while ColumnFamilyHandle is alive.
CheckFileTypeCounts(dbname_, 0, 1, 1);
delete cfh;
test::SleepingBackgroundTask sleeping_task_after;
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
&sleeping_task_after, Env::Priority::HIGH);
// If background purge is enabled, the file should still be there.
CheckFileTypeCounts(dbname_, 0, bg_purge ? 1 : 0, 1);
TEST_SYNC_POINT("DeleteFileTest::BackgroundPurgeCFDropTest:1");
// Execute background purges.
sleeping_task_after.WakeUp();
sleeping_task_after.WaitUntilDone();
// The file should have been deleted.
CheckFileTypeCounts(dbname_, 0, 0, 1);
};
{
SCOPED_TRACE("avoid_unnecessary_blocking_io = false");
do_test(false);
}
options.avoid_unnecessary_blocking_io = true;
options.create_if_missing = false;
Reopen(options);
ASSERT_OK(dbfull()->TEST_WaitForPurge());
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->LoadDependency(
{{"DeleteFileTest::BackgroundPurgeCFDropTest:1",
"DBImpl::BGWorkPurge:start"}});
SyncPoint::GetInstance()->EnableProcessing();
{
SCOPED_TRACE("avoid_unnecessary_blocking_io = true");
do_test(true);
}
}
// This test is to reproduce a bug that read invalid ReadOption in iterator
// cleanup function
TEST_F(DeleteFileTest, BackgroundPurgeCopyOptions) {
Options options = CurrentOptions();
SetOptions(&options);
Destroy(options);
options.create_if_missing = true;
Reopen(options);
std::string first("0"), last("999999");
CompactRangeOptions compact_options;
compact_options.change_level = true;
compact_options.target_level = 2;
Slice first_slice(first), last_slice(last);
// We keep an iterator alive
Iterator* itr = nullptr;
CreateTwoLevels();
{
ReadOptions read_options;
read_options.background_purge_on_iterator_cleanup = true;
itr = db_->NewIterator(read_options);
ASSERT_OK(itr->status());
// ReadOptions is deleted, but iterator cleanup function should not be
// affected
}
ASSERT_OK(db_->CompactRange(compact_options, &first_slice, &last_slice));
// 3 sst after compaction with live iterator
CheckFileTypeCounts(dbname_, 0, 3, 1);
delete itr;
test::SleepingBackgroundTask sleeping_task_after;
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
&sleeping_task_after, Env::Priority::HIGH);
// Make sure all background purges are executed
sleeping_task_after.WakeUp();
sleeping_task_after.WaitUntilDone();
// 1 sst after iterator deletion
CheckFileTypeCounts(dbname_, 0, 1, 1);
}
TEST_F(DeleteFileTest, BackgroundPurgeTestMultipleJobs) {
Options options = CurrentOptions();
SetOptions(&options);
Destroy(options);
options.create_if_missing = true;
Reopen(options);
std::string first("0"), last("999999");
CompactRangeOptions compact_options;
compact_options.change_level = true;
compact_options.target_level = 2;
Slice first_slice(first), last_slice(last);
// We keep an iterator alive
CreateTwoLevels();
ReadOptions read_options;
read_options.background_purge_on_iterator_cleanup = true;
Iterator* itr1 = db_->NewIterator(read_options);
ASSERT_OK(itr1->status());
CreateTwoLevels();
Iterator* itr2 = db_->NewIterator(read_options);
ASSERT_OK(itr2->status());
ASSERT_OK(db_->CompactRange(compact_options, &first_slice, &last_slice));
// 5 sst files after 2 compactions with 2 live iterators
CheckFileTypeCounts(dbname_, 0, 5, 1);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
// ~DBImpl should wait until all BGWorkPurge are finished
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
{{"DBImpl::~DBImpl:WaitJob", "DBImpl::BGWorkPurge"},
{"DeleteFileTest::GuardFinish",
"DeleteFileTest::BackgroundPurgeTestMultipleJobs:DBClose"}});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
delete itr1;
env_->Schedule(&DeleteFileTest::DoSleep, this, Env::Priority::HIGH);
delete itr2;
env_->Schedule(&DeleteFileTest::GuardFinish, nullptr, Env::Priority::HIGH);
Close();
TEST_SYNC_POINT("DeleteFileTest::BackgroundPurgeTestMultipleJobs:DBClose");
// 1 sst after iterator deletion
CheckFileTypeCounts(dbname_, 0, 1, 1);
}
TEST_F(DeleteFileTest, DeleteFileWithIterator) {
Options options = CurrentOptions();
SetOptions(&options);
Destroy(options);
options.create_if_missing = true;
Reopen(options);
CreateTwoLevels();
ReadOptions read_options;
Iterator* it = db_->NewIterator(read_options);
ASSERT_OK(it->status());
std::vector<LiveFileMetaData> metadata;
db_->GetLiveFilesMetaData(&metadata);
std::string level2file;
ASSERT_EQ(metadata.size(), static_cast<size_t>(2));
if (metadata[0].level == 1) {
level2file = metadata[1].name;
} else {
level2file = metadata[0].name;
}
Status status = db_->DeleteFile(level2file);
fprintf(stdout, "Deletion status %s: %s\n", level2file.c_str(),
status.ToString().c_str());
ASSERT_OK(status);
it->SeekToFirst();
int numKeysIterated = 0;
while (it->Valid()) {
numKeysIterated++;
it->Next();
}
ASSERT_EQ(numKeysIterated, 50000);
delete it;
}
TEST_F(DeleteFileTest, DeleteLogFiles) {
Options options = CurrentOptions();
SetOptions(&options);
Destroy(options);
options.create_if_missing = true;
Reopen(options);
AddKeys(10, 0);
VectorLogPtr logfiles;
ASSERT_OK(db_->GetSortedWalFiles(logfiles));
ASSERT_GT(logfiles.size(), 0UL);
// Take the last log file which is expected to be alive and try to delete it
// Should not succeed because live logs are not allowed to be deleted
std::unique_ptr<LogFile> alive_log = std::move(logfiles.back());
ASSERT_EQ(alive_log->Type(), kAliveLogFile);
ASSERT_OK(env_->FileExists(wal_dir_ + "/" + alive_log->PathName()));
fprintf(stdout, "Deleting alive log file %s\n",
alive_log->PathName().c_str());
ASSERT_NOK(db_->DeleteFile(alive_log->PathName()));
ASSERT_OK(env_->FileExists(wal_dir_ + "/" + alive_log->PathName()));
logfiles.clear();
// Call Flush to bring about a new working log file and add more keys
// Call Flush again to flush out memtable and move alive log to archived log
// and try to delete the archived log file
FlushOptions fopts;
ASSERT_OK(db_->Flush(fopts));
AddKeys(10, 0);
ASSERT_OK(db_->Flush(fopts));
ASSERT_OK(db_->GetSortedWalFiles(logfiles));
ASSERT_GT(logfiles.size(), 0UL);
std::unique_ptr<LogFile> archived_log = std::move(logfiles.front());
ASSERT_EQ(archived_log->Type(), kArchivedLogFile);
ASSERT_OK(env_->FileExists(wal_dir_ + "/" + archived_log->PathName()));
fprintf(stdout, "Deleting archived log file %s\n",
archived_log->PathName().c_str());
ASSERT_OK(db_->DeleteFile(archived_log->PathName()));
ASSERT_TRUE(
env_->FileExists(wal_dir_ + "/" + archived_log->PathName()).IsNotFound());
}
TEST_F(DeleteFileTest, DeleteNonDefaultColumnFamily) {
Options options = CurrentOptions();
SetOptions(&options);
Destroy(options);
options.create_if_missing = true;
Reopen(options);
CreateAndReopenWithCF({"new_cf"}, options);
Random rnd(5);
for (int i = 0; i < 1000; ++i) {
ASSERT_OK(db_->Put(WriteOptions(), handles_[1], test::RandomKey(&rnd, 10),
test::RandomKey(&rnd, 10)));
}
ASSERT_OK(db_->Flush(FlushOptions(), handles_[1]));
for (int i = 0; i < 1000; ++i) {
ASSERT_OK(db_->Put(WriteOptions(), handles_[1], test::RandomKey(&rnd, 10),
test::RandomKey(&rnd, 10)));
}
ASSERT_OK(db_->Flush(FlushOptions(), handles_[1]));
std::vector<LiveFileMetaData> metadata;
db_->GetLiveFilesMetaData(&metadata);
ASSERT_EQ(2U, metadata.size());
ASSERT_EQ("new_cf", metadata[0].column_family_name);
ASSERT_EQ("new_cf", metadata[1].column_family_name);
auto old_file = metadata[0].smallest_seqno < metadata[1].smallest_seqno
? metadata[0].name
: metadata[1].name;
auto new_file = metadata[0].smallest_seqno > metadata[1].smallest_seqno
? metadata[0].name
: metadata[1].name;
ASSERT_TRUE(db_->DeleteFile(new_file).IsInvalidArgument());
ASSERT_OK(db_->DeleteFile(old_file));
{
std::unique_ptr<Iterator> itr(db_->NewIterator(ReadOptions(), handles_[1]));
ASSERT_OK(itr->status());
int count = 0;
for (itr->SeekToFirst(); itr->Valid(); itr->Next()) {
ASSERT_OK(itr->status());
++count;
}
ASSERT_EQ(count, 1000);
}
Close();
ReopenWithColumnFamilies({kDefaultColumnFamilyName, "new_cf"}, options);
{
std::unique_ptr<Iterator> itr(db_->NewIterator(ReadOptions(), handles_[1]));
int count = 0;
for (itr->SeekToFirst(); itr->Valid(); itr->Next()) {
ASSERT_OK(itr->status());
++count;
}
ASSERT_EQ(count, 1000);
}
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
RegisterCustomObjects(argc, argv);
return RUN_ALL_TESTS();
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,637 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright 2014 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
// This test uses a custom Env to keep track of the state of a filesystem as of
// the last "sync". It then checks for data loss errors by purposely dropping
// file data (or entire files) not protected by a "sync".
#include "db/db_impl/db_impl.h"
#include "db/log_format.h"
#include "db/version_set.h"
#include "env/mock_env.h"
#include "file/filename.h"
#include "rocksdb/cache.h"
#include "rocksdb/convenience.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/table.h"
#include "rocksdb/write_batch.h"
#include "test_util/sync_point.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/mutexlock.h"
#include "util/random.h"
#include "utilities/fault_injection_env.h"
#ifndef NDEBUG
#include "utilities/fault_injection_fs.h"
#endif
namespace ROCKSDB_NAMESPACE {
static const int kValueSize = 1000;
static const int kMaxNumValues = 2000;
static const size_t kNumIterations = 3;
enum FaultInjectionOptionConfig {
kDefault,
kDifferentDataDir,
kWalDir,
kSyncWal,
kWalDirSyncWal,
kMultiLevels,
kEnd,
};
class FaultInjectionTest
: public testing::Test,
public testing::WithParamInterface<std::tuple<
bool, FaultInjectionOptionConfig, FaultInjectionOptionConfig>> {
protected:
int option_config_;
int non_inclusive_end_range_; // kEnd or equivalent to that
// When need to make sure data is persistent, sync WAL
bool sync_use_wal_;
// When need to make sure data is persistent, call DB::CompactRange()
bool sync_use_compact_;
bool sequential_order_;
public:
enum ExpectedVerifResult { kValExpectFound, kValExpectNoError };
enum ResetMethod {
kResetDropUnsyncedData,
kResetDropRandomUnsyncedData,
kResetDeleteUnsyncedFiles,
kResetDropAndDeleteUnsynced
};
std::unique_ptr<Env> base_env_;
FaultInjectionTestEnv* env_;
std::string dbname_;
std::shared_ptr<Cache> tiny_cache_;
Options options_;
DB* db_;
FaultInjectionTest()
: option_config_(std::get<1>(GetParam())),
non_inclusive_end_range_(std::get<2>(GetParam())),
sync_use_wal_(false),
sync_use_compact_(true),
base_env_(nullptr),
env_(nullptr),
db_(nullptr) {
EXPECT_OK(
test::CreateEnvFromSystem(ConfigOptions(), &system_env_, &env_guard_));
EXPECT_NE(system_env_, nullptr);
}
~FaultInjectionTest() override {
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
}
bool ChangeOptions() {
option_config_++;
if (option_config_ >= non_inclusive_end_range_) {
return false;
} else {
if (option_config_ == kMultiLevels) {
base_env_.reset(MockEnv::Create(system_env_));
}
return true;
}
}
// Return the current option configuration.
Options CurrentOptions() {
sync_use_wal_ = false;
sync_use_compact_ = true;
Options options;
switch (option_config_) {
case kWalDir:
options.wal_dir = test::PerThreadDBPath(env_, "fault_test_wal");
break;
case kDifferentDataDir:
options.db_paths.emplace_back(
test::PerThreadDBPath(env_, "fault_test_data"), 1000000U);
break;
case kSyncWal:
sync_use_wal_ = true;
sync_use_compact_ = false;
break;
case kWalDirSyncWal:
options.wal_dir = test::PerThreadDBPath(env_, "/fault_test_wal");
sync_use_wal_ = true;
sync_use_compact_ = false;
break;
case kMultiLevels:
options.write_buffer_size = 64 * 1024;
options.target_file_size_base = 64 * 1024;
options.level0_file_num_compaction_trigger = 2;
options.level0_slowdown_writes_trigger = 2;
options.level0_stop_writes_trigger = 4;
options.max_bytes_for_level_base = 128 * 1024;
options.max_write_buffer_number = 2;
options.max_background_compactions = 8;
options.max_background_flushes = 8;
sync_use_wal_ = true;
sync_use_compact_ = false;
break;
default:
break;
}
return options;
}
Status NewDB() {
assert(db_ == nullptr);
assert(tiny_cache_ == nullptr);
assert(env_ == nullptr);
env_ = new FaultInjectionTestEnv(base_env_ ? base_env_.get() : system_env_);
options_ = CurrentOptions();
options_.env = env_;
options_.paranoid_checks = true;
BlockBasedTableOptions table_options;
tiny_cache_ = NewLRUCache(100);
table_options.block_cache = tiny_cache_;
options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
dbname_ = test::PerThreadDBPath("fault_test");
EXPECT_OK(DestroyDB(dbname_, options_));
options_.create_if_missing = true;
Status s = OpenDB();
options_.create_if_missing = false;
return s;
}
void SetUp() override {
sequential_order_ = std::get<0>(GetParam());
ASSERT_OK(NewDB());
}
void TearDown() override {
CloseDB();
Status s = DestroyDB(dbname_, options_);
delete env_;
env_ = nullptr;
tiny_cache_.reset();
ASSERT_OK(s);
}
void Build(const WriteOptions& write_options, int start_idx, int num_vals) {
std::string key_space, value_space;
WriteBatch batch;
for (int i = start_idx; i < start_idx + num_vals; i++) {
Slice key = Key(i, &key_space);
batch.Clear();
ASSERT_OK(batch.Put(key, Value(i, &value_space)));
ASSERT_OK(db_->Write(write_options, &batch));
}
}
Status ReadValue(int i, std::string* val) const {
std::string key_space, value_space;
Slice key = Key(i, &key_space);
Value(i, &value_space);
ReadOptions options;
return db_->Get(options, key, val);
}
Status Verify(int start_idx, int num_vals,
ExpectedVerifResult expected) const {
std::string val;
std::string value_space;
Status s;
for (int i = start_idx; i < start_idx + num_vals && s.ok(); i++) {
Value(i, &value_space);
s = ReadValue(i, &val);
if (s.ok()) {
EXPECT_EQ(value_space, val);
}
if (expected == kValExpectFound) {
if (!s.ok()) {
fprintf(stderr, "Error when read %dth record (expect found): %s\n", i,
s.ToString().c_str());
return s;
}
} else if (!s.ok() && !s.IsNotFound()) {
fprintf(stderr, "Error when read %dth record: %s\n", i,
s.ToString().c_str());
return s;
}
}
return Status::OK();
}
// Return the ith key
Slice Key(int i, std::string* storage) const {
unsigned long long num = i;
if (!sequential_order_) {
// random transfer
const int m = 0x5bd1e995;
num *= m;
num ^= num << 24;
}
char buf[100];
snprintf(buf, sizeof(buf), "%016d", static_cast<int>(num));
storage->assign(buf, strlen(buf));
return Slice(*storage);
}
// Return the value to associate with the specified key
Slice Value(int k, std::string* storage) const {
Random r(k);
*storage = r.RandomString(kValueSize);
return Slice(*storage);
}
void CloseDB() {
delete db_;
db_ = nullptr;
}
Status OpenDB() {
CloseDB();
env_->ResetState();
Status s = DB::Open(options_, dbname_, &db_);
assert(db_ != nullptr);
return s;
}
void DeleteAllData() {
Iterator* iter = db_->NewIterator(ReadOptions());
WriteOptions options;
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
ASSERT_OK(db_->Delete(WriteOptions(), iter->key()));
}
ASSERT_OK(iter->status());
delete iter;
FlushOptions flush_options;
flush_options.wait = true;
ASSERT_OK(db_->Flush(flush_options));
}
// rnd cannot be null for kResetDropRandomUnsyncedData
void ResetDBState(ResetMethod reset_method, Random* rnd = nullptr) {
env_->AssertNoOpenFile();
switch (reset_method) {
case kResetDropUnsyncedData:
ASSERT_OK(env_->DropUnsyncedFileData());
break;
case kResetDropRandomUnsyncedData:
ASSERT_OK(env_->DropRandomUnsyncedFileData(rnd));
break;
case kResetDeleteUnsyncedFiles:
ASSERT_OK(env_->DeleteFilesCreatedAfterLastDirSync());
break;
case kResetDropAndDeleteUnsynced:
ASSERT_OK(env_->DropUnsyncedFileData());
ASSERT_OK(env_->DeleteFilesCreatedAfterLastDirSync());
break;
default:
assert(false);
}
}
void PartialCompactTestPreFault(int num_pre_sync, int num_post_sync) {
DeleteAllData();
WriteOptions write_options;
write_options.sync = sync_use_wal_;
Build(write_options, 0, num_pre_sync);
if (sync_use_compact_) {
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
}
write_options.sync = false;
Build(write_options, num_pre_sync, num_post_sync);
}
void PartialCompactTestReopenWithFault(ResetMethod reset_method,
int num_pre_sync, int num_post_sync,
Random* rnd = nullptr) {
env_->SetFilesystemActive(false);
CloseDB();
ResetDBState(reset_method, rnd);
ASSERT_OK(OpenDB());
ASSERT_OK(Verify(0, num_pre_sync, FaultInjectionTest::kValExpectFound));
ASSERT_OK(Verify(num_pre_sync, num_post_sync,
FaultInjectionTest::kValExpectNoError));
WaitCompactionFinish();
ASSERT_OK(Verify(0, num_pre_sync, FaultInjectionTest::kValExpectFound));
ASSERT_OK(Verify(num_pre_sync, num_post_sync,
FaultInjectionTest::kValExpectNoError));
}
void NoWriteTestPreFault() {}
void NoWriteTestReopenWithFault(ResetMethod reset_method) {
CloseDB();
ResetDBState(reset_method);
ASSERT_OK(OpenDB());
}
void WaitCompactionFinish() {
ASSERT_OK(static_cast<DBImpl*>(db_->GetRootDB())->TEST_WaitForCompact());
ASSERT_OK(db_->Put(WriteOptions(), "", ""));
}
private:
Env* system_env_;
std::shared_ptr<Env> env_guard_;
};
class FaultInjectionTestSplitted : public FaultInjectionTest {};
TEST_P(FaultInjectionTestSplitted, FaultTest) {
do {
Random rnd(301);
for (size_t idx = 0; idx < kNumIterations; idx++) {
int num_pre_sync = rnd.Uniform(kMaxNumValues);
int num_post_sync = rnd.Uniform(kMaxNumValues);
PartialCompactTestPreFault(num_pre_sync, num_post_sync);
PartialCompactTestReopenWithFault(kResetDropUnsyncedData, num_pre_sync,
num_post_sync);
NoWriteTestPreFault();
NoWriteTestReopenWithFault(kResetDropUnsyncedData);
PartialCompactTestPreFault(num_pre_sync, num_post_sync);
PartialCompactTestReopenWithFault(kResetDropRandomUnsyncedData,
num_pre_sync, num_post_sync, &rnd);
NoWriteTestPreFault();
NoWriteTestReopenWithFault(kResetDropUnsyncedData);
// Setting a separate data path won't pass the test as we don't sync
// it after creating new files,
PartialCompactTestPreFault(num_pre_sync, num_post_sync);
PartialCompactTestReopenWithFault(kResetDropAndDeleteUnsynced,
num_pre_sync, num_post_sync);
NoWriteTestPreFault();
NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced);
PartialCompactTestPreFault(num_pre_sync, num_post_sync);
// No new files created so we expect all values since no files will be
// dropped.
PartialCompactTestReopenWithFault(kResetDeleteUnsyncedFiles, num_pre_sync,
num_post_sync);
NoWriteTestPreFault();
NoWriteTestReopenWithFault(kResetDeleteUnsyncedFiles);
}
} while (ChangeOptions());
}
// Previous log file is not fsynced if sync is forced after log rolling.
TEST_P(FaultInjectionTest, WriteOptionSyncTest) {
test::SleepingBackgroundTask sleeping_task_low;
env_->SetBackgroundThreads(1, Env::HIGH);
// Block the job queue to prevent flush job from running.
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
Env::Priority::HIGH);
sleeping_task_low.WaitUntilSleeping();
WriteOptions write_options;
write_options.sync = false;
std::string key_space, value_space;
ASSERT_OK(
db_->Put(write_options, Key(1, &key_space), Value(1, &value_space)));
FlushOptions flush_options;
flush_options.wait = false;
ASSERT_OK(db_->Flush(flush_options));
write_options.sync = true;
ASSERT_OK(
db_->Put(write_options, Key(2, &key_space), Value(2, &value_space)));
ASSERT_OK(db_->FlushWAL(false));
env_->SetFilesystemActive(false);
NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced);
sleeping_task_low.WakeUp();
sleeping_task_low.WaitUntilDone();
ASSERT_OK(OpenDB());
std::string val;
Value(2, &value_space);
ASSERT_OK(ReadValue(2, &val));
ASSERT_EQ(value_space, val);
Value(1, &value_space);
ASSERT_OK(ReadValue(1, &val));
ASSERT_EQ(value_space, val);
}
TEST_P(FaultInjectionTest, UninstalledCompaction) {
options_.target_file_size_base = 32 * 1024;
options_.write_buffer_size = 100 << 10; // 100KB
options_.level0_file_num_compaction_trigger = 6;
options_.level0_stop_writes_trigger = 1 << 10;
options_.level0_slowdown_writes_trigger = 1 << 10;
options_.max_background_compactions = 1;
OpenDB();
if (!sequential_order_) {
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
{"FaultInjectionTest::FaultTest:0", "DBImpl::BGWorkCompaction"},
{"CompactionJob::Run():End", "FaultInjectionTest::FaultTest:1"},
{"FaultInjectionTest::FaultTest:2",
"DBImpl::BackgroundCompaction:NonTrivial:AfterRun"},
});
}
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
int kNumKeys = 1000;
Build(WriteOptions(), 0, kNumKeys);
FlushOptions flush_options;
flush_options.wait = true;
ASSERT_OK(db_->Flush(flush_options));
ASSERT_OK(db_->Put(WriteOptions(), "", ""));
TEST_SYNC_POINT("FaultInjectionTest::FaultTest:0");
TEST_SYNC_POINT("FaultInjectionTest::FaultTest:1");
env_->SetFilesystemActive(false);
TEST_SYNC_POINT("FaultInjectionTest::FaultTest:2");
CloseDB();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
ResetDBState(kResetDropUnsyncedData);
std::atomic<bool> opened(false);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"DBImpl::Open:Opened", [&](void* /*arg*/) { opened.store(true); });
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"DBImpl::BGWorkCompaction",
[&](void* /*arg*/) { ASSERT_TRUE(opened.load()); });
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
ASSERT_OK(OpenDB());
ASSERT_OK(Verify(0, kNumKeys, FaultInjectionTest::kValExpectFound));
WaitCompactionFinish();
ASSERT_OK(Verify(0, kNumKeys, FaultInjectionTest::kValExpectFound));
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
}
TEST_P(FaultInjectionTest, ManualLogSyncTest) {
test::SleepingBackgroundTask sleeping_task_low;
env_->SetBackgroundThreads(1, Env::HIGH);
// Block the job queue to prevent flush job from running.
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
Env::Priority::HIGH);
sleeping_task_low.WaitUntilSleeping();
WriteOptions write_options;
write_options.sync = false;
std::string key_space, value_space;
ASSERT_OK(
db_->Put(write_options, Key(1, &key_space), Value(1, &value_space)));
FlushOptions flush_options;
flush_options.wait = false;
ASSERT_OK(db_->Flush(flush_options));
ASSERT_OK(
db_->Put(write_options, Key(2, &key_space), Value(2, &value_space)));
ASSERT_OK(db_->FlushWAL(true));
env_->SetFilesystemActive(false);
NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced);
sleeping_task_low.WakeUp();
sleeping_task_low.WaitUntilDone();
ASSERT_OK(OpenDB());
std::string val;
Value(2, &value_space);
ASSERT_OK(ReadValue(2, &val));
ASSERT_EQ(value_space, val);
Value(1, &value_space);
ASSERT_OK(ReadValue(1, &val));
ASSERT_EQ(value_space, val);
}
TEST_P(FaultInjectionTest, WriteBatchWalTerminationTest) {
ReadOptions ro;
Options options = CurrentOptions();
options.env = env_;
WriteOptions wo;
wo.sync = true;
wo.disableWAL = false;
WriteBatch batch;
ASSERT_OK(batch.Put("cats", "dogs"));
batch.MarkWalTerminationPoint();
ASSERT_OK(batch.Put("boys", "girls"));
ASSERT_OK(db_->Write(wo, &batch));
env_->SetFilesystemActive(false);
NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced);
ASSERT_OK(OpenDB());
std::string val;
ASSERT_OK(db_->Get(ro, "cats", &val));
ASSERT_EQ("dogs", val);
ASSERT_EQ(db_->Get(ro, "boys", &val), Status::NotFound());
}
TEST_P(FaultInjectionTest, NoDuplicateTrailingEntries) {
auto fault_fs = std::make_shared<FaultInjectionTestFS>(FileSystem::Default());
fault_fs->EnableWriteErrorInjection();
fault_fs->SetFilesystemDirectWritable(false);
const std::string file_name = NormalizePath(dbname_ + "/test_file");
std::unique_ptr<log::Writer> log_writer = nullptr;
constexpr uint64_t log_number = 0;
{
std::unique_ptr<FSWritableFile> file;
const Status s =
fault_fs->NewWritableFile(file_name, FileOptions(), &file, nullptr);
ASSERT_OK(s);
std::unique_ptr<WritableFileWriter> fwriter(
new WritableFileWriter(std::move(file), file_name, FileOptions()));
log_writer.reset(new log::Writer(std::move(fwriter), log_number,
/*recycle_log_files=*/false));
}
fault_fs->SetRandomWriteError(
0xdeadbeef, /*one_in=*/1, IOStatus::IOError("Injected IOError"),
/*inject_for_all_file_types=*/true, /*types=*/{});
{
VersionEdit edit;
edit.SetColumnFamily(0);
std::string buf;
assert(edit.EncodeTo(&buf));
const Status s = log_writer->AddRecord(buf);
ASSERT_NOK(s);
}
fault_fs->DisableWriteErrorInjection();
// Closing the log writer will cause WritableFileWriter::Close() and flush
// remaining data from its buffer to underlying file.
log_writer.reset();
{
std::unique_ptr<FSSequentialFile> file;
Status s =
fault_fs->NewSequentialFile(file_name, FileOptions(), &file, nullptr);
ASSERT_OK(s);
std::unique_ptr<SequentialFileReader> freader(
new SequentialFileReader(std::move(file), file_name));
Status log_read_s;
class LogReporter : public log::Reader::Reporter {
public:
Status* status_;
explicit LogReporter(Status* _s) : status_(_s) {}
void Corruption(size_t /*bytes*/, const Status& _s) override {
if (status_->ok()) {
*status_ = _s;
}
}
} reporter(&log_read_s);
std::unique_ptr<log::Reader> log_reader(new log::Reader(
nullptr, std::move(freader), &reporter, /*checksum=*/true, log_number));
Slice record;
std::string data;
size_t count = 0;
while (log_reader->ReadRecord(&record, &data) && log_read_s.ok()) {
VersionEdit edit;
ASSERT_OK(edit.DecodeFrom(data));
++count;
}
// Verify that only one version edit exists in the file.
ASSERT_EQ(1, count);
}
}
INSTANTIATE_TEST_CASE_P(
FaultTest, FaultInjectionTest,
::testing::Values(std::make_tuple(false, kDefault, kEnd),
std::make_tuple(true, kDefault, kEnd)));
INSTANTIATE_TEST_CASE_P(
FaultTest, FaultInjectionTestSplitted,
::testing::Values(std::make_tuple(false, kDefault, kSyncWal),
std::make_tuple(true, kDefault, kSyncWal),
std::make_tuple(false, kSyncWal, kEnd),
std::make_tuple(true, kSyncWal, kEnd)));
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
RegisterCustomObjects(argc, argv);
return RUN_ALL_TESTS();
}

@ -1,352 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "db/file_indexer.h"
#include <string>
#include "db/dbformat.h"
#include "db/version_edit.h"
#include "port/stack_trace.h"
#include "rocksdb/comparator.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
namespace ROCKSDB_NAMESPACE {
class IntComparator : public Comparator {
public:
int Compare(const Slice& a, const Slice& b) const override {
assert(a.size() == 8);
assert(b.size() == 8);
int64_t diff = *reinterpret_cast<const int64_t*>(a.data()) -
*reinterpret_cast<const int64_t*>(b.data());
if (diff < 0) {
return -1;
} else if (diff == 0) {
return 0;
} else {
return 1;
}
}
const char* Name() const override { return "IntComparator"; }
void FindShortestSeparator(std::string* /*start*/,
const Slice& /*limit*/) const override {}
void FindShortSuccessor(std::string* /*key*/) const override {}
};
class FileIndexerTest : public testing::Test {
public:
FileIndexerTest()
: kNumLevels(4), files(new std::vector<FileMetaData*>[kNumLevels]) {}
~FileIndexerTest() override {
ClearFiles();
delete[] files;
}
void AddFile(int level, int64_t smallest, int64_t largest) {
auto* f = new FileMetaData();
f->smallest = IntKey(smallest);
f->largest = IntKey(largest);
files[level].push_back(f);
}
InternalKey IntKey(int64_t v) {
return InternalKey(Slice(reinterpret_cast<char*>(&v), 8), 0, kTypeValue);
}
void ClearFiles() {
for (uint32_t i = 0; i < kNumLevels; ++i) {
for (auto* f : files[i]) {
delete f;
}
files[i].clear();
}
}
void GetNextLevelIndex(const uint32_t level, const uint32_t file_index,
const int cmp_smallest, const int cmp_largest,
int32_t* left_index, int32_t* right_index) {
*left_index = 100;
*right_index = 100;
indexer->GetNextLevelIndex(level, file_index, cmp_smallest, cmp_largest,
left_index, right_index);
}
int32_t left = 100;
int32_t right = 100;
const uint32_t kNumLevels;
IntComparator ucmp;
FileIndexer* indexer;
std::vector<FileMetaData*>* files;
};
// Case 0: Empty
TEST_F(FileIndexerTest, Empty) {
Arena arena;
indexer = new FileIndexer(&ucmp);
indexer->UpdateIndex(&arena, 0, files);
delete indexer;
}
// Case 1: no overlap, files are on the left of next level files
TEST_F(FileIndexerTest, no_overlap_left) {
Arena arena;
indexer = new FileIndexer(&ucmp);
// level 1
AddFile(1, 100, 200);
AddFile(1, 300, 400);
AddFile(1, 500, 600);
// level 2
AddFile(2, 1500, 1600);
AddFile(2, 1601, 1699);
AddFile(2, 1700, 1800);
// level 3
AddFile(3, 2500, 2600);
AddFile(3, 2601, 2699);
AddFile(3, 2700, 2800);
indexer->UpdateIndex(&arena, kNumLevels, files);
for (uint32_t level = 1; level < 3; ++level) {
for (uint32_t f = 0; f < 3; ++f) {
GetNextLevelIndex(level, f, -1, -1, &left, &right);
ASSERT_EQ(0, left);
ASSERT_EQ(-1, right);
GetNextLevelIndex(level, f, 0, -1, &left, &right);
ASSERT_EQ(0, left);
ASSERT_EQ(-1, right);
GetNextLevelIndex(level, f, 1, -1, &left, &right);
ASSERT_EQ(0, left);
ASSERT_EQ(-1, right);
GetNextLevelIndex(level, f, 1, 0, &left, &right);
ASSERT_EQ(0, left);
ASSERT_EQ(-1, right);
GetNextLevelIndex(level, f, 1, 1, &left, &right);
ASSERT_EQ(0, left);
ASSERT_EQ(2, right);
}
}
delete indexer;
ClearFiles();
}
// Case 2: no overlap, files are on the right of next level files
TEST_F(FileIndexerTest, no_overlap_right) {
Arena arena;
indexer = new FileIndexer(&ucmp);
// level 1
AddFile(1, 2100, 2200);
AddFile(1, 2300, 2400);
AddFile(1, 2500, 2600);
// level 2
AddFile(2, 1500, 1600);
AddFile(2, 1501, 1699);
AddFile(2, 1700, 1800);
// level 3
AddFile(3, 500, 600);
AddFile(3, 501, 699);
AddFile(3, 700, 800);
indexer->UpdateIndex(&arena, kNumLevels, files);
for (uint32_t level = 1; level < 3; ++level) {
for (uint32_t f = 0; f < 3; ++f) {
GetNextLevelIndex(level, f, -1, -1, &left, &right);
ASSERT_EQ(f == 0 ? 0 : 3, left);
ASSERT_EQ(2, right);
GetNextLevelIndex(level, f, 0, -1, &left, &right);
ASSERT_EQ(3, left);
ASSERT_EQ(2, right);
GetNextLevelIndex(level, f, 1, -1, &left, &right);
ASSERT_EQ(3, left);
ASSERT_EQ(2, right);
GetNextLevelIndex(level, f, 1, -1, &left, &right);
ASSERT_EQ(3, left);
ASSERT_EQ(2, right);
GetNextLevelIndex(level, f, 1, 0, &left, &right);
ASSERT_EQ(3, left);
ASSERT_EQ(2, right);
GetNextLevelIndex(level, f, 1, 1, &left, &right);
ASSERT_EQ(3, left);
ASSERT_EQ(2, right);
}
}
delete indexer;
}
// Case 3: empty L2
TEST_F(FileIndexerTest, empty_L2) {
Arena arena;
indexer = new FileIndexer(&ucmp);
for (uint32_t i = 1; i < kNumLevels; ++i) {
ASSERT_EQ(0U, indexer->LevelIndexSize(i));
}
// level 1
AddFile(1, 2100, 2200);
AddFile(1, 2300, 2400);
AddFile(1, 2500, 2600);
// level 3
AddFile(3, 500, 600);
AddFile(3, 501, 699);
AddFile(3, 700, 800);
indexer->UpdateIndex(&arena, kNumLevels, files);
for (uint32_t f = 0; f < 3; ++f) {
GetNextLevelIndex(1, f, -1, -1, &left, &right);
ASSERT_EQ(0, left);
ASSERT_EQ(-1, right);
GetNextLevelIndex(1, f, 0, -1, &left, &right);
ASSERT_EQ(0, left);
ASSERT_EQ(-1, right);
GetNextLevelIndex(1, f, 1, -1, &left, &right);
ASSERT_EQ(0, left);
ASSERT_EQ(-1, right);
GetNextLevelIndex(1, f, 1, -1, &left, &right);
ASSERT_EQ(0, left);
ASSERT_EQ(-1, right);
GetNextLevelIndex(1, f, 1, 0, &left, &right);
ASSERT_EQ(0, left);
ASSERT_EQ(-1, right);
GetNextLevelIndex(1, f, 1, 1, &left, &right);
ASSERT_EQ(0, left);
ASSERT_EQ(-1, right);
}
delete indexer;
ClearFiles();
}
// Case 4: mixed
TEST_F(FileIndexerTest, mixed) {
Arena arena;
indexer = new FileIndexer(&ucmp);
// level 1
AddFile(1, 100, 200);
AddFile(1, 250, 400);
AddFile(1, 450, 500);
// level 2
AddFile(2, 100, 150); // 0
AddFile(2, 200, 250); // 1
AddFile(2, 251, 300); // 2
AddFile(2, 301, 350); // 3
AddFile(2, 500, 600); // 4
// level 3
AddFile(3, 0, 50);
AddFile(3, 100, 200);
AddFile(3, 201, 250);
indexer->UpdateIndex(&arena, kNumLevels, files);
// level 1, 0
GetNextLevelIndex(1, 0, -1, -1, &left, &right);
ASSERT_EQ(0, left);
ASSERT_EQ(0, right);
GetNextLevelIndex(1, 0, 0, -1, &left, &right);
ASSERT_EQ(0, left);
ASSERT_EQ(0, right);
GetNextLevelIndex(1, 0, 1, -1, &left, &right);
ASSERT_EQ(0, left);
ASSERT_EQ(1, right);
GetNextLevelIndex(1, 0, 1, 0, &left, &right);
ASSERT_EQ(1, left);
ASSERT_EQ(1, right);
GetNextLevelIndex(1, 0, 1, 1, &left, &right);
ASSERT_EQ(1, left);
ASSERT_EQ(4, right);
// level 1, 1
GetNextLevelIndex(1, 1, -1, -1, &left, &right);
ASSERT_EQ(1, left);
ASSERT_EQ(1, right);
GetNextLevelIndex(1, 1, 0, -1, &left, &right);
ASSERT_EQ(1, left);
ASSERT_EQ(1, right);
GetNextLevelIndex(1, 1, 1, -1, &left, &right);
ASSERT_EQ(1, left);
ASSERT_EQ(3, right);
GetNextLevelIndex(1, 1, 1, 0, &left, &right);
ASSERT_EQ(4, left);
ASSERT_EQ(3, right);
GetNextLevelIndex(1, 1, 1, 1, &left, &right);
ASSERT_EQ(4, left);
ASSERT_EQ(4, right);
// level 1, 2
GetNextLevelIndex(1, 2, -1, -1, &left, &right);
ASSERT_EQ(4, left);
ASSERT_EQ(3, right);
GetNextLevelIndex(1, 2, 0, -1, &left, &right);
ASSERT_EQ(4, left);
ASSERT_EQ(3, right);
GetNextLevelIndex(1, 2, 1, -1, &left, &right);
ASSERT_EQ(4, left);
ASSERT_EQ(4, right);
GetNextLevelIndex(1, 2, 1, 0, &left, &right);
ASSERT_EQ(4, left);
ASSERT_EQ(4, right);
GetNextLevelIndex(1, 2, 1, 1, &left, &right);
ASSERT_EQ(4, left);
ASSERT_EQ(4, right);
// level 2, 0
GetNextLevelIndex(2, 0, -1, -1, &left, &right);
ASSERT_EQ(0, left);
ASSERT_EQ(1, right);
GetNextLevelIndex(2, 0, 0, -1, &left, &right);
ASSERT_EQ(1, left);
ASSERT_EQ(1, right);
GetNextLevelIndex(2, 0, 1, -1, &left, &right);
ASSERT_EQ(1, left);
ASSERT_EQ(1, right);
GetNextLevelIndex(2, 0, 1, 0, &left, &right);
ASSERT_EQ(1, left);
ASSERT_EQ(1, right);
GetNextLevelIndex(2, 0, 1, 1, &left, &right);
ASSERT_EQ(1, left);
ASSERT_EQ(2, right);
// level 2, 1
GetNextLevelIndex(2, 1, -1, -1, &left, &right);
ASSERT_EQ(1, left);
ASSERT_EQ(1, right);
GetNextLevelIndex(2, 1, 0, -1, &left, &right);
ASSERT_EQ(1, left);
ASSERT_EQ(1, right);
GetNextLevelIndex(2, 1, 1, -1, &left, &right);
ASSERT_EQ(1, left);
ASSERT_EQ(2, right);
GetNextLevelIndex(2, 1, 1, 0, &left, &right);
ASSERT_EQ(2, left);
ASSERT_EQ(2, right);
GetNextLevelIndex(2, 1, 1, 1, &left, &right);
ASSERT_EQ(2, left);
ASSERT_EQ(2, right);
// level 2, [2 - 4], no overlap
for (uint32_t f = 2; f <= 4; ++f) {
GetNextLevelIndex(2, f, -1, -1, &left, &right);
ASSERT_EQ(f == 2 ? 2 : 3, left);
ASSERT_EQ(2, right);
GetNextLevelIndex(2, f, 0, -1, &left, &right);
ASSERT_EQ(3, left);
ASSERT_EQ(2, right);
GetNextLevelIndex(2, f, 1, -1, &left, &right);
ASSERT_EQ(3, left);
ASSERT_EQ(2, right);
GetNextLevelIndex(2, f, 1, 0, &left, &right);
ASSERT_EQ(3, left);
ASSERT_EQ(2, right);
GetNextLevelIndex(2, f, 1, 1, &left, &right);
ASSERT_EQ(3, left);
ASSERT_EQ(2, right);
}
delete indexer;
ClearFiles();
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,241 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "file/filename.h"
#include "db/dbformat.h"
#include "port/port.h"
#include "test_util/testharness.h"
namespace ROCKSDB_NAMESPACE {
class FileNameTest : public testing::Test {};
TEST_F(FileNameTest, Parse) {
Slice db;
FileType type;
uint64_t number;
char kDefautInfoLogDir = 1;
char kDifferentInfoLogDir = 2;
char kNoCheckLogDir = 4;
char kAllMode = kDefautInfoLogDir | kDifferentInfoLogDir | kNoCheckLogDir;
// Successful parses
static struct {
const char* fname;
uint64_t number;
FileType type;
char mode;
} cases[] = {
{"100.log", 100, kWalFile, kAllMode},
{"0.log", 0, kWalFile, kAllMode},
{"0.sst", 0, kTableFile, kAllMode},
{"CURRENT", 0, kCurrentFile, kAllMode},
{"LOCK", 0, kDBLockFile, kAllMode},
{"MANIFEST-2", 2, kDescriptorFile, kAllMode},
{"MANIFEST-7", 7, kDescriptorFile, kAllMode},
{"METADB-2", 2, kMetaDatabase, kAllMode},
{"METADB-7", 7, kMetaDatabase, kAllMode},
{"LOG", 0, kInfoLogFile, kDefautInfoLogDir},
{"LOG.old", 0, kInfoLogFile, kDefautInfoLogDir},
{"LOG.old.6688", 6688, kInfoLogFile, kDefautInfoLogDir},
{"rocksdb_dir_LOG", 0, kInfoLogFile, kDifferentInfoLogDir},
{"rocksdb_dir_LOG.old", 0, kInfoLogFile, kDifferentInfoLogDir},
{"rocksdb_dir_LOG.old.6688", 6688, kInfoLogFile, kDifferentInfoLogDir},
{"18446744073709551615.log", 18446744073709551615ull, kWalFile, kAllMode},
};
for (char mode : {kDifferentInfoLogDir, kDefautInfoLogDir, kNoCheckLogDir}) {
for (unsigned int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
InfoLogPrefix info_log_prefix(mode != kDefautInfoLogDir, "/rocksdb/dir");
if (cases[i].mode & mode) {
std::string f = cases[i].fname;
if (mode == kNoCheckLogDir) {
ASSERT_TRUE(ParseFileName(f, &number, &type)) << f;
} else {
ASSERT_TRUE(ParseFileName(f, &number, info_log_prefix.prefix, &type))
<< f;
}
ASSERT_EQ(cases[i].type, type) << f;
ASSERT_EQ(cases[i].number, number) << f;
}
}
}
// Errors
static const char* errors[] = {"",
"foo",
"foo-dx-100.log",
".log",
"",
"manifest",
"CURREN",
"CURRENTX",
"MANIFES",
"MANIFEST",
"MANIFEST-",
"XMANIFEST-3",
"MANIFEST-3x",
"META",
"METADB",
"METADB-",
"XMETADB-3",
"METADB-3x",
"LOC",
"LOCKx",
"LO",
"LOGx",
"18446744073709551616.log",
"184467440737095516150.log",
"100",
"100.",
"100.lop"};
for (unsigned int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) {
std::string f = errors[i];
ASSERT_TRUE(!ParseFileName(f, &number, &type)) << f;
};
}
TEST_F(FileNameTest, InfoLogFileName) {
std::string dbname = ("/data/rocksdb");
std::string db_absolute_path;
ASSERT_OK(Env::Default()->GetAbsolutePath(dbname, &db_absolute_path));
ASSERT_EQ("/data/rocksdb/LOG", InfoLogFileName(dbname, db_absolute_path, ""));
ASSERT_EQ("/data/rocksdb/LOG.old.666",
OldInfoLogFileName(dbname, 666u, db_absolute_path, ""));
ASSERT_EQ("/data/rocksdb_log/data_rocksdb_LOG",
InfoLogFileName(dbname, db_absolute_path, "/data/rocksdb_log"));
ASSERT_EQ(
"/data/rocksdb_log/data_rocksdb_LOG.old.666",
OldInfoLogFileName(dbname, 666u, db_absolute_path, "/data/rocksdb_log"));
}
TEST_F(FileNameTest, Construction) {
uint64_t number;
FileType type;
std::string fname;
fname = CurrentFileName("foo");
ASSERT_EQ("foo/", std::string(fname.data(), 4));
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
ASSERT_EQ(0U, number);
ASSERT_EQ(kCurrentFile, type);
fname = LockFileName("foo");
ASSERT_EQ("foo/", std::string(fname.data(), 4));
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
ASSERT_EQ(0U, number);
ASSERT_EQ(kDBLockFile, type);
fname = LogFileName("foo", 192);
ASSERT_EQ("foo/", std::string(fname.data(), 4));
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
ASSERT_EQ(192U, number);
ASSERT_EQ(kWalFile, type);
fname = TableFileName({DbPath("bar", 0)}, 200, 0);
std::string fname1 =
TableFileName({DbPath("foo", 0), DbPath("bar", 0)}, 200, 1);
ASSERT_EQ(fname, fname1);
ASSERT_EQ("bar/", std::string(fname.data(), 4));
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
ASSERT_EQ(200U, number);
ASSERT_EQ(kTableFile, type);
fname = DescriptorFileName("bar", 100);
ASSERT_EQ("bar/", std::string(fname.data(), 4));
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
ASSERT_EQ(100U, number);
ASSERT_EQ(kDescriptorFile, type);
fname = TempFileName("tmp", 999);
ASSERT_EQ("tmp/", std::string(fname.data(), 4));
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
ASSERT_EQ(999U, number);
ASSERT_EQ(kTempFile, type);
fname = MetaDatabaseName("met", 100);
ASSERT_EQ("met/", std::string(fname.data(), 4));
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
ASSERT_EQ(100U, number);
ASSERT_EQ(kMetaDatabase, type);
}
TEST_F(FileNameTest, NormalizePath) {
// No leading slash
const std::string sep = std::string(1, kFilePathSeparator);
std::string expected = "FOLDER" + sep + "filename.ext";
std::string given = "FOLDER" + sep + "filename.ext";
ASSERT_EQ(expected, NormalizePath(given));
// Two chars /a
expected = sep + "a";
given = expected;
ASSERT_EQ(expected, NormalizePath(given));
// Two chars a/
expected = "a" + sep;
given = expected;
ASSERT_EQ(expected, NormalizePath(given));
// Server only
expected = sep + sep + "a";
given = expected;
ASSERT_EQ(expected, NormalizePath(given));
// Two slashes after character
expected = "a" + sep;
given = "a" + sep + sep;
ASSERT_EQ(expected, NormalizePath(given));
// slash only /
expected = sep;
given = expected;
ASSERT_EQ(expected, NormalizePath(given));
// UNC only //
expected = sep;
given = sep + sep;
ASSERT_EQ(expected, NormalizePath(given));
// 3 slashesy //
expected = sep + sep;
given = sep + sep + sep;
ASSERT_EQ(expected, NormalizePath(given));
// 3 slashes //
expected = sep + sep + "a" + sep;
given = sep + sep + sep + "a" + sep;
ASSERT_EQ(expected, NormalizePath(given));
// 2 separators in the middle
expected = "a" + sep + "b";
given = "a" + sep + sep + "b";
ASSERT_EQ(expected, NormalizePath(given));
// UNC with duplicate slashes
expected = sep + sep + "SERVER" + sep + "a" + sep + "b" + sep + "c";
given = sep + sep + "SERVER" + sep + "a" + sep + sep + "b" + sep + "c";
ASSERT_EQ(expected, NormalizePath(given));
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,743 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/flush_job.h"
#include <algorithm>
#include <array>
#include <map>
#include <string>
#include "db/blob/blob_index.h"
#include "db/column_family.h"
#include "db/db_impl/db_impl.h"
#include "db/version_set.h"
#include "file/writable_file_writer.h"
#include "rocksdb/cache.h"
#include "rocksdb/file_system.h"
#include "rocksdb/write_buffer_manager.h"
#include "table/mock_table.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/random.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
// TODO(icanadi) Mock out everything else:
// 1. VersionSet
// 2. Memtable
class FlushJobTestBase : public testing::Test {
protected:
FlushJobTestBase(std::string dbname, const Comparator* ucmp)
: env_(Env::Default()),
fs_(env_->GetFileSystem()),
dbname_(std::move(dbname)),
ucmp_(ucmp),
options_(),
db_options_(options_),
column_family_names_({kDefaultColumnFamilyName, "foo", "bar"}),
table_cache_(NewLRUCache(50000, 16)),
write_buffer_manager_(db_options_.db_write_buffer_size),
shutting_down_(false),
mock_table_factory_(new mock::MockTableFactory()) {}
virtual ~FlushJobTestBase() {
if (getenv("KEEP_DB")) {
fprintf(stdout, "db is still in %s\n", dbname_.c_str());
} else {
// destroy versions_ to release all file handles
versions_.reset();
EXPECT_OK(DestroyDir(env_, dbname_));
}
}
void NewDB() {
ASSERT_OK(SetIdentityFile(env_, dbname_));
VersionEdit new_db;
new_db.SetLogNumber(0);
new_db.SetNextFile(2);
new_db.SetLastSequence(0);
autovector<VersionEdit> new_cfs;
SequenceNumber last_seq = 1;
uint32_t cf_id = 1;
for (size_t i = 1; i != column_family_names_.size(); ++i) {
VersionEdit new_cf;
new_cf.AddColumnFamily(column_family_names_[i]);
new_cf.SetColumnFamily(cf_id++);
new_cf.SetComparatorName(ucmp_->Name());
new_cf.SetLogNumber(0);
new_cf.SetNextFile(2);
new_cf.SetLastSequence(last_seq++);
new_cfs.emplace_back(new_cf);
}
const std::string manifest = DescriptorFileName(dbname_, 1);
const auto& fs = env_->GetFileSystem();
std::unique_ptr<WritableFileWriter> file_writer;
Status s = WritableFileWriter::Create(
fs, manifest, fs->OptimizeForManifestWrite(env_options_), &file_writer,
nullptr);
ASSERT_OK(s);
{
log::Writer log(std::move(file_writer), 0, false);
std::string record;
new_db.EncodeTo(&record);
s = log.AddRecord(record);
ASSERT_OK(s);
for (const auto& e : new_cfs) {
record.clear();
e.EncodeTo(&record);
s = log.AddRecord(record);
ASSERT_OK(s);
}
}
ASSERT_OK(s);
// Make "CURRENT" file that points to the new manifest file.
s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr);
ASSERT_OK(s);
}
void SetUp() override {
EXPECT_OK(env_->CreateDirIfMissing(dbname_));
// TODO(icanadi) Remove this once we mock out VersionSet
NewDB();
db_options_.env = env_;
db_options_.fs = fs_;
db_options_.db_paths.emplace_back(dbname_,
std::numeric_limits<uint64_t>::max());
db_options_.statistics = CreateDBStatistics();
cf_options_.comparator = ucmp_;
std::vector<ColumnFamilyDescriptor> column_families;
cf_options_.table_factory = mock_table_factory_;
for (const auto& cf_name : column_family_names_) {
column_families.emplace_back(cf_name, cf_options_);
}
versions_.reset(
new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(),
&write_buffer_manager_, &write_controller_,
/*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
/*db_id*/ "", /*db_session_id*/ ""));
EXPECT_OK(versions_->Recover(column_families, false));
}
Env* env_;
std::shared_ptr<FileSystem> fs_;
std::string dbname_;
const Comparator* const ucmp_;
EnvOptions env_options_;
Options options_;
ImmutableDBOptions db_options_;
const std::vector<std::string> column_family_names_;
std::shared_ptr<Cache> table_cache_;
WriteController write_controller_;
WriteBufferManager write_buffer_manager_;
ColumnFamilyOptions cf_options_;
std::unique_ptr<VersionSet> versions_;
InstrumentedMutex mutex_;
std::atomic<bool> shutting_down_;
std::shared_ptr<mock::MockTableFactory> mock_table_factory_;
SeqnoToTimeMapping empty_seqno_to_time_mapping_;
};
class FlushJobTest : public FlushJobTestBase {
public:
FlushJobTest()
: FlushJobTestBase(test::PerThreadDBPath("flush_job_test"),
BytewiseComparator()) {}
};
TEST_F(FlushJobTest, Empty) {
JobContext job_context(0);
auto cfd = versions_->GetColumnFamilySet()->GetDefault();
EventLogger event_logger(db_options_.info_log.get());
SnapshotChecker* snapshot_checker = nullptr; // not relavant
FlushJob flush_job(
dbname_, versions_->GetColumnFamilySet()->GetDefault(), db_options_,
*cfd->GetLatestMutableCFOptions(),
std::numeric_limits<uint64_t>::max() /* memtable_id */, env_options_,
versions_.get(), &mutex_, &shutting_down_, {}, kMaxSequenceNumber,
snapshot_checker, &job_context, FlushReason::kTest, nullptr, nullptr,
nullptr, kNoCompression, nullptr, &event_logger, false,
true /* sync_output_directory */, true /* write_manifest */,
Env::Priority::USER, nullptr /*IOTracer*/, empty_seqno_to_time_mapping_);
{
InstrumentedMutexLock l(&mutex_);
flush_job.PickMemTable();
ASSERT_OK(flush_job.Run());
}
job_context.Clean();
}
TEST_F(FlushJobTest, NonEmpty) {
JobContext job_context(0);
auto cfd = versions_->GetColumnFamilySet()->GetDefault();
auto new_mem = cfd->ConstructNewMemtable(*cfd->GetLatestMutableCFOptions(),
kMaxSequenceNumber);
new_mem->Ref();
auto inserted_keys = mock::MakeMockFile();
// Test data:
// seqno [ 1, 2 ... 8998, 8999, 9000, 9001, 9002 ... 9999 ]
// key [ 1001, 1002 ... 9998, 9999, 0, 1, 2 ... 999 ]
// range-delete "9995" -> "9999" at seqno 10000
// blob references with seqnos 10001..10006
for (int i = 1; i < 10000; ++i) {
std::string key(std::to_string((i + 1000) % 10000));
std::string value("value" + key);
ASSERT_OK(new_mem->Add(SequenceNumber(i), kTypeValue, key, value,
nullptr /* kv_prot_info */));
if ((i + 1000) % 10000 < 9995) {
InternalKey internal_key(key, SequenceNumber(i), kTypeValue);
inserted_keys.push_back({internal_key.Encode().ToString(), value});
}
}
{
ASSERT_OK(new_mem->Add(SequenceNumber(10000), kTypeRangeDeletion, "9995",
"9999a", nullptr /* kv_prot_info */));
InternalKey internal_key("9995", SequenceNumber(10000), kTypeRangeDeletion);
inserted_keys.push_back({internal_key.Encode().ToString(), "9999a"});
}
// Note: the first two blob references will not be considered when resolving
// the oldest blob file referenced (the first one is inlined TTL, while the
// second one is TTL and thus points to a TTL blob file).
constexpr std::array<uint64_t, 6> blob_file_numbers{
{kInvalidBlobFileNumber, 5, 103, 17, 102, 101}};
for (size_t i = 0; i < blob_file_numbers.size(); ++i) {
std::string key(std::to_string(i + 10001));
std::string blob_index;
if (i == 0) {
BlobIndex::EncodeInlinedTTL(&blob_index, /* expiration */ 1234567890ULL,
"foo");
} else if (i == 1) {
BlobIndex::EncodeBlobTTL(&blob_index, /* expiration */ 1234567890ULL,
blob_file_numbers[i], /* offset */ i << 10,
/* size */ i << 20, kNoCompression);
} else {
BlobIndex::EncodeBlob(&blob_index, blob_file_numbers[i],
/* offset */ i << 10, /* size */ i << 20,
kNoCompression);
}
const SequenceNumber seq(i + 10001);
ASSERT_OK(new_mem->Add(seq, kTypeBlobIndex, key, blob_index,
nullptr /* kv_prot_info */));
InternalKey internal_key(key, seq, kTypeBlobIndex);
inserted_keys.push_back({internal_key.Encode().ToString(), blob_index});
}
mock::SortKVVector(&inserted_keys);
autovector<MemTable*> to_delete;
new_mem->ConstructFragmentedRangeTombstones();
cfd->imm()->Add(new_mem, &to_delete);
for (auto& m : to_delete) {
delete m;
}
EventLogger event_logger(db_options_.info_log.get());
SnapshotChecker* snapshot_checker = nullptr; // not relavant
FlushJob flush_job(
dbname_, versions_->GetColumnFamilySet()->GetDefault(), db_options_,
*cfd->GetLatestMutableCFOptions(),
std::numeric_limits<uint64_t>::max() /* memtable_id */, env_options_,
versions_.get(), &mutex_, &shutting_down_, {}, kMaxSequenceNumber,
snapshot_checker, &job_context, FlushReason::kTest, nullptr, nullptr,
nullptr, kNoCompression, db_options_.statistics.get(), &event_logger,
true, true /* sync_output_directory */, true /* write_manifest */,
Env::Priority::USER, nullptr /*IOTracer*/, empty_seqno_to_time_mapping_);
HistogramData hist;
FileMetaData file_meta;
mutex_.Lock();
flush_job.PickMemTable();
ASSERT_OK(flush_job.Run(nullptr, &file_meta));
mutex_.Unlock();
db_options_.statistics->histogramData(FLUSH_TIME, &hist);
ASSERT_GT(hist.average, 0.0);
ASSERT_EQ(std::to_string(0), file_meta.smallest.user_key().ToString());
ASSERT_EQ("9999a", file_meta.largest.user_key().ToString());
ASSERT_EQ(1, file_meta.fd.smallest_seqno);
ASSERT_EQ(10006, file_meta.fd.largest_seqno);
ASSERT_EQ(17, file_meta.oldest_blob_file_number);
mock_table_factory_->AssertSingleFile(inserted_keys);
job_context.Clean();
}
TEST_F(FlushJobTest, FlushMemTablesSingleColumnFamily) {
const size_t num_mems = 2;
const size_t num_mems_to_flush = 1;
const size_t num_keys_per_table = 100;
JobContext job_context(0);
ColumnFamilyData* cfd = versions_->GetColumnFamilySet()->GetDefault();
std::vector<uint64_t> memtable_ids;
std::vector<MemTable*> new_mems;
for (size_t i = 0; i != num_mems; ++i) {
MemTable* mem = cfd->ConstructNewMemtable(*cfd->GetLatestMutableCFOptions(),
kMaxSequenceNumber);
mem->SetID(i);
mem->Ref();
new_mems.emplace_back(mem);
memtable_ids.push_back(mem->GetID());
for (size_t j = 0; j < num_keys_per_table; ++j) {
std::string key(std::to_string(j + i * num_keys_per_table));
std::string value("value" + key);
ASSERT_OK(mem->Add(SequenceNumber(j + i * num_keys_per_table), kTypeValue,
key, value, nullptr /* kv_prot_info */));
}
}
autovector<MemTable*> to_delete;
for (auto mem : new_mems) {
mem->ConstructFragmentedRangeTombstones();
cfd->imm()->Add(mem, &to_delete);
}
EventLogger event_logger(db_options_.info_log.get());
SnapshotChecker* snapshot_checker = nullptr; // not relavant
assert(memtable_ids.size() == num_mems);
uint64_t smallest_memtable_id = memtable_ids.front();
uint64_t flush_memtable_id = smallest_memtable_id + num_mems_to_flush - 1;
FlushJob flush_job(
dbname_, versions_->GetColumnFamilySet()->GetDefault(), db_options_,
*cfd->GetLatestMutableCFOptions(), flush_memtable_id, env_options_,
versions_.get(), &mutex_, &shutting_down_, {}, kMaxSequenceNumber,
snapshot_checker, &job_context, FlushReason::kTest, nullptr, nullptr,
nullptr, kNoCompression, db_options_.statistics.get(), &event_logger,
true, true /* sync_output_directory */, true /* write_manifest */,
Env::Priority::USER, nullptr /*IOTracer*/, empty_seqno_to_time_mapping_);
HistogramData hist;
FileMetaData file_meta;
mutex_.Lock();
flush_job.PickMemTable();
ASSERT_OK(flush_job.Run(nullptr /* prep_tracker */, &file_meta));
mutex_.Unlock();
db_options_.statistics->histogramData(FLUSH_TIME, &hist);
ASSERT_GT(hist.average, 0.0);
ASSERT_EQ(std::to_string(0), file_meta.smallest.user_key().ToString());
ASSERT_EQ("99", file_meta.largest.user_key().ToString());
ASSERT_EQ(0, file_meta.fd.smallest_seqno);
ASSERT_EQ(SequenceNumber(num_mems_to_flush * num_keys_per_table - 1),
file_meta.fd.largest_seqno);
ASSERT_EQ(kInvalidBlobFileNumber, file_meta.oldest_blob_file_number);
for (auto m : to_delete) {
delete m;
}
to_delete.clear();
job_context.Clean();
}
TEST_F(FlushJobTest, FlushMemtablesMultipleColumnFamilies) {
autovector<ColumnFamilyData*> all_cfds;
for (auto cfd : *versions_->GetColumnFamilySet()) {
all_cfds.push_back(cfd);
}
const std::vector<size_t> num_memtables = {2, 1, 3};
assert(num_memtables.size() == column_family_names_.size());
const size_t num_keys_per_memtable = 1000;
JobContext job_context(0);
std::vector<uint64_t> memtable_ids;
std::vector<SequenceNumber> smallest_seqs;
std::vector<SequenceNumber> largest_seqs;
autovector<MemTable*> to_delete;
SequenceNumber curr_seqno = 0;
size_t k = 0;
for (auto cfd : all_cfds) {
smallest_seqs.push_back(curr_seqno);
for (size_t i = 0; i != num_memtables[k]; ++i) {
MemTable* mem = cfd->ConstructNewMemtable(
*cfd->GetLatestMutableCFOptions(), kMaxSequenceNumber);
mem->SetID(i);
mem->Ref();
for (size_t j = 0; j != num_keys_per_memtable; ++j) {
std::string key(std::to_string(j + i * num_keys_per_memtable));
std::string value("value" + key);
ASSERT_OK(mem->Add(curr_seqno++, kTypeValue, key, value,
nullptr /* kv_prot_info */));
}
mem->ConstructFragmentedRangeTombstones();
cfd->imm()->Add(mem, &to_delete);
}
largest_seqs.push_back(curr_seqno - 1);
memtable_ids.push_back(num_memtables[k++] - 1);
}
EventLogger event_logger(db_options_.info_log.get());
SnapshotChecker* snapshot_checker = nullptr; // not relevant
std::vector<std::unique_ptr<FlushJob>> flush_jobs;
k = 0;
for (auto cfd : all_cfds) {
std::vector<SequenceNumber> snapshot_seqs;
flush_jobs.emplace_back(new FlushJob(
dbname_, cfd, db_options_, *cfd->GetLatestMutableCFOptions(),
memtable_ids[k], env_options_, versions_.get(), &mutex_,
&shutting_down_, snapshot_seqs, kMaxSequenceNumber, snapshot_checker,
&job_context, FlushReason::kTest, nullptr, nullptr, nullptr,
kNoCompression, db_options_.statistics.get(), &event_logger, true,
false /* sync_output_directory */, false /* write_manifest */,
Env::Priority::USER, nullptr /*IOTracer*/,
empty_seqno_to_time_mapping_));
k++;
}
HistogramData hist;
std::vector<FileMetaData> file_metas;
// Call reserve to avoid auto-resizing
file_metas.reserve(flush_jobs.size());
mutex_.Lock();
for (auto& job : flush_jobs) {
job->PickMemTable();
}
for (auto& job : flush_jobs) {
FileMetaData meta;
// Run will release and re-acquire mutex
ASSERT_OK(job->Run(nullptr /**/, &meta));
file_metas.emplace_back(meta);
}
autovector<FileMetaData*> file_meta_ptrs;
for (auto& meta : file_metas) {
file_meta_ptrs.push_back(&meta);
}
autovector<const autovector<MemTable*>*> mems_list;
for (size_t i = 0; i != all_cfds.size(); ++i) {
const auto& mems = flush_jobs[i]->GetMemTables();
mems_list.push_back(&mems);
}
autovector<const MutableCFOptions*> mutable_cf_options_list;
for (auto cfd : all_cfds) {
mutable_cf_options_list.push_back(cfd->GetLatestMutableCFOptions());
}
autovector<std::list<std::unique_ptr<FlushJobInfo>>*>
committed_flush_jobs_info;
for (auto& job : flush_jobs) {
committed_flush_jobs_info.push_back(job->GetCommittedFlushJobsInfo());
}
Status s = InstallMemtableAtomicFlushResults(
nullptr /* imm_lists */, all_cfds, mutable_cf_options_list, mems_list,
versions_.get(), nullptr /* prep_tracker */, &mutex_, file_meta_ptrs,
committed_flush_jobs_info, &job_context.memtables_to_free,
nullptr /* db_directory */, nullptr /* log_buffer */);
ASSERT_OK(s);
mutex_.Unlock();
db_options_.statistics->histogramData(FLUSH_TIME, &hist);
ASSERT_GT(hist.average, 0.0);
k = 0;
for (const auto& file_meta : file_metas) {
ASSERT_EQ(std::to_string(0), file_meta.smallest.user_key().ToString());
ASSERT_EQ("999", file_meta.largest.user_key()
.ToString()); // max key by bytewise comparator
ASSERT_EQ(smallest_seqs[k], file_meta.fd.smallest_seqno);
ASSERT_EQ(largest_seqs[k], file_meta.fd.largest_seqno);
// Verify that imm is empty
ASSERT_EQ(std::numeric_limits<uint64_t>::max(),
all_cfds[k]->imm()->GetEarliestMemTableID());
ASSERT_EQ(0, all_cfds[k]->imm()->GetLatestMemTableID());
++k;
}
for (auto m : to_delete) {
delete m;
}
to_delete.clear();
job_context.Clean();
}
TEST_F(FlushJobTest, Snapshots) {
JobContext job_context(0);
auto cfd = versions_->GetColumnFamilySet()->GetDefault();
auto new_mem = cfd->ConstructNewMemtable(*cfd->GetLatestMutableCFOptions(),
kMaxSequenceNumber);
std::set<SequenceNumber> snapshots_set;
int keys = 10000;
int max_inserts_per_keys = 8;
Random rnd(301);
for (int i = 0; i < keys / 2; ++i) {
snapshots_set.insert(rnd.Uniform(keys * (max_inserts_per_keys / 2)) + 1);
}
// set has already removed the duplicate snapshots
std::vector<SequenceNumber> snapshots(snapshots_set.begin(),
snapshots_set.end());
new_mem->Ref();
SequenceNumber current_seqno = 0;
auto inserted_keys = mock::MakeMockFile();
for (int i = 1; i < keys; ++i) {
std::string key(std::to_string(i));
int insertions = rnd.Uniform(max_inserts_per_keys);
for (int j = 0; j < insertions; ++j) {
std::string value(rnd.HumanReadableString(10));
auto seqno = ++current_seqno;
ASSERT_OK(new_mem->Add(SequenceNumber(seqno), kTypeValue, key, value,
nullptr /* kv_prot_info */));
// a key is visible only if:
// 1. it's the last one written (j == insertions - 1)
// 2. there's a snapshot pointing at it
bool visible = (j == insertions - 1) ||
(snapshots_set.find(seqno) != snapshots_set.end());
if (visible) {
InternalKey internal_key(key, seqno, kTypeValue);
inserted_keys.push_back({internal_key.Encode().ToString(), value});
}
}
}
mock::SortKVVector(&inserted_keys);
autovector<MemTable*> to_delete;
new_mem->ConstructFragmentedRangeTombstones();
cfd->imm()->Add(new_mem, &to_delete);
for (auto& m : to_delete) {
delete m;
}
EventLogger event_logger(db_options_.info_log.get());
SnapshotChecker* snapshot_checker = nullptr; // not relavant
FlushJob flush_job(
dbname_, versions_->GetColumnFamilySet()->GetDefault(), db_options_,
*cfd->GetLatestMutableCFOptions(),
std::numeric_limits<uint64_t>::max() /* memtable_id */, env_options_,
versions_.get(), &mutex_, &shutting_down_, snapshots, kMaxSequenceNumber,
snapshot_checker, &job_context, FlushReason::kTest, nullptr, nullptr,
nullptr, kNoCompression, db_options_.statistics.get(), &event_logger,
true, true /* sync_output_directory */, true /* write_manifest */,
Env::Priority::USER, nullptr /*IOTracer*/, empty_seqno_to_time_mapping_);
mutex_.Lock();
flush_job.PickMemTable();
ASSERT_OK(flush_job.Run());
mutex_.Unlock();
mock_table_factory_->AssertSingleFile(inserted_keys);
HistogramData hist;
db_options_.statistics->histogramData(FLUSH_TIME, &hist);
ASSERT_GT(hist.average, 0.0);
job_context.Clean();
}
TEST_F(FlushJobTest, GetRateLimiterPriorityForWrite) {
// Prepare a FlushJob that flush MemTables of Single Column Family.
const size_t num_mems = 2;
const size_t num_mems_to_flush = 1;
const size_t num_keys_per_table = 100;
JobContext job_context(0);
ColumnFamilyData* cfd = versions_->GetColumnFamilySet()->GetDefault();
std::vector<uint64_t> memtable_ids;
std::vector<MemTable*> new_mems;
for (size_t i = 0; i != num_mems; ++i) {
MemTable* mem = cfd->ConstructNewMemtable(*cfd->GetLatestMutableCFOptions(),
kMaxSequenceNumber);
mem->SetID(i);
mem->Ref();
new_mems.emplace_back(mem);
memtable_ids.push_back(mem->GetID());
for (size_t j = 0; j < num_keys_per_table; ++j) {
std::string key(std::to_string(j + i * num_keys_per_table));
std::string value("value" + key);
ASSERT_OK(mem->Add(SequenceNumber(j + i * num_keys_per_table), kTypeValue,
key, value, nullptr /* kv_prot_info */));
}
}
autovector<MemTable*> to_delete;
for (auto mem : new_mems) {
mem->ConstructFragmentedRangeTombstones();
cfd->imm()->Add(mem, &to_delete);
}
EventLogger event_logger(db_options_.info_log.get());
SnapshotChecker* snapshot_checker = nullptr; // not relavant
assert(memtable_ids.size() == num_mems);
uint64_t smallest_memtable_id = memtable_ids.front();
uint64_t flush_memtable_id = smallest_memtable_id + num_mems_to_flush - 1;
FlushJob flush_job(
dbname_, versions_->GetColumnFamilySet()->GetDefault(), db_options_,
*cfd->GetLatestMutableCFOptions(), flush_memtable_id, env_options_,
versions_.get(), &mutex_, &shutting_down_, {}, kMaxSequenceNumber,
snapshot_checker, &job_context, FlushReason::kTest, nullptr, nullptr,
nullptr, kNoCompression, db_options_.statistics.get(), &event_logger,
true, true /* sync_output_directory */, true /* write_manifest */,
Env::Priority::USER, nullptr /*IOTracer*/, empty_seqno_to_time_mapping_);
// When the state from WriteController is normal.
ASSERT_EQ(flush_job.GetRateLimiterPriorityForWrite(), Env::IO_HIGH);
WriteController* write_controller =
flush_job.versions_->GetColumnFamilySet()->write_controller();
{
// When the state from WriteController is Delayed.
std::unique_ptr<WriteControllerToken> delay_token =
write_controller->GetDelayToken(1000000);
ASSERT_EQ(flush_job.GetRateLimiterPriorityForWrite(), Env::IO_USER);
}
{
// When the state from WriteController is Stopped.
std::unique_ptr<WriteControllerToken> stop_token =
write_controller->GetStopToken();
ASSERT_EQ(flush_job.GetRateLimiterPriorityForWrite(), Env::IO_USER);
}
}
class FlushJobTimestampTest : public FlushJobTestBase {
public:
FlushJobTimestampTest()
: FlushJobTestBase(test::PerThreadDBPath("flush_job_ts_gc_test"),
test::BytewiseComparatorWithU64TsWrapper()) {}
void AddKeyValueToMemtable(MemTable* memtable, std::string key, uint64_t ts,
SequenceNumber seq, ValueType value_type,
Slice value) {
std::string key_str(std::move(key));
PutFixed64(&key_str, ts);
ASSERT_OK(memtable->Add(seq, value_type, key_str, value,
nullptr /* kv_prot_info */));
}
protected:
static constexpr uint64_t kStartTs = 10;
static constexpr SequenceNumber kStartSeq = 0;
SequenceNumber curr_seq_{kStartSeq};
std::atomic<uint64_t> curr_ts_{kStartTs};
};
TEST_F(FlushJobTimestampTest, AllKeysExpired) {
ColumnFamilyData* cfd = versions_->GetColumnFamilySet()->GetDefault();
autovector<MemTable*> to_delete;
{
MemTable* new_mem = cfd->ConstructNewMemtable(
*cfd->GetLatestMutableCFOptions(), kMaxSequenceNumber);
new_mem->Ref();
for (int i = 0; i < 100; ++i) {
uint64_t ts = curr_ts_.fetch_add(1);
SequenceNumber seq = (curr_seq_++);
AddKeyValueToMemtable(new_mem, test::EncodeInt(0), ts, seq,
ValueType::kTypeValue, "0_value");
}
uint64_t ts = curr_ts_.fetch_add(1);
SequenceNumber seq = (curr_seq_++);
AddKeyValueToMemtable(new_mem, test::EncodeInt(0), ts, seq,
ValueType::kTypeDeletionWithTimestamp, "");
new_mem->ConstructFragmentedRangeTombstones();
cfd->imm()->Add(new_mem, &to_delete);
}
std::vector<SequenceNumber> snapshots;
constexpr SnapshotChecker* const snapshot_checker = nullptr;
JobContext job_context(0);
EventLogger event_logger(db_options_.info_log.get());
std::string full_history_ts_low;
PutFixed64(&full_history_ts_low, std::numeric_limits<uint64_t>::max());
FlushJob flush_job(
dbname_, cfd, db_options_, *cfd->GetLatestMutableCFOptions(),
std::numeric_limits<uint64_t>::max() /* memtable_id */, env_options_,
versions_.get(), &mutex_, &shutting_down_, snapshots, kMaxSequenceNumber,
snapshot_checker, &job_context, FlushReason::kTest, nullptr, nullptr,
nullptr, kNoCompression, db_options_.statistics.get(), &event_logger,
true, true /* sync_output_directory */, true /* write_manifest */,
Env::Priority::USER, nullptr /*IOTracer*/, empty_seqno_to_time_mapping_,
/*db_id=*/"",
/*db_session_id=*/"", full_history_ts_low);
FileMetaData fmeta;
mutex_.Lock();
flush_job.PickMemTable();
ASSERT_OK(flush_job.Run(/*prep_tracker=*/nullptr, &fmeta));
mutex_.Unlock();
{
std::string key = test::EncodeInt(0);
key.append(test::EncodeInt(curr_ts_.load(std::memory_order_relaxed) - 1));
InternalKey ikey(key, curr_seq_ - 1, ValueType::kTypeDeletionWithTimestamp);
ASSERT_EQ(ikey.Encode(), fmeta.smallest.Encode());
ASSERT_EQ(ikey.Encode(), fmeta.largest.Encode());
}
job_context.Clean();
ASSERT_TRUE(to_delete.empty());
}
TEST_F(FlushJobTimestampTest, NoKeyExpired) {
ColumnFamilyData* cfd = versions_->GetColumnFamilySet()->GetDefault();
autovector<MemTable*> to_delete;
{
MemTable* new_mem = cfd->ConstructNewMemtable(
*cfd->GetLatestMutableCFOptions(), kMaxSequenceNumber);
new_mem->Ref();
for (int i = 0; i < 100; ++i) {
uint64_t ts = curr_ts_.fetch_add(1);
SequenceNumber seq = (curr_seq_++);
AddKeyValueToMemtable(new_mem, test::EncodeInt(0), ts, seq,
ValueType::kTypeValue, "0_value");
}
new_mem->ConstructFragmentedRangeTombstones();
cfd->imm()->Add(new_mem, &to_delete);
}
std::vector<SequenceNumber> snapshots;
SnapshotChecker* const snapshot_checker = nullptr;
JobContext job_context(0);
EventLogger event_logger(db_options_.info_log.get());
std::string full_history_ts_low;
PutFixed64(&full_history_ts_low, 0);
FlushJob flush_job(
dbname_, cfd, db_options_, *cfd->GetLatestMutableCFOptions(),
std::numeric_limits<uint64_t>::max() /* memtable_id */, env_options_,
versions_.get(), &mutex_, &shutting_down_, snapshots, kMaxSequenceNumber,
snapshot_checker, &job_context, FlushReason::kTest, nullptr, nullptr,
nullptr, kNoCompression, db_options_.statistics.get(), &event_logger,
true, true /* sync_output_directory */, true /* write_manifest */,
Env::Priority::USER, nullptr /*IOTracer*/, empty_seqno_to_time_mapping_,
/*db_id=*/"",
/*db_session_id=*/"", full_history_ts_low);
FileMetaData fmeta;
mutex_.Lock();
flush_job.PickMemTable();
ASSERT_OK(flush_job.Run(/*prep_tracker=*/nullptr, &fmeta));
mutex_.Unlock();
{
std::string ukey = test::EncodeInt(0);
std::string smallest_key =
ukey + test::EncodeInt(curr_ts_.load(std::memory_order_relaxed) - 1);
std::string largest_key = ukey + test::EncodeInt(kStartTs);
InternalKey smallest(smallest_key, curr_seq_ - 1, ValueType::kTypeValue);
InternalKey largest(largest_key, kStartSeq, ValueType::kTypeValue);
ASSERT_EQ(smallest.Encode(), fmeta.smallest.Encode());
ASSERT_EQ(largest.Encode(), fmeta.largest.Encode());
}
job_context.Clean();
ASSERT_TRUE(to_delete.empty());
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,746 +0,0 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
//
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include <functional>
#include "db/db_test_util.h"
#include "port/port.h"
#include "port/stack_trace.h"
#include "rocksdb/sst_file_writer.h"
#include "test_util/testutil.h"
#include "util/random.h"
namespace ROCKSDB_NAMESPACE {
class ImportColumnFamilyTest : public DBTestBase {
public:
ImportColumnFamilyTest()
: DBTestBase("import_column_family_test", /*env_do_fsync=*/true) {
sst_files_dir_ = dbname_ + "/sst_files/";
export_files_dir_ = test::PerThreadDBPath(env_, "export");
DestroyAndRecreateExternalSSTFilesDir();
import_cfh_ = nullptr;
import_cfh2_ = nullptr;
metadata_ptr_ = nullptr;
}
~ImportColumnFamilyTest() {
if (import_cfh_) {
EXPECT_OK(db_->DropColumnFamily(import_cfh_));
EXPECT_OK(db_->DestroyColumnFamilyHandle(import_cfh_));
import_cfh_ = nullptr;
}
if (import_cfh2_) {
EXPECT_OK(db_->DropColumnFamily(import_cfh2_));
EXPECT_OK(db_->DestroyColumnFamilyHandle(import_cfh2_));
import_cfh2_ = nullptr;
}
if (metadata_ptr_) {
delete metadata_ptr_;
metadata_ptr_ = nullptr;
}
EXPECT_OK(DestroyDir(env_, sst_files_dir_));
EXPECT_OK(DestroyDir(env_, export_files_dir_));
}
void DestroyAndRecreateExternalSSTFilesDir() {
EXPECT_OK(DestroyDir(env_, sst_files_dir_));
EXPECT_OK(env_->CreateDir(sst_files_dir_));
EXPECT_OK(DestroyDir(env_, export_files_dir_));
}
LiveFileMetaData LiveFileMetaDataInit(std::string name, std::string path,
int level,
SequenceNumber smallest_seqno,
SequenceNumber largest_seqno) {
LiveFileMetaData metadata;
metadata.name = name;
metadata.db_path = path;
metadata.smallest_seqno = smallest_seqno;
metadata.largest_seqno = largest_seqno;
metadata.level = level;
return metadata;
}
protected:
std::string sst_files_dir_;
std::string export_files_dir_;
ColumnFamilyHandle* import_cfh_;
ColumnFamilyHandle* import_cfh2_;
ExportImportFilesMetaData* metadata_ptr_;
};
TEST_F(ImportColumnFamilyTest, ImportSSTFileWriterFiles) {
Options options = CurrentOptions();
CreateAndReopenWithCF({"koko"}, options);
SstFileWriter sfw_cf1(EnvOptions(), options, handles_[1]);
SstFileWriter sfw_unknown(EnvOptions(), options);
// cf1.sst
const std::string cf1_sst_name = "cf1.sst";
const std::string cf1_sst = sst_files_dir_ + cf1_sst_name;
ASSERT_OK(sfw_cf1.Open(cf1_sst));
ASSERT_OK(sfw_cf1.Put("K1", "V1"));
ASSERT_OK(sfw_cf1.Put("K2", "V2"));
ASSERT_OK(sfw_cf1.Finish());
// cf_unknown.sst
const std::string unknown_sst_name = "cf_unknown.sst";
const std::string unknown_sst = sst_files_dir_ + unknown_sst_name;
ASSERT_OK(sfw_unknown.Open(unknown_sst));
ASSERT_OK(sfw_unknown.Put("K3", "V1"));
ASSERT_OK(sfw_unknown.Put("K4", "V2"));
ASSERT_OK(sfw_unknown.Finish());
{
// Import sst file corresponding to cf1 onto a new cf and verify
ExportImportFilesMetaData metadata;
metadata.files.push_back(
LiveFileMetaDataInit(cf1_sst_name, sst_files_dir_, 0, 10, 19));
metadata.db_comparator_name = options.comparator->Name();
ASSERT_OK(db_->CreateColumnFamilyWithImport(
options, "toto", ImportColumnFamilyOptions(), metadata, &import_cfh_));
ASSERT_NE(import_cfh_, nullptr);
std::string value;
ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, "K1", &value));
ASSERT_EQ(value, "V1");
ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, "K2", &value));
ASSERT_EQ(value, "V2");
ASSERT_OK(db_->DropColumnFamily(import_cfh_));
ASSERT_OK(db_->DestroyColumnFamilyHandle(import_cfh_));
import_cfh_ = nullptr;
}
{
// Import sst file corresponding to unknown cf onto a new cf and verify
ExportImportFilesMetaData metadata;
metadata.files.push_back(
LiveFileMetaDataInit(unknown_sst_name, sst_files_dir_, 0, 20, 29));
metadata.db_comparator_name = options.comparator->Name();
ASSERT_OK(db_->CreateColumnFamilyWithImport(
options, "yoyo", ImportColumnFamilyOptions(), metadata, &import_cfh_));
ASSERT_NE(import_cfh_, nullptr);
std::string value;
ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, "K3", &value));
ASSERT_EQ(value, "V1");
ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, "K4", &value));
ASSERT_EQ(value, "V2");
}
EXPECT_OK(db_->DestroyColumnFamilyHandle(import_cfh_));
import_cfh_ = nullptr;
// verify sst unique id during reopen
options.verify_sst_unique_id_in_manifest = true;
ReopenWithColumnFamilies({"default", "koko", "yoyo"}, options);
}
TEST_F(ImportColumnFamilyTest, ImportSSTFileWriterFilesWithOverlap) {
Options options = CurrentOptions();
CreateAndReopenWithCF({"koko"}, options);
SstFileWriter sfw_cf1(EnvOptions(), options, handles_[1]);
// file3.sst
const std::string file3_sst_name = "file3.sst";
const std::string file3_sst = sst_files_dir_ + file3_sst_name;
ASSERT_OK(sfw_cf1.Open(file3_sst));
for (int i = 0; i < 100; ++i) {
ASSERT_OK(sfw_cf1.Put(Key(i), Key(i) + "_val"));
}
ASSERT_OK(sfw_cf1.Finish());
// file2.sst
const std::string file2_sst_name = "file2.sst";
const std::string file2_sst = sst_files_dir_ + file2_sst_name;
ASSERT_OK(sfw_cf1.Open(file2_sst));
for (int i = 0; i < 100; i += 2) {
ASSERT_OK(sfw_cf1.Put(Key(i), Key(i) + "_overwrite1"));
}
ASSERT_OK(sfw_cf1.Finish());
// file1a.sst
const std::string file1a_sst_name = "file1a.sst";
const std::string file1a_sst = sst_files_dir_ + file1a_sst_name;
ASSERT_OK(sfw_cf1.Open(file1a_sst));
for (int i = 0; i < 52; i += 4) {
ASSERT_OK(sfw_cf1.Put(Key(i), Key(i) + "_overwrite2"));
}
ASSERT_OK(sfw_cf1.Finish());
// file1b.sst
const std::string file1b_sst_name = "file1b.sst";
const std::string file1b_sst = sst_files_dir_ + file1b_sst_name;
ASSERT_OK(sfw_cf1.Open(file1b_sst));
for (int i = 52; i < 100; i += 4) {
ASSERT_OK(sfw_cf1.Put(Key(i), Key(i) + "_overwrite2"));
}
ASSERT_OK(sfw_cf1.Finish());
// file0a.sst
const std::string file0a_sst_name = "file0a.sst";
const std::string file0a_sst = sst_files_dir_ + file0a_sst_name;
ASSERT_OK(sfw_cf1.Open(file0a_sst));
for (int i = 0; i < 100; i += 16) {
ASSERT_OK(sfw_cf1.Put(Key(i), Key(i) + "_overwrite3"));
}
ASSERT_OK(sfw_cf1.Finish());
// file0b.sst
const std::string file0b_sst_name = "file0b.sst";
const std::string file0b_sst = sst_files_dir_ + file0b_sst_name;
ASSERT_OK(sfw_cf1.Open(file0b_sst));
for (int i = 0; i < 100; i += 16) {
ASSERT_OK(sfw_cf1.Put(Key(i), Key(i) + "_overwrite4"));
}
ASSERT_OK(sfw_cf1.Finish());
// Import sst files and verify
ExportImportFilesMetaData metadata;
metadata.files.push_back(
LiveFileMetaDataInit(file3_sst_name, sst_files_dir_, 3, 10, 19));
metadata.files.push_back(
LiveFileMetaDataInit(file2_sst_name, sst_files_dir_, 2, 20, 29));
metadata.files.push_back(
LiveFileMetaDataInit(file1a_sst_name, sst_files_dir_, 1, 30, 34));
metadata.files.push_back(
LiveFileMetaDataInit(file1b_sst_name, sst_files_dir_, 1, 35, 39));
metadata.files.push_back(
LiveFileMetaDataInit(file0a_sst_name, sst_files_dir_, 0, 40, 49));
metadata.files.push_back(
LiveFileMetaDataInit(file0b_sst_name, sst_files_dir_, 0, 50, 59));
metadata.db_comparator_name = options.comparator->Name();
ASSERT_OK(db_->CreateColumnFamilyWithImport(
options, "toto", ImportColumnFamilyOptions(), metadata, &import_cfh_));
ASSERT_NE(import_cfh_, nullptr);
for (int i = 0; i < 100; i++) {
std::string value;
ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, Key(i), &value));
if (i % 16 == 0) {
ASSERT_EQ(value, Key(i) + "_overwrite4");
} else if (i % 4 == 0) {
ASSERT_EQ(value, Key(i) + "_overwrite2");
} else if (i % 2 == 0) {
ASSERT_EQ(value, Key(i) + "_overwrite1");
} else {
ASSERT_EQ(value, Key(i) + "_val");
}
}
for (int i = 0; i < 100; i += 5) {
ASSERT_OK(
db_->Put(WriteOptions(), import_cfh_, Key(i), Key(i) + "_overwrite5"));
}
// Flush and check again
ASSERT_OK(db_->Flush(FlushOptions(), import_cfh_));
for (int i = 0; i < 100; i++) {
std::string value;
ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, Key(i), &value));
if (i % 5 == 0) {
ASSERT_EQ(value, Key(i) + "_overwrite5");
} else if (i % 16 == 0) {
ASSERT_EQ(value, Key(i) + "_overwrite4");
} else if (i % 4 == 0) {
ASSERT_EQ(value, Key(i) + "_overwrite2");
} else if (i % 2 == 0) {
ASSERT_EQ(value, Key(i) + "_overwrite1");
} else {
ASSERT_EQ(value, Key(i) + "_val");
}
}
// Compact and check again.
ASSERT_OK(
db_->CompactRange(CompactRangeOptions(), import_cfh_, nullptr, nullptr));
for (int i = 0; i < 100; i++) {
std::string value;
ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, Key(i), &value));
if (i % 5 == 0) {
ASSERT_EQ(value, Key(i) + "_overwrite5");
} else if (i % 16 == 0) {
ASSERT_EQ(value, Key(i) + "_overwrite4");
} else if (i % 4 == 0) {
ASSERT_EQ(value, Key(i) + "_overwrite2");
} else if (i % 2 == 0) {
ASSERT_EQ(value, Key(i) + "_overwrite1");
} else {
ASSERT_EQ(value, Key(i) + "_val");
}
}
}
TEST_F(ImportColumnFamilyTest, ImportSSTFileWriterFilesWithRangeTombstone) {
// Test for a bug where import file's smallest and largest key did not
// consider range tombstone.
Options options = CurrentOptions();
CreateAndReopenWithCF({"koko"}, options);
SstFileWriter sfw_cf1(EnvOptions(), options, handles_[1]);
// cf1.sst
const std::string cf1_sst_name = "cf1.sst";
const std::string cf1_sst = sst_files_dir_ + cf1_sst_name;
ASSERT_OK(sfw_cf1.Open(cf1_sst));
ASSERT_OK(sfw_cf1.Put("K1", "V1"));
ASSERT_OK(sfw_cf1.Put("K2", "V2"));
ASSERT_OK(sfw_cf1.DeleteRange("K3", "K4"));
ASSERT_OK(sfw_cf1.Finish());
// Import sst file corresponding to cf1 onto a new cf and verify
ExportImportFilesMetaData metadata;
metadata.files.push_back(
LiveFileMetaDataInit(cf1_sst_name, sst_files_dir_, 0, 0, 19));
metadata.db_comparator_name = options.comparator->Name();
ASSERT_OK(db_->CreateColumnFamilyWithImport(
options, "toto", ImportColumnFamilyOptions(), metadata, &import_cfh_));
ASSERT_NE(import_cfh_, nullptr);
ColumnFamilyMetaData import_cf_meta;
db_->GetColumnFamilyMetaData(import_cfh_, &import_cf_meta);
ASSERT_EQ(import_cf_meta.file_count, 1);
const SstFileMetaData* file_meta = nullptr;
for (const auto& level_meta : import_cf_meta.levels) {
if (!level_meta.files.empty()) {
file_meta = &(level_meta.files[0]);
break;
}
}
ASSERT_TRUE(file_meta != nullptr);
InternalKey largest;
largest.DecodeFrom(file_meta->largest);
ASSERT_EQ(largest.user_key(), "K4");
std::string value;
ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, "K1", &value));
ASSERT_EQ(value, "V1");
ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, "K2", &value));
ASSERT_EQ(value, "V2");
ASSERT_OK(db_->DropColumnFamily(import_cfh_));
ASSERT_OK(db_->DestroyColumnFamilyHandle(import_cfh_));
import_cfh_ = nullptr;
}
TEST_F(ImportColumnFamilyTest, ImportExportedSSTFromAnotherCF) {
Options options = CurrentOptions();
CreateAndReopenWithCF({"koko"}, options);
for (int i = 0; i < 100; ++i) {
ASSERT_OK(Put(1, Key(i), Key(i) + "_val"));
}
ASSERT_OK(Flush(1));
ASSERT_OK(
db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr));
// Overwrite the value in the same set of keys.
for (int i = 0; i < 100; ++i) {
ASSERT_OK(Put(1, Key(i), Key(i) + "_overwrite"));
}
// Flush to create L0 file.
ASSERT_OK(Flush(1));
for (int i = 0; i < 100; ++i) {
ASSERT_OK(Put(1, Key(i), Key(i) + "_overwrite2"));
}
// Flush again to create another L0 file. It should have higher sequencer.
ASSERT_OK(Flush(1));
Checkpoint* checkpoint;
ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
ASSERT_OK(checkpoint->ExportColumnFamily(handles_[1], export_files_dir_,
&metadata_ptr_));
ASSERT_NE(metadata_ptr_, nullptr);
delete checkpoint;
ImportColumnFamilyOptions import_options;
import_options.move_files = false;
ASSERT_OK(db_->CreateColumnFamilyWithImport(options, "toto", import_options,
*metadata_ptr_, &import_cfh_));
ASSERT_NE(import_cfh_, nullptr);
import_options.move_files = true;
ASSERT_OK(db_->CreateColumnFamilyWithImport(options, "yoyo", import_options,
*metadata_ptr_, &import_cfh2_));
ASSERT_NE(import_cfh2_, nullptr);
delete metadata_ptr_;
metadata_ptr_ = NULL;
std::string value1, value2;
for (int i = 0; i < 100; ++i) {
ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, Key(i), &value1));
ASSERT_EQ(Get(1, Key(i)), value1);
}
for (int i = 0; i < 100; ++i) {
ASSERT_OK(db_->Get(ReadOptions(), import_cfh2_, Key(i), &value2));
ASSERT_EQ(Get(1, Key(i)), value2);
}
// Modify keys in cf1 and verify.
for (int i = 0; i < 25; i++) {
ASSERT_OK(db_->Delete(WriteOptions(), import_cfh_, Key(i)));
}
for (int i = 25; i < 50; i++) {
ASSERT_OK(
db_->Put(WriteOptions(), import_cfh_, Key(i), Key(i) + "_overwrite3"));
}
for (int i = 0; i < 25; ++i) {
ASSERT_TRUE(
db_->Get(ReadOptions(), import_cfh_, Key(i), &value1).IsNotFound());
}
for (int i = 25; i < 50; ++i) {
ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, Key(i), &value1));
ASSERT_EQ(Key(i) + "_overwrite3", value1);
}
for (int i = 50; i < 100; ++i) {
ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, Key(i), &value1));
ASSERT_EQ(Key(i) + "_overwrite2", value1);
}
for (int i = 0; i < 100; ++i) {
ASSERT_OK(db_->Get(ReadOptions(), import_cfh2_, Key(i), &value2));
ASSERT_EQ(Get(1, Key(i)), value2);
}
// Compact and check again.
ASSERT_OK(db_->Flush(FlushOptions(), import_cfh_));
ASSERT_OK(
db_->CompactRange(CompactRangeOptions(), import_cfh_, nullptr, nullptr));
for (int i = 0; i < 25; ++i) {
ASSERT_TRUE(
db_->Get(ReadOptions(), import_cfh_, Key(i), &value1).IsNotFound());
}
for (int i = 25; i < 50; ++i) {
ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, Key(i), &value1));
ASSERT_EQ(Key(i) + "_overwrite3", value1);
}
for (int i = 50; i < 100; ++i) {
ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, Key(i), &value1));
ASSERT_EQ(Key(i) + "_overwrite2", value1);
}
for (int i = 0; i < 100; ++i) {
ASSERT_OK(db_->Get(ReadOptions(), import_cfh2_, Key(i), &value2));
ASSERT_EQ(Get(1, Key(i)), value2);
}
}
TEST_F(ImportColumnFamilyTest, ImportExportedSSTFromAnotherDB) {
Options options = CurrentOptions();
CreateAndReopenWithCF({"koko"}, options);
for (int i = 0; i < 100; ++i) {
ASSERT_OK(Put(1, Key(i), Key(i) + "_val"));
}
ASSERT_OK(Flush(1));
// Compact to create a L1 file.
ASSERT_OK(
db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr));
// Overwrite the value in the same set of keys.
for (int i = 0; i < 50; ++i) {
ASSERT_OK(Put(1, Key(i), Key(i) + "_overwrite"));
}
// Flush to create L0 file.
ASSERT_OK(Flush(1));
for (int i = 0; i < 25; ++i) {
ASSERT_OK(Put(1, Key(i), Key(i) + "_overwrite2"));
}
// Flush again to create another L0 file. It should have higher sequencer.
ASSERT_OK(Flush(1));
Checkpoint* checkpoint;
ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
ASSERT_OK(checkpoint->ExportColumnFamily(handles_[1], export_files_dir_,
&metadata_ptr_));
ASSERT_NE(metadata_ptr_, nullptr);
delete checkpoint;
// Create a new db and import the files.
DB* db_copy;
ASSERT_OK(DestroyDir(env_, dbname_ + "/db_copy"));
ASSERT_OK(DB::Open(options, dbname_ + "/db_copy", &db_copy));
ColumnFamilyHandle* cfh = nullptr;
ASSERT_OK(db_copy->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo",
ImportColumnFamilyOptions(),
*metadata_ptr_, &cfh));
ASSERT_NE(cfh, nullptr);
for (int i = 0; i < 100; ++i) {
std::string value;
ASSERT_OK(db_copy->Get(ReadOptions(), cfh, Key(i), &value));
ASSERT_EQ(Get(1, Key(i)), value);
}
ASSERT_OK(db_copy->DropColumnFamily(cfh));
ASSERT_OK(db_copy->DestroyColumnFamilyHandle(cfh));
delete db_copy;
ASSERT_OK(DestroyDir(env_, dbname_ + "/db_copy"));
}
TEST_F(ImportColumnFamilyTest,
ImportExportedSSTFromAnotherCFWithRangeTombstone) {
// Test for a bug where import file's smallest and largest key did not
// consider range tombstone.
Options options = CurrentOptions();
options.disable_auto_compactions = true;
CreateAndReopenWithCF({"koko"}, options);
for (int i = 10; i < 20; ++i) {
ASSERT_OK(Put(1, Key(i), Key(i) + "_val"));
}
ASSERT_OK(Flush(1 /* cf */));
MoveFilesToLevel(1 /* level */, 1 /* cf */);
const Snapshot* snapshot = db_->GetSnapshot();
ASSERT_OK(db_->DeleteRange(WriteOptions(), handles_[1], Key(0), Key(25)));
ASSERT_OK(Put(1, Key(1), "t"));
ASSERT_OK(Flush(1));
// Tests importing a range tombstone only file
ASSERT_OK(db_->DeleteRange(WriteOptions(), handles_[1], Key(0), Key(2)));
Checkpoint* checkpoint;
ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
ASSERT_OK(checkpoint->ExportColumnFamily(handles_[1], export_files_dir_,
&metadata_ptr_));
ASSERT_NE(metadata_ptr_, nullptr);
delete checkpoint;
ImportColumnFamilyOptions import_options;
import_options.move_files = false;
ASSERT_OK(db_->CreateColumnFamilyWithImport(options, "toto", import_options,
*metadata_ptr_, &import_cfh_));
ASSERT_NE(import_cfh_, nullptr);
import_options.move_files = true;
ASSERT_OK(db_->CreateColumnFamilyWithImport(options, "yoyo", import_options,
*metadata_ptr_, &import_cfh2_));
ASSERT_NE(import_cfh2_, nullptr);
delete metadata_ptr_;
metadata_ptr_ = nullptr;
std::string value1, value2;
ReadOptions ro_latest;
ReadOptions ro_snapshot;
ro_snapshot.snapshot = snapshot;
for (int i = 10; i < 20; ++i) {
ASSERT_TRUE(db_->Get(ro_latest, import_cfh_, Key(i), &value1).IsNotFound());
ASSERT_OK(db_->Get(ro_snapshot, import_cfh_, Key(i), &value1));
ASSERT_EQ(Get(1, Key(i), snapshot), value1);
}
ASSERT_TRUE(db_->Get(ro_latest, import_cfh_, Key(1), &value1).IsNotFound());
for (int i = 10; i < 20; ++i) {
ASSERT_TRUE(
db_->Get(ro_latest, import_cfh2_, Key(i), &value1).IsNotFound());
ASSERT_OK(db_->Get(ro_snapshot, import_cfh2_, Key(i), &value2));
ASSERT_EQ(Get(1, Key(i), snapshot), value2);
}
ASSERT_TRUE(db_->Get(ro_latest, import_cfh2_, Key(1), &value1).IsNotFound());
db_->ReleaseSnapshot(snapshot);
}
TEST_F(ImportColumnFamilyTest, LevelFilesOverlappingAtEndpoints) {
// Imports a column family containing a level where two files overlap at their
// endpoints. "Overlap" means the largest user key in one file is the same as
// the smallest user key in the second file.
const int kFileBytes = 128 << 10; // 128KB
const int kValueBytes = 1 << 10; // 1KB
const int kNumFiles = 4;
Options options = CurrentOptions();
options.disable_auto_compactions = true;
options.num_levels = 2;
CreateAndReopenWithCF({"koko"}, options);
Random rnd(301);
// Every key is snapshot protected to ensure older versions will not be
// dropped during compaction.
std::vector<const Snapshot*> snapshots;
snapshots.reserve(kFileBytes / kValueBytes * kNumFiles);
for (int i = 0; i < kNumFiles; ++i) {
for (int j = 0; j < kFileBytes / kValueBytes; ++j) {
auto value = rnd.RandomString(kValueBytes);
ASSERT_OK(Put(1, "key", value));
snapshots.push_back(db_->GetSnapshot());
}
ASSERT_OK(Flush(1));
}
// Compact to create overlapping L1 files.
ASSERT_OK(
db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr));
ASSERT_GT(NumTableFilesAtLevel(1, 1), 1);
Checkpoint* checkpoint;
ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
ASSERT_OK(checkpoint->ExportColumnFamily(handles_[1], export_files_dir_,
&metadata_ptr_));
ASSERT_NE(metadata_ptr_, nullptr);
delete checkpoint;
// Create a new db and import the files.
DB* db_copy;
ASSERT_OK(DestroyDir(env_, dbname_ + "/db_copy"));
ASSERT_OK(DB::Open(options, dbname_ + "/db_copy", &db_copy));
ColumnFamilyHandle* cfh = nullptr;
ASSERT_OK(db_copy->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo",
ImportColumnFamilyOptions(),
*metadata_ptr_, &cfh));
ASSERT_NE(cfh, nullptr);
{
std::string value;
ASSERT_OK(db_copy->Get(ReadOptions(), cfh, "key", &value));
}
ASSERT_OK(db_copy->DropColumnFamily(cfh));
ASSERT_OK(db_copy->DestroyColumnFamilyHandle(cfh));
delete db_copy;
ASSERT_OK(DestroyDir(env_, dbname_ + "/db_copy"));
for (const Snapshot* snapshot : snapshots) {
db_->ReleaseSnapshot(snapshot);
}
}
TEST_F(ImportColumnFamilyTest, ImportColumnFamilyNegativeTest) {
Options options = CurrentOptions();
CreateAndReopenWithCF({"koko"}, options);
{
// Create column family with existing cf name.
ExportImportFilesMetaData metadata;
ASSERT_EQ(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "koko",
ImportColumnFamilyOptions(),
metadata, &import_cfh_),
Status::InvalidArgument("Column family already exists"));
ASSERT_EQ(import_cfh_, nullptr);
}
{
// Import with no files specified.
ExportImportFilesMetaData metadata;
ASSERT_EQ(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo",
ImportColumnFamilyOptions(),
metadata, &import_cfh_),
Status::InvalidArgument("The list of files is empty"));
ASSERT_EQ(import_cfh_, nullptr);
}
{
// Import with overlapping keys in sst files.
ExportImportFilesMetaData metadata;
SstFileWriter sfw_cf1(EnvOptions(), options, handles_[1]);
const std::string file1_sst_name = "file1.sst";
const std::string file1_sst = sst_files_dir_ + file1_sst_name;
ASSERT_OK(sfw_cf1.Open(file1_sst));
ASSERT_OK(sfw_cf1.Put("K1", "V1"));
ASSERT_OK(sfw_cf1.Put("K2", "V2"));
ASSERT_OK(sfw_cf1.Finish());
const std::string file2_sst_name = "file2.sst";
const std::string file2_sst = sst_files_dir_ + file2_sst_name;
ASSERT_OK(sfw_cf1.Open(file2_sst));
ASSERT_OK(sfw_cf1.Put("K2", "V2"));
ASSERT_OK(sfw_cf1.Put("K3", "V3"));
ASSERT_OK(sfw_cf1.Finish());
metadata.files.push_back(
LiveFileMetaDataInit(file1_sst_name, sst_files_dir_, 1, 10, 19));
metadata.files.push_back(
LiveFileMetaDataInit(file2_sst_name, sst_files_dir_, 1, 10, 19));
metadata.db_comparator_name = options.comparator->Name();
ASSERT_NOK(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo",
ImportColumnFamilyOptions(),
metadata, &import_cfh_));
ASSERT_EQ(import_cfh_, nullptr);
}
{
// Import with a mismatching comparator, should fail with appropriate error.
ExportImportFilesMetaData metadata;
Options mismatch_options = CurrentOptions();
mismatch_options.comparator = ReverseBytewiseComparator();
SstFileWriter sfw_cf1(EnvOptions(), mismatch_options, handles_[1]);
const std::string file1_sst_name = "file1.sst";
const std::string file1_sst = sst_files_dir_ + file1_sst_name;
ASSERT_OK(sfw_cf1.Open(file1_sst));
ASSERT_OK(sfw_cf1.Put("K2", "V2"));
ASSERT_OK(sfw_cf1.Put("K1", "V1"));
ASSERT_OK(sfw_cf1.Finish());
metadata.files.push_back(
LiveFileMetaDataInit(file1_sst_name, sst_files_dir_, 1, 10, 19));
metadata.db_comparator_name = mismatch_options.comparator->Name();
ASSERT_EQ(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "coco",
ImportColumnFamilyOptions(),
metadata, &import_cfh_),
Status::InvalidArgument("Comparator name mismatch"));
ASSERT_EQ(import_cfh_, nullptr);
}
{
// Import with non existent sst file should fail with appropriate error
ExportImportFilesMetaData metadata;
SstFileWriter sfw_cf1(EnvOptions(), options, handles_[1]);
const std::string file1_sst_name = "file1.sst";
const std::string file1_sst = sst_files_dir_ + file1_sst_name;
ASSERT_OK(sfw_cf1.Open(file1_sst));
ASSERT_OK(sfw_cf1.Put("K1", "V1"));
ASSERT_OK(sfw_cf1.Put("K2", "V2"));
ASSERT_OK(sfw_cf1.Finish());
const std::string file3_sst_name = "file3.sst";
metadata.files.push_back(
LiveFileMetaDataInit(file1_sst_name, sst_files_dir_, 1, 10, 19));
metadata.files.push_back(
LiveFileMetaDataInit(file3_sst_name, sst_files_dir_, 1, 10, 19));
metadata.db_comparator_name = options.comparator->Name();
ASSERT_EQ(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo",
ImportColumnFamilyOptions(),
metadata, &import_cfh_),
Status::IOError("No such file or directory"));
ASSERT_EQ(import_cfh_, nullptr);
// Test successful import after a failure with the same CF name. Ensures
// there is no side effect with CF when there is a failed import
metadata.files.pop_back();
metadata.db_comparator_name = options.comparator->Name();
ASSERT_OK(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo",
ImportColumnFamilyOptions(),
metadata, &import_cfh_));
ASSERT_NE(import_cfh_, nullptr);
}
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,308 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Test for issue 178: a manual compaction causes deleted data to reappear.
#include <cstdlib>
#include "port/port.h"
#include "rocksdb/compaction_filter.h"
#include "rocksdb/db.h"
#include "rocksdb/slice.h"
#include "rocksdb/write_batch.h"
#include "test_util/testharness.h"
using ROCKSDB_NAMESPACE::CompactionFilter;
using ROCKSDB_NAMESPACE::CompactionStyle;
using ROCKSDB_NAMESPACE::CompactRangeOptions;
using ROCKSDB_NAMESPACE::CompressionType;
using ROCKSDB_NAMESPACE::DB;
using ROCKSDB_NAMESPACE::DestroyDB;
using ROCKSDB_NAMESPACE::FlushOptions;
using ROCKSDB_NAMESPACE::Iterator;
using ROCKSDB_NAMESPACE::Options;
using ROCKSDB_NAMESPACE::ReadOptions;
using ROCKSDB_NAMESPACE::Slice;
using ROCKSDB_NAMESPACE::WriteBatch;
using ROCKSDB_NAMESPACE::WriteOptions;
namespace {
// Reasoning: previously the number was 1100000. Since the keys are written to
// the batch in one write each write will result into one SST file. each write
// will result into one SST file. We reduced the write_buffer_size to 1K to
// basically have the same effect with however less number of keys, which
// results into less test runtime.
const int kNumKeys = 1100;
std::string Key1(int i) {
char buf[100];
snprintf(buf, sizeof(buf), "my_key_%d", i);
return buf;
}
std::string Key2(int i) { return Key1(i) + "_xxx"; }
class ManualCompactionTest : public testing::Test {
public:
ManualCompactionTest() {
// Get rid of any state from an old run.
dbname_ = ROCKSDB_NAMESPACE::test::PerThreadDBPath(
"rocksdb_manual_compaction_test");
EXPECT_OK(DestroyDB(dbname_, Options()));
}
std::string dbname_;
};
class DestroyAllCompactionFilter : public CompactionFilter {
public:
DestroyAllCompactionFilter() {}
bool Filter(int /*level*/, const Slice& /*key*/, const Slice& existing_value,
std::string* /*new_value*/,
bool* /*value_changed*/) const override {
return existing_value.ToString() == "destroy";
}
const char* Name() const override { return "DestroyAllCompactionFilter"; }
};
class LogCompactionFilter : public CompactionFilter {
public:
const char* Name() const override { return "LogCompactionFilter"; }
bool Filter(int level, const Slice& key, const Slice& /*existing_value*/,
std::string* /*new_value*/,
bool* /*value_changed*/) const override {
key_level_[key.ToString()] = level;
return false;
}
void Reset() { key_level_.clear(); }
size_t NumKeys() const { return key_level_.size(); }
int KeyLevel(const Slice& key) {
auto it = key_level_.find(key.ToString());
if (it == key_level_.end()) {
return -1;
}
return it->second;
}
private:
mutable std::map<std::string, int> key_level_;
};
TEST_F(ManualCompactionTest, CompactTouchesAllKeys) {
for (int iter = 0; iter < 2; ++iter) {
DB* db;
Options options;
if (iter == 0) { // level compaction
options.num_levels = 3;
options.compaction_style = CompactionStyle::kCompactionStyleLevel;
} else { // universal compaction
options.compaction_style = CompactionStyle::kCompactionStyleUniversal;
}
options.create_if_missing = true;
options.compression = CompressionType::kNoCompression;
options.compaction_filter = new DestroyAllCompactionFilter();
ASSERT_OK(DB::Open(options, dbname_, &db));
ASSERT_OK(db->Put(WriteOptions(), Slice("key1"), Slice("destroy")));
ASSERT_OK(db->Put(WriteOptions(), Slice("key2"), Slice("destroy")));
ASSERT_OK(db->Put(WriteOptions(), Slice("key3"), Slice("value3")));
ASSERT_OK(db->Put(WriteOptions(), Slice("key4"), Slice("destroy")));
Slice key4("key4");
ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, &key4));
Iterator* itr = db->NewIterator(ReadOptions());
itr->SeekToFirst();
ASSERT_TRUE(itr->Valid());
ASSERT_EQ("key3", itr->key().ToString());
itr->Next();
ASSERT_TRUE(!itr->Valid());
delete itr;
delete options.compaction_filter;
delete db;
ASSERT_OK(DestroyDB(dbname_, options));
}
}
TEST_F(ManualCompactionTest, Test) {
// Open database. Disable compression since it affects the creation
// of layers and the code below is trying to test against a very
// specific scenario.
DB* db;
Options db_options;
db_options.write_buffer_size = 1024;
db_options.create_if_missing = true;
db_options.compression = CompressionType::kNoCompression;
ASSERT_OK(DB::Open(db_options, dbname_, &db));
// create first key range
WriteBatch batch;
for (int i = 0; i < kNumKeys; i++) {
ASSERT_OK(batch.Put(Key1(i), "value for range 1 key"));
}
ASSERT_OK(db->Write(WriteOptions(), &batch));
// create second key range
batch.Clear();
for (int i = 0; i < kNumKeys; i++) {
ASSERT_OK(batch.Put(Key2(i), "value for range 2 key"));
}
ASSERT_OK(db->Write(WriteOptions(), &batch));
// delete second key range
batch.Clear();
for (int i = 0; i < kNumKeys; i++) {
ASSERT_OK(batch.Delete(Key2(i)));
}
ASSERT_OK(db->Write(WriteOptions(), &batch));
// compact database
std::string start_key = Key1(0);
std::string end_key = Key1(kNumKeys - 1);
Slice least(start_key.data(), start_key.size());
Slice greatest(end_key.data(), end_key.size());
// commenting out the line below causes the example to work correctly
ASSERT_OK(db->CompactRange(CompactRangeOptions(), &least, &greatest));
// count the keys
Iterator* iter = db->NewIterator(ReadOptions());
int num_keys = 0;
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
num_keys++;
}
delete iter;
ASSERT_EQ(kNumKeys, num_keys) << "Bad number of keys";
// close database
delete db;
ASSERT_OK(DestroyDB(dbname_, Options()));
}
TEST_F(ManualCompactionTest, SkipLevel) {
DB* db;
Options options;
options.num_levels = 3;
// Initially, flushed L0 files won't exceed 100.
options.level0_file_num_compaction_trigger = 100;
options.compaction_style = CompactionStyle::kCompactionStyleLevel;
options.create_if_missing = true;
options.compression = CompressionType::kNoCompression;
LogCompactionFilter* filter = new LogCompactionFilter();
options.compaction_filter = filter;
ASSERT_OK(DB::Open(options, dbname_, &db));
WriteOptions wo;
FlushOptions fo;
ASSERT_OK(db->Put(wo, "1", ""));
ASSERT_OK(db->Flush(fo));
ASSERT_OK(db->Put(wo, "2", ""));
ASSERT_OK(db->Flush(fo));
ASSERT_OK(db->Put(wo, "4", ""));
ASSERT_OK(db->Put(wo, "8", ""));
ASSERT_OK(db->Flush(fo));
{
// L0: 1, 2, [4, 8]
// no file has keys in range [5, 7]
Slice start("5");
Slice end("7");
filter->Reset();
ASSERT_OK(db->CompactRange(CompactRangeOptions(), &start, &end));
ASSERT_EQ(0, filter->NumKeys());
}
{
// L0: 1, 2, [4, 8]
// [3, 7] overlaps with 4 in L0
Slice start("3");
Slice end("7");
filter->Reset();
ASSERT_OK(db->CompactRange(CompactRangeOptions(), &start, &end));
ASSERT_EQ(2, filter->NumKeys());
ASSERT_EQ(0, filter->KeyLevel("4"));
ASSERT_EQ(0, filter->KeyLevel("8"));
}
{
// L0: 1, 2
// L1: [4, 8]
// no file has keys in range (-inf, 0]
Slice end("0");
filter->Reset();
ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, &end));
ASSERT_EQ(0, filter->NumKeys());
}
{
// L0: 1, 2
// L1: [4, 8]
// no file has keys in range [9, inf)
Slice start("9");
filter->Reset();
ASSERT_OK(db->CompactRange(CompactRangeOptions(), &start, nullptr));
ASSERT_EQ(0, filter->NumKeys());
}
{
// L0: 1, 2
// L1: [4, 8]
// [2, 2] overlaps with 2 in L0
Slice start("2");
Slice end("2");
filter->Reset();
ASSERT_OK(db->CompactRange(CompactRangeOptions(), &start, &end));
ASSERT_EQ(1, filter->NumKeys());
ASSERT_EQ(0, filter->KeyLevel("2"));
}
{
// L0: 1
// L1: 2, [4, 8]
// [2, 5] overlaps with 2 and [4, 8) in L1, skip L0
Slice start("2");
Slice end("5");
filter->Reset();
ASSERT_OK(db->CompactRange(CompactRangeOptions(), &start, &end));
ASSERT_EQ(3, filter->NumKeys());
ASSERT_EQ(1, filter->KeyLevel("2"));
ASSERT_EQ(1, filter->KeyLevel("4"));
ASSERT_EQ(1, filter->KeyLevel("8"));
}
{
// L0: 1
// L1: [2, 4, 8]
// [0, inf) overlaps all files
Slice start("0");
filter->Reset();
ASSERT_OK(db->CompactRange(CompactRangeOptions(), &start, nullptr));
ASSERT_EQ(4, filter->NumKeys());
// 1 is first compacted to L1 and then further compacted into [2, 4, 8],
// so finally the logged level for 1 is L1.
ASSERT_EQ(1, filter->KeyLevel("1"));
ASSERT_EQ(1, filter->KeyLevel("2"));
ASSERT_EQ(1, filter->KeyLevel("4"));
ASSERT_EQ(1, filter->KeyLevel("8"));
}
delete filter;
delete db;
ASSERT_OK(DestroyDB(dbname_, options));
}
} // anonymous namespace
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

File diff suppressed because it is too large Load Diff

@ -1,298 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/merge_helper.h"
#include <algorithm>
#include <string>
#include <vector>
#include "db/dbformat.h"
#include "rocksdb/comparator.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/coding.h"
#include "util/vector_iterator.h"
#include "utilities/merge_operators.h"
namespace ROCKSDB_NAMESPACE {
class MergeHelperTest : public testing::Test {
public:
MergeHelperTest() : icmp_(BytewiseComparator()) { env_ = Env::Default(); }
~MergeHelperTest() override = default;
Status Run(SequenceNumber stop_before, bool at_bottom,
SequenceNumber latest_snapshot = 0) {
iter_.reset(new VectorIterator(ks_, vs_, &icmp_));
iter_->SeekToFirst();
merge_helper_.reset(new MergeHelper(env_, icmp_.user_comparator(),
merge_op_.get(), filter_.get(), nullptr,
false, latest_snapshot));
return merge_helper_->MergeUntil(
iter_.get(), nullptr /* range_del_agg */, stop_before, at_bottom,
false /* allow_data_in_errors */, nullptr /* blob_fetcher */,
nullptr /* full_history_ts_low */, nullptr /* prefetch_buffers */,
nullptr /* c_iter_stats */);
}
void AddKeyVal(const std::string& user_key, const SequenceNumber& seq,
const ValueType& t, const std::string& val,
bool corrupt = false) {
InternalKey ikey(user_key, seq, t);
if (corrupt) {
test::CorruptKeyType(&ikey);
}
ks_.push_back(ikey.Encode().ToString());
vs_.push_back(val);
}
Env* env_;
InternalKeyComparator icmp_;
std::unique_ptr<VectorIterator> iter_;
std::shared_ptr<MergeOperator> merge_op_;
std::unique_ptr<MergeHelper> merge_helper_;
std::vector<std::string> ks_;
std::vector<std::string> vs_;
std::unique_ptr<test::FilterNumber> filter_;
};
// If MergeHelper encounters a new key on the last level, we know that
// the key has no more history and it can merge keys.
TEST_F(MergeHelperTest, MergeAtBottomSuccess) {
merge_op_ = MergeOperators::CreateUInt64AddOperator();
AddKeyVal("a", 20, kTypeMerge, test::EncodeInt(1U));
AddKeyVal("a", 10, kTypeMerge, test::EncodeInt(3U));
AddKeyVal("b", 10, kTypeMerge, test::EncodeInt(4U)); // <- iter_ after merge
ASSERT_TRUE(Run(0, true).ok());
ASSERT_EQ(ks_[2], iter_->key());
ASSERT_EQ(test::KeyStr("a", 20, kTypeValue), merge_helper_->keys()[0]);
ASSERT_EQ(test::EncodeInt(4U), merge_helper_->values()[0]);
ASSERT_EQ(1U, merge_helper_->keys().size());
ASSERT_EQ(1U, merge_helper_->values().size());
}
// Merging with a value results in a successful merge.
TEST_F(MergeHelperTest, MergeValue) {
merge_op_ = MergeOperators::CreateUInt64AddOperator();
AddKeyVal("a", 40, kTypeMerge, test::EncodeInt(1U));
AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(3U));
AddKeyVal("a", 20, kTypeValue, test::EncodeInt(4U)); // <- iter_ after merge
AddKeyVal("a", 10, kTypeMerge, test::EncodeInt(1U));
ASSERT_TRUE(Run(0, false).ok());
ASSERT_EQ(ks_[3], iter_->key());
ASSERT_EQ(test::KeyStr("a", 40, kTypeValue), merge_helper_->keys()[0]);
ASSERT_EQ(test::EncodeInt(8U), merge_helper_->values()[0]);
ASSERT_EQ(1U, merge_helper_->keys().size());
ASSERT_EQ(1U, merge_helper_->values().size());
}
// Merging stops before a snapshot.
TEST_F(MergeHelperTest, SnapshotBeforeValue) {
merge_op_ = MergeOperators::CreateUInt64AddOperator();
AddKeyVal("a", 50, kTypeMerge, test::EncodeInt(1U));
AddKeyVal("a", 40, kTypeMerge, test::EncodeInt(3U)); // <- iter_ after merge
AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(1U));
AddKeyVal("a", 20, kTypeValue, test::EncodeInt(4U));
AddKeyVal("a", 10, kTypeMerge, test::EncodeInt(1U));
ASSERT_TRUE(Run(31, true).IsMergeInProgress());
ASSERT_EQ(ks_[2], iter_->key());
ASSERT_EQ(test::KeyStr("a", 50, kTypeMerge), merge_helper_->keys()[0]);
ASSERT_EQ(test::EncodeInt(4U), merge_helper_->values()[0]);
ASSERT_EQ(1U, merge_helper_->keys().size());
ASSERT_EQ(1U, merge_helper_->values().size());
}
// MergeHelper preserves the operand stack for merge operators that
// cannot do a partial merge.
TEST_F(MergeHelperTest, NoPartialMerge) {
merge_op_ = MergeOperators::CreateStringAppendTESTOperator();
AddKeyVal("a", 50, kTypeMerge, "v2");
AddKeyVal("a", 40, kTypeMerge, "v"); // <- iter_ after merge
AddKeyVal("a", 30, kTypeMerge, "v");
ASSERT_TRUE(Run(31, true).IsMergeInProgress());
ASSERT_EQ(ks_[2], iter_->key());
ASSERT_EQ(test::KeyStr("a", 40, kTypeMerge), merge_helper_->keys()[0]);
ASSERT_EQ("v", merge_helper_->values()[0]);
ASSERT_EQ(test::KeyStr("a", 50, kTypeMerge), merge_helper_->keys()[1]);
ASSERT_EQ("v2", merge_helper_->values()[1]);
ASSERT_EQ(2U, merge_helper_->keys().size());
ASSERT_EQ(2U, merge_helper_->values().size());
}
// A single operand can not be merged.
TEST_F(MergeHelperTest, SingleOperand) {
merge_op_ = MergeOperators::CreateUInt64AddOperator();
AddKeyVal("a", 50, kTypeMerge, test::EncodeInt(1U));
ASSERT_TRUE(Run(31, false).IsMergeInProgress());
ASSERT_FALSE(iter_->Valid());
ASSERT_EQ(test::KeyStr("a", 50, kTypeMerge), merge_helper_->keys()[0]);
ASSERT_EQ(test::EncodeInt(1U), merge_helper_->values()[0]);
ASSERT_EQ(1U, merge_helper_->keys().size());
ASSERT_EQ(1U, merge_helper_->values().size());
}
// Merging with a deletion turns the deletion into a value
TEST_F(MergeHelperTest, MergeDeletion) {
merge_op_ = MergeOperators::CreateUInt64AddOperator();
AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(3U));
AddKeyVal("a", 20, kTypeDeletion, "");
ASSERT_TRUE(Run(15, false).ok());
ASSERT_FALSE(iter_->Valid());
ASSERT_EQ(test::KeyStr("a", 30, kTypeValue), merge_helper_->keys()[0]);
ASSERT_EQ(test::EncodeInt(3U), merge_helper_->values()[0]);
ASSERT_EQ(1U, merge_helper_->keys().size());
ASSERT_EQ(1U, merge_helper_->values().size());
}
// The merge helper stops upon encountering a corrupt key
TEST_F(MergeHelperTest, CorruptKey) {
merge_op_ = MergeOperators::CreateUInt64AddOperator();
AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(3U));
AddKeyVal("a", 25, kTypeMerge, test::EncodeInt(1U));
// Corrupt key
AddKeyVal("a", 20, kTypeDeletion, "", true); // <- iter_ after merge
ASSERT_TRUE(Run(15, false).IsMergeInProgress());
ASSERT_EQ(ks_[2], iter_->key());
ASSERT_EQ(test::KeyStr("a", 30, kTypeMerge), merge_helper_->keys()[0]);
ASSERT_EQ(test::EncodeInt(4U), merge_helper_->values()[0]);
ASSERT_EQ(1U, merge_helper_->keys().size());
ASSERT_EQ(1U, merge_helper_->values().size());
}
// The compaction filter is called on every merge operand
TEST_F(MergeHelperTest, FilterMergeOperands) {
merge_op_ = MergeOperators::CreateUInt64AddOperator();
filter_.reset(new test::FilterNumber(5U));
AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(3U));
AddKeyVal("a", 29, kTypeMerge, test::EncodeInt(5U)); // Filtered
AddKeyVal("a", 28, kTypeMerge, test::EncodeInt(3U));
AddKeyVal("a", 27, kTypeMerge, test::EncodeInt(1U));
AddKeyVal("a", 26, kTypeMerge, test::EncodeInt(5U)); // Filtered
AddKeyVal("a", 25, kTypeValue, test::EncodeInt(1U));
ASSERT_TRUE(Run(15, false).ok());
ASSERT_FALSE(iter_->Valid());
MergeOutputIterator merge_output_iter(merge_helper_.get());
merge_output_iter.SeekToFirst();
ASSERT_EQ(test::KeyStr("a", 30, kTypeValue),
merge_output_iter.key().ToString());
ASSERT_EQ(test::EncodeInt(8U), merge_output_iter.value().ToString());
merge_output_iter.Next();
ASSERT_FALSE(merge_output_iter.Valid());
}
TEST_F(MergeHelperTest, FilterAllMergeOperands) {
merge_op_ = MergeOperators::CreateUInt64AddOperator();
filter_.reset(new test::FilterNumber(5U));
AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(5U));
AddKeyVal("a", 29, kTypeMerge, test::EncodeInt(5U));
AddKeyVal("a", 28, kTypeMerge, test::EncodeInt(5U));
AddKeyVal("a", 27, kTypeMerge, test::EncodeInt(5U));
AddKeyVal("a", 26, kTypeMerge, test::EncodeInt(5U));
AddKeyVal("a", 25, kTypeMerge, test::EncodeInt(5U));
// filtered out all
ASSERT_TRUE(Run(15, false).ok());
ASSERT_FALSE(iter_->Valid());
MergeOutputIterator merge_output_iter(merge_helper_.get());
merge_output_iter.SeekToFirst();
ASSERT_FALSE(merge_output_iter.Valid());
// we have one operand that will survive because it's a delete
AddKeyVal("a", 24, kTypeDeletion, test::EncodeInt(5U));
AddKeyVal("b", 23, kTypeValue, test::EncodeInt(5U));
ASSERT_TRUE(Run(15, true).ok());
merge_output_iter = MergeOutputIterator(merge_helper_.get());
ASSERT_TRUE(iter_->Valid());
merge_output_iter.SeekToFirst();
ASSERT_FALSE(merge_output_iter.Valid());
// when all merge operands are filtered out, we leave the iterator pointing to
// the Put/Delete that survived
ASSERT_EQ(test::KeyStr("a", 24, kTypeDeletion), iter_->key().ToString());
ASSERT_EQ(test::EncodeInt(5U), iter_->value().ToString());
}
// Make sure that merge operands are filtered at the beginning
TEST_F(MergeHelperTest, FilterFirstMergeOperand) {
merge_op_ = MergeOperators::CreateUInt64AddOperator();
filter_.reset(new test::FilterNumber(5U));
AddKeyVal("a", 31, kTypeMerge, test::EncodeInt(5U)); // Filtered
AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(5U)); // Filtered
AddKeyVal("a", 29, kTypeMerge, test::EncodeInt(2U));
AddKeyVal("a", 28, kTypeMerge, test::EncodeInt(1U));
AddKeyVal("a", 27, kTypeMerge, test::EncodeInt(3U));
AddKeyVal("a", 26, kTypeMerge, test::EncodeInt(5U)); // Filtered
AddKeyVal("a", 25, kTypeMerge, test::EncodeInt(5U)); // Filtered
AddKeyVal("b", 24, kTypeValue, test::EncodeInt(5U)); // next user key
ASSERT_OK(Run(15, true));
ASSERT_TRUE(iter_->Valid());
MergeOutputIterator merge_output_iter(merge_helper_.get());
merge_output_iter.SeekToFirst();
// sequence number is 29 here, because the first merge operand got filtered
// out
ASSERT_EQ(test::KeyStr("a", 29, kTypeValue),
merge_output_iter.key().ToString());
ASSERT_EQ(test::EncodeInt(6U), merge_output_iter.value().ToString());
merge_output_iter.Next();
ASSERT_FALSE(merge_output_iter.Valid());
// make sure that we're passing user keys into the filter
ASSERT_EQ("a", filter_->last_merge_operand_key());
}
// Make sure that merge operands are not filtered out if there's a snapshot
// pointing at them
TEST_F(MergeHelperTest, DontFilterMergeOperandsBeforeSnapshotTest) {
merge_op_ = MergeOperators::CreateUInt64AddOperator();
filter_.reset(new test::FilterNumber(5U));
AddKeyVal("a", 31, kTypeMerge, test::EncodeInt(5U));
AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(5U));
AddKeyVal("a", 29, kTypeMerge, test::EncodeInt(2U));
AddKeyVal("a", 28, kTypeMerge, test::EncodeInt(1U));
AddKeyVal("a", 27, kTypeMerge, test::EncodeInt(3U));
AddKeyVal("a", 26, kTypeMerge, test::EncodeInt(5U));
AddKeyVal("a", 25, kTypeMerge, test::EncodeInt(5U));
AddKeyVal("b", 24, kTypeValue, test::EncodeInt(5U));
ASSERT_OK(Run(15, true, 32));
ASSERT_TRUE(iter_->Valid());
MergeOutputIterator merge_output_iter(merge_helper_.get());
merge_output_iter.SeekToFirst();
ASSERT_EQ(test::KeyStr("a", 31, kTypeValue),
merge_output_iter.key().ToString());
ASSERT_EQ(test::EncodeInt(26U), merge_output_iter.value().ToString());
merge_output_iter.Next();
ASSERT_FALSE(merge_output_iter.Valid());
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,621 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
#include <assert.h>
#include <iostream>
#include <memory>
#include "db/db_impl/db_impl.h"
#include "db/dbformat.h"
#include "db/write_batch_internal.h"
#include "port/stack_trace.h"
#include "rocksdb/cache.h"
#include "rocksdb/comparator.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/merge_operator.h"
#include "rocksdb/utilities/db_ttl.h"
#include "test_util/testharness.h"
#include "util/coding.h"
#include "utilities/merge_operators.h"
namespace ROCKSDB_NAMESPACE {
bool use_compression;
class MergeTest : public testing::Test {};
size_t num_merge_operator_calls;
void resetNumMergeOperatorCalls() { num_merge_operator_calls = 0; }
size_t num_partial_merge_calls;
void resetNumPartialMergeCalls() { num_partial_merge_calls = 0; }
class CountMergeOperator : public AssociativeMergeOperator {
public:
CountMergeOperator() {
mergeOperator_ = MergeOperators::CreateUInt64AddOperator();
}
bool Merge(const Slice& key, const Slice* existing_value, const Slice& value,
std::string* new_value, Logger* logger) const override {
assert(new_value->empty());
++num_merge_operator_calls;
if (existing_value == nullptr) {
new_value->assign(value.data(), value.size());
return true;
}
return mergeOperator_->PartialMerge(key, *existing_value, value, new_value,
logger);
}
bool PartialMergeMulti(const Slice& key,
const std::deque<Slice>& operand_list,
std::string* new_value,
Logger* logger) const override {
assert(new_value->empty());
++num_partial_merge_calls;
return mergeOperator_->PartialMergeMulti(key, operand_list, new_value,
logger);
}
const char* Name() const override { return "UInt64AddOperator"; }
private:
std::shared_ptr<MergeOperator> mergeOperator_;
};
class EnvMergeTest : public EnvWrapper {
public:
EnvMergeTest() : EnvWrapper(Env::Default()) {}
static const char* kClassName() { return "MergeEnv"; }
const char* Name() const override { return kClassName(); }
// ~EnvMergeTest() override {}
uint64_t NowNanos() override {
++now_nanos_count_;
return target()->NowNanos();
}
static uint64_t now_nanos_count_;
static std::unique_ptr<EnvMergeTest> singleton_;
static EnvMergeTest* GetInstance() {
if (nullptr == singleton_) singleton_.reset(new EnvMergeTest);
return singleton_.get();
}
};
uint64_t EnvMergeTest::now_nanos_count_{0};
std::unique_ptr<EnvMergeTest> EnvMergeTest::singleton_;
std::shared_ptr<DB> OpenDb(const std::string& dbname, const bool ttl = false,
const size_t max_successive_merges = 0) {
DB* db;
Options options;
options.create_if_missing = true;
options.merge_operator = std::make_shared<CountMergeOperator>();
options.max_successive_merges = max_successive_merges;
options.env = EnvMergeTest::GetInstance();
EXPECT_OK(DestroyDB(dbname, Options()));
Status s;
if (ttl) {
DBWithTTL* db_with_ttl;
s = DBWithTTL::Open(options, dbname, &db_with_ttl);
db = db_with_ttl;
} else {
s = DB::Open(options, dbname, &db);
}
EXPECT_OK(s);
assert(s.ok());
// Allowed to call NowNanos during DB creation (in GenerateRawUniqueId() for
// session ID)
EnvMergeTest::now_nanos_count_ = 0;
return std::shared_ptr<DB>(db);
}
// Imagine we are maintaining a set of uint64 counters.
// Each counter has a distinct name. And we would like
// to support four high level operations:
// set, add, get and remove
// This is a quick implementation without a Merge operation.
class Counters {
protected:
std::shared_ptr<DB> db_;
WriteOptions put_option_;
ReadOptions get_option_;
WriteOptions delete_option_;
uint64_t default_;
public:
explicit Counters(std::shared_ptr<DB> db, uint64_t defaultCount = 0)
: db_(db),
put_option_(),
get_option_(),
delete_option_(),
default_(defaultCount) {
assert(db_);
}
virtual ~Counters() {}
// public interface of Counters.
// All four functions return false
// if the underlying level db operation failed.
// mapped to a levedb Put
bool set(const std::string& key, uint64_t value) {
// just treat the internal rep of int64 as the string
char buf[sizeof(value)];
EncodeFixed64(buf, value);
Slice slice(buf, sizeof(value));
auto s = db_->Put(put_option_, key, slice);
if (s.ok()) {
return true;
} else {
std::cerr << s.ToString() << std::endl;
return false;
}
}
// mapped to a rocksdb Delete
bool remove(const std::string& key) {
auto s = db_->Delete(delete_option_, key);
if (s.ok()) {
return true;
} else {
std::cerr << s.ToString() << std::endl;
return false;
}
}
// mapped to a rocksdb Get
bool get(const std::string& key, uint64_t* value) {
std::string str;
auto s = db_->Get(get_option_, key, &str);
if (s.IsNotFound()) {
// return default value if not found;
*value = default_;
return true;
} else if (s.ok()) {
// deserialization
if (str.size() != sizeof(uint64_t)) {
std::cerr << "value corruption\n";
return false;
}
*value = DecodeFixed64(&str[0]);
return true;
} else {
std::cerr << s.ToString() << std::endl;
return false;
}
}
// 'add' is implemented as get -> modify -> set
// An alternative is a single merge operation, see MergeBasedCounters
virtual bool add(const std::string& key, uint64_t value) {
uint64_t base = default_;
return get(key, &base) && set(key, base + value);
}
// convenience functions for testing
void assert_set(const std::string& key, uint64_t value) {
assert(set(key, value));
}
void assert_remove(const std::string& key) { assert(remove(key)); }
uint64_t assert_get(const std::string& key) {
uint64_t value = default_;
int result = get(key, &value);
assert(result);
if (result == 0) exit(1); // Disable unused variable warning.
return value;
}
void assert_add(const std::string& key, uint64_t value) {
int result = add(key, value);
assert(result);
if (result == 0) exit(1); // Disable unused variable warning.
}
};
// Implement 'add' directly with the new Merge operation
class MergeBasedCounters : public Counters {
private:
WriteOptions merge_option_; // for merge
public:
explicit MergeBasedCounters(std::shared_ptr<DB> db, uint64_t defaultCount = 0)
: Counters(db, defaultCount), merge_option_() {}
// mapped to a rocksdb Merge operation
bool add(const std::string& key, uint64_t value) override {
char encoded[sizeof(uint64_t)];
EncodeFixed64(encoded, value);
Slice slice(encoded, sizeof(uint64_t));
auto s = db_->Merge(merge_option_, key, slice);
if (s.ok()) {
return true;
} else {
std::cerr << s.ToString() << std::endl;
return false;
}
}
};
void dumpDb(DB* db) {
auto it = std::unique_ptr<Iterator>(db->NewIterator(ReadOptions()));
for (it->SeekToFirst(); it->Valid(); it->Next()) {
// uint64_t value = DecodeFixed64(it->value().data());
// std::cout << it->key().ToString() << ": " << value << std::endl;
}
assert(it->status().ok()); // Check for any errors found during the scan
}
void testCounters(Counters& counters, DB* db, bool test_compaction) {
FlushOptions o;
o.wait = true;
counters.assert_set("a", 1);
if (test_compaction) {
ASSERT_OK(db->Flush(o));
}
ASSERT_EQ(counters.assert_get("a"), 1);
counters.assert_remove("b");
// defaut value is 0 if non-existent
ASSERT_EQ(counters.assert_get("b"), 0);
counters.assert_add("a", 2);
if (test_compaction) {
ASSERT_OK(db->Flush(o));
}
// 1+2 = 3
ASSERT_EQ(counters.assert_get("a"), 3);
dumpDb(db);
// 1+...+49 = ?
uint64_t sum = 0;
for (int i = 1; i < 50; i++) {
counters.assert_add("b", i);
sum += i;
}
ASSERT_EQ(counters.assert_get("b"), sum);
dumpDb(db);
if (test_compaction) {
ASSERT_OK(db->Flush(o));
ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr));
dumpDb(db);
ASSERT_EQ(counters.assert_get("a"), 3);
ASSERT_EQ(counters.assert_get("b"), sum);
}
}
void testCountersWithFlushAndCompaction(Counters& counters, DB* db) {
ASSERT_OK(db->Put({}, "1", "1"));
ASSERT_OK(db->Flush(FlushOptions()));
std::atomic<int> cnt{0};
const auto get_thread_id = [&cnt]() {
thread_local int thread_id{cnt++};
return thread_id;
};
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->SetCallBack(
"VersionSet::LogAndApply:BeforeWriterWaiting", [&](void* /*arg*/) {
int thread_id = get_thread_id();
if (1 == thread_id) {
TEST_SYNC_POINT(
"testCountersWithFlushAndCompaction::bg_compact_thread:0");
} else if (2 == thread_id) {
TEST_SYNC_POINT(
"testCountersWithFlushAndCompaction::bg_flush_thread:0");
}
});
SyncPoint::GetInstance()->SetCallBack(
"VersionSet::LogAndApply:WriteManifest", [&](void* /*arg*/) {
int thread_id = get_thread_id();
if (0 == thread_id) {
TEST_SYNC_POINT(
"testCountersWithFlushAndCompaction::set_options_thread:0");
TEST_SYNC_POINT(
"testCountersWithFlushAndCompaction::set_options_thread:1");
}
});
SyncPoint::GetInstance()->SetCallBack(
"VersionSet::LogAndApply:WakeUpAndDone", [&](void* arg) {
auto* mutex = reinterpret_cast<InstrumentedMutex*>(arg);
mutex->AssertHeld();
int thread_id = get_thread_id();
ASSERT_EQ(2, thread_id);
mutex->Unlock();
TEST_SYNC_POINT(
"testCountersWithFlushAndCompaction::bg_flush_thread:1");
TEST_SYNC_POINT(
"testCountersWithFlushAndCompaction::bg_flush_thread:2");
mutex->Lock();
});
SyncPoint::GetInstance()->LoadDependency({
{"testCountersWithFlushAndCompaction::set_options_thread:0",
"testCountersWithCompactionAndFlush:BeforeCompact"},
{"testCountersWithFlushAndCompaction::bg_compact_thread:0",
"testCountersWithFlushAndCompaction:BeforeIncCounters"},
{"testCountersWithFlushAndCompaction::bg_flush_thread:0",
"testCountersWithFlushAndCompaction::set_options_thread:1"},
{"testCountersWithFlushAndCompaction::bg_flush_thread:1",
"testCountersWithFlushAndCompaction:BeforeVerification"},
{"testCountersWithFlushAndCompaction:AfterGet",
"testCountersWithFlushAndCompaction::bg_flush_thread:2"},
});
SyncPoint::GetInstance()->EnableProcessing();
port::Thread set_options_thread([&]() {
ASSERT_OK(reinterpret_cast<DBImpl*>(db)->SetOptions(
{{"disable_auto_compactions", "false"}}));
});
TEST_SYNC_POINT("testCountersWithCompactionAndFlush:BeforeCompact");
port::Thread compact_thread([&]() {
ASSERT_OK(reinterpret_cast<DBImpl*>(db)->CompactRange(
CompactRangeOptions(), db->DefaultColumnFamily(), nullptr, nullptr));
});
TEST_SYNC_POINT("testCountersWithFlushAndCompaction:BeforeIncCounters");
counters.add("test-key", 1);
FlushOptions flush_opts;
flush_opts.wait = false;
ASSERT_OK(db->Flush(flush_opts));
TEST_SYNC_POINT("testCountersWithFlushAndCompaction:BeforeVerification");
std::string expected;
PutFixed64(&expected, 1);
std::string actual;
Status s = db->Get(ReadOptions(), "test-key", &actual);
TEST_SYNC_POINT("testCountersWithFlushAndCompaction:AfterGet");
set_options_thread.join();
compact_thread.join();
ASSERT_OK(s);
ASSERT_EQ(expected, actual);
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
}
void testSuccessiveMerge(Counters& counters, size_t max_num_merges,
size_t num_merges) {
counters.assert_remove("z");
uint64_t sum = 0;
for (size_t i = 1; i <= num_merges; ++i) {
resetNumMergeOperatorCalls();
counters.assert_add("z", i);
sum += i;
if (i % (max_num_merges + 1) == 0) {
ASSERT_EQ(num_merge_operator_calls, max_num_merges + 1);
} else {
ASSERT_EQ(num_merge_operator_calls, 0);
}
resetNumMergeOperatorCalls();
ASSERT_EQ(counters.assert_get("z"), sum);
ASSERT_EQ(num_merge_operator_calls, i % (max_num_merges + 1));
}
}
void testPartialMerge(Counters* counters, DB* db, size_t max_merge,
size_t min_merge, size_t count) {
FlushOptions o;
o.wait = true;
// Test case 1: partial merge should be called when the number of merge
// operands exceeds the threshold.
uint64_t tmp_sum = 0;
resetNumPartialMergeCalls();
for (size_t i = 1; i <= count; i++) {
counters->assert_add("b", i);
tmp_sum += i;
}
ASSERT_OK(db->Flush(o));
ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr));
ASSERT_EQ(tmp_sum, counters->assert_get("b"));
if (count > max_merge) {
// in this case, FullMerge should be called instead.
ASSERT_EQ(num_partial_merge_calls, 0U);
} else {
// if count >= min_merge, then partial merge should be called once.
ASSERT_EQ((count >= min_merge), (num_partial_merge_calls == 1));
}
// Test case 2: partial merge should not be called when a put is found.
resetNumPartialMergeCalls();
tmp_sum = 0;
ASSERT_OK(db->Put(ROCKSDB_NAMESPACE::WriteOptions(), "c", "10"));
for (size_t i = 1; i <= count; i++) {
counters->assert_add("c", i);
tmp_sum += i;
}
ASSERT_OK(db->Flush(o));
ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr));
ASSERT_EQ(tmp_sum, counters->assert_get("c"));
ASSERT_EQ(num_partial_merge_calls, 0U);
// NowNanos was previously called in MergeHelper::FilterMerge(), which
// harmed performance.
ASSERT_EQ(EnvMergeTest::now_nanos_count_, 0U);
}
void testSingleBatchSuccessiveMerge(DB* db, size_t max_num_merges,
size_t num_merges) {
ASSERT_GT(num_merges, max_num_merges);
Slice key("BatchSuccessiveMerge");
uint64_t merge_value = 1;
char buf[sizeof(merge_value)];
EncodeFixed64(buf, merge_value);
Slice merge_value_slice(buf, sizeof(merge_value));
// Create the batch
WriteBatch batch;
for (size_t i = 0; i < num_merges; ++i) {
ASSERT_OK(batch.Merge(key, merge_value_slice));
}
// Apply to memtable and count the number of merges
resetNumMergeOperatorCalls();
ASSERT_OK(db->Write(WriteOptions(), &batch));
ASSERT_EQ(
num_merge_operator_calls,
static_cast<size_t>(num_merges - (num_merges % (max_num_merges + 1))));
// Get the value
resetNumMergeOperatorCalls();
std::string get_value_str;
ASSERT_OK(db->Get(ReadOptions(), key, &get_value_str));
assert(get_value_str.size() == sizeof(uint64_t));
uint64_t get_value = DecodeFixed64(&get_value_str[0]);
ASSERT_EQ(get_value, num_merges * merge_value);
ASSERT_EQ(num_merge_operator_calls,
static_cast<size_t>((num_merges % (max_num_merges + 1))));
}
void runTest(const std::string& dbname, const bool use_ttl = false) {
{
auto db = OpenDb(dbname, use_ttl);
{
Counters counters(db, 0);
testCounters(counters, db.get(), true);
}
{
MergeBasedCounters counters(db, 0);
testCounters(counters, db.get(), use_compression);
}
}
ASSERT_OK(DestroyDB(dbname, Options()));
{
size_t max_merge = 5;
auto db = OpenDb(dbname, use_ttl, max_merge);
MergeBasedCounters counters(db, 0);
testCounters(counters, db.get(), use_compression);
testSuccessiveMerge(counters, max_merge, max_merge * 2);
testSingleBatchSuccessiveMerge(db.get(), 5, 7);
ASSERT_OK(db->Close());
ASSERT_OK(DestroyDB(dbname, Options()));
}
{
size_t max_merge = 100;
// Min merge is hard-coded to 2.
uint32_t min_merge = 2;
for (uint32_t count = min_merge - 1; count <= min_merge + 1; count++) {
auto db = OpenDb(dbname, use_ttl, max_merge);
MergeBasedCounters counters(db, 0);
testPartialMerge(&counters, db.get(), max_merge, min_merge, count);
ASSERT_OK(db->Close());
ASSERT_OK(DestroyDB(dbname, Options()));
}
{
auto db = OpenDb(dbname, use_ttl, max_merge);
MergeBasedCounters counters(db, 0);
testPartialMerge(&counters, db.get(), max_merge, min_merge,
min_merge * 10);
ASSERT_OK(db->Close());
ASSERT_OK(DestroyDB(dbname, Options()));
}
}
{
{
auto db = OpenDb(dbname);
MergeBasedCounters counters(db, 0);
counters.add("test-key", 1);
counters.add("test-key", 1);
counters.add("test-key", 1);
ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr));
}
DB* reopen_db;
ASSERT_OK(DB::Open(Options(), dbname, &reopen_db));
std::string value;
ASSERT_NOK(reopen_db->Get(ReadOptions(), "test-key", &value));
delete reopen_db;
ASSERT_OK(DestroyDB(dbname, Options()));
}
/* Temporary remove this test
{
std::cout << "Test merge-operator not set after reopen (recovery case)\n";
{
auto db = OpenDb(dbname);
MergeBasedCounters counters(db, 0);
counters.add("test-key", 1);
counters.add("test-key", 1);
counters.add("test-key", 1);
}
DB* reopen_db;
ASSERT_TRUE(DB::Open(Options(), dbname, &reopen_db).IsInvalidArgument());
}
*/
}
TEST_F(MergeTest, MergeDbTest) {
runTest(test::PerThreadDBPath("merge_testdb"));
}
TEST_F(MergeTest, MergeDbTtlTest) {
runTest(test::PerThreadDBPath("merge_testdbttl"),
true); // Run test on TTL database
}
TEST_F(MergeTest, MergeWithCompactionAndFlush) {
const std::string dbname =
test::PerThreadDBPath("merge_with_compaction_and_flush");
{
auto db = OpenDb(dbname);
{
MergeBasedCounters counters(db, 0);
testCountersWithFlushAndCompaction(counters, db.get());
}
}
ASSERT_OK(DestroyDB(dbname, Options()));
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::use_compression = false;
if (argc > 1) {
ROCKSDB_NAMESPACE::use_compression = true;
}
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,317 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <stdlib.h>
#include <algorithm>
#include <map>
#include <string>
#include <vector>
#include "db/db_impl/db_impl.h"
#include "db/db_test_util.h"
#include "db/version_set.h"
#include "db/write_batch_internal.h"
#include "file/filename.h"
#include "port/stack_trace.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/transaction_log.h"
#include "test_util/sync_point.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
class ObsoleteFilesTest : public DBTestBase {
public:
ObsoleteFilesTest()
: DBTestBase("obsolete_files_test", /*env_do_fsync=*/true),
wal_dir_(dbname_ + "/wal_files") {}
void AddKeys(int numkeys, int startkey) {
WriteOptions options;
options.sync = false;
for (int i = startkey; i < (numkeys + startkey); i++) {
std::string temp = std::to_string(i);
Slice key(temp);
Slice value(temp);
ASSERT_OK(db_->Put(options, key, value));
}
}
void createLevel0Files(int numFiles, int numKeysPerFile) {
int startKey = 0;
for (int i = 0; i < numFiles; i++) {
AddKeys(numKeysPerFile, startKey);
startKey += numKeysPerFile;
ASSERT_OK(dbfull()->TEST_FlushMemTable());
ASSERT_OK(
dbfull()->TEST_WaitForCompact()); // wait for background flush (flush
// is also a kind of compaction).
}
}
void CheckFileTypeCounts(const std::string& dir, int required_log,
int required_sst, int required_manifest) {
std::vector<std::string> filenames;
ASSERT_OK(env_->GetChildren(dir, &filenames));
int log_cnt = 0;
int sst_cnt = 0;
int manifest_cnt = 0;
for (auto file : filenames) {
uint64_t number;
FileType type;
if (ParseFileName(file, &number, &type)) {
log_cnt += (type == kWalFile);
sst_cnt += (type == kTableFile);
manifest_cnt += (type == kDescriptorFile);
}
}
ASSERT_EQ(required_log, log_cnt);
ASSERT_EQ(required_sst, sst_cnt);
ASSERT_EQ(required_manifest, manifest_cnt);
}
void ReopenDB() {
Options options = CurrentOptions();
// Trigger compaction when the number of level 0 files reaches 2.
options.create_if_missing = true;
options.level0_file_num_compaction_trigger = 2;
options.disable_auto_compactions = false;
options.delete_obsolete_files_period_micros = 0; // always do full purge
options.enable_thread_tracking = true;
options.write_buffer_size = 1024 * 1024 * 1000;
options.target_file_size_base = 1024 * 1024 * 1000;
options.max_bytes_for_level_base = 1024 * 1024 * 1000;
options.WAL_ttl_seconds = 300; // Used to test log files
options.WAL_size_limit_MB = 1024; // Used to test log files
options.wal_dir = wal_dir_;
// Note: the following prevents an otherwise harmless data race between the
// test setup code (AddBlobFile) in ObsoleteFilesTest.BlobFiles and the
// periodic stat dumping thread.
options.stats_dump_period_sec = 0;
Destroy(options);
Reopen(options);
}
const std::string wal_dir_;
};
TEST_F(ObsoleteFilesTest, RaceForObsoleteFileDeletion) {
ReopenDB();
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->LoadDependency({
{"DBImpl::BackgroundCallCompaction:FoundObsoleteFiles",
"ObsoleteFilesTest::RaceForObsoleteFileDeletion:1"},
{"DBImpl::BackgroundCallCompaction:PurgedObsoleteFiles",
"ObsoleteFilesTest::RaceForObsoleteFileDeletion:2"},
});
SyncPoint::GetInstance()->SetCallBack(
"DBImpl::DeleteObsoleteFileImpl:AfterDeletion", [&](void* arg) {
Status* p_status = reinterpret_cast<Status*>(arg);
ASSERT_OK(*p_status);
});
SyncPoint::GetInstance()->SetCallBack(
"DBImpl::CloseHelper:PendingPurgeFinished", [&](void* arg) {
std::unordered_set<uint64_t>* files_grabbed_for_purge_ptr =
reinterpret_cast<std::unordered_set<uint64_t>*>(arg);
ASSERT_TRUE(files_grabbed_for_purge_ptr->empty());
});
SyncPoint::GetInstance()->EnableProcessing();
createLevel0Files(2, 50000);
CheckFileTypeCounts(wal_dir_, 1, 0, 0);
port::Thread user_thread([this]() {
JobContext jobCxt(0);
TEST_SYNC_POINT("ObsoleteFilesTest::RaceForObsoleteFileDeletion:1");
dbfull()->TEST_LockMutex();
dbfull()->FindObsoleteFiles(&jobCxt, true /* force=true */,
false /* no_full_scan=false */);
dbfull()->TEST_UnlockMutex();
TEST_SYNC_POINT("ObsoleteFilesTest::RaceForObsoleteFileDeletion:2");
dbfull()->PurgeObsoleteFiles(jobCxt);
jobCxt.Clean();
});
user_thread.join();
}
TEST_F(ObsoleteFilesTest, DeleteObsoleteOptionsFile) {
ReopenDB();
createLevel0Files(2, 50000);
CheckFileTypeCounts(wal_dir_, 1, 0, 0);
ASSERT_OK(dbfull()->DisableFileDeletions());
for (int i = 0; i != 4; ++i) {
if (i % 2) {
ASSERT_OK(dbfull()->SetOptions(dbfull()->DefaultColumnFamily(),
{{"paranoid_file_checks", "false"}}));
} else {
ASSERT_OK(dbfull()->SetOptions(dbfull()->DefaultColumnFamily(),
{{"paranoid_file_checks", "true"}}));
}
}
ASSERT_OK(dbfull()->EnableFileDeletions(true /* force */));
Close();
std::vector<std::string> files;
int opts_file_count = 0;
ASSERT_OK(env_->GetChildren(dbname_, &files));
for (const auto& file : files) {
uint64_t file_num;
Slice dummy_info_log_name_prefix;
FileType type;
WalFileType log_type;
if (ParseFileName(file, &file_num, dummy_info_log_name_prefix, &type,
&log_type) &&
type == kOptionsFile) {
opts_file_count++;
}
}
ASSERT_EQ(2, opts_file_count);
}
TEST_F(ObsoleteFilesTest, BlobFiles) {
ReopenDB();
VersionSet* const versions = dbfull()->GetVersionSet();
assert(versions);
assert(versions->GetColumnFamilySet());
ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
assert(cfd);
const ImmutableCFOptions* const ioptions = cfd->ioptions();
assert(ioptions);
assert(!ioptions->cf_paths.empty());
const std::string& path = ioptions->cf_paths.front().path;
// Add an obsolete blob file.
constexpr uint64_t first_blob_file_number = 234;
versions->AddObsoleteBlobFile(first_blob_file_number, path);
// Add a live blob file.
Version* const version = cfd->current();
assert(version);
VersionStorageInfo* const storage_info = version->storage_info();
assert(storage_info);
constexpr uint64_t second_blob_file_number = 456;
constexpr uint64_t second_total_blob_count = 100;
constexpr uint64_t second_total_blob_bytes = 2000000;
constexpr char second_checksum_method[] = "CRC32B";
constexpr char second_checksum_value[] = "\x6d\xbd\xf2\x3a";
auto shared_meta = SharedBlobFileMetaData::Create(
second_blob_file_number, second_total_blob_count, second_total_blob_bytes,
second_checksum_method, second_checksum_value);
constexpr uint64_t second_garbage_blob_count = 0;
constexpr uint64_t second_garbage_blob_bytes = 0;
auto meta = BlobFileMetaData::Create(
std::move(shared_meta), BlobFileMetaData::LinkedSsts(),
second_garbage_blob_count, second_garbage_blob_bytes);
storage_info->AddBlobFile(std::move(meta));
// Check for obsolete files and make sure the first blob file is picked up
// and grabbed for purge. The second blob file should be on the live list.
constexpr int job_id = 0;
JobContext job_context{job_id};
dbfull()->TEST_LockMutex();
constexpr bool force_full_scan = false;
dbfull()->FindObsoleteFiles(&job_context, force_full_scan);
dbfull()->TEST_UnlockMutex();
ASSERT_TRUE(job_context.HaveSomethingToDelete());
ASSERT_EQ(job_context.blob_delete_files.size(), 1);
ASSERT_EQ(job_context.blob_delete_files[0].GetBlobFileNumber(),
first_blob_file_number);
const auto& files_grabbed_for_purge =
dbfull()->TEST_GetFilesGrabbedForPurge();
ASSERT_NE(files_grabbed_for_purge.find(first_blob_file_number),
files_grabbed_for_purge.end());
ASSERT_EQ(job_context.blob_live.size(), 1);
ASSERT_EQ(job_context.blob_live[0], second_blob_file_number);
// Hack the job context a bit by adding a few files to the full scan
// list and adjusting the pending file number. We add the two files
// above as well as two additional ones, where one is old
// and should be cleaned up, and the other is still pending.
constexpr uint64_t old_blob_file_number = 123;
constexpr uint64_t pending_blob_file_number = 567;
job_context.full_scan_candidate_files.emplace_back(
BlobFileName(old_blob_file_number), path);
job_context.full_scan_candidate_files.emplace_back(
BlobFileName(first_blob_file_number), path);
job_context.full_scan_candidate_files.emplace_back(
BlobFileName(second_blob_file_number), path);
job_context.full_scan_candidate_files.emplace_back(
BlobFileName(pending_blob_file_number), path);
job_context.min_pending_output = pending_blob_file_number;
// Purge obsolete files and make sure we purge the old file and the first file
// (and keep the second file and the pending file).
std::vector<std::string> deleted_files;
SyncPoint::GetInstance()->SetCallBack(
"DBImpl::DeleteObsoleteFileImpl::BeforeDeletion", [&](void* arg) {
const std::string* file = static_cast<std::string*>(arg);
assert(file);
constexpr char blob_extension[] = ".blob";
if (file->find(blob_extension) != std::string::npos) {
deleted_files.emplace_back(*file);
}
});
SyncPoint::GetInstance()->EnableProcessing();
dbfull()->PurgeObsoleteFiles(job_context);
job_context.Clean();
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
ASSERT_EQ(files_grabbed_for_purge.find(first_blob_file_number),
files_grabbed_for_purge.end());
std::sort(deleted_files.begin(), deleted_files.end());
const std::vector<std::string> expected_deleted_files{
BlobFileName(path, old_blob_file_number),
BlobFileName(path, first_blob_file_number)};
ASSERT_EQ(deleted_files, expected_deleted_files);
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
RegisterCustomObjects(argc, argv);
return RUN_ALL_TESTS();
}

@ -1,110 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include <string>
#include "db/db_impl/db_impl.h"
#include "db/db_test_util.h"
#include "rocksdb/options.h"
#include "rocksdb/table.h"
#include "test_util/testharness.h"
namespace ROCKSDB_NAMESPACE {
class OptionsFileTest : public testing::Test {
public:
OptionsFileTest() : dbname_(test::PerThreadDBPath("options_file_test")) {}
std::string dbname_;
};
namespace {
void UpdateOptionsFiles(DB* db,
std::unordered_set<std::string>* filename_history,
int* options_files_count) {
std::vector<std::string> filenames;
EXPECT_OK(db->GetEnv()->GetChildren(db->GetName(), &filenames));
uint64_t number;
FileType type;
*options_files_count = 0;
for (auto filename : filenames) {
if (ParseFileName(filename, &number, &type) && type == kOptionsFile) {
filename_history->insert(filename);
(*options_files_count)++;
}
}
}
// Verify whether the current Options Files are the latest ones.
void VerifyOptionsFileName(
DB* db, const std::unordered_set<std::string>& past_filenames) {
std::vector<std::string> filenames;
std::unordered_set<std::string> current_filenames;
EXPECT_OK(db->GetEnv()->GetChildren(db->GetName(), &filenames));
uint64_t number;
FileType type;
for (auto filename : filenames) {
if (ParseFileName(filename, &number, &type) && type == kOptionsFile) {
current_filenames.insert(filename);
}
}
for (auto past_filename : past_filenames) {
if (current_filenames.find(past_filename) != current_filenames.end()) {
continue;
}
for (auto filename : current_filenames) {
ASSERT_GT(filename, past_filename);
}
}
}
} // anonymous namespace
TEST_F(OptionsFileTest, NumberOfOptionsFiles) {
const int kReopenCount = 20;
Options opt;
opt.create_if_missing = true;
ASSERT_OK(DestroyDB(dbname_, opt));
std::unordered_set<std::string> filename_history;
DB* db;
for (int i = 0; i < kReopenCount; ++i) {
ASSERT_OK(DB::Open(opt, dbname_, &db));
int num_options_files = 0;
UpdateOptionsFiles(db, &filename_history, &num_options_files);
ASSERT_GT(num_options_files, 0);
ASSERT_LE(num_options_files, 2);
// Make sure we always keep the latest option files.
VerifyOptionsFileName(db, filename_history);
delete db;
}
}
TEST_F(OptionsFileTest, OptionsFileName) {
const uint64_t kOptionsFileNum = 12345;
uint64_t number;
FileType type;
auto options_file_name = OptionsFileName("", kOptionsFileNum);
ASSERT_TRUE(ParseFileName(options_file_name, &number, &type, nullptr));
ASSERT_EQ(type, kOptionsFile);
ASSERT_EQ(number, kOptionsFileNum);
const uint64_t kTempOptionsFileNum = 54352;
auto temp_options_file_name = TempOptionsFileName("", kTempOptionsFileNum);
ASSERT_TRUE(ParseFileName(temp_options_file_name, &number, &type, nullptr));
ASSERT_NE(temp_options_file_name.find(kTempFileNameSuffix),
std::string::npos);
ASSERT_EQ(type, kTempFile);
ASSERT_EQ(number, kTempOptionsFileNum);
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
#if !(defined NDEBUG) || !defined(OS_WIN)
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
#else
return 0;
#endif // !(defined NDEBUG) || !defined(OS_WIN)
}

File diff suppressed because it is too large Load Diff

@ -1,229 +0,0 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
//
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/periodic_task_scheduler.h"
#include "db/db_test_util.h"
#include "env/composite_env_wrapper.h"
#include "test_util/mock_time_env.h"
namespace ROCKSDB_NAMESPACE {
class PeriodicTaskSchedulerTest : public DBTestBase {
public:
PeriodicTaskSchedulerTest()
: DBTestBase("periodic_task_scheduler_test", /*env_do_fsync=*/true) {
mock_clock_ = std::make_shared<MockSystemClock>(env_->GetSystemClock());
mock_env_.reset(new CompositeEnvWrapper(env_, mock_clock_));
}
protected:
std::unique_ptr<Env> mock_env_;
std::shared_ptr<MockSystemClock> mock_clock_;
void SetUp() override {
mock_clock_->InstallTimedWaitFixCallback();
SyncPoint::GetInstance()->SetCallBack(
"DBImpl::StartPeriodicTaskScheduler:Init", [&](void* arg) {
auto periodic_task_scheduler_ptr =
reinterpret_cast<PeriodicTaskScheduler*>(arg);
periodic_task_scheduler_ptr->TEST_OverrideTimer(mock_clock_.get());
});
}
};
TEST_F(PeriodicTaskSchedulerTest, Basic) {
constexpr unsigned int kPeriodSec = 10;
Close();
Options options;
options.stats_dump_period_sec = kPeriodSec;
options.stats_persist_period_sec = kPeriodSec;
options.create_if_missing = true;
options.env = mock_env_.get();
int dump_st_counter = 0;
SyncPoint::GetInstance()->SetCallBack("DBImpl::DumpStats:StartRunning",
[&](void*) { dump_st_counter++; });
int pst_st_counter = 0;
SyncPoint::GetInstance()->SetCallBack("DBImpl::PersistStats:StartRunning",
[&](void*) { pst_st_counter++; });
int flush_info_log_counter = 0;
SyncPoint::GetInstance()->SetCallBack(
"DBImpl::FlushInfoLog:StartRunning",
[&](void*) { flush_info_log_counter++; });
SyncPoint::GetInstance()->EnableProcessing();
Reopen(options);
ASSERT_EQ(kPeriodSec, dbfull()->GetDBOptions().stats_dump_period_sec);
ASSERT_EQ(kPeriodSec, dbfull()->GetDBOptions().stats_persist_period_sec);
ASSERT_GT(kPeriodSec, 1u);
dbfull()->TEST_WaitForPeriodicTaskRun([&] {
mock_clock_->MockSleepForSeconds(static_cast<int>(kPeriodSec) - 1);
});
const PeriodicTaskScheduler& scheduler =
dbfull()->TEST_GetPeriodicTaskScheduler();
ASSERT_EQ(3, scheduler.TEST_GetValidTaskNum());
ASSERT_EQ(1, dump_st_counter);
ASSERT_EQ(1, pst_st_counter);
ASSERT_EQ(1, flush_info_log_counter);
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); });
ASSERT_EQ(2, dump_st_counter);
ASSERT_EQ(2, pst_st_counter);
ASSERT_EQ(2, flush_info_log_counter);
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); });
ASSERT_EQ(3, dump_st_counter);
ASSERT_EQ(3, pst_st_counter);
ASSERT_EQ(3, flush_info_log_counter);
// Disable scheduler with SetOption
ASSERT_OK(dbfull()->SetDBOptions(
{{"stats_dump_period_sec", "0"}, {"stats_persist_period_sec", "0"}}));
ASSERT_EQ(0u, dbfull()->GetDBOptions().stats_dump_period_sec);
ASSERT_EQ(0u, dbfull()->GetDBOptions().stats_persist_period_sec);
// Info log flush should still run.
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); });
ASSERT_EQ(3, dump_st_counter);
ASSERT_EQ(3, pst_st_counter);
ASSERT_EQ(4, flush_info_log_counter);
ASSERT_EQ(1u, scheduler.TEST_GetValidTaskNum());
// Re-enable one task
ASSERT_OK(dbfull()->SetDBOptions({{"stats_dump_period_sec", "5"}}));
ASSERT_EQ(5u, dbfull()->GetDBOptions().stats_dump_period_sec);
ASSERT_EQ(0u, dbfull()->GetDBOptions().stats_persist_period_sec);
ASSERT_EQ(2, scheduler.TEST_GetValidTaskNum());
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); });
ASSERT_EQ(4, dump_st_counter);
ASSERT_EQ(3, pst_st_counter);
ASSERT_EQ(5, flush_info_log_counter);
Close();
}
TEST_F(PeriodicTaskSchedulerTest, MultiInstances) {
constexpr int kPeriodSec = 5;
const int kInstanceNum = 10;
Close();
Options options;
options.stats_dump_period_sec = kPeriodSec;
options.stats_persist_period_sec = kPeriodSec;
options.create_if_missing = true;
options.env = mock_env_.get();
int dump_st_counter = 0;
SyncPoint::GetInstance()->SetCallBack("DBImpl::DumpStats:2",
[&](void*) { dump_st_counter++; });
int pst_st_counter = 0;
SyncPoint::GetInstance()->SetCallBack("DBImpl::PersistStats:StartRunning",
[&](void*) { pst_st_counter++; });
SyncPoint::GetInstance()->EnableProcessing();
auto dbs = std::vector<DB*>(kInstanceNum);
for (int i = 0; i < kInstanceNum; i++) {
ASSERT_OK(
DB::Open(options, test::PerThreadDBPath(std::to_string(i)), &(dbs[i])));
}
auto dbi = static_cast_with_check<DBImpl>(dbs[kInstanceNum - 1]);
const PeriodicTaskScheduler& scheduler = dbi->TEST_GetPeriodicTaskScheduler();
ASSERT_EQ(kInstanceNum * 3, scheduler.TEST_GetValidTaskNum());
int expected_run = kInstanceNum;
dbi->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
ASSERT_EQ(expected_run, dump_st_counter);
ASSERT_EQ(expected_run, pst_st_counter);
expected_run += kInstanceNum;
dbi->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
ASSERT_EQ(expected_run, dump_st_counter);
ASSERT_EQ(expected_run, pst_st_counter);
expected_run += kInstanceNum;
dbi->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
ASSERT_EQ(expected_run, dump_st_counter);
ASSERT_EQ(expected_run, pst_st_counter);
int half = kInstanceNum / 2;
for (int i = 0; i < half; i++) {
delete dbs[i];
}
expected_run += (kInstanceNum - half) * 2;
dbi->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
dbi->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
ASSERT_EQ(expected_run, dump_st_counter);
ASSERT_EQ(expected_run, pst_st_counter);
for (int i = half; i < kInstanceNum; i++) {
ASSERT_OK(dbs[i]->Close());
delete dbs[i];
}
}
TEST_F(PeriodicTaskSchedulerTest, MultiEnv) {
constexpr int kDumpPeriodSec = 5;
constexpr int kPersistPeriodSec = 10;
Close();
Options options1;
options1.stats_dump_period_sec = kDumpPeriodSec;
options1.stats_persist_period_sec = kPersistPeriodSec;
options1.create_if_missing = true;
options1.env = mock_env_.get();
Reopen(options1);
std::unique_ptr<Env> mock_env2(
new CompositeEnvWrapper(Env::Default(), mock_clock_));
Options options2;
options2.stats_dump_period_sec = kDumpPeriodSec;
options2.stats_persist_period_sec = kPersistPeriodSec;
options2.create_if_missing = true;
options1.env = mock_env2.get();
std::string dbname = test::PerThreadDBPath("multi_env_test");
DB* db;
ASSERT_OK(DB::Open(options2, dbname, &db));
ASSERT_OK(db->Close());
delete db;
Close();
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

File diff suppressed because it is too large Load Diff

@ -1,894 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#ifndef GFLAGS
#include <cstdio>
int main() {
fprintf(stderr, "Please install gflags to run this test... Skipping...\n");
return 0;
}
#else
#include <algorithm>
#include <iostream>
#include <vector>
#include "db/db_impl/db_impl.h"
#include "monitoring/histogram.h"
#include "rocksdb/comparator.h"
#include "rocksdb/db.h"
#include "rocksdb/filter_policy.h"
#include "rocksdb/memtablerep.h"
#include "rocksdb/perf_context.h"
#include "rocksdb/slice_transform.h"
#include "rocksdb/system_clock.h"
#include "rocksdb/table.h"
#include "test_util/testharness.h"
#include "util/cast_util.h"
#include "util/coding.h"
#include "util/gflags_compat.h"
#include "util/random.h"
#include "util/stop_watch.h"
#include "util/string_util.h"
#include "utilities/merge_operators.h"
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
DEFINE_bool(trigger_deadlock, false,
"issue delete in range scan to trigger PrefixHashMap deadlock");
DEFINE_int32(bucket_count, 100000, "number of buckets");
DEFINE_uint64(num_locks, 10001, "number of locks");
DEFINE_bool(random_prefix, false, "randomize prefix");
DEFINE_uint64(total_prefixes, 100000, "total number of prefixes");
DEFINE_uint64(items_per_prefix, 1, "total number of values per prefix");
DEFINE_int64(write_buffer_size, 33554432, "");
DEFINE_int32(max_write_buffer_number, 2, "");
DEFINE_int32(min_write_buffer_number_to_merge, 1, "");
DEFINE_int32(skiplist_height, 4, "");
DEFINE_double(memtable_prefix_bloom_size_ratio, 0.1, "");
DEFINE_int32(memtable_huge_page_size, 2 * 1024 * 1024, "");
DEFINE_int32(value_size, 40, "");
DEFINE_bool(enable_print, false, "Print options generated to console.");
// Path to the database on file system
const std::string kDbName =
ROCKSDB_NAMESPACE::test::PerThreadDBPath("prefix_test");
namespace ROCKSDB_NAMESPACE {
struct TestKey {
uint64_t prefix;
uint64_t sorted;
TestKey(uint64_t _prefix, uint64_t _sorted)
: prefix(_prefix), sorted(_sorted) {}
};
// return a slice backed by test_key
inline Slice TestKeyToSlice(std::string& s, const TestKey& test_key) {
s.clear();
PutFixed64(&s, test_key.prefix);
PutFixed64(&s, test_key.sorted);
return Slice(s.c_str(), s.size());
}
inline const TestKey SliceToTestKey(const Slice& slice) {
return TestKey(DecodeFixed64(slice.data()), DecodeFixed64(slice.data() + 8));
}
class TestKeyComparator : public Comparator {
public:
// Compare needs to be aware of the possibility of a and/or b is
// prefix only
int Compare(const Slice& a, const Slice& b) const override {
const TestKey kkey_a = SliceToTestKey(a);
const TestKey kkey_b = SliceToTestKey(b);
const TestKey* key_a = &kkey_a;
const TestKey* key_b = &kkey_b;
if (key_a->prefix != key_b->prefix) {
if (key_a->prefix < key_b->prefix) return -1;
if (key_a->prefix > key_b->prefix) return 1;
} else {
EXPECT_TRUE(key_a->prefix == key_b->prefix);
// note, both a and b could be prefix only
if (a.size() != b.size()) {
// one of them is prefix
EXPECT_TRUE(
(a.size() == sizeof(uint64_t) && b.size() == sizeof(TestKey)) ||
(b.size() == sizeof(uint64_t) && a.size() == sizeof(TestKey)));
if (a.size() < b.size()) return -1;
if (a.size() > b.size()) return 1;
} else {
// both a and b are prefix
if (a.size() == sizeof(uint64_t)) {
return 0;
}
// both a and b are whole key
EXPECT_TRUE(a.size() == sizeof(TestKey) && b.size() == sizeof(TestKey));
if (key_a->sorted < key_b->sorted) return -1;
if (key_a->sorted > key_b->sorted) return 1;
if (key_a->sorted == key_b->sorted) return 0;
}
}
return 0;
}
bool operator()(const TestKey& a, const TestKey& b) const {
std::string sa, sb;
return Compare(TestKeyToSlice(sa, a), TestKeyToSlice(sb, b)) < 0;
}
const char* Name() const override { return "TestKeyComparator"; }
void FindShortestSeparator(std::string* /*start*/,
const Slice& /*limit*/) const override {}
void FindShortSuccessor(std::string* /*key*/) const override {}
};
namespace {
void PutKey(DB* db, WriteOptions write_options, uint64_t prefix,
uint64_t suffix, const Slice& value) {
TestKey test_key(prefix, suffix);
std::string s;
Slice key = TestKeyToSlice(s, test_key);
ASSERT_OK(db->Put(write_options, key, value));
}
void PutKey(DB* db, WriteOptions write_options, const TestKey& test_key,
const Slice& value) {
std::string s;
Slice key = TestKeyToSlice(s, test_key);
ASSERT_OK(db->Put(write_options, key, value));
}
void MergeKey(DB* db, WriteOptions write_options, const TestKey& test_key,
const Slice& value) {
std::string s;
Slice key = TestKeyToSlice(s, test_key);
ASSERT_OK(db->Merge(write_options, key, value));
}
void DeleteKey(DB* db, WriteOptions write_options, const TestKey& test_key) {
std::string s;
Slice key = TestKeyToSlice(s, test_key);
ASSERT_OK(db->Delete(write_options, key));
}
void SeekIterator(Iterator* iter, uint64_t prefix, uint64_t suffix) {
TestKey test_key(prefix, suffix);
std::string s;
Slice key = TestKeyToSlice(s, test_key);
iter->Seek(key);
}
const std::string kNotFoundResult = "NOT_FOUND";
std::string Get(DB* db, const ReadOptions& read_options, uint64_t prefix,
uint64_t suffix) {
TestKey test_key(prefix, suffix);
std::string s2;
Slice key = TestKeyToSlice(s2, test_key);
std::string result;
Status s = db->Get(read_options, key, &result);
if (s.IsNotFound()) {
result = kNotFoundResult;
} else if (!s.ok()) {
result = s.ToString();
}
return result;
}
class SamePrefixTransform : public SliceTransform {
private:
const Slice prefix_;
std::string name_;
public:
explicit SamePrefixTransform(const Slice& prefix)
: prefix_(prefix), name_("rocksdb.SamePrefix." + prefix.ToString()) {}
const char* Name() const override { return name_.c_str(); }
Slice Transform(const Slice& src) const override {
assert(InDomain(src));
return prefix_;
}
bool InDomain(const Slice& src) const override {
if (src.size() >= prefix_.size()) {
return Slice(src.data(), prefix_.size()) == prefix_;
}
return false;
}
bool InRange(const Slice& dst) const override { return dst == prefix_; }
bool FullLengthEnabled(size_t* /*len*/) const override { return false; }
};
} // anonymous namespace
class PrefixTest : public testing::Test {
public:
std::shared_ptr<DB> OpenDb() {
DB* db;
options.create_if_missing = true;
options.write_buffer_size = FLAGS_write_buffer_size;
options.max_write_buffer_number = FLAGS_max_write_buffer_number;
options.min_write_buffer_number_to_merge =
FLAGS_min_write_buffer_number_to_merge;
options.memtable_prefix_bloom_size_ratio =
FLAGS_memtable_prefix_bloom_size_ratio;
options.memtable_huge_page_size = FLAGS_memtable_huge_page_size;
options.prefix_extractor.reset(NewFixedPrefixTransform(8));
BlockBasedTableOptions bbto;
bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
bbto.whole_key_filtering = false;
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
options.allow_concurrent_memtable_write = false;
Status s = DB::Open(options, kDbName, &db);
EXPECT_OK(s);
return std::shared_ptr<DB>(db);
}
void FirstOption() { option_config_ = kBegin; }
bool NextOptions(int bucket_count) {
// skip some options
option_config_++;
if (option_config_ < kEnd) {
options.prefix_extractor.reset(NewFixedPrefixTransform(8));
switch (option_config_) {
case kHashSkipList:
options.memtable_factory.reset(
NewHashSkipListRepFactory(bucket_count, FLAGS_skiplist_height));
return true;
case kHashLinkList:
options.memtable_factory.reset(
NewHashLinkListRepFactory(bucket_count));
return true;
case kHashLinkListHugePageTlb:
options.memtable_factory.reset(
NewHashLinkListRepFactory(bucket_count, 2 * 1024 * 1024));
return true;
case kHashLinkListTriggerSkipList:
options.memtable_factory.reset(
NewHashLinkListRepFactory(bucket_count, 0, 3));
return true;
default:
return false;
}
}
return false;
}
PrefixTest() : option_config_(kBegin) {
options.comparator = new TestKeyComparator();
}
~PrefixTest() override { delete options.comparator; }
protected:
enum OptionConfig {
kBegin,
kHashSkipList,
kHashLinkList,
kHashLinkListHugePageTlb,
kHashLinkListTriggerSkipList,
kEnd
};
int option_config_;
Options options;
};
TEST(SamePrefixTest, InDomainTest) {
DB* db;
Options options;
options.create_if_missing = true;
options.prefix_extractor.reset(new SamePrefixTransform("HHKB"));
BlockBasedTableOptions bbto;
bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
bbto.whole_key_filtering = false;
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
WriteOptions write_options;
ReadOptions read_options;
{
ASSERT_OK(DestroyDB(kDbName, Options()));
ASSERT_OK(DB::Open(options, kDbName, &db));
ASSERT_OK(db->Put(write_options, "HHKB pro2", "Mar 24, 2006"));
ASSERT_OK(db->Put(write_options, "HHKB pro2 Type-S", "June 29, 2011"));
ASSERT_OK(db->Put(write_options, "Realforce 87u", "idk"));
ASSERT_OK(db->Flush(FlushOptions()));
std::string result;
auto db_iter = db->NewIterator(ReadOptions());
db_iter->Seek("Realforce 87u");
ASSERT_TRUE(db_iter->Valid());
ASSERT_OK(db_iter->status());
ASSERT_EQ(db_iter->key(), "Realforce 87u");
ASSERT_EQ(db_iter->value(), "idk");
delete db_iter;
delete db;
ASSERT_OK(DestroyDB(kDbName, Options()));
}
{
ASSERT_OK(DB::Open(options, kDbName, &db));
ASSERT_OK(db->Put(write_options, "pikachu", "1"));
ASSERT_OK(db->Put(write_options, "Meowth", "1"));
ASSERT_OK(db->Put(write_options, "Mewtwo", "idk"));
ASSERT_OK(db->Flush(FlushOptions()));
std::string result;
auto db_iter = db->NewIterator(ReadOptions());
db_iter->Seek("Mewtwo");
ASSERT_TRUE(db_iter->Valid());
ASSERT_OK(db_iter->status());
delete db_iter;
delete db;
ASSERT_OK(DestroyDB(kDbName, Options()));
}
}
TEST_F(PrefixTest, TestResult) {
for (int num_buckets = 1; num_buckets <= 2; num_buckets++) {
FirstOption();
while (NextOptions(num_buckets)) {
std::cout << "*** Mem table: " << options.memtable_factory->Name()
<< " number of buckets: " << num_buckets << std::endl;
ASSERT_OK(DestroyDB(kDbName, Options()));
auto db = OpenDb();
WriteOptions write_options;
ReadOptions read_options;
// 1. Insert one row.
Slice v16("v16");
PutKey(db.get(), write_options, 1, 6, v16);
std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
SeekIterator(iter.get(), 1, 6);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v16 == iter->value());
SeekIterator(iter.get(), 1, 5);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v16 == iter->value());
SeekIterator(iter.get(), 1, 5);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v16 == iter->value());
iter->Next();
ASSERT_TRUE(!iter->Valid());
ASSERT_OK(iter->status());
SeekIterator(iter.get(), 2, 0);
ASSERT_TRUE(!iter->Valid());
ASSERT_OK(iter->status());
ASSERT_EQ(v16.ToString(), Get(db.get(), read_options, 1, 6));
ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 1, 5));
ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 1, 7));
ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 0, 6));
ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 2, 6));
// 2. Insert an entry for the same prefix as the last entry in the bucket.
Slice v17("v17");
PutKey(db.get(), write_options, 1, 7, v17);
iter.reset(db->NewIterator(read_options));
SeekIterator(iter.get(), 1, 7);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v17 == iter->value());
SeekIterator(iter.get(), 1, 6);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v16 == iter->value());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v17 == iter->value());
iter->Next();
ASSERT_TRUE(!iter->Valid());
ASSERT_OK(iter->status());
SeekIterator(iter.get(), 2, 0);
ASSERT_TRUE(!iter->Valid());
ASSERT_OK(iter->status());
// 3. Insert an entry for the same prefix as the head of the bucket.
Slice v15("v15");
PutKey(db.get(), write_options, 1, 5, v15);
iter.reset(db->NewIterator(read_options));
SeekIterator(iter.get(), 1, 7);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v17 == iter->value());
SeekIterator(iter.get(), 1, 5);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v15 == iter->value());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v16 == iter->value());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v17 == iter->value());
SeekIterator(iter.get(), 1, 5);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v15 == iter->value());
ASSERT_EQ(v15.ToString(), Get(db.get(), read_options, 1, 5));
ASSERT_EQ(v16.ToString(), Get(db.get(), read_options, 1, 6));
ASSERT_EQ(v17.ToString(), Get(db.get(), read_options, 1, 7));
// 4. Insert an entry with a larger prefix
Slice v22("v22");
PutKey(db.get(), write_options, 2, 2, v22);
iter.reset(db->NewIterator(read_options));
SeekIterator(iter.get(), 2, 2);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v22 == iter->value());
SeekIterator(iter.get(), 2, 0);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v22 == iter->value());
SeekIterator(iter.get(), 1, 5);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v15 == iter->value());
SeekIterator(iter.get(), 1, 7);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v17 == iter->value());
// 5. Insert an entry with a smaller prefix
Slice v02("v02");
PutKey(db.get(), write_options, 0, 2, v02);
iter.reset(db->NewIterator(read_options));
SeekIterator(iter.get(), 0, 2);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v02 == iter->value());
SeekIterator(iter.get(), 0, 0);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v02 == iter->value());
SeekIterator(iter.get(), 2, 0);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v22 == iter->value());
SeekIterator(iter.get(), 1, 5);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v15 == iter->value());
SeekIterator(iter.get(), 1, 7);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v17 == iter->value());
// 6. Insert to the beginning and the end of the first prefix
Slice v13("v13");
Slice v18("v18");
PutKey(db.get(), write_options, 1, 3, v13);
PutKey(db.get(), write_options, 1, 8, v18);
iter.reset(db->NewIterator(read_options));
SeekIterator(iter.get(), 1, 7);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v17 == iter->value());
SeekIterator(iter.get(), 1, 3);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v13 == iter->value());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v15 == iter->value());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v16 == iter->value());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v17 == iter->value());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v18 == iter->value());
SeekIterator(iter.get(), 0, 0);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v02 == iter->value());
SeekIterator(iter.get(), 2, 0);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v22 == iter->value());
ASSERT_EQ(v22.ToString(), Get(db.get(), read_options, 2, 2));
ASSERT_EQ(v02.ToString(), Get(db.get(), read_options, 0, 2));
ASSERT_EQ(v13.ToString(), Get(db.get(), read_options, 1, 3));
ASSERT_EQ(v15.ToString(), Get(db.get(), read_options, 1, 5));
ASSERT_EQ(v16.ToString(), Get(db.get(), read_options, 1, 6));
ASSERT_EQ(v17.ToString(), Get(db.get(), read_options, 1, 7));
ASSERT_EQ(v18.ToString(), Get(db.get(), read_options, 1, 8));
}
}
}
// Show results in prefix
TEST_F(PrefixTest, PrefixValid) {
for (int num_buckets = 1; num_buckets <= 2; num_buckets++) {
FirstOption();
while (NextOptions(num_buckets)) {
std::cout << "*** Mem table: " << options.memtable_factory->Name()
<< " number of buckets: " << num_buckets << std::endl;
ASSERT_OK(DestroyDB(kDbName, Options()));
auto db = OpenDb();
WriteOptions write_options;
ReadOptions read_options;
// Insert keys with common prefix and one key with different
Slice v16("v16");
Slice v17("v17");
Slice v18("v18");
Slice v19("v19");
PutKey(db.get(), write_options, 12345, 6, v16);
PutKey(db.get(), write_options, 12345, 7, v17);
PutKey(db.get(), write_options, 12345, 8, v18);
PutKey(db.get(), write_options, 12345, 9, v19);
PutKey(db.get(), write_options, 12346, 8, v16);
ASSERT_OK(db->Flush(FlushOptions()));
TestKey test_key(12346, 8);
std::string s;
ASSERT_OK(db->Delete(write_options, TestKeyToSlice(s, test_key)));
ASSERT_OK(db->Flush(FlushOptions()));
read_options.prefix_same_as_start = true;
std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
SeekIterator(iter.get(), 12345, 6);
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v16 == iter->value());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v17 == iter->value());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v18 == iter->value());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_TRUE(v19 == iter->value());
iter->Next();
ASSERT_FALSE(iter->Valid());
ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 12346, 8));
// Verify seeking past the prefix won't return a result.
SeekIterator(iter.get(), 12345, 10);
ASSERT_TRUE(!iter->Valid());
ASSERT_OK(iter->status());
}
}
}
TEST_F(PrefixTest, DynamicPrefixIterator) {
while (NextOptions(FLAGS_bucket_count)) {
std::cout << "*** Mem table: " << options.memtable_factory->Name()
<< std::endl;
ASSERT_OK(DestroyDB(kDbName, Options()));
auto db = OpenDb();
WriteOptions write_options;
ReadOptions read_options;
std::vector<uint64_t> prefixes;
for (uint64_t i = 0; i < FLAGS_total_prefixes; ++i) {
prefixes.push_back(i);
}
if (FLAGS_random_prefix) {
RandomShuffle(prefixes.begin(), prefixes.end());
}
HistogramImpl hist_put_time;
HistogramImpl hist_put_comparison;
// insert x random prefix, each with y continuous element.
for (auto prefix : prefixes) {
for (uint64_t sorted = 0; sorted < FLAGS_items_per_prefix; sorted++) {
TestKey test_key(prefix, sorted);
std::string s;
Slice key = TestKeyToSlice(s, test_key);
std::string value(FLAGS_value_size, 0);
get_perf_context()->Reset();
StopWatchNano timer(SystemClock::Default().get(), true);
ASSERT_OK(db->Put(write_options, key, value));
hist_put_time.Add(timer.ElapsedNanos());
hist_put_comparison.Add(get_perf_context()->user_key_comparison_count);
}
}
std::cout << "Put key comparison: \n"
<< hist_put_comparison.ToString() << "Put time: \n"
<< hist_put_time.ToString();
// test seek existing keys
HistogramImpl hist_seek_time;
HistogramImpl hist_seek_comparison;
std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
for (auto prefix : prefixes) {
TestKey test_key(prefix, FLAGS_items_per_prefix / 2);
std::string s;
Slice key = TestKeyToSlice(s, test_key);
std::string value = "v" + std::to_string(0);
get_perf_context()->Reset();
StopWatchNano timer(SystemClock::Default().get(), true);
auto key_prefix = options.prefix_extractor->Transform(key);
uint64_t total_keys = 0;
for (iter->Seek(key);
iter->Valid() && iter->key().starts_with(key_prefix); iter->Next()) {
if (FLAGS_trigger_deadlock) {
std::cout << "Behold the deadlock!\n";
db->Delete(write_options, iter->key());
}
total_keys++;
}
hist_seek_time.Add(timer.ElapsedNanos());
hist_seek_comparison.Add(get_perf_context()->user_key_comparison_count);
ASSERT_EQ(total_keys,
FLAGS_items_per_prefix - FLAGS_items_per_prefix / 2);
}
std::cout << "Seek key comparison: \n"
<< hist_seek_comparison.ToString() << "Seek time: \n"
<< hist_seek_time.ToString();
// test non-existing keys
HistogramImpl hist_no_seek_time;
HistogramImpl hist_no_seek_comparison;
for (auto prefix = FLAGS_total_prefixes;
prefix < FLAGS_total_prefixes + 10000; prefix++) {
TestKey test_key(prefix, 0);
std::string s;
Slice key = TestKeyToSlice(s, test_key);
get_perf_context()->Reset();
StopWatchNano timer(SystemClock::Default().get(), true);
iter->Seek(key);
hist_no_seek_time.Add(timer.ElapsedNanos());
hist_no_seek_comparison.Add(
get_perf_context()->user_key_comparison_count);
ASSERT_TRUE(!iter->Valid());
ASSERT_OK(iter->status());
}
std::cout << "non-existing Seek key comparison: \n"
<< hist_no_seek_comparison.ToString()
<< "non-existing Seek time: \n"
<< hist_no_seek_time.ToString();
}
}
TEST_F(PrefixTest, PrefixSeekModePrev) {
// Only for SkipListFactory
options.memtable_factory.reset(new SkipListFactory);
options.merge_operator = MergeOperators::CreatePutOperator();
options.write_buffer_size = 1024 * 1024;
Random rnd(1);
for (size_t m = 1; m < 100; m++) {
std::cout << "[" + std::to_string(m) + "]" + "*** Mem table: "
<< options.memtable_factory->Name() << std::endl;
ASSERT_OK(DestroyDB(kDbName, Options()));
auto db = OpenDb();
WriteOptions write_options;
ReadOptions read_options;
std::map<TestKey, std::string, TestKeyComparator> entry_maps[3], whole_map;
for (uint64_t i = 0; i < 10; i++) {
int div = i % 3 + 1;
for (uint64_t j = 0; j < 10; j++) {
whole_map[TestKey(i, j)] = entry_maps[rnd.Uniform(div)][TestKey(i, j)] =
'v' + std::to_string(i) + std::to_string(j);
}
}
std::map<TestKey, std::string, TestKeyComparator> type_map;
for (size_t i = 0; i < 3; i++) {
for (auto& kv : entry_maps[i]) {
if (rnd.OneIn(3)) {
PutKey(db.get(), write_options, kv.first, kv.second);
type_map[kv.first] = "value";
} else {
MergeKey(db.get(), write_options, kv.first, kv.second);
type_map[kv.first] = "merge";
}
}
if (i < 2) {
ASSERT_OK(db->Flush(FlushOptions()));
}
}
for (size_t i = 0; i < 2; i++) {
for (auto& kv : entry_maps[i]) {
if (rnd.OneIn(10)) {
whole_map.erase(kv.first);
DeleteKey(db.get(), write_options, kv.first);
entry_maps[2][kv.first] = "delete";
}
}
}
if (FLAGS_enable_print) {
for (size_t i = 0; i < 3; i++) {
for (auto& kv : entry_maps[i]) {
std::cout << "[" << i << "]" << kv.first.prefix << kv.first.sorted
<< " " << kv.second + " " + type_map[kv.first] << std::endl;
}
}
}
std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
for (uint64_t prefix = 0; prefix < 10; prefix++) {
uint64_t start_suffix = rnd.Uniform(9);
SeekIterator(iter.get(), prefix, start_suffix);
auto it = whole_map.find(TestKey(prefix, start_suffix));
if (it == whole_map.end()) {
continue;
}
ASSERT_NE(it, whole_map.end());
ASSERT_TRUE(iter->Valid());
if (FLAGS_enable_print) {
std::cout << "round " << prefix
<< " iter: " << SliceToTestKey(iter->key()).prefix
<< SliceToTestKey(iter->key()).sorted
<< " | map: " << it->first.prefix << it->first.sorted << " | "
<< iter->value().ToString() << " " << it->second << std::endl;
}
ASSERT_EQ(iter->value(), it->second);
uint64_t stored_prefix = prefix;
for (size_t k = 0; k < 9; k++) {
if (rnd.OneIn(2) || it == whole_map.begin()) {
iter->Next();
++it;
if (FLAGS_enable_print) {
std::cout << "Next >> ";
}
} else {
iter->Prev();
it--;
if (FLAGS_enable_print) {
std::cout << "Prev >> ";
}
}
if (!iter->Valid() ||
SliceToTestKey(iter->key()).prefix != stored_prefix) {
break;
}
ASSERT_OK(iter->status());
stored_prefix = SliceToTestKey(iter->key()).prefix;
ASSERT_TRUE(iter->Valid());
ASSERT_NE(it, whole_map.end());
ASSERT_EQ(iter->value(), it->second);
if (FLAGS_enable_print) {
std::cout << "iter: " << SliceToTestKey(iter->key()).prefix
<< SliceToTestKey(iter->key()).sorted
<< " | map: " << it->first.prefix << it->first.sorted
<< " | " << iter->value().ToString() << " " << it->second
<< std::endl;
}
}
}
}
}
TEST_F(PrefixTest, PrefixSeekModePrev2) {
// Only for SkipListFactory
// test the case
// iter1 iter2
// | prefix | suffix | | prefix | suffix |
// | 1 | 1 | | 1 | 2 |
// | 1 | 3 | | 1 | 4 |
// | 2 | 1 | | 3 | 3 |
// | 2 | 2 | | 3 | 4 |
// after seek(15), iter1 will be at 21 and iter2 will be 33.
// Then if call Prev() in prefix mode where SeekForPrev(21) gets called,
// iter2 should turn to invalid state because of bloom filter.
options.memtable_factory.reset(new SkipListFactory);
options.write_buffer_size = 1024 * 1024;
std::string v13("v13");
ASSERT_OK(DestroyDB(kDbName, Options()));
auto db = OpenDb();
WriteOptions write_options;
ReadOptions read_options;
PutKey(db.get(), write_options, TestKey(1, 2), "v12");
PutKey(db.get(), write_options, TestKey(1, 4), "v14");
PutKey(db.get(), write_options, TestKey(3, 3), "v33");
PutKey(db.get(), write_options, TestKey(3, 4), "v34");
ASSERT_OK(db->Flush(FlushOptions()));
ASSERT_OK(
static_cast_with_check<DBImpl>(db.get())->TEST_WaitForFlushMemTable());
PutKey(db.get(), write_options, TestKey(1, 1), "v11");
PutKey(db.get(), write_options, TestKey(1, 3), "v13");
PutKey(db.get(), write_options, TestKey(2, 1), "v21");
PutKey(db.get(), write_options, TestKey(2, 2), "v22");
ASSERT_OK(db->Flush(FlushOptions()));
ASSERT_OK(
static_cast_with_check<DBImpl>(db.get())->TEST_WaitForFlushMemTable());
std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
SeekIterator(iter.get(), 1, 5);
iter->Prev();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->value(), v13);
}
TEST_F(PrefixTest, PrefixSeekModePrev3) {
// Only for SkipListFactory
// test SeekToLast() with iterate_upper_bound_ in prefix_seek_mode
options.memtable_factory.reset(new SkipListFactory);
options.write_buffer_size = 1024 * 1024;
std::string v14("v14");
TestKey upper_bound_key = TestKey(1, 5);
std::string s;
Slice upper_bound = TestKeyToSlice(s, upper_bound_key);
{
ASSERT_OK(DestroyDB(kDbName, Options()));
auto db = OpenDb();
WriteOptions write_options;
ReadOptions read_options;
read_options.iterate_upper_bound = &upper_bound;
PutKey(db.get(), write_options, TestKey(1, 2), "v12");
PutKey(db.get(), write_options, TestKey(1, 4), "v14");
ASSERT_OK(db->Flush(FlushOptions()));
ASSERT_OK(
static_cast_with_check<DBImpl>(db.get())->TEST_WaitForFlushMemTable());
PutKey(db.get(), write_options, TestKey(1, 1), "v11");
PutKey(db.get(), write_options, TestKey(1, 3), "v13");
PutKey(db.get(), write_options, TestKey(2, 1), "v21");
PutKey(db.get(), write_options, TestKey(2, 2), "v22");
ASSERT_OK(db->Flush(FlushOptions()));
ASSERT_OK(
static_cast_with_check<DBImpl>(db.get())->TEST_WaitForFlushMemTable());
std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
iter->SeekToLast();
ASSERT_EQ(iter->value(), v14);
}
{
ASSERT_OK(DestroyDB(kDbName, Options()));
auto db = OpenDb();
WriteOptions write_options;
ReadOptions read_options;
read_options.iterate_upper_bound = &upper_bound;
PutKey(db.get(), write_options, TestKey(1, 2), "v12");
PutKey(db.get(), write_options, TestKey(1, 4), "v14");
PutKey(db.get(), write_options, TestKey(3, 3), "v33");
PutKey(db.get(), write_options, TestKey(3, 4), "v34");
ASSERT_OK(db->Flush(FlushOptions()));
ASSERT_OK(
static_cast_with_check<DBImpl>(db.get())->TEST_WaitForFlushMemTable());
PutKey(db.get(), write_options, TestKey(1, 1), "v11");
PutKey(db.get(), write_options, TestKey(1, 3), "v13");
ASSERT_OK(db->Flush(FlushOptions()));
ASSERT_OK(
static_cast_with_check<DBImpl>(db.get())->TEST_WaitForFlushMemTable());
std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
iter->SeekToLast();
ASSERT_EQ(iter->value(), v14);
}
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
ParseCommandLineFlags(&argc, &argv, true);
return RUN_ALL_TESTS();
}
#endif // GFLAGS

@ -1,713 +0,0 @@
// Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/range_del_aggregator.h"
#include <memory>
#include <string>
#include <vector>
#include "db/db_test_util.h"
#include "db/dbformat.h"
#include "db/range_tombstone_fragmenter.h"
#include "test_util/testutil.h"
#include "util/vector_iterator.h"
namespace ROCKSDB_NAMESPACE {
class RangeDelAggregatorTest : public testing::Test {};
namespace {
static auto bytewise_icmp = InternalKeyComparator(BytewiseComparator());
std::unique_ptr<InternalIterator> MakeRangeDelIter(
const std::vector<RangeTombstone>& range_dels) {
std::vector<std::string> keys, values;
for (const auto& range_del : range_dels) {
auto key_and_value = range_del.Serialize();
keys.push_back(key_and_value.first.Encode().ToString());
values.push_back(key_and_value.second.ToString());
}
return std::unique_ptr<VectorIterator>(
new VectorIterator(keys, values, &bytewise_icmp));
}
std::vector<std::unique_ptr<FragmentedRangeTombstoneList>>
MakeFragmentedTombstoneLists(
const std::vector<std::vector<RangeTombstone>>& range_dels_list) {
std::vector<std::unique_ptr<FragmentedRangeTombstoneList>> fragment_lists;
for (const auto& range_dels : range_dels_list) {
auto range_del_iter = MakeRangeDelIter(range_dels);
fragment_lists.emplace_back(new FragmentedRangeTombstoneList(
std::move(range_del_iter), bytewise_icmp));
}
return fragment_lists;
}
struct TruncatedIterScanTestCase {
ParsedInternalKey start;
ParsedInternalKey end;
SequenceNumber seq;
};
struct TruncatedIterSeekTestCase {
Slice target;
ParsedInternalKey start;
ParsedInternalKey end;
SequenceNumber seq;
bool invalid;
};
struct ShouldDeleteTestCase {
ParsedInternalKey lookup_key;
bool result;
};
struct IsRangeOverlappedTestCase {
Slice start;
Slice end;
bool result;
};
ParsedInternalKey UncutEndpoint(const Slice& s) {
return ParsedInternalKey(s, kMaxSequenceNumber, kTypeRangeDeletion);
}
ParsedInternalKey InternalValue(const Slice& key, SequenceNumber seq,
ValueType type = kTypeValue) {
return ParsedInternalKey(key, seq, type);
}
void VerifyIterator(
TruncatedRangeDelIterator* iter, const InternalKeyComparator& icmp,
const std::vector<TruncatedIterScanTestCase>& expected_range_dels) {
// Test forward iteration.
iter->SeekToFirst();
for (size_t i = 0; i < expected_range_dels.size(); i++, iter->Next()) {
ASSERT_TRUE(iter->Valid());
EXPECT_EQ(0, icmp.Compare(iter->start_key(), expected_range_dels[i].start));
EXPECT_EQ(0, icmp.Compare(iter->end_key(), expected_range_dels[i].end));
EXPECT_EQ(expected_range_dels[i].seq, iter->seq());
}
EXPECT_FALSE(iter->Valid());
// Test reverse iteration.
iter->SeekToLast();
std::vector<TruncatedIterScanTestCase> reverse_expected_range_dels(
expected_range_dels.rbegin(), expected_range_dels.rend());
for (size_t i = 0; i < reverse_expected_range_dels.size();
i++, iter->Prev()) {
ASSERT_TRUE(iter->Valid());
EXPECT_EQ(0, icmp.Compare(iter->start_key(),
reverse_expected_range_dels[i].start));
EXPECT_EQ(
0, icmp.Compare(iter->end_key(), reverse_expected_range_dels[i].end));
EXPECT_EQ(reverse_expected_range_dels[i].seq, iter->seq());
}
EXPECT_FALSE(iter->Valid());
}
void VerifySeek(TruncatedRangeDelIterator* iter,
const InternalKeyComparator& icmp,
const std::vector<TruncatedIterSeekTestCase>& test_cases) {
for (const auto& test_case : test_cases) {
iter->Seek(test_case.target);
if (test_case.invalid) {
ASSERT_FALSE(iter->Valid());
} else {
ASSERT_TRUE(iter->Valid());
EXPECT_EQ(0, icmp.Compare(iter->start_key(), test_case.start));
EXPECT_EQ(0, icmp.Compare(iter->end_key(), test_case.end));
EXPECT_EQ(test_case.seq, iter->seq());
}
}
}
void VerifySeekForPrev(
TruncatedRangeDelIterator* iter, const InternalKeyComparator& icmp,
const std::vector<TruncatedIterSeekTestCase>& test_cases) {
for (const auto& test_case : test_cases) {
iter->SeekForPrev(test_case.target);
if (test_case.invalid) {
ASSERT_FALSE(iter->Valid());
} else {
ASSERT_TRUE(iter->Valid());
EXPECT_EQ(0, icmp.Compare(iter->start_key(), test_case.start));
EXPECT_EQ(0, icmp.Compare(iter->end_key(), test_case.end));
EXPECT_EQ(test_case.seq, iter->seq());
}
}
}
void VerifyShouldDelete(RangeDelAggregator* range_del_agg,
const std::vector<ShouldDeleteTestCase>& test_cases) {
for (const auto& test_case : test_cases) {
EXPECT_EQ(
test_case.result,
range_del_agg->ShouldDelete(
test_case.lookup_key, RangeDelPositioningMode::kForwardTraversal));
}
for (auto it = test_cases.rbegin(); it != test_cases.rend(); ++it) {
const auto& test_case = *it;
EXPECT_EQ(
test_case.result,
range_del_agg->ShouldDelete(
test_case.lookup_key, RangeDelPositioningMode::kBackwardTraversal));
}
}
void VerifyIsRangeOverlapped(
ReadRangeDelAggregator* range_del_agg,
const std::vector<IsRangeOverlappedTestCase>& test_cases) {
for (const auto& test_case : test_cases) {
EXPECT_EQ(test_case.result,
range_del_agg->IsRangeOverlapped(test_case.start, test_case.end));
}
}
void CheckIterPosition(const RangeTombstone& tombstone,
const FragmentedRangeTombstoneIterator* iter) {
// Test InternalIterator interface.
EXPECT_EQ(tombstone.start_key_, ExtractUserKey(iter->key()));
EXPECT_EQ(tombstone.end_key_, iter->value());
EXPECT_EQ(tombstone.seq_, iter->seq());
// Test FragmentedRangeTombstoneIterator interface.
EXPECT_EQ(tombstone.start_key_, iter->start_key());
EXPECT_EQ(tombstone.end_key_, iter->end_key());
EXPECT_EQ(tombstone.seq_, GetInternalKeySeqno(iter->key()));
}
void VerifyFragmentedRangeDels(
FragmentedRangeTombstoneIterator* iter,
const std::vector<RangeTombstone>& expected_tombstones) {
iter->SeekToFirst();
for (size_t i = 0; i < expected_tombstones.size(); i++, iter->Next()) {
ASSERT_TRUE(iter->Valid());
CheckIterPosition(expected_tombstones[i], iter);
}
EXPECT_FALSE(iter->Valid());
}
} // anonymous namespace
TEST_F(RangeDelAggregatorTest, EmptyTruncatedIter) {
auto range_del_iter = MakeRangeDelIter({});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp,
kMaxSequenceNumber));
TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, nullptr,
nullptr);
iter.SeekToFirst();
ASSERT_FALSE(iter.Valid());
iter.SeekToLast();
ASSERT_FALSE(iter.Valid());
}
TEST_F(RangeDelAggregatorTest, UntruncatedIter) {
auto range_del_iter =
MakeRangeDelIter({{"a", "e", 10}, {"e", "g", 8}, {"j", "n", 4}});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp,
kMaxSequenceNumber));
TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, nullptr,
nullptr);
VerifyIterator(
&iter, bytewise_icmp,
{{InternalValue("a", 10, kTypeRangeDeletion), UncutEndpoint("e"), 10},
{InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8},
{InternalValue("j", 4, kTypeRangeDeletion), UncutEndpoint("n"), 4}});
VerifySeek(
&iter, bytewise_icmp,
{{"d", InternalValue("a", 10, kTypeRangeDeletion), UncutEndpoint("e"),
10},
{"e", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8},
{"ia", InternalValue("j", 4, kTypeRangeDeletion), UncutEndpoint("n"), 4},
{"n", InternalValue("", 0, kTypeRangeDeletion), UncutEndpoint(""), 0,
true /* invalid */},
{"", InternalValue("a", 10, kTypeRangeDeletion), UncutEndpoint("e"),
10}});
VerifySeekForPrev(
&iter, bytewise_icmp,
{{"d", InternalValue("a", 10, kTypeRangeDeletion), UncutEndpoint("e"),
10},
{"e", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8},
{"ia", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8},
{"n", InternalValue("j", 4, kTypeRangeDeletion), UncutEndpoint("n"), 4},
{"", InternalValue("", 0, kTypeRangeDeletion), UncutEndpoint(""), 0,
true /* invalid */}});
}
TEST_F(RangeDelAggregatorTest, UntruncatedIterWithSnapshot) {
auto range_del_iter =
MakeRangeDelIter({{"a", "e", 10}, {"e", "g", 8}, {"j", "n", 4}});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp,
9 /* snapshot */));
TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, nullptr,
nullptr);
VerifyIterator(
&iter, bytewise_icmp,
{{InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8},
{InternalValue("j", 4, kTypeRangeDeletion), UncutEndpoint("n"), 4}});
VerifySeek(
&iter, bytewise_icmp,
{{"d", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8},
{"e", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8},
{"ia", InternalValue("j", 4, kTypeRangeDeletion), UncutEndpoint("n"), 4},
{"n", InternalValue("", 0, kTypeRangeDeletion), UncutEndpoint(""), 0,
true /* invalid */},
{"", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8}});
VerifySeekForPrev(
&iter, bytewise_icmp,
{{"d", InternalValue("", 0, kTypeRangeDeletion), UncutEndpoint(""), 0,
true /* invalid */},
{"e", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8},
{"ia", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8},
{"n", InternalValue("j", 4, kTypeRangeDeletion), UncutEndpoint("n"), 4},
{"", InternalValue("", 0, kTypeRangeDeletion), UncutEndpoint(""), 0,
true /* invalid */}});
}
TEST_F(RangeDelAggregatorTest, TruncatedIterPartiallyCutTombstones) {
auto range_del_iter =
MakeRangeDelIter({{"a", "e", 10}, {"e", "g", 8}, {"j", "n", 4}});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp,
kMaxSequenceNumber));
InternalKey smallest("d", 7, kTypeValue);
InternalKey largest("m", 9, kTypeValue);
TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp,
&smallest, &largest);
VerifyIterator(
&iter, bytewise_icmp,
{{InternalValue("d", 7, kTypeMaxValid), UncutEndpoint("e"), 10},
{InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8},
{InternalValue("j", 4, kTypeRangeDeletion),
InternalValue("m", 8, kTypeMaxValid), 4}});
VerifySeek(
&iter, bytewise_icmp,
{{"d", InternalValue("d", 7, kTypeMaxValid), UncutEndpoint("e"), 10},
{"e", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8},
{"ia", InternalValue("j", 4, kTypeRangeDeletion),
InternalValue("m", 8, kTypeMaxValid), 4, false /* invalid */},
{"n", InternalValue("", 0, kTypeRangeDeletion), UncutEndpoint(""), 0,
true /* invalid */},
{"", InternalValue("d", 7, kTypeMaxValid), UncutEndpoint("e"), 10}});
VerifySeekForPrev(
&iter, bytewise_icmp,
{{"d", InternalValue("d", 7, kTypeMaxValid), UncutEndpoint("e"), 10},
{"e", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8},
{"ia", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8},
{"n", InternalValue("j", 4, kTypeRangeDeletion),
InternalValue("m", 8, kTypeMaxValid), 4, false /* invalid */},
{"", InternalValue("", 0, kTypeRangeDeletion), UncutEndpoint(""), 0,
true /* invalid */}});
}
TEST_F(RangeDelAggregatorTest, TruncatedIterFullyCutTombstones) {
auto range_del_iter =
MakeRangeDelIter({{"a", "e", 10}, {"e", "g", 8}, {"j", "n", 4}});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp,
kMaxSequenceNumber));
InternalKey smallest("f", 7, kTypeValue);
InternalKey largest("i", 9, kTypeValue);
TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp,
&smallest, &largest);
VerifyIterator(
&iter, bytewise_icmp,
{{InternalValue("f", 7, kTypeMaxValid), UncutEndpoint("g"), 8}});
VerifySeek(
&iter, bytewise_icmp,
{{"d", InternalValue("f", 7, kTypeMaxValid), UncutEndpoint("g"), 8},
{"f", InternalValue("f", 7, kTypeMaxValid), UncutEndpoint("g"), 8},
{"j", InternalValue("", 0, kTypeRangeDeletion), UncutEndpoint(""), 0,
true /* invalid */}});
VerifySeekForPrev(
&iter, bytewise_icmp,
{{"d", InternalValue("", 0, kTypeRangeDeletion), UncutEndpoint(""), 0,
true /* invalid */},
{"f", InternalValue("f", 7, kTypeMaxValid), UncutEndpoint("g"), 8},
{"j", InternalValue("f", 7, kTypeMaxValid), UncutEndpoint("g"), 8}});
}
TEST_F(RangeDelAggregatorTest, SingleIterInAggregator) {
auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"c", "g", 8}});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp,
kMaxSequenceNumber));
ReadRangeDelAggregator range_del_agg(&bytewise_icmp, kMaxSequenceNumber);
range_del_agg.AddTombstones(std::move(input_iter));
VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 19), false},
{InternalValue("b", 9), true},
{InternalValue("d", 9), true},
{InternalValue("e", 7), true},
{InternalValue("g", 7), false}});
VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false},
{"_", "a", true},
{"a", "c", true},
{"d", "f", true},
{"g", "l", false}});
}
TEST_F(RangeDelAggregatorTest, MultipleItersInAggregator) {
auto fragment_lists = MakeFragmentedTombstoneLists(
{{{"a", "e", 10}, {"c", "g", 8}},
{{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}});
ReadRangeDelAggregator range_del_agg(&bytewise_icmp, kMaxSequenceNumber);
for (const auto& fragment_list : fragment_lists) {
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp,
kMaxSequenceNumber));
range_del_agg.AddTombstones(std::move(input_iter));
}
VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 19), true},
{InternalValue("b", 19), false},
{InternalValue("b", 9), true},
{InternalValue("d", 9), true},
{InternalValue("e", 7), true},
{InternalValue("g", 7), false},
{InternalValue("h", 24), true},
{InternalValue("i", 24), false},
{InternalValue("ii", 14), true},
{InternalValue("j", 14), false}});
VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false},
{"_", "a", true},
{"a", "c", true},
{"d", "f", true},
{"g", "l", true},
{"x", "y", false}});
}
TEST_F(RangeDelAggregatorTest, MultipleItersInAggregatorWithUpperBound) {
auto fragment_lists = MakeFragmentedTombstoneLists(
{{{"a", "e", 10}, {"c", "g", 8}},
{{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}});
ReadRangeDelAggregator range_del_agg(&bytewise_icmp, 19);
for (const auto& fragment_list : fragment_lists) {
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp,
19 /* snapshot */));
range_del_agg.AddTombstones(std::move(input_iter));
}
VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 19), false},
{InternalValue("a", 9), true},
{InternalValue("b", 9), true},
{InternalValue("d", 9), true},
{InternalValue("e", 7), true},
{InternalValue("g", 7), false},
{InternalValue("h", 24), false},
{InternalValue("i", 24), false},
{InternalValue("ii", 14), true},
{InternalValue("j", 14), false}});
VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false},
{"_", "a", true},
{"a", "c", true},
{"d", "f", true},
{"g", "l", true},
{"x", "y", false}});
}
TEST_F(RangeDelAggregatorTest, MultipleTruncatedItersInAggregator) {
auto fragment_lists = MakeFragmentedTombstoneLists(
{{{"a", "z", 10}}, {{"a", "z", 10}}, {{"a", "z", 10}}});
std::vector<std::pair<InternalKey, InternalKey>> iter_bounds = {
{InternalKey("a", 4, kTypeValue),
InternalKey("m", kMaxSequenceNumber, kTypeRangeDeletion)},
{InternalKey("m", 20, kTypeValue),
InternalKey("x", kMaxSequenceNumber, kTypeRangeDeletion)},
{InternalKey("x", 5, kTypeValue), InternalKey("zz", 30, kTypeValue)}};
ReadRangeDelAggregator range_del_agg(&bytewise_icmp, 19);
for (size_t i = 0; i < fragment_lists.size(); i++) {
const auto& fragment_list = fragment_lists[i];
const auto& bounds = iter_bounds[i];
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp,
19 /* snapshot */));
range_del_agg.AddTombstones(std::move(input_iter), &bounds.first,
&bounds.second);
}
VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 10), false},
{InternalValue("a", 9), false},
{InternalValue("a", 4), true},
{InternalValue("m", 10), false},
{InternalValue("m", 9), true},
{InternalValue("x", 10), false},
{InternalValue("x", 9), false},
{InternalValue("x", 5), true},
{InternalValue("z", 9), false}});
VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false},
{"_", "a", true},
{"a", "n", true},
{"l", "x", true},
{"w", "z", true},
{"zzz", "zz", false},
{"zz", "zzz", false}});
}
TEST_F(RangeDelAggregatorTest, MultipleTruncatedItersInAggregatorSameLevel) {
auto fragment_lists = MakeFragmentedTombstoneLists(
{{{"a", "z", 10}}, {{"a", "z", 10}}, {{"a", "z", 10}}});
std::vector<std::pair<InternalKey, InternalKey>> iter_bounds = {
{InternalKey("a", 4, kTypeValue),
InternalKey("m", kMaxSequenceNumber, kTypeRangeDeletion)},
{InternalKey("m", 20, kTypeValue),
InternalKey("x", kMaxSequenceNumber, kTypeRangeDeletion)},
{InternalKey("x", 5, kTypeValue), InternalKey("zz", 30, kTypeValue)}};
ReadRangeDelAggregator range_del_agg(&bytewise_icmp, 19);
auto add_iter_to_agg = [&](size_t i) {
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
new FragmentedRangeTombstoneIterator(fragment_lists[i].get(),
bytewise_icmp, 19 /* snapshot */));
range_del_agg.AddTombstones(std::move(input_iter), &iter_bounds[i].first,
&iter_bounds[i].second);
};
add_iter_to_agg(0);
VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 10), false},
{InternalValue("a", 9), false},
{InternalValue("a", 4), true}});
add_iter_to_agg(1);
VerifyShouldDelete(&range_del_agg, {{InternalValue("m", 10), false},
{InternalValue("m", 9), true}});
add_iter_to_agg(2);
VerifyShouldDelete(&range_del_agg, {{InternalValue("x", 10), false},
{InternalValue("x", 9), false},
{InternalValue("x", 5), true},
{InternalValue("z", 9), false}});
VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false},
{"_", "a", true},
{"a", "n", true},
{"l", "x", true},
{"w", "z", true},
{"zzz", "zz", false},
{"zz", "zzz", false}});
}
TEST_F(RangeDelAggregatorTest, CompactionAggregatorNoSnapshots) {
auto fragment_lists = MakeFragmentedTombstoneLists(
{{{"a", "e", 10}, {"c", "g", 8}},
{{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}});
std::vector<SequenceNumber> snapshots;
CompactionRangeDelAggregator range_del_agg(&bytewise_icmp, snapshots);
for (const auto& fragment_list : fragment_lists) {
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp,
kMaxSequenceNumber));
range_del_agg.AddTombstones(std::move(input_iter));
}
VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 19), true},
{InternalValue("b", 19), false},
{InternalValue("b", 9), true},
{InternalValue("d", 9), true},
{InternalValue("e", 7), true},
{InternalValue("g", 7), false},
{InternalValue("h", 24), true},
{InternalValue("i", 24), false},
{InternalValue("ii", 14), true},
{InternalValue("j", 14), false}});
auto range_del_compaction_iter = range_del_agg.NewIterator();
VerifyFragmentedRangeDels(range_del_compaction_iter.get(), {{"a", "b", 20},
{"b", "c", 10},
{"c", "e", 10},
{"e", "g", 8},
{"h", "i", 25},
{"ii", "j", 15}});
}
TEST_F(RangeDelAggregatorTest, CompactionAggregatorWithSnapshots) {
auto fragment_lists = MakeFragmentedTombstoneLists(
{{{"a", "e", 10}, {"c", "g", 8}},
{{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}});
std::vector<SequenceNumber> snapshots{9, 19};
CompactionRangeDelAggregator range_del_agg(&bytewise_icmp, snapshots);
for (const auto& fragment_list : fragment_lists) {
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp,
kMaxSequenceNumber));
range_del_agg.AddTombstones(std::move(input_iter));
}
VerifyShouldDelete(
&range_del_agg,
{
{InternalValue("a", 19), false}, // [10, 19]
{InternalValue("a", 9), false}, // [0, 9]
{InternalValue("b", 9), false}, // [0, 9]
{InternalValue("d", 9), false}, // [0, 9]
{InternalValue("d", 7), true}, // [0, 9]
{InternalValue("e", 7), true}, // [0, 9]
{InternalValue("g", 7), false}, // [0, 9]
{InternalValue("h", 24), true}, // [20, kMaxSequenceNumber]
{InternalValue("i", 24), false}, // [20, kMaxSequenceNumber]
{InternalValue("ii", 14), true}, // [10, 19]
{InternalValue("j", 14), false} // [10, 19]
});
auto range_del_compaction_iter = range_del_agg.NewIterator();
VerifyFragmentedRangeDels(range_del_compaction_iter.get(), {{"a", "b", 20},
{"a", "b", 10},
{"b", "c", 10},
{"c", "e", 10},
{"c", "e", 8},
{"e", "g", 8},
{"h", "i", 25},
{"ii", "j", 15}});
}
TEST_F(RangeDelAggregatorTest, CompactionAggregatorEmptyIteratorLeft) {
auto fragment_lists = MakeFragmentedTombstoneLists(
{{{"a", "e", 10}, {"c", "g", 8}},
{{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}});
std::vector<SequenceNumber> snapshots{9, 19};
CompactionRangeDelAggregator range_del_agg(&bytewise_icmp, snapshots);
for (const auto& fragment_list : fragment_lists) {
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp,
kMaxSequenceNumber));
range_del_agg.AddTombstones(std::move(input_iter));
}
Slice start("_");
Slice end("__");
}
TEST_F(RangeDelAggregatorTest, CompactionAggregatorEmptyIteratorRight) {
auto fragment_lists = MakeFragmentedTombstoneLists(
{{{"a", "e", 10}, {"c", "g", 8}},
{{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}});
std::vector<SequenceNumber> snapshots{9, 19};
CompactionRangeDelAggregator range_del_agg(&bytewise_icmp, snapshots);
for (const auto& fragment_list : fragment_lists) {
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp,
kMaxSequenceNumber));
range_del_agg.AddTombstones(std::move(input_iter));
}
InternalKey start_buf("p", 0, kTypeRangeDeletion);
InternalKey end_buf("q", 0, kTypeRangeDeletion);
Slice start = start_buf.Encode();
Slice end = end_buf.Encode();
auto range_del_compaction_iter = range_del_agg.NewIterator(&start, &end);
VerifyFragmentedRangeDels(range_del_compaction_iter.get(), {});
}
TEST_F(RangeDelAggregatorTest, CompactionAggregatorBoundedIterator) {
auto fragment_lists = MakeFragmentedTombstoneLists(
{{{"a", "e", 10}, {"c", "g", 8}},
{{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}});
std::vector<SequenceNumber> snapshots{9, 19};
CompactionRangeDelAggregator range_del_agg(&bytewise_icmp, snapshots);
for (const auto& fragment_list : fragment_lists) {
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp,
kMaxSequenceNumber));
range_del_agg.AddTombstones(std::move(input_iter));
}
InternalKey start_buf("bb", 0, kTypeRangeDeletion);
InternalKey end_buf("e", 9, kTypeRangeDeletion);
Slice start = start_buf.Encode();
Slice end = end_buf.Encode();
auto range_del_compaction_iter = range_del_agg.NewIterator(&start, &end);
VerifyFragmentedRangeDels(range_del_compaction_iter.get(),
{{"a", "c", 10}, {"c", "e", 10}, {"c", "e", 8}});
}
TEST_F(RangeDelAggregatorTest,
CompactionAggregatorBoundedIteratorExtraFragments) {
auto fragment_lists = MakeFragmentedTombstoneLists(
{{{"a", "d", 10}, {"c", "g", 8}},
{{"b", "c", 20}, {"d", "f", 30}, {"h", "i", 25}, {"ii", "j", 15}}});
std::vector<SequenceNumber> snapshots{9, 19};
CompactionRangeDelAggregator range_del_agg(&bytewise_icmp, snapshots);
for (const auto& fragment_list : fragment_lists) {
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp,
kMaxSequenceNumber));
range_del_agg.AddTombstones(std::move(input_iter));
}
InternalKey start_buf("bb", 0, kTypeRangeDeletion);
InternalKey end_buf("e", 0, kTypeRangeDeletion);
Slice start = start_buf.Encode();
Slice end = end_buf.Encode();
auto range_del_compaction_iter = range_del_agg.NewIterator(&start, &end);
VerifyFragmentedRangeDels(range_del_compaction_iter.get(), {{"a", "b", 10},
{"b", "c", 20},
{"b", "c", 10},
{"c", "d", 10},
{"c", "d", 8},
{"d", "f", 30},
{"d", "f", 8},
{"f", "g", 8}});
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,555 +0,0 @@
// Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/range_tombstone_fragmenter.h"
#include "db/db_test_util.h"
#include "db/dbformat.h"
#include "rocksdb/comparator.h"
#include "test_util/testutil.h"
#include "util/vector_iterator.h"
namespace ROCKSDB_NAMESPACE {
class RangeTombstoneFragmenterTest : public testing::Test {};
namespace {
static auto bytewise_icmp = InternalKeyComparator(BytewiseComparator());
std::unique_ptr<InternalIterator> MakeRangeDelIter(
const std::vector<RangeTombstone>& range_dels) {
std::vector<std::string> keys, values;
for (const auto& range_del : range_dels) {
auto key_and_value = range_del.Serialize();
keys.push_back(key_and_value.first.Encode().ToString());
values.push_back(key_and_value.second.ToString());
}
return std::unique_ptr<VectorIterator>(
new VectorIterator(keys, values, &bytewise_icmp));
}
void CheckIterPosition(const RangeTombstone& tombstone,
const FragmentedRangeTombstoneIterator* iter) {
// Test InternalIterator interface.
EXPECT_EQ(tombstone.start_key_, ExtractUserKey(iter->key()));
EXPECT_EQ(tombstone.end_key_, iter->value());
EXPECT_EQ(tombstone.seq_, iter->seq());
// Test FragmentedRangeTombstoneIterator interface.
EXPECT_EQ(tombstone.start_key_, iter->start_key());
EXPECT_EQ(tombstone.end_key_, iter->end_key());
EXPECT_EQ(tombstone.seq_, GetInternalKeySeqno(iter->key()));
}
void VerifyFragmentedRangeDels(
FragmentedRangeTombstoneIterator* iter,
const std::vector<RangeTombstone>& expected_tombstones) {
iter->SeekToFirst();
for (size_t i = 0; i < expected_tombstones.size(); i++, iter->Next()) {
ASSERT_TRUE(iter->Valid());
CheckIterPosition(expected_tombstones[i], iter);
}
EXPECT_FALSE(iter->Valid());
}
void VerifyVisibleTombstones(
FragmentedRangeTombstoneIterator* iter,
const std::vector<RangeTombstone>& expected_tombstones) {
iter->SeekToTopFirst();
for (size_t i = 0; i < expected_tombstones.size(); i++, iter->TopNext()) {
ASSERT_TRUE(iter->Valid());
CheckIterPosition(expected_tombstones[i], iter);
}
EXPECT_FALSE(iter->Valid());
}
struct SeekTestCase {
Slice seek_target;
RangeTombstone expected_position;
bool out_of_range;
};
void VerifySeek(FragmentedRangeTombstoneIterator* iter,
const std::vector<SeekTestCase>& cases) {
for (const auto& testcase : cases) {
iter->Seek(testcase.seek_target);
if (testcase.out_of_range) {
ASSERT_FALSE(iter->Valid());
} else {
ASSERT_TRUE(iter->Valid());
CheckIterPosition(testcase.expected_position, iter);
}
}
}
void VerifySeekForPrev(FragmentedRangeTombstoneIterator* iter,
const std::vector<SeekTestCase>& cases) {
for (const auto& testcase : cases) {
iter->SeekForPrev(testcase.seek_target);
if (testcase.out_of_range) {
ASSERT_FALSE(iter->Valid());
} else {
ASSERT_TRUE(iter->Valid());
CheckIterPosition(testcase.expected_position, iter);
}
}
}
struct MaxCoveringTombstoneSeqnumTestCase {
Slice user_key;
SequenceNumber result;
};
void VerifyMaxCoveringTombstoneSeqnum(
FragmentedRangeTombstoneIterator* iter,
const std::vector<MaxCoveringTombstoneSeqnumTestCase>& cases) {
for (const auto& testcase : cases) {
EXPECT_EQ(testcase.result,
iter->MaxCoveringTombstoneSeqnum(testcase.user_key));
}
}
} // anonymous namespace
TEST_F(RangeTombstoneFragmenterTest, NonOverlappingTombstones) {
auto range_del_iter = MakeRangeDelIter({{"a", "b", 10}, {"c", "d", 5}});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
kMaxSequenceNumber);
ASSERT_EQ(0, iter.lower_bound());
ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound());
VerifyFragmentedRangeDels(&iter, {{"a", "b", 10}, {"c", "d", 5}});
VerifyMaxCoveringTombstoneSeqnum(&iter,
{{"", 0}, {"a", 10}, {"b", 0}, {"c", 5}});
}
TEST_F(RangeTombstoneFragmenterTest, OverlappingTombstones) {
auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"c", "g", 15}});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
kMaxSequenceNumber);
ASSERT_EQ(0, iter.lower_bound());
ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound());
VerifyFragmentedRangeDels(
&iter, {{"a", "c", 10}, {"c", "e", 15}, {"c", "e", 10}, {"e", "g", 15}});
VerifyMaxCoveringTombstoneSeqnum(&iter,
{{"a", 10}, {"c", 15}, {"e", 15}, {"g", 0}});
}
TEST_F(RangeTombstoneFragmenterTest, ContiguousTombstones) {
auto range_del_iter = MakeRangeDelIter(
{{"a", "c", 10}, {"c", "e", 20}, {"c", "e", 5}, {"e", "g", 15}});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
kMaxSequenceNumber);
ASSERT_EQ(0, iter.lower_bound());
ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound());
VerifyFragmentedRangeDels(
&iter, {{"a", "c", 10}, {"c", "e", 20}, {"c", "e", 5}, {"e", "g", 15}});
VerifyMaxCoveringTombstoneSeqnum(&iter,
{{"a", 10}, {"c", 20}, {"e", 15}, {"g", 0}});
}
TEST_F(RangeTombstoneFragmenterTest, RepeatedStartAndEndKey) {
auto range_del_iter =
MakeRangeDelIter({{"a", "c", 10}, {"a", "c", 7}, {"a", "c", 3}});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
kMaxSequenceNumber);
ASSERT_EQ(0, iter.lower_bound());
ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound());
VerifyFragmentedRangeDels(&iter,
{{"a", "c", 10}, {"a", "c", 7}, {"a", "c", 3}});
VerifyMaxCoveringTombstoneSeqnum(&iter, {{"a", 10}, {"b", 10}, {"c", 0}});
}
TEST_F(RangeTombstoneFragmenterTest, RepeatedStartKeyDifferentEndKeys) {
auto range_del_iter =
MakeRangeDelIter({{"a", "e", 10}, {"a", "g", 7}, {"a", "c", 3}});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
kMaxSequenceNumber);
ASSERT_EQ(0, iter.lower_bound());
ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound());
VerifyFragmentedRangeDels(&iter, {{"a", "c", 10},
{"a", "c", 7},
{"a", "c", 3},
{"c", "e", 10},
{"c", "e", 7},
{"e", "g", 7}});
VerifyMaxCoveringTombstoneSeqnum(&iter,
{{"a", 10}, {"c", 10}, {"e", 7}, {"g", 0}});
}
TEST_F(RangeTombstoneFragmenterTest, RepeatedStartKeyMixedEndKeys) {
auto range_del_iter = MakeRangeDelIter({{"a", "c", 30},
{"a", "g", 20},
{"a", "e", 10},
{"a", "g", 7},
{"a", "c", 3}});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
kMaxSequenceNumber);
ASSERT_EQ(0, iter.lower_bound());
ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound());
VerifyFragmentedRangeDels(&iter, {{"a", "c", 30},
{"a", "c", 20},
{"a", "c", 10},
{"a", "c", 7},
{"a", "c", 3},
{"c", "e", 20},
{"c", "e", 10},
{"c", "e", 7},
{"e", "g", 20},
{"e", "g", 7}});
VerifyMaxCoveringTombstoneSeqnum(&iter,
{{"a", 30}, {"c", 20}, {"e", 20}, {"g", 0}});
}
TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) {
auto range_del_iter = MakeRangeDelIter({{"a", "e", 10},
{"c", "g", 8},
{"c", "i", 6},
{"j", "n", 4},
{"j", "l", 2}});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp,
kMaxSequenceNumber);
FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp,
9 /* upper_bound */);
FragmentedRangeTombstoneIterator iter3(&fragment_list, bytewise_icmp,
7 /* upper_bound */);
FragmentedRangeTombstoneIterator iter4(&fragment_list, bytewise_icmp,
5 /* upper_bound */);
FragmentedRangeTombstoneIterator iter5(&fragment_list, bytewise_icmp,
3 /* upper_bound */);
for (auto* iter : {&iter1, &iter2, &iter3, &iter4, &iter5}) {
VerifyFragmentedRangeDels(iter, {{"a", "c", 10},
{"c", "e", 10},
{"c", "e", 8},
{"c", "e", 6},
{"e", "g", 8},
{"e", "g", 6},
{"g", "i", 6},
{"j", "l", 4},
{"j", "l", 2},
{"l", "n", 4}});
}
ASSERT_EQ(0, iter1.lower_bound());
ASSERT_EQ(kMaxSequenceNumber, iter1.upper_bound());
VerifyVisibleTombstones(&iter1, {{"a", "c", 10},
{"c", "e", 10},
{"e", "g", 8},
{"g", "i", 6},
{"j", "l", 4},
{"l", "n", 4}});
VerifyMaxCoveringTombstoneSeqnum(
&iter1, {{"a", 10}, {"c", 10}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}});
ASSERT_EQ(0, iter2.lower_bound());
ASSERT_EQ(9, iter2.upper_bound());
VerifyVisibleTombstones(&iter2, {{"c", "e", 8},
{"e", "g", 8},
{"g", "i", 6},
{"j", "l", 4},
{"l", "n", 4}});
VerifyMaxCoveringTombstoneSeqnum(
&iter2, {{"a", 0}, {"c", 8}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}});
ASSERT_EQ(0, iter3.lower_bound());
ASSERT_EQ(7, iter3.upper_bound());
VerifyVisibleTombstones(&iter3, {{"c", "e", 6},
{"e", "g", 6},
{"g", "i", 6},
{"j", "l", 4},
{"l", "n", 4}});
VerifyMaxCoveringTombstoneSeqnum(
&iter3, {{"a", 0}, {"c", 6}, {"e", 6}, {"i", 0}, {"j", 4}, {"m", 4}});
ASSERT_EQ(0, iter4.lower_bound());
ASSERT_EQ(5, iter4.upper_bound());
VerifyVisibleTombstones(&iter4, {{"j", "l", 4}, {"l", "n", 4}});
VerifyMaxCoveringTombstoneSeqnum(
&iter4, {{"a", 0}, {"c", 0}, {"e", 0}, {"i", 0}, {"j", 4}, {"m", 4}});
ASSERT_EQ(0, iter5.lower_bound());
ASSERT_EQ(3, iter5.upper_bound());
VerifyVisibleTombstones(&iter5, {{"j", "l", 2}});
VerifyMaxCoveringTombstoneSeqnum(
&iter5, {{"a", 0}, {"c", 0}, {"e", 0}, {"i", 0}, {"j", 2}, {"m", 0}});
}
TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyUnordered) {
auto range_del_iter = MakeRangeDelIter({{"a", "e", 10},
{"j", "n", 4},
{"c", "i", 6},
{"c", "g", 8},
{"j", "l", 2}});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
9 /* upper_bound */);
ASSERT_EQ(0, iter.lower_bound());
ASSERT_EQ(9, iter.upper_bound());
VerifyFragmentedRangeDels(&iter, {{"a", "c", 10},
{"c", "e", 10},
{"c", "e", 8},
{"c", "e", 6},
{"e", "g", 8},
{"e", "g", 6},
{"g", "i", 6},
{"j", "l", 4},
{"j", "l", 2},
{"l", "n", 4}});
VerifyMaxCoveringTombstoneSeqnum(
&iter, {{"a", 0}, {"c", 8}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}});
}
TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyForCompaction) {
auto range_del_iter = MakeRangeDelIter({{"a", "e", 10},
{"j", "n", 4},
{"c", "i", 6},
{"c", "g", 8},
{"j", "l", 2}});
FragmentedRangeTombstoneList fragment_list(
std::move(range_del_iter), bytewise_icmp, true /* for_compaction */,
{} /* snapshots */);
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
kMaxSequenceNumber /* upper_bound */);
VerifyFragmentedRangeDels(&iter, {{"a", "c", 10},
{"c", "e", 10},
{"e", "g", 8},
{"g", "i", 6},
{"j", "l", 4},
{"l", "n", 4}});
}
TEST_F(RangeTombstoneFragmenterTest,
OverlapAndRepeatedStartKeyForCompactionWithSnapshot) {
auto range_del_iter = MakeRangeDelIter({{"a", "e", 10},
{"j", "n", 4},
{"c", "i", 6},
{"c", "g", 8},
{"j", "l", 2}});
FragmentedRangeTombstoneList fragment_list(
std::move(range_del_iter), bytewise_icmp, true /* for_compaction */,
{9, 20} /* snapshots */);
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
kMaxSequenceNumber /* upper_bound */);
VerifyFragmentedRangeDels(&iter, {{"a", "c", 10},
{"c", "e", 10},
{"c", "e", 8},
{"e", "g", 8},
{"g", "i", 6},
{"j", "l", 4},
{"l", "n", 4}});
}
TEST_F(RangeTombstoneFragmenterTest, IteratorSplitNoSnapshots) {
auto range_del_iter = MakeRangeDelIter({{"a", "e", 10},
{"j", "n", 4},
{"c", "i", 6},
{"c", "g", 8},
{"j", "l", 2}});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
kMaxSequenceNumber /* upper_bound */);
auto split_iters = iter.SplitBySnapshot({} /* snapshots */);
ASSERT_EQ(1, split_iters.size());
auto* split_iter = split_iters[kMaxSequenceNumber].get();
ASSERT_EQ(0, split_iter->lower_bound());
ASSERT_EQ(kMaxSequenceNumber, split_iter->upper_bound());
VerifyVisibleTombstones(split_iter, {{"a", "c", 10},
{"c", "e", 10},
{"e", "g", 8},
{"g", "i", 6},
{"j", "l", 4},
{"l", "n", 4}});
}
TEST_F(RangeTombstoneFragmenterTest, IteratorSplitWithSnapshots) {
auto range_del_iter = MakeRangeDelIter({{"a", "e", 10},
{"j", "n", 4},
{"c", "i", 6},
{"c", "g", 8},
{"j", "l", 2}});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
kMaxSequenceNumber /* upper_bound */);
auto split_iters = iter.SplitBySnapshot({3, 5, 7, 9} /* snapshots */);
ASSERT_EQ(5, split_iters.size());
auto* split_iter1 = split_iters[3].get();
ASSERT_EQ(0, split_iter1->lower_bound());
ASSERT_EQ(3, split_iter1->upper_bound());
VerifyVisibleTombstones(split_iter1, {{"j", "l", 2}});
auto* split_iter2 = split_iters[5].get();
ASSERT_EQ(4, split_iter2->lower_bound());
ASSERT_EQ(5, split_iter2->upper_bound());
VerifyVisibleTombstones(split_iter2, {{"j", "l", 4}, {"l", "n", 4}});
auto* split_iter3 = split_iters[7].get();
ASSERT_EQ(6, split_iter3->lower_bound());
ASSERT_EQ(7, split_iter3->upper_bound());
VerifyVisibleTombstones(split_iter3,
{{"c", "e", 6}, {"e", "g", 6}, {"g", "i", 6}});
auto* split_iter4 = split_iters[9].get();
ASSERT_EQ(8, split_iter4->lower_bound());
ASSERT_EQ(9, split_iter4->upper_bound());
VerifyVisibleTombstones(split_iter4, {{"c", "e", 8}, {"e", "g", 8}});
auto* split_iter5 = split_iters[kMaxSequenceNumber].get();
ASSERT_EQ(10, split_iter5->lower_bound());
ASSERT_EQ(kMaxSequenceNumber, split_iter5->upper_bound());
VerifyVisibleTombstones(split_iter5, {{"a", "c", 10}, {"c", "e", 10}});
}
TEST_F(RangeTombstoneFragmenterTest, SeekStartKey) {
// Same tombstones as OverlapAndRepeatedStartKey.
auto range_del_iter = MakeRangeDelIter({{"a", "e", 10},
{"c", "g", 8},
{"c", "i", 6},
{"j", "n", 4},
{"j", "l", 2}});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp,
kMaxSequenceNumber);
VerifySeek(
&iter1,
{{"a", {"a", "c", 10}}, {"e", {"e", "g", 8}}, {"l", {"l", "n", 4}}});
VerifySeekForPrev(
&iter1,
{{"a", {"a", "c", 10}}, {"e", {"e", "g", 8}}, {"l", {"l", "n", 4}}});
FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp,
3 /* upper_bound */);
VerifySeek(&iter2, {{"a", {"j", "l", 2}},
{"e", {"j", "l", 2}},
{"l", {}, true /* out of range */}});
VerifySeekForPrev(&iter2, {{"a", {}, true /* out of range */},
{"e", {}, true /* out of range */},
{"l", {"j", "l", 2}}});
}
TEST_F(RangeTombstoneFragmenterTest, SeekCovered) {
// Same tombstones as OverlapAndRepeatedStartKey.
auto range_del_iter = MakeRangeDelIter({{"a", "e", 10},
{"c", "g", 8},
{"c", "i", 6},
{"j", "n", 4},
{"j", "l", 2}});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp,
kMaxSequenceNumber);
VerifySeek(
&iter1,
{{"b", {"a", "c", 10}}, {"f", {"e", "g", 8}}, {"m", {"l", "n", 4}}});
VerifySeekForPrev(
&iter1,
{{"b", {"a", "c", 10}}, {"f", {"e", "g", 8}}, {"m", {"l", "n", 4}}});
FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp,
3 /* upper_bound */);
VerifySeek(&iter2, {{"b", {"j", "l", 2}},
{"f", {"j", "l", 2}},
{"m", {}, true /* out of range */}});
VerifySeekForPrev(&iter2, {{"b", {}, true /* out of range */},
{"f", {}, true /* out of range */},
{"m", {"j", "l", 2}}});
}
TEST_F(RangeTombstoneFragmenterTest, SeekEndKey) {
// Same tombstones as OverlapAndRepeatedStartKey.
auto range_del_iter = MakeRangeDelIter({{"a", "e", 10},
{"c", "g", 8},
{"c", "i", 6},
{"j", "n", 4},
{"j", "l", 2}});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp,
kMaxSequenceNumber);
VerifySeek(&iter1, {{"c", {"c", "e", 10}},
{"g", {"g", "i", 6}},
{"i", {"j", "l", 4}},
{"n", {}, true /* out of range */}});
VerifySeekForPrev(&iter1, {{"c", {"c", "e", 10}},
{"g", {"g", "i", 6}},
{"i", {"g", "i", 6}},
{"n", {"l", "n", 4}}});
FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp,
3 /* upper_bound */);
VerifySeek(&iter2, {{"c", {"j", "l", 2}},
{"g", {"j", "l", 2}},
{"i", {"j", "l", 2}},
{"n", {}, true /* out of range */}});
VerifySeekForPrev(&iter2, {{"c", {}, true /* out of range */},
{"g", {}, true /* out of range */},
{"i", {}, true /* out of range */},
{"n", {"j", "l", 2}}});
}
TEST_F(RangeTombstoneFragmenterTest, SeekOutOfBounds) {
// Same tombstones as OverlapAndRepeatedStartKey.
auto range_del_iter = MakeRangeDelIter({{"a", "e", 10},
{"c", "g", 8},
{"c", "i", 6},
{"j", "n", 4},
{"j", "l", 2}});
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
bytewise_icmp);
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
kMaxSequenceNumber);
VerifySeek(&iter, {{"", {"a", "c", 10}}, {"z", {}, true /* out of range */}});
VerifySeekForPrev(&iter,
{{"", {}, true /* out of range */}, {"z", {"l", "n", 4}}});
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,484 +0,0 @@
// Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "rocksdb/options.h"
#include <algorithm>
#include <string>
#include <vector>
#include "db/db_impl/db_impl.h"
#include "db/db_test_util.h"
#include "file/file_util.h"
#include "rocksdb/comparator.h"
#include "rocksdb/db.h"
#include "rocksdb/transaction_log.h"
#include "table/unique_id_impl.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
class RepairTest : public DBTestBase {
public:
RepairTest() : DBTestBase("repair_test", /*env_do_fsync=*/true) {}
Status GetFirstSstPath(std::string* first_sst_path) {
assert(first_sst_path != nullptr);
first_sst_path->clear();
uint64_t manifest_size;
std::vector<std::string> files;
Status s = db_->GetLiveFiles(files, &manifest_size);
if (s.ok()) {
auto sst_iter =
std::find_if(files.begin(), files.end(), [](const std::string& file) {
uint64_t number;
FileType type;
bool ok = ParseFileName(file, &number, &type);
return ok && type == kTableFile;
});
*first_sst_path = sst_iter == files.end() ? "" : dbname_ + *sst_iter;
}
return s;
}
void ReopenWithSstIdVerify() {
std::atomic_int verify_passed{0};
SyncPoint::GetInstance()->SetCallBack(
"BlockBasedTable::Open::PassedVerifyUniqueId", [&](void* arg) {
// override job status
auto id = static_cast<UniqueId64x2*>(arg);
assert(*id != kNullUniqueId64x2);
verify_passed++;
});
SyncPoint::GetInstance()->EnableProcessing();
auto options = CurrentOptions();
options.verify_sst_unique_id_in_manifest = true;
Reopen(options);
ASSERT_GT(verify_passed, 0);
SyncPoint::GetInstance()->DisableProcessing();
}
std::vector<FileMetaData*> GetLevelFileMetadatas(int level, int cf = 0) {
VersionSet* const versions = dbfull()->GetVersionSet();
assert(versions);
ColumnFamilyData* const cfd =
versions->GetColumnFamilySet()->GetColumnFamily(cf);
assert(cfd);
Version* const current = cfd->current();
assert(current);
VersionStorageInfo* const storage_info = current->storage_info();
assert(storage_info);
return storage_info->LevelFiles(level);
}
};
TEST_F(RepairTest, SortRepairedDBL0ByEpochNumber) {
Options options = CurrentOptions();
DestroyAndReopen(options);
ASSERT_OK(Put("k1", "oldest"));
ASSERT_OK(Put("k1", "older"));
ASSERT_OK(Flush());
MoveFilesToLevel(1);
ASSERT_OK(Put("k1", "old"));
ASSERT_OK(Flush());
ASSERT_OK(Put("k1", "new"));
std::vector<FileMetaData*> level0_files = GetLevelFileMetadatas(0 /* level*/);
ASSERT_EQ(level0_files.size(), 1);
ASSERT_EQ(level0_files[0]->epoch_number, 2);
std::vector<FileMetaData*> level1_files = GetLevelFileMetadatas(1 /* level*/);
ASSERT_EQ(level1_files.size(), 1);
ASSERT_EQ(level1_files[0]->epoch_number, 1);
std::string manifest_path =
DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo());
Close();
ASSERT_OK(env_->FileExists(manifest_path));
ASSERT_OK(env_->DeleteFile(manifest_path));
ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
ReopenWithSstIdVerify();
EXPECT_EQ(Get("k1"), "new");
level0_files = GetLevelFileMetadatas(0 /* level*/);
ASSERT_EQ(level0_files.size(), 3);
EXPECT_EQ(level0_files[0]->epoch_number, 3);
EXPECT_EQ(level0_files[1]->epoch_number, 2);
EXPECT_EQ(level0_files[2]->epoch_number, 1);
level1_files = GetLevelFileMetadatas(1 /* level*/);
ASSERT_EQ(level1_files.size(), 0);
}
TEST_F(RepairTest, LostManifest) {
// Add a couple SST files, delete the manifest, and verify RepairDB() saves
// the day.
ASSERT_OK(Put("key", "val"));
ASSERT_OK(Flush());
ASSERT_OK(Put("key2", "val2"));
ASSERT_OK(Flush());
// Need to get path before Close() deletes db_, but delete it after Close() to
// ensure Close() didn't change the manifest.
std::string manifest_path =
DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo());
Close();
ASSERT_OK(env_->FileExists(manifest_path));
ASSERT_OK(env_->DeleteFile(manifest_path));
ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
ReopenWithSstIdVerify();
ASSERT_EQ(Get("key"), "val");
ASSERT_EQ(Get("key2"), "val2");
}
TEST_F(RepairTest, LostManifestMoreDbFeatures) {
// Add a couple SST files, delete the manifest, and verify RepairDB() saves
// the day.
ASSERT_OK(Put("key", "val"));
ASSERT_OK(Put("key2", "val2"));
ASSERT_OK(Put("key3", "val3"));
ASSERT_OK(Put("key4", "val4"));
ASSERT_OK(Flush());
// Test an SST file containing only a range tombstone
ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "key2",
"key3z"));
ASSERT_OK(Flush());
// Need to get path before Close() deletes db_, but delete it after Close() to
// ensure Close() didn't change the manifest.
std::string manifest_path =
DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo());
Close();
ASSERT_OK(env_->FileExists(manifest_path));
ASSERT_OK(env_->DeleteFile(manifest_path));
ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
// repair from sst should work with unique_id verification
ReopenWithSstIdVerify();
ASSERT_EQ(Get("key"), "val");
ASSERT_EQ(Get("key2"), "NOT_FOUND");
ASSERT_EQ(Get("key3"), "NOT_FOUND");
ASSERT_EQ(Get("key4"), "val4");
}
TEST_F(RepairTest, CorruptManifest) {
// Manifest is in an invalid format. Expect a full recovery.
ASSERT_OK(Put("key", "val"));
ASSERT_OK(Flush());
ASSERT_OK(Put("key2", "val2"));
ASSERT_OK(Flush());
// Need to get path before Close() deletes db_, but overwrite it after Close()
// to ensure Close() didn't change the manifest.
std::string manifest_path =
DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo());
Close();
ASSERT_OK(env_->FileExists(manifest_path));
ASSERT_OK(CreateFile(env_->GetFileSystem(), manifest_path, "blah",
false /* use_fsync */));
ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
ReopenWithSstIdVerify();
ASSERT_EQ(Get("key"), "val");
ASSERT_EQ(Get("key2"), "val2");
}
TEST_F(RepairTest, IncompleteManifest) {
// In this case, the manifest is valid but does not reference all of the SST
// files. Expect a full recovery.
ASSERT_OK(Put("key", "val"));
ASSERT_OK(Flush());
std::string orig_manifest_path =
DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo());
CopyFile(orig_manifest_path, orig_manifest_path + ".tmp");
ASSERT_OK(Put("key2", "val2"));
ASSERT_OK(Flush());
// Need to get path before Close() deletes db_, but overwrite it after Close()
// to ensure Close() didn't change the manifest.
std::string new_manifest_path =
DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo());
Close();
ASSERT_OK(env_->FileExists(new_manifest_path));
// Replace the manifest with one that is only aware of the first SST file.
CopyFile(orig_manifest_path + ".tmp", new_manifest_path);
ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
ReopenWithSstIdVerify();
ASSERT_EQ(Get("key"), "val");
ASSERT_EQ(Get("key2"), "val2");
}
TEST_F(RepairTest, PostRepairSstFileNumbering) {
// Verify after a DB is repaired, new files will be assigned higher numbers
// than old files.
ASSERT_OK(Put("key", "val"));
ASSERT_OK(Flush());
ASSERT_OK(Put("key2", "val2"));
ASSERT_OK(Flush());
uint64_t pre_repair_file_num = dbfull()->TEST_Current_Next_FileNo();
Close();
ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
ReopenWithSstIdVerify();
uint64_t post_repair_file_num = dbfull()->TEST_Current_Next_FileNo();
ASSERT_GE(post_repair_file_num, pre_repair_file_num);
}
TEST_F(RepairTest, LostSst) {
// Delete one of the SST files but preserve the manifest that refers to it,
// then verify the DB is still usable for the intact SST.
ASSERT_OK(Put("key", "val"));
ASSERT_OK(Flush());
ASSERT_OK(Put("key2", "val2"));
ASSERT_OK(Flush());
std::string sst_path;
ASSERT_OK(GetFirstSstPath(&sst_path));
ASSERT_FALSE(sst_path.empty());
ASSERT_OK(env_->DeleteFile(sst_path));
Close();
ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
ReopenWithSstIdVerify();
// Exactly one of the key-value pairs should be in the DB now.
ASSERT_TRUE((Get("key") == "val") != (Get("key2") == "val2"));
}
TEST_F(RepairTest, CorruptSst) {
// Corrupt one of the SST files but preserve the manifest that refers to it,
// then verify the DB is still usable for the intact SST.
ASSERT_OK(Put("key", "val"));
ASSERT_OK(Flush());
ASSERT_OK(Put("key2", "val2"));
ASSERT_OK(Flush());
std::string sst_path;
ASSERT_OK(GetFirstSstPath(&sst_path));
ASSERT_FALSE(sst_path.empty());
ASSERT_OK(CreateFile(env_->GetFileSystem(), sst_path, "blah",
false /* use_fsync */));
Close();
ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
ReopenWithSstIdVerify();
// Exactly one of the key-value pairs should be in the DB now.
ASSERT_TRUE((Get("key") == "val") != (Get("key2") == "val2"));
}
TEST_F(RepairTest, UnflushedSst) {
// This test case invokes repair while some data is unflushed, then verifies
// that data is in the db.
ASSERT_OK(Put("key", "val"));
VectorLogPtr wal_files;
ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files));
ASSERT_EQ(wal_files.size(), 1);
{
uint64_t total_ssts_size;
std::unordered_map<std::string, uint64_t> sst_files;
ASSERT_OK(GetAllDataFiles(kTableFile, &sst_files, &total_ssts_size));
ASSERT_EQ(total_ssts_size, 0);
}
// Need to get path before Close() deletes db_, but delete it after Close() to
// ensure Close() didn't change the manifest.
std::string manifest_path =
DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo());
Close();
ASSERT_OK(env_->FileExists(manifest_path));
ASSERT_OK(env_->DeleteFile(manifest_path));
ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
ReopenWithSstIdVerify();
ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files));
ASSERT_EQ(wal_files.size(), 0);
{
uint64_t total_ssts_size;
std::unordered_map<std::string, uint64_t> sst_files;
ASSERT_OK(GetAllDataFiles(kTableFile, &sst_files, &total_ssts_size));
ASSERT_GT(total_ssts_size, 0);
}
ASSERT_EQ(Get("key"), "val");
}
TEST_F(RepairTest, SeparateWalDir) {
do {
Options options = CurrentOptions();
DestroyAndReopen(options);
ASSERT_OK(Put("key", "val"));
ASSERT_OK(Put("foo", "bar"));
VectorLogPtr wal_files;
ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files));
ASSERT_EQ(wal_files.size(), 1);
{
uint64_t total_ssts_size;
std::unordered_map<std::string, uint64_t> sst_files;
ASSERT_OK(GetAllDataFiles(kTableFile, &sst_files, &total_ssts_size));
ASSERT_EQ(total_ssts_size, 0);
}
std::string manifest_path =
DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo());
Close();
ASSERT_OK(env_->FileExists(manifest_path));
ASSERT_OK(env_->DeleteFile(manifest_path));
ASSERT_OK(RepairDB(dbname_, options));
// make sure that all WALs are converted to SSTables.
options.wal_dir = "";
ReopenWithSstIdVerify();
ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files));
ASSERT_EQ(wal_files.size(), 0);
{
uint64_t total_ssts_size;
std::unordered_map<std::string, uint64_t> sst_files;
ASSERT_OK(GetAllDataFiles(kTableFile, &sst_files, &total_ssts_size));
ASSERT_GT(total_ssts_size, 0);
}
ASSERT_EQ(Get("key"), "val");
ASSERT_EQ(Get("foo"), "bar");
} while (ChangeWalOptions());
}
TEST_F(RepairTest, RepairMultipleColumnFamilies) {
// Verify repair logic associates SST files with their original column
// families.
const int kNumCfs = 3;
const int kEntriesPerCf = 2;
DestroyAndReopen(CurrentOptions());
CreateAndReopenWithCF({"pikachu1", "pikachu2"}, CurrentOptions());
for (int i = 0; i < kNumCfs; ++i) {
for (int j = 0; j < kEntriesPerCf; ++j) {
ASSERT_OK(Put(i, "key" + std::to_string(j), "val" + std::to_string(j)));
if (j == kEntriesPerCf - 1 && i == kNumCfs - 1) {
// Leave one unflushed so we can verify WAL entries are properly
// associated with column families.
continue;
}
ASSERT_OK(Flush(i));
}
}
// Need to get path before Close() deletes db_, but delete it after Close() to
// ensure Close() doesn't re-create the manifest.
std::string manifest_path =
DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo());
Close();
ASSERT_OK(env_->FileExists(manifest_path));
ASSERT_OK(env_->DeleteFile(manifest_path));
ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
ReopenWithColumnFamilies({"default", "pikachu1", "pikachu2"},
CurrentOptions());
for (int i = 0; i < kNumCfs; ++i) {
for (int j = 0; j < kEntriesPerCf; ++j) {
ASSERT_EQ(Get(i, "key" + std::to_string(j)), "val" + std::to_string(j));
}
}
}
TEST_F(RepairTest, RepairColumnFamilyOptions) {
// Verify repair logic uses correct ColumnFamilyOptions when repairing a
// database with different options for column families.
const int kNumCfs = 2;
const int kEntriesPerCf = 2;
Options opts(CurrentOptions()), rev_opts(CurrentOptions());
opts.comparator = BytewiseComparator();
rev_opts.comparator = ReverseBytewiseComparator();
DestroyAndReopen(opts);
CreateColumnFamilies({"reverse"}, rev_opts);
ReopenWithColumnFamilies({"default", "reverse"},
std::vector<Options>{opts, rev_opts});
for (int i = 0; i < kNumCfs; ++i) {
for (int j = 0; j < kEntriesPerCf; ++j) {
ASSERT_OK(Put(i, "key" + std::to_string(j), "val" + std::to_string(j)));
if (i == kNumCfs - 1 && j == kEntriesPerCf - 1) {
// Leave one unflushed so we can verify RepairDB's flush logic
continue;
}
ASSERT_OK(Flush(i));
}
}
Close();
// RepairDB() records the comparator in the manifest, and DB::Open would fail
// if a different comparator were used.
ASSERT_OK(RepairDB(dbname_, opts, {{"default", opts}, {"reverse", rev_opts}},
opts /* unknown_cf_opts */));
ASSERT_OK(TryReopenWithColumnFamilies({"default", "reverse"},
std::vector<Options>{opts, rev_opts}));
for (int i = 0; i < kNumCfs; ++i) {
for (int j = 0; j < kEntriesPerCf; ++j) {
ASSERT_EQ(Get(i, "key" + std::to_string(j)), "val" + std::to_string(j));
}
}
// Examine table properties to verify RepairDB() used the right options when
// converting WAL->SST
TablePropertiesCollection fname_to_props;
ASSERT_OK(db_->GetPropertiesOfAllTables(handles_[1], &fname_to_props));
ASSERT_EQ(fname_to_props.size(), 2U);
for (const auto& fname_and_props : fname_to_props) {
std::string comparator_name(rev_opts.comparator->Name());
ASSERT_EQ(comparator_name, fname_and_props.second->comparator_name);
}
Close();
// Also check comparator when it's provided via "unknown" CF options
ASSERT_OK(RepairDB(dbname_, opts, {{"default", opts}},
rev_opts /* unknown_cf_opts */));
ASSERT_OK(TryReopenWithColumnFamilies({"default", "reverse"},
std::vector<Options>{opts, rev_opts}));
for (int i = 0; i < kNumCfs; ++i) {
for (int j = 0; j < kEntriesPerCf; ++j) {
ASSERT_EQ(Get(i, "key" + std::to_string(j)), "val" + std::to_string(j));
}
}
}
TEST_F(RepairTest, DbNameContainsTrailingSlash) {
{
bool tmp;
if (env_->AreFilesSame("", "", &tmp).IsNotSupported()) {
fprintf(stderr,
"skipping RepairTest.DbNameContainsTrailingSlash due to "
"unsupported Env::AreFilesSame\n");
return;
}
}
ASSERT_OK(Put("key", "val"));
ASSERT_OK(Flush());
Close();
ASSERT_OK(RepairDB(dbname_ + "/", CurrentOptions()));
ReopenWithSstIdVerify();
ASSERT_EQ(Get("key"), "val");
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,994 +0,0 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
//
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/db_test_util.h"
#include "db/periodic_task_scheduler.h"
#include "db/seqno_to_time_mapping.h"
#include "port/stack_trace.h"
#include "rocksdb/iostats_context.h"
#include "rocksdb/utilities/debug.h"
#include "test_util/mock_time_env.h"
namespace ROCKSDB_NAMESPACE {
class SeqnoTimeTest : public DBTestBase {
public:
SeqnoTimeTest() : DBTestBase("seqno_time_test", /*env_do_fsync=*/false) {
mock_clock_ = std::make_shared<MockSystemClock>(env_->GetSystemClock());
mock_env_ = std::make_unique<CompositeEnvWrapper>(env_, mock_clock_);
}
protected:
std::unique_ptr<Env> mock_env_;
std::shared_ptr<MockSystemClock> mock_clock_;
void SetUp() override {
mock_clock_->InstallTimedWaitFixCallback();
SyncPoint::GetInstance()->SetCallBack(
"DBImpl::StartPeriodicTaskScheduler:Init", [&](void* arg) {
auto periodic_task_scheduler_ptr =
reinterpret_cast<PeriodicTaskScheduler*>(arg);
periodic_task_scheduler_ptr->TEST_OverrideTimer(mock_clock_.get());
});
}
// make sure the file is not in cache, otherwise it won't have IO info
void AssertKeyTemperature(int key_id, Temperature expected_temperature) {
get_iostats_context()->Reset();
IOStatsContext* iostats = get_iostats_context();
std::string result = Get(Key(key_id));
ASSERT_FALSE(result.empty());
ASSERT_GT(iostats->bytes_read, 0);
switch (expected_temperature) {
case Temperature::kUnknown:
ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_read_count,
0);
ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read,
0);
break;
case Temperature::kCold:
ASSERT_GT(iostats->file_io_stats_by_temperature.cold_file_read_count,
0);
ASSERT_GT(iostats->file_io_stats_by_temperature.cold_file_bytes_read,
0);
break;
default:
// the test only support kCold now for the bottommost temperature
FAIL();
}
}
};
TEST_F(SeqnoTimeTest, TemperatureBasicUniversal) {
const int kNumTrigger = 4;
const int kNumLevels = 7;
const int kNumKeys = 100;
const int kKeyPerSec = 10;
Options options = CurrentOptions();
options.compaction_style = kCompactionStyleUniversal;
options.preclude_last_level_data_seconds = 10000;
options.env = mock_env_.get();
options.bottommost_temperature = Temperature::kCold;
options.num_levels = kNumLevels;
DestroyAndReopen(options);
// pass some time first, otherwise the first a few keys write time are going
// to be zero, and internally zero has special meaning: kUnknownSeqnoTime
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(kKeyPerSec)); });
int sst_num = 0;
// Write files that are overlap and enough to trigger compaction
for (; sst_num < kNumTrigger; sst_num++) {
for (int i = 0; i < kNumKeys; i++) {
ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun([&] {
mock_clock_->MockSleepForSeconds(static_cast<int>(kKeyPerSec));
});
}
ASSERT_OK(Flush());
}
ASSERT_OK(dbfull()->WaitForCompact(true));
// All data is hot, only output to penultimate level
ASSERT_EQ("0,0,0,0,0,1", FilesPerLevel());
ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0);
ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
// read a random key, which should be hot (kUnknown)
AssertKeyTemperature(20, Temperature::kUnknown);
// Write more data, but still all hot until the 10th SST, as:
// write a key every 10 seconds, 100 keys per SST, each SST takes 1000 seconds
// The preclude_last_level_data_seconds is 10k
for (; sst_num < kNumTrigger * 2; sst_num++) {
for (int i = 0; i < kNumKeys; i++) {
ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun([&] {
mock_clock_->MockSleepForSeconds(static_cast<int>(kKeyPerSec));
});
}
ASSERT_OK(Flush());
ASSERT_OK(dbfull()->WaitForCompact(true));
ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0);
ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
}
// Now we have both hot data and cold data
for (; sst_num < kNumTrigger * 3; sst_num++) {
for (int i = 0; i < kNumKeys; i++) {
ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun([&] {
mock_clock_->MockSleepForSeconds(static_cast<int>(kKeyPerSec));
});
}
ASSERT_OK(Flush());
ASSERT_OK(dbfull()->WaitForCompact(true));
}
CompactRangeOptions cro;
cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
uint64_t hot_data_size = GetSstSizeHelper(Temperature::kUnknown);
uint64_t cold_data_size = GetSstSizeHelper(Temperature::kCold);
ASSERT_GT(hot_data_size, 0);
ASSERT_GT(cold_data_size, 0);
// the first a few key should be cold
AssertKeyTemperature(20, Temperature::kCold);
for (int i = 0; i < 30; i++) {
dbfull()->TEST_WaitForPeriodicTaskRun([&] {
mock_clock_->MockSleepForSeconds(static_cast<int>(20 * kKeyPerSec));
});
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
// the hot/cold data cut off range should be between i * 20 + 200 -> 250
AssertKeyTemperature(i * 20 + 250, Temperature::kUnknown);
AssertKeyTemperature(i * 20 + 200, Temperature::kCold);
}
ASSERT_LT(GetSstSizeHelper(Temperature::kUnknown), hot_data_size);
ASSERT_GT(GetSstSizeHelper(Temperature::kCold), cold_data_size);
// Wait again, the most of the data should be cold after that
// but it may not be all cold, because if there's no new data write to SST,
// the compaction will not get the new seqno->time sampling to decide the last
// a few data's time.
for (int i = 0; i < 5; i++) {
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(1000)); });
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
}
// any random data close to the end should be cold
AssertKeyTemperature(1000, Temperature::kCold);
// close explicitly, because the env is local variable which will be released
// first.
Close();
}
TEST_F(SeqnoTimeTest, TemperatureBasicLevel) {
const int kNumLevels = 7;
const int kNumKeys = 100;
Options options = CurrentOptions();
options.preclude_last_level_data_seconds = 10000;
options.env = mock_env_.get();
options.bottommost_temperature = Temperature::kCold;
options.num_levels = kNumLevels;
options.level_compaction_dynamic_level_bytes = true;
// TODO(zjay): for level compaction, auto-compaction may stuck in deadloop, if
// the penultimate level score > 1, but the hot is not cold enough to compact
// to last level, which will keep triggering compaction.
options.disable_auto_compactions = true;
DestroyAndReopen(options);
// pass some time first, otherwise the first a few keys write time are going
// to be zero, and internally zero has special meaning: kUnknownSeqnoTime
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(10)); });
int sst_num = 0;
// Write files that are overlap
for (; sst_num < 4; sst_num++) {
for (int i = 0; i < kNumKeys; i++) {
ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(10)); });
}
ASSERT_OK(Flush());
}
CompactRangeOptions cro;
cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
// All data is hot, only output to penultimate level
ASSERT_EQ("0,0,0,0,0,1", FilesPerLevel());
ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0);
ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
// read a random key, which should be hot (kUnknown)
AssertKeyTemperature(20, Temperature::kUnknown);
// Adding more data to have mixed hot and cold data
for (; sst_num < 14; sst_num++) {
for (int i = 0; i < kNumKeys; i++) {
ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(10)); });
}
ASSERT_OK(Flush());
}
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0);
ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
// Compact the files to the last level which should split the hot/cold data
MoveFilesToLevel(6);
uint64_t hot_data_size = GetSstSizeHelper(Temperature::kUnknown);
uint64_t cold_data_size = GetSstSizeHelper(Temperature::kCold);
ASSERT_GT(hot_data_size, 0);
ASSERT_GT(cold_data_size, 0);
// the first a few key should be cold
AssertKeyTemperature(20, Temperature::kCold);
// Wait some time, with each wait, the cold data is increasing and hot data is
// decreasing
for (int i = 0; i < 30; i++) {
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(200)); });
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
uint64_t pre_hot = hot_data_size;
uint64_t pre_cold = cold_data_size;
hot_data_size = GetSstSizeHelper(Temperature::kUnknown);
cold_data_size = GetSstSizeHelper(Temperature::kCold);
ASSERT_LT(hot_data_size, pre_hot);
ASSERT_GT(cold_data_size, pre_cold);
// the hot/cold cut_off key should be around i * 20 + 400 -> 450
AssertKeyTemperature(i * 20 + 450, Temperature::kUnknown);
AssertKeyTemperature(i * 20 + 400, Temperature::kCold);
}
// Wait again, the most of the data should be cold after that
// hot data might not be empty, because if we don't write new data, there's
// no seqno->time sampling available to the compaction
for (int i = 0; i < 5; i++) {
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(1000)); });
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
}
// any random data close to the end should be cold
AssertKeyTemperature(1000, Temperature::kCold);
Close();
}
enum class SeqnoTimeTestType : char {
kTrackInternalTimeSeconds = 0,
kPrecludeLastLevel = 1,
kBothSetTrackSmaller = 2,
};
class SeqnoTimeTablePropTest
: public SeqnoTimeTest,
public ::testing::WithParamInterface<SeqnoTimeTestType> {
public:
SeqnoTimeTablePropTest() : SeqnoTimeTest() {}
void SetTrackTimeDurationOptions(uint64_t track_time_duration,
Options& options) const {
// either option set will enable the time tracking feature
switch (GetParam()) {
case SeqnoTimeTestType::kTrackInternalTimeSeconds:
options.preclude_last_level_data_seconds = 0;
options.preserve_internal_time_seconds = track_time_duration;
break;
case SeqnoTimeTestType::kPrecludeLastLevel:
options.preclude_last_level_data_seconds = track_time_duration;
options.preserve_internal_time_seconds = 0;
break;
case SeqnoTimeTestType::kBothSetTrackSmaller:
options.preclude_last_level_data_seconds = track_time_duration;
options.preserve_internal_time_seconds = track_time_duration / 10;
break;
}
}
};
INSTANTIATE_TEST_CASE_P(
SeqnoTimeTablePropTest, SeqnoTimeTablePropTest,
::testing::Values(SeqnoTimeTestType::kTrackInternalTimeSeconds,
SeqnoTimeTestType::kPrecludeLastLevel,
SeqnoTimeTestType::kBothSetTrackSmaller));
TEST_P(SeqnoTimeTablePropTest, BasicSeqnoToTimeMapping) {
Options options = CurrentOptions();
SetTrackTimeDurationOptions(10000, options);
options.env = mock_env_.get();
options.disable_auto_compactions = true;
DestroyAndReopen(options);
std::set<uint64_t> checked_file_nums;
SequenceNumber start_seq = dbfull()->GetLatestSequenceNumber();
// Write a key every 10 seconds
for (int i = 0; i < 200; i++) {
ASSERT_OK(Put(Key(i), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(10)); });
}
ASSERT_OK(Flush());
TablePropertiesCollection tables_props;
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
ASSERT_EQ(tables_props.size(), 1);
auto it = tables_props.begin();
SeqnoToTimeMapping tp_mapping;
ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping));
ASSERT_OK(tp_mapping.Sort());
ASSERT_FALSE(tp_mapping.Empty());
auto seqs = tp_mapping.TEST_GetInternalMapping();
// about ~20 seqs->time entries, because the sample rate is 10000/100, and it
// passes 2k time.
ASSERT_GE(seqs.size(), 19);
ASSERT_LE(seqs.size(), 21);
SequenceNumber seq_end = dbfull()->GetLatestSequenceNumber();
for (auto i = start_seq; i < start_seq + 10; i++) {
ASSERT_LE(tp_mapping.GetOldestApproximateTime(i), (i + 1) * 10);
}
start_seq += 10;
for (auto i = start_seq; i < seq_end; i++) {
// The result is within the range
ASSERT_GE(tp_mapping.GetOldestApproximateTime(i), (i - 10) * 10);
ASSERT_LE(tp_mapping.GetOldestApproximateTime(i), (i + 10) * 10);
}
checked_file_nums.insert(it->second->orig_file_number);
start_seq = seq_end;
// Write a key every 1 seconds
for (int i = 0; i < 200; i++) {
ASSERT_OK(Put(Key(i + 190), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(1)); });
}
seq_end = dbfull()->GetLatestSequenceNumber();
ASSERT_OK(Flush());
tables_props.clear();
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
ASSERT_EQ(tables_props.size(), 2);
it = tables_props.begin();
while (it != tables_props.end()) {
if (!checked_file_nums.count(it->second->orig_file_number)) {
break;
}
it++;
}
ASSERT_TRUE(it != tables_props.end());
tp_mapping.Clear();
ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping));
ASSERT_OK(tp_mapping.Sort());
seqs = tp_mapping.TEST_GetInternalMapping();
// There only a few time sample
ASSERT_GE(seqs.size(), 1);
ASSERT_LE(seqs.size(), 3);
for (auto i = start_seq; i < seq_end; i++) {
// The result is not very accurate, as there is more data write within small
// range of time
ASSERT_GE(tp_mapping.GetOldestApproximateTime(i), (i - start_seq) + 1000);
ASSERT_LE(tp_mapping.GetOldestApproximateTime(i), (i - start_seq) + 3000);
}
checked_file_nums.insert(it->second->orig_file_number);
start_seq = seq_end;
// Write a key every 200 seconds
for (int i = 0; i < 200; i++) {
ASSERT_OK(Put(Key(i + 380), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(200)); });
}
seq_end = dbfull()->GetLatestSequenceNumber();
ASSERT_OK(Flush());
tables_props.clear();
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
ASSERT_EQ(tables_props.size(), 3);
it = tables_props.begin();
while (it != tables_props.end()) {
if (!checked_file_nums.count(it->second->orig_file_number)) {
break;
}
it++;
}
ASSERT_TRUE(it != tables_props.end());
tp_mapping.Clear();
ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping));
ASSERT_OK(tp_mapping.Sort());
seqs = tp_mapping.TEST_GetInternalMapping();
// The sequence number -> time entries should be maxed
ASSERT_GE(seqs.size(), 99);
ASSERT_LE(seqs.size(), 101);
for (auto i = start_seq; i < seq_end - 99; i++) {
// likely the first 100 entries reports 0
ASSERT_LE(tp_mapping.GetOldestApproximateTime(i), (i - start_seq) + 3000);
}
start_seq += 101;
for (auto i = start_seq; i < seq_end; i++) {
ASSERT_GE(tp_mapping.GetOldestApproximateTime(i),
(i - start_seq) * 200 + 22200);
ASSERT_LE(tp_mapping.GetOldestApproximateTime(i),
(i - start_seq) * 200 + 22600);
}
checked_file_nums.insert(it->second->orig_file_number);
start_seq = seq_end;
// Write a key every 100 seconds
for (int i = 0; i < 200; i++) {
ASSERT_OK(Put(Key(i + 570), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
}
seq_end = dbfull()->GetLatestSequenceNumber();
ASSERT_OK(Flush());
tables_props.clear();
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
ASSERT_EQ(tables_props.size(), 4);
it = tables_props.begin();
while (it != tables_props.end()) {
if (!checked_file_nums.count(it->second->orig_file_number)) {
break;
}
it++;
}
ASSERT_TRUE(it != tables_props.end());
tp_mapping.Clear();
ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping));
ASSERT_OK(tp_mapping.Sort());
seqs = tp_mapping.TEST_GetInternalMapping();
ASSERT_GE(seqs.size(), 99);
ASSERT_LE(seqs.size(), 101);
checked_file_nums.insert(it->second->orig_file_number);
// re-enable compaction
ASSERT_OK(dbfull()->SetOptions({
{"disable_auto_compactions", "false"},
}));
ASSERT_OK(dbfull()->TEST_WaitForCompact());
tables_props.clear();
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
ASSERT_GE(tables_props.size(), 1);
it = tables_props.begin();
while (it != tables_props.end()) {
if (!checked_file_nums.count(it->second->orig_file_number)) {
break;
}
it++;
}
ASSERT_TRUE(it != tables_props.end());
tp_mapping.Clear();
ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping));
ASSERT_OK(tp_mapping.Sort());
seqs = tp_mapping.TEST_GetInternalMapping();
ASSERT_GE(seqs.size(), 99);
ASSERT_LE(seqs.size(), 101);
for (auto i = start_seq; i < seq_end - 99; i++) {
// likely the first 100 entries reports 0
ASSERT_LE(tp_mapping.GetOldestApproximateTime(i),
(i - start_seq) * 100 + 50000);
}
start_seq += 101;
for (auto i = start_seq; i < seq_end; i++) {
ASSERT_GE(tp_mapping.GetOldestApproximateTime(i),
(i - start_seq) * 100 + 52200);
ASSERT_LE(tp_mapping.GetOldestApproximateTime(i),
(i - start_seq) * 100 + 52400);
}
ASSERT_OK(db_->Close());
}
TEST_P(SeqnoTimeTablePropTest, MultiCFs) {
Options options = CurrentOptions();
options.preclude_last_level_data_seconds = 0;
options.preserve_internal_time_seconds = 0;
options.env = mock_env_.get();
options.stats_dump_period_sec = 0;
options.stats_persist_period_sec = 0;
ReopenWithColumnFamilies({"default"}, options);
const PeriodicTaskScheduler& scheduler =
dbfull()->TEST_GetPeriodicTaskScheduler();
ASSERT_FALSE(scheduler.TEST_HasTask(PeriodicTaskType::kRecordSeqnoTime));
// Write some data and increase the current time
for (int i = 0; i < 200; i++) {
ASSERT_OK(Put(Key(i), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
}
ASSERT_OK(Flush());
TablePropertiesCollection tables_props;
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
ASSERT_EQ(tables_props.size(), 1);
auto it = tables_props.begin();
ASSERT_TRUE(it->second->seqno_to_time_mapping.empty());
ASSERT_TRUE(dbfull()->TEST_GetSeqnoToTimeMapping().Empty());
Options options_1 = options;
SetTrackTimeDurationOptions(10000, options_1);
CreateColumnFamilies({"one"}, options_1);
ASSERT_TRUE(scheduler.TEST_HasTask(PeriodicTaskType::kRecordSeqnoTime));
// Write some data to the default CF (without preclude_last_level feature)
for (int i = 0; i < 200; i++) {
ASSERT_OK(Put(Key(i), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
}
ASSERT_OK(Flush());
// Write some data to the CF one
for (int i = 0; i < 20; i++) {
ASSERT_OK(Put(1, Key(i), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(10)); });
}
ASSERT_OK(Flush(1));
tables_props.clear();
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(handles_[1], &tables_props));
ASSERT_EQ(tables_props.size(), 1);
it = tables_props.begin();
SeqnoToTimeMapping tp_mapping;
ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping));
ASSERT_OK(tp_mapping.Sort());
ASSERT_FALSE(tp_mapping.Empty());
auto seqs = tp_mapping.TEST_GetInternalMapping();
ASSERT_GE(seqs.size(), 1);
ASSERT_LE(seqs.size(), 4);
// Create one more CF with larger preclude_last_level time
Options options_2 = options;
SetTrackTimeDurationOptions(1000000, options_2); // 1m
CreateColumnFamilies({"two"}, options_2);
// Add more data to CF "two" to fill the in memory mapping
for (int i = 0; i < 2000; i++) {
ASSERT_OK(Put(2, Key(i), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
}
seqs = dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping();
ASSERT_GE(seqs.size(), 1000 - 1);
ASSERT_LE(seqs.size(), 1000 + 1);
ASSERT_OK(Flush(2));
tables_props.clear();
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(handles_[2], &tables_props));
ASSERT_EQ(tables_props.size(), 1);
it = tables_props.begin();
tp_mapping.Clear();
ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping));
ASSERT_OK(tp_mapping.Sort());
seqs = tp_mapping.TEST_GetInternalMapping();
// the max encoded entries is 100
ASSERT_GE(seqs.size(), 100 - 1);
ASSERT_LE(seqs.size(), 100 + 1);
// Write some data to default CF, as all memtable with preclude_last_level
// enabled have flushed, the in-memory seqno->time mapping should be cleared
for (int i = 0; i < 10; i++) {
ASSERT_OK(Put(0, Key(i), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
}
seqs = dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping();
ASSERT_OK(Flush(0));
// trigger compaction for CF "two" and make sure the compaction output has
// seqno_to_time_mapping
for (int j = 0; j < 3; j++) {
for (int i = 0; i < 200; i++) {
ASSERT_OK(Put(2, Key(i), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
}
ASSERT_OK(Flush(2));
}
ASSERT_OK(dbfull()->TEST_WaitForCompact());
tables_props.clear();
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(handles_[2], &tables_props));
ASSERT_EQ(tables_props.size(), 1);
it = tables_props.begin();
tp_mapping.Clear();
ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping));
ASSERT_OK(tp_mapping.Sort());
seqs = tp_mapping.TEST_GetInternalMapping();
ASSERT_GE(seqs.size(), 99);
ASSERT_LE(seqs.size(), 101);
for (int j = 0; j < 2; j++) {
for (int i = 0; i < 200; i++) {
ASSERT_OK(Put(0, Key(i), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
}
ASSERT_OK(Flush(0));
}
ASSERT_OK(dbfull()->TEST_WaitForCompact());
tables_props.clear();
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(handles_[0], &tables_props));
ASSERT_EQ(tables_props.size(), 1);
it = tables_props.begin();
ASSERT_TRUE(it->second->seqno_to_time_mapping.empty());
// Write some data to CF "two", but don't flush to accumulate
for (int i = 0; i < 1000; i++) {
ASSERT_OK(Put(2, Key(i), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
}
ASSERT_GE(
dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping().size(),
500);
// After dropping CF "one", the in-memory mapping will be change to only
// follow CF "two" options.
ASSERT_OK(db_->DropColumnFamily(handles_[1]));
ASSERT_LE(
dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping().size(),
100 + 5);
// After dropping CF "two", the in-memory mapping is also clear.
ASSERT_OK(db_->DropColumnFamily(handles_[2]));
ASSERT_EQ(
dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping().size(),
0);
// And the timer worker is stopped
ASSERT_FALSE(scheduler.TEST_HasTask(PeriodicTaskType::kRecordSeqnoTime));
Close();
}
TEST_P(SeqnoTimeTablePropTest, MultiInstancesBasic) {
const int kInstanceNum = 2;
Options options = CurrentOptions();
SetTrackTimeDurationOptions(10000, options);
options.env = mock_env_.get();
options.stats_dump_period_sec = 0;
options.stats_persist_period_sec = 0;
auto dbs = std::vector<DB*>(kInstanceNum);
for (int i = 0; i < kInstanceNum; i++) {
ASSERT_OK(
DB::Open(options, test::PerThreadDBPath(std::to_string(i)), &(dbs[i])));
}
// Make sure the second instance has the worker enabled
auto dbi = static_cast_with_check<DBImpl>(dbs[1]);
WriteOptions wo;
for (int i = 0; i < 200; i++) {
ASSERT_OK(dbi->Put(wo, Key(i), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
}
SeqnoToTimeMapping seqno_to_time_mapping = dbi->TEST_GetSeqnoToTimeMapping();
ASSERT_GT(seqno_to_time_mapping.Size(), 10);
for (int i = 0; i < kInstanceNum; i++) {
ASSERT_OK(dbs[i]->Close());
delete dbs[i];
}
}
TEST_P(SeqnoTimeTablePropTest, SeqnoToTimeMappingUniversal) {
const int kNumTrigger = 4;
const int kNumLevels = 7;
const int kNumKeys = 100;
Options options = CurrentOptions();
SetTrackTimeDurationOptions(10000, options);
options.compaction_style = kCompactionStyleUniversal;
options.num_levels = kNumLevels;
options.env = mock_env_.get();
DestroyAndReopen(options);
std::atomic_uint64_t num_seqno_zeroing{0};
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->SetCallBack(
"CompactionIterator::PrepareOutput:ZeroingSeq",
[&](void* /*arg*/) { num_seqno_zeroing++; });
SyncPoint::GetInstance()->EnableProcessing();
int sst_num = 0;
for (; sst_num < kNumTrigger - 1; sst_num++) {
for (int i = 0; i < kNumKeys; i++) {
ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(10)); });
}
ASSERT_OK(Flush());
}
TablePropertiesCollection tables_props;
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
ASSERT_EQ(tables_props.size(), 3);
for (const auto& props : tables_props) {
ASSERT_FALSE(props.second->seqno_to_time_mapping.empty());
SeqnoToTimeMapping tp_mapping;
ASSERT_OK(tp_mapping.Add(props.second->seqno_to_time_mapping));
ASSERT_OK(tp_mapping.Sort());
ASSERT_FALSE(tp_mapping.Empty());
auto seqs = tp_mapping.TEST_GetInternalMapping();
ASSERT_GE(seqs.size(), 10 - 1);
ASSERT_LE(seqs.size(), 10 + 1);
}
// Trigger a compaction
for (int i = 0; i < kNumKeys; i++) {
ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
dbfull()->TEST_WaitForPeriodicTaskRun(
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(10)); });
}
sst_num++;
ASSERT_OK(Flush());
ASSERT_OK(dbfull()->TEST_WaitForCompact());
tables_props.clear();
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
ASSERT_EQ(tables_props.size(), 1);
auto it = tables_props.begin();
SeqnoToTimeMapping tp_mapping;
ASSERT_FALSE(it->second->seqno_to_time_mapping.empty());
ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping));
// compact to the last level
CompactRangeOptions cro;
cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
// make sure the data is all compacted to penultimate level if the feature is
// on, otherwise, compacted to the last level.
if (options.preclude_last_level_data_seconds > 0) {
ASSERT_GT(NumTableFilesAtLevel(5), 0);
ASSERT_EQ(NumTableFilesAtLevel(6), 0);
} else {
ASSERT_EQ(NumTableFilesAtLevel(5), 0);
ASSERT_GT(NumTableFilesAtLevel(6), 0);
}
// regardless the file is on the last level or not, it should keep the time
// information and sequence number are not set
tables_props.clear();
tp_mapping.Clear();
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
ASSERT_EQ(tables_props.size(), 1);
ASSERT_EQ(num_seqno_zeroing, 0);
it = tables_props.begin();
ASSERT_FALSE(it->second->seqno_to_time_mapping.empty());
ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping));
// make half of the data expired
mock_clock_->MockSleepForSeconds(static_cast<int>(8000));
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
tables_props.clear();
tp_mapping.Clear();
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
if (options.preclude_last_level_data_seconds > 0) {
ASSERT_EQ(tables_props.size(), 2);
} else {
ASSERT_EQ(tables_props.size(), 1);
}
ASSERT_GT(num_seqno_zeroing, 0);
std::vector<KeyVersion> key_versions;
ASSERT_OK(GetAllKeyVersions(db_, Slice(), Slice(),
std::numeric_limits<size_t>::max(),
&key_versions));
// make sure there're more than 300 keys and first 100 keys are having seqno
// zeroed out, the last 100 key seqno not zeroed out
ASSERT_GT(key_versions.size(), 300);
for (int i = 0; i < 100; i++) {
ASSERT_EQ(key_versions[i].sequence, 0);
}
auto rit = key_versions.rbegin();
for (int i = 0; i < 100; i++) {
ASSERT_GT(rit->sequence, 0);
rit++;
}
// make all data expired and compact again to push it to the last level
// regardless if the tiering feature is enabled or not
mock_clock_->MockSleepForSeconds(static_cast<int>(20000));
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
ASSERT_GT(num_seqno_zeroing, 0);
ASSERT_GT(NumTableFilesAtLevel(6), 0);
Close();
}
TEST_F(SeqnoTimeTest, MappingAppend) {
SeqnoToTimeMapping test(/*max_time_duration=*/100, /*max_capacity=*/10);
// ignore seqno == 0, as it may mean the seqno is zeroed out
ASSERT_FALSE(test.Append(0, 9));
ASSERT_TRUE(test.Append(3, 10));
auto size = test.Size();
// normal add
ASSERT_TRUE(test.Append(10, 11));
size++;
ASSERT_EQ(size, test.Size());
// Append unsorted
ASSERT_FALSE(test.Append(8, 12));
ASSERT_EQ(size, test.Size());
// Append with the same seqno, newer time will be accepted
ASSERT_TRUE(test.Append(10, 12));
ASSERT_EQ(size, test.Size());
// older time will be ignored
ASSERT_FALSE(test.Append(10, 9));
ASSERT_EQ(size, test.Size());
// new seqno with old time will be ignored
ASSERT_FALSE(test.Append(12, 8));
ASSERT_EQ(size, test.Size());
}
TEST_F(SeqnoTimeTest, GetOldestApproximateTime) {
SeqnoToTimeMapping test(/*max_time_duration=*/100, /*max_capacity=*/10);
ASSERT_EQ(test.GetOldestApproximateTime(10), kUnknownSeqnoTime);
test.Append(3, 10);
ASSERT_EQ(test.GetOldestApproximateTime(2), kUnknownSeqnoTime);
ASSERT_EQ(test.GetOldestApproximateTime(3), 10);
ASSERT_EQ(test.GetOldestApproximateTime(10), 10);
test.Append(10, 100);
test.Append(100, 1000);
ASSERT_EQ(test.GetOldestApproximateTime(10), 100);
ASSERT_EQ(test.GetOldestApproximateTime(40), 100);
ASSERT_EQ(test.GetOldestApproximateTime(111), 1000);
}
TEST_F(SeqnoTimeTest, Sort) {
SeqnoToTimeMapping test;
// single entry
test.Add(10, 11);
ASSERT_OK(test.Sort());
ASSERT_EQ(test.Size(), 1);
// duplicate, should be removed by sort
test.Add(10, 11);
// same seqno, but older time, should be removed
test.Add(10, 9);
// unuseful ones, should be removed by sort
test.Add(11, 9);
test.Add(9, 8);
// Good ones
test.Add(1, 10);
test.Add(100, 100);
ASSERT_OK(test.Sort());
auto seqs = test.TEST_GetInternalMapping();
std::deque<SeqnoToTimeMapping::SeqnoTimePair> expected;
expected.emplace_back(1, 10);
expected.emplace_back(10, 11);
expected.emplace_back(100, 100);
ASSERT_EQ(expected, seqs);
}
TEST_F(SeqnoTimeTest, EncodeDecodeBasic) {
SeqnoToTimeMapping test(0, 1000);
std::string output;
test.Encode(output, 0, 1000, 100);
ASSERT_TRUE(output.empty());
for (int i = 1; i <= 1000; i++) {
ASSERT_TRUE(test.Append(i, i * 10));
}
test.Encode(output, 0, 1000, 100);
ASSERT_FALSE(output.empty());
SeqnoToTimeMapping decoded;
ASSERT_OK(decoded.Add(output));
ASSERT_OK(decoded.Sort());
ASSERT_EQ(decoded.Size(), SeqnoToTimeMapping::kMaxSeqnoTimePairsPerSST);
ASSERT_EQ(test.Size(), 1000);
for (SequenceNumber seq = 0; seq <= 1000; seq++) {
// test has the more accurate time mapping, encode only pick
// kMaxSeqnoTimePairsPerSST number of entries, which is less accurate
uint64_t target_time = test.GetOldestApproximateTime(seq);
ASSERT_GE(decoded.GetOldestApproximateTime(seq),
target_time < 200 ? 0 : target_time - 200);
ASSERT_LE(decoded.GetOldestApproximateTime(seq), target_time);
}
}
TEST_F(SeqnoTimeTest, EncodeDecodePerferNewTime) {
SeqnoToTimeMapping test(0, 10);
test.Append(1, 10);
test.Append(5, 17);
test.Append(6, 25);
test.Append(8, 30);
std::string output;
test.Encode(output, 1, 10, 0, 3);
SeqnoToTimeMapping decoded;
ASSERT_OK(decoded.Add(output));
ASSERT_OK(decoded.Sort());
ASSERT_EQ(decoded.Size(), 3);
auto seqs = decoded.TEST_GetInternalMapping();
std::deque<SeqnoToTimeMapping::SeqnoTimePair> expected;
expected.emplace_back(1, 10);
expected.emplace_back(6, 25);
expected.emplace_back(8, 30);
ASSERT_EQ(expected, seqs);
// Add a few large time number
test.Append(10, 100);
test.Append(13, 200);
test.Append(16, 300);
output.clear();
test.Encode(output, 1, 20, 0, 4);
decoded.Clear();
ASSERT_OK(decoded.Add(output));
ASSERT_OK(decoded.Sort());
ASSERT_EQ(decoded.Size(), 4);
expected.clear();
expected.emplace_back(1, 10);
// entry #6, #8 are skipped as they are too close to #1.
// entry #100 is also within skip range, but if it's skipped, there not enough
// number to fill 4 entries, so select it.
expected.emplace_back(10, 100);
expected.emplace_back(13, 200);
expected.emplace_back(16, 300);
seqs = decoded.TEST_GetInternalMapping();
ASSERT_EQ(expected, seqs);
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -1,509 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/table_properties_collector.h"
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "db/db_impl/db_impl.h"
#include "db/dbformat.h"
#include "file/sequence_file_reader.h"
#include "file/writable_file_writer.h"
#include "options/cf_options.h"
#include "rocksdb/flush_block_policy.h"
#include "rocksdb/table.h"
#include "table/block_based/block_based_table_factory.h"
#include "table/meta_blocks.h"
#include "table/plain/plain_table_factory.h"
#include "table/table_builder.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/coding.h"
namespace ROCKSDB_NAMESPACE {
class TablePropertiesTest : public testing::Test,
public testing::WithParamInterface<bool> {
public:
void SetUp() override { backward_mode_ = GetParam(); }
bool backward_mode_;
};
// Utilities test functions
namespace {
static const uint32_t kTestColumnFamilyId = 66;
static const std::string kTestColumnFamilyName = "test_column_fam";
static const int kTestLevel = 1;
void MakeBuilder(
const Options& options, const ImmutableOptions& ioptions,
const MutableCFOptions& moptions,
const InternalKeyComparator& internal_comparator,
const IntTblPropCollectorFactories* int_tbl_prop_collector_factories,
std::unique_ptr<WritableFileWriter>* writable,
std::unique_ptr<TableBuilder>* builder) {
std::unique_ptr<FSWritableFile> wf(new test::StringSink);
writable->reset(
new WritableFileWriter(std::move(wf), "" /* don't care */, EnvOptions()));
TableBuilderOptions tboptions(
ioptions, moptions, internal_comparator, int_tbl_prop_collector_factories,
options.compression, options.compression_opts, kTestColumnFamilyId,
kTestColumnFamilyName, kTestLevel);
builder->reset(NewTableBuilder(tboptions, writable->get()));
}
} // namespace
// Collects keys that starts with "A" in a table.
class RegularKeysStartWithA : public TablePropertiesCollector {
public:
const char* Name() const override { return "RegularKeysStartWithA"; }
Status Finish(UserCollectedProperties* properties) override {
std::string encoded;
std::string encoded_num_puts;
std::string encoded_num_deletes;
std::string encoded_num_single_deletes;
std::string encoded_num_size_changes;
PutVarint32(&encoded, count_);
PutVarint32(&encoded_num_puts, num_puts_);
PutVarint32(&encoded_num_deletes, num_deletes_);
PutVarint32(&encoded_num_single_deletes, num_single_deletes_);
PutVarint32(&encoded_num_size_changes, num_size_changes_);
*properties = UserCollectedProperties{
{"TablePropertiesTest", message_},
{"Count", encoded},
{"NumPuts", encoded_num_puts},
{"NumDeletes", encoded_num_deletes},
{"NumSingleDeletes", encoded_num_single_deletes},
{"NumSizeChanges", encoded_num_size_changes},
};
return Status::OK();
}
Status AddUserKey(const Slice& user_key, const Slice& /*value*/,
EntryType type, SequenceNumber /*seq*/,
uint64_t file_size) override {
// simply asssume all user keys are not empty.
if (user_key.data()[0] == 'A') {
++count_;
}
if (type == kEntryPut) {
num_puts_++;
} else if (type == kEntryDelete) {
num_deletes_++;
} else if (type == kEntrySingleDelete) {
num_single_deletes_++;
}
if (file_size < file_size_) {
message_ = "File size should not decrease.";
} else if (file_size != file_size_) {
num_size_changes_++;
}
return Status::OK();
}
UserCollectedProperties GetReadableProperties() const override {
return UserCollectedProperties{};
}
private:
std::string message_ = "Rocksdb";
uint32_t count_ = 0;
uint32_t num_puts_ = 0;
uint32_t num_deletes_ = 0;
uint32_t num_single_deletes_ = 0;
uint32_t num_size_changes_ = 0;
uint64_t file_size_ = 0;
};
// Collects keys that starts with "A" in a table. Backward compatible mode
// It is also used to test internal key table property collector
class RegularKeysStartWithABackwardCompatible
: public TablePropertiesCollector {
public:
const char* Name() const override { return "RegularKeysStartWithA"; }
Status Finish(UserCollectedProperties* properties) override {
std::string encoded;
PutVarint32(&encoded, count_);
*properties = UserCollectedProperties{{"TablePropertiesTest", "Rocksdb"},
{"Count", encoded}};
return Status::OK();
}
Status Add(const Slice& user_key, const Slice& /*value*/) override {
// simply asssume all user keys are not empty.
if (user_key.data()[0] == 'A') {
++count_;
}
return Status::OK();
}
UserCollectedProperties GetReadableProperties() const override {
return UserCollectedProperties{};
}
private:
uint32_t count_ = 0;
};
class RegularKeysStartWithAInternal : public IntTblPropCollector {
public:
const char* Name() const override { return "RegularKeysStartWithA"; }
Status Finish(UserCollectedProperties* properties) override {
std::string encoded;
PutVarint32(&encoded, count_);
*properties = UserCollectedProperties{{"TablePropertiesTest", "Rocksdb"},
{"Count", encoded}};
return Status::OK();
}
Status InternalAdd(const Slice& user_key, const Slice& /*value*/,
uint64_t /*file_size*/) override {
// simply asssume all user keys are not empty.
if (user_key.data()[0] == 'A') {
++count_;
}
return Status::OK();
}
void BlockAdd(uint64_t /* block_uncomp_bytes */,
uint64_t /* block_compressed_bytes_fast */,
uint64_t /* block_compressed_bytes_slow */) override {
// Nothing to do.
return;
}
UserCollectedProperties GetReadableProperties() const override {
return UserCollectedProperties{};
}
private:
uint32_t count_ = 0;
};
class RegularKeysStartWithAFactory : public IntTblPropCollectorFactory,
public TablePropertiesCollectorFactory {
public:
explicit RegularKeysStartWithAFactory(bool backward_mode)
: backward_mode_(backward_mode) {}
TablePropertiesCollector* CreateTablePropertiesCollector(
TablePropertiesCollectorFactory::Context context) override {
EXPECT_EQ(kTestColumnFamilyId, context.column_family_id);
EXPECT_EQ(kTestLevel, context.level_at_creation);
if (!backward_mode_) {
return new RegularKeysStartWithA();
} else {
return new RegularKeysStartWithABackwardCompatible();
}
}
IntTblPropCollector* CreateIntTblPropCollector(
uint32_t /*column_family_id*/, int /* level_at_creation */) override {
return new RegularKeysStartWithAInternal();
}
const char* Name() const override { return "RegularKeysStartWithA"; }
bool backward_mode_;
};
class FlushBlockEveryThreePolicy : public FlushBlockPolicy {
public:
bool Update(const Slice& /*key*/, const Slice& /*value*/) override {
return (++count_ % 3U == 0);
}
private:
uint64_t count_ = 0;
};
class FlushBlockEveryThreePolicyFactory : public FlushBlockPolicyFactory {
public:
explicit FlushBlockEveryThreePolicyFactory() {}
const char* Name() const override {
return "FlushBlockEveryThreePolicyFactory";
}
FlushBlockPolicy* NewFlushBlockPolicy(
const BlockBasedTableOptions& /*table_options*/,
const BlockBuilder& /*data_block_builder*/) const override {
return new FlushBlockEveryThreePolicy;
}
};
extern const uint64_t kBlockBasedTableMagicNumber;
extern const uint64_t kPlainTableMagicNumber;
namespace {
void TestCustomizedTablePropertiesCollector(
bool backward_mode, uint64_t magic_number, bool test_int_tbl_prop_collector,
const Options& options, const InternalKeyComparator& internal_comparator) {
// make sure the entries will be inserted with order.
std::map<std::pair<std::string, ValueType>, std::string> kvs = {
{{"About ", kTypeValue}, "val5"}, // starts with 'A'
{{"Abstract", kTypeValue}, "val2"}, // starts with 'A'
{{"Around ", kTypeValue}, "val7"}, // starts with 'A'
{{"Beyond ", kTypeValue}, "val3"},
{{"Builder ", kTypeValue}, "val1"},
{{"Love ", kTypeDeletion}, ""},
{{"Cancel ", kTypeValue}, "val4"},
{{"Find ", kTypeValue}, "val6"},
{{"Rocks ", kTypeDeletion}, ""},
{{"Foo ", kTypeSingleDeletion}, ""},
};
// -- Step 1: build table
std::unique_ptr<TableBuilder> builder;
std::unique_ptr<WritableFileWriter> writer;
const ImmutableOptions ioptions(options);
const MutableCFOptions moptions(options);
IntTblPropCollectorFactories int_tbl_prop_collector_factories;
if (test_int_tbl_prop_collector) {
int_tbl_prop_collector_factories.emplace_back(
new RegularKeysStartWithAFactory(backward_mode));
} else {
GetIntTblPropCollectorFactory(ioptions, &int_tbl_prop_collector_factories);
}
MakeBuilder(options, ioptions, moptions, internal_comparator,
&int_tbl_prop_collector_factories, &writer, &builder);
SequenceNumber seqNum = 0U;
for (const auto& kv : kvs) {
InternalKey ikey(kv.first.first, seqNum++, kv.first.second);
builder->Add(ikey.Encode(), kv.second);
}
ASSERT_OK(builder->Finish());
ASSERT_OK(writer->Flush());
// -- Step 2: Read properties
test::StringSink* fwf =
static_cast<test::StringSink*>(writer->writable_file());
std::unique_ptr<FSRandomAccessFile> source(
new test::StringSource(fwf->contents()));
std::unique_ptr<RandomAccessFileReader> fake_file_reader(
new RandomAccessFileReader(std::move(source), "test"));
std::unique_ptr<TableProperties> props;
Status s = ReadTableProperties(fake_file_reader.get(), fwf->contents().size(),
magic_number, ioptions, &props);
ASSERT_OK(s);
auto user_collected = props->user_collected_properties;
ASSERT_NE(user_collected.find("TablePropertiesTest"), user_collected.end());
ASSERT_EQ("Rocksdb", user_collected.at("TablePropertiesTest"));
uint32_t starts_with_A = 0;
ASSERT_NE(user_collected.find("Count"), user_collected.end());
Slice key(user_collected.at("Count"));
ASSERT_TRUE(GetVarint32(&key, &starts_with_A));
ASSERT_EQ(3u, starts_with_A);
if (!backward_mode && !test_int_tbl_prop_collector) {
uint32_t num_puts;
ASSERT_NE(user_collected.find("NumPuts"), user_collected.end());
Slice key_puts(user_collected.at("NumPuts"));
ASSERT_TRUE(GetVarint32(&key_puts, &num_puts));
ASSERT_EQ(7u, num_puts);
uint32_t num_deletes;
ASSERT_NE(user_collected.find("NumDeletes"), user_collected.end());
Slice key_deletes(user_collected.at("NumDeletes"));
ASSERT_TRUE(GetVarint32(&key_deletes, &num_deletes));
ASSERT_EQ(2u, num_deletes);
uint32_t num_single_deletes;
ASSERT_NE(user_collected.find("NumSingleDeletes"), user_collected.end());
Slice key_single_deletes(user_collected.at("NumSingleDeletes"));
ASSERT_TRUE(GetVarint32(&key_single_deletes, &num_single_deletes));
ASSERT_EQ(1u, num_single_deletes);
uint32_t num_size_changes;
ASSERT_NE(user_collected.find("NumSizeChanges"), user_collected.end());
Slice key_size_changes(user_collected.at("NumSizeChanges"));
ASSERT_TRUE(GetVarint32(&key_size_changes, &num_size_changes));
ASSERT_GE(num_size_changes, 2u);
}
}
} // namespace
TEST_P(TablePropertiesTest, CustomizedTablePropertiesCollector) {
// Test properties collectors with internal keys or regular keys
// for block based table
for (bool encode_as_internal : {true, false}) {
Options options;
BlockBasedTableOptions table_options;
table_options.flush_block_policy_factory =
std::make_shared<FlushBlockEveryThreePolicyFactory>();
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
test::PlainInternalKeyComparator ikc(options.comparator);
std::shared_ptr<TablePropertiesCollectorFactory> collector_factory(
new RegularKeysStartWithAFactory(backward_mode_));
options.table_properties_collector_factories.resize(1);
options.table_properties_collector_factories[0] = collector_factory;
TestCustomizedTablePropertiesCollector(backward_mode_,
kBlockBasedTableMagicNumber,
encode_as_internal, options, ikc);
// test plain table
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 8;
plain_table_options.bloom_bits_per_key = 8;
plain_table_options.hash_table_ratio = 0;
options.table_factory =
std::make_shared<PlainTableFactory>(plain_table_options);
TestCustomizedTablePropertiesCollector(backward_mode_,
kPlainTableMagicNumber,
encode_as_internal, options, ikc);
}
}
namespace {
void TestInternalKeyPropertiesCollector(
bool backward_mode, uint64_t magic_number, bool sanitized,
std::shared_ptr<TableFactory> table_factory) {
InternalKey keys[] = {
InternalKey("A ", 0, ValueType::kTypeValue),
InternalKey("B ", 1, ValueType::kTypeValue),
InternalKey("C ", 2, ValueType::kTypeValue),
InternalKey("W ", 3, ValueType::kTypeDeletion),
InternalKey("X ", 4, ValueType::kTypeDeletion),
InternalKey("Y ", 5, ValueType::kTypeDeletion),
InternalKey("Z ", 6, ValueType::kTypeDeletion),
InternalKey("a ", 7, ValueType::kTypeSingleDeletion),
InternalKey("b ", 8, ValueType::kTypeMerge),
InternalKey("c ", 9, ValueType::kTypeMerge),
};
std::unique_ptr<TableBuilder> builder;
std::unique_ptr<WritableFileWriter> writable;
Options options;
test::PlainInternalKeyComparator pikc(options.comparator);
IntTblPropCollectorFactories int_tbl_prop_collector_factories;
options.table_factory = table_factory;
if (sanitized) {
options.table_properties_collector_factories.emplace_back(
new RegularKeysStartWithAFactory(backward_mode));
// with sanitization, even regular properties collector will be able to
// handle internal keys.
auto comparator = options.comparator;
// HACK: Set options.info_log to avoid writing log in
// SanitizeOptions().
options.info_log = std::make_shared<test::NullLogger>();
options = SanitizeOptions("db", // just a place holder
options);
ImmutableOptions ioptions(options);
GetIntTblPropCollectorFactory(ioptions, &int_tbl_prop_collector_factories);
options.comparator = comparator;
}
const ImmutableOptions ioptions(options);
MutableCFOptions moptions(options);
for (int iter = 0; iter < 2; ++iter) {
MakeBuilder(options, ioptions, moptions, pikc,
&int_tbl_prop_collector_factories, &writable, &builder);
for (const auto& k : keys) {
builder->Add(k.Encode(), "val");
}
ASSERT_OK(builder->Finish());
ASSERT_OK(writable->Flush());
test::StringSink* fwf =
static_cast<test::StringSink*>(writable->writable_file());
std::unique_ptr<FSRandomAccessFile> source(
new test::StringSource(fwf->contents()));
std::unique_ptr<RandomAccessFileReader> reader(
new RandomAccessFileReader(std::move(source), "test"));
std::unique_ptr<TableProperties> props;
Status s = ReadTableProperties(reader.get(), fwf->contents().size(),
magic_number, ioptions, &props);
ASSERT_OK(s);
auto user_collected = props->user_collected_properties;
uint64_t deleted = GetDeletedKeys(user_collected);
ASSERT_EQ(5u, deleted); // deletes + single-deletes
bool property_present;
uint64_t merges = GetMergeOperands(user_collected, &property_present);
ASSERT_TRUE(property_present);
ASSERT_EQ(2u, merges);
if (sanitized) {
uint32_t starts_with_A = 0;
ASSERT_NE(user_collected.find("Count"), user_collected.end());
Slice key(user_collected.at("Count"));
ASSERT_TRUE(GetVarint32(&key, &starts_with_A));
ASSERT_EQ(1u, starts_with_A);
if (!backward_mode) {
uint32_t num_puts;
ASSERT_NE(user_collected.find("NumPuts"), user_collected.end());
Slice key_puts(user_collected.at("NumPuts"));
ASSERT_TRUE(GetVarint32(&key_puts, &num_puts));
ASSERT_EQ(3u, num_puts);
uint32_t num_deletes;
ASSERT_NE(user_collected.find("NumDeletes"), user_collected.end());
Slice key_deletes(user_collected.at("NumDeletes"));
ASSERT_TRUE(GetVarint32(&key_deletes, &num_deletes));
ASSERT_EQ(4u, num_deletes);
uint32_t num_single_deletes;
ASSERT_NE(user_collected.find("NumSingleDeletes"),
user_collected.end());
Slice key_single_deletes(user_collected.at("NumSingleDeletes"));
ASSERT_TRUE(GetVarint32(&key_single_deletes, &num_single_deletes));
ASSERT_EQ(1u, num_single_deletes);
}
}
}
}
} // namespace
TEST_P(TablePropertiesTest, InternalKeyPropertiesCollector) {
TestInternalKeyPropertiesCollector(
backward_mode_, kBlockBasedTableMagicNumber, true /* sanitize */,
std::make_shared<BlockBasedTableFactory>());
if (backward_mode_) {
TestInternalKeyPropertiesCollector(
backward_mode_, kBlockBasedTableMagicNumber, false /* not sanitize */,
std::make_shared<BlockBasedTableFactory>());
}
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 8;
plain_table_options.bloom_bits_per_key = 8;
plain_table_options.hash_table_ratio = 0;
TestInternalKeyPropertiesCollector(
backward_mode_, kPlainTableMagicNumber, false /* not sanitize */,
std::make_shared<PlainTableFactory>(plain_table_options));
}
INSTANTIATE_TEST_CASE_P(InternalKeyPropertiesCollector, TablePropertiesTest,
::testing::Bool());
INSTANTIATE_TEST_CASE_P(CustomizedTablePropertiesCollector, TablePropertiesTest,
::testing::Bool());
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

File diff suppressed because it is too large Load Diff

@ -1,732 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "db/version_edit.h"
#include "db/blob/blob_index.h"
#include "rocksdb/advanced_options.h"
#include "table/unique_id_impl.h"
#include "test_util/sync_point.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/coding.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
static void TestEncodeDecode(const VersionEdit& edit) {
std::string encoded, encoded2;
edit.EncodeTo(&encoded);
VersionEdit parsed;
Status s = parsed.DecodeFrom(encoded);
ASSERT_TRUE(s.ok()) << s.ToString();
parsed.EncodeTo(&encoded2);
ASSERT_EQ(encoded, encoded2);
}
class VersionEditTest : public testing::Test {};
TEST_F(VersionEditTest, EncodeDecode) {
static const uint64_t kBig = 1ull << 50;
static const uint32_t kBig32Bit = 1ull << 30;
VersionEdit edit;
for (int i = 0; i < 4; i++) {
TestEncodeDecode(edit);
edit.AddFile(3, kBig + 300 + i, kBig32Bit + 400 + i, 0,
InternalKey("foo", kBig + 500 + i, kTypeValue),
InternalKey("zoo", kBig + 600 + i, kTypeDeletion),
kBig + 500 + i, kBig + 600 + i, false, Temperature::kUnknown,
kInvalidBlobFileNumber, 888, 678,
kBig + 300 + i /* epoch_number */, "234", "crc32c",
kNullUniqueId64x2, 0);
edit.DeleteFile(4, kBig + 700 + i);
}
edit.SetComparatorName("foo");
edit.SetLogNumber(kBig + 100);
edit.SetNextFile(kBig + 200);
edit.SetLastSequence(kBig + 1000);
TestEncodeDecode(edit);
}
TEST_F(VersionEditTest, EncodeDecodeNewFile4) {
static const uint64_t kBig = 1ull << 50;
VersionEdit edit;
edit.AddFile(3, 300, 3, 100, InternalKey("foo", kBig + 500, kTypeValue),
InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500,
kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
300 /* epoch_number */, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue),
InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501,
kBig + 601, false, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
301 /* epoch_number */, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
edit.AddFile(5, 302, 0, 100, InternalKey("foo", kBig + 502, kTypeValue),
InternalKey("zoo", kBig + 602, kTypeDeletion), kBig + 502,
kBig + 602, true, Temperature::kUnknown, kInvalidBlobFileNumber,
666, 888, 302 /* epoch_number */, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
edit.AddFile(5, 303, 0, 100, InternalKey("foo", kBig + 503, kTypeBlobIndex),
InternalKey("zoo", kBig + 603, kTypeBlobIndex), kBig + 503,
kBig + 603, true, Temperature::kUnknown, 1001,
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
303 /* epoch_number */, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
edit.DeleteFile(4, 700);
edit.SetComparatorName("foo");
edit.SetLogNumber(kBig + 100);
edit.SetNextFile(kBig + 200);
edit.SetLastSequence(kBig + 1000);
TestEncodeDecode(edit);
std::string encoded, encoded2;
edit.EncodeTo(&encoded);
VersionEdit parsed;
Status s = parsed.DecodeFrom(encoded);
ASSERT_TRUE(s.ok()) << s.ToString();
auto& new_files = parsed.GetNewFiles();
ASSERT_TRUE(new_files[0].second.marked_for_compaction);
ASSERT_TRUE(!new_files[1].second.marked_for_compaction);
ASSERT_TRUE(new_files[2].second.marked_for_compaction);
ASSERT_TRUE(new_files[3].second.marked_for_compaction);
ASSERT_EQ(3u, new_files[0].second.fd.GetPathId());
ASSERT_EQ(3u, new_files[1].second.fd.GetPathId());
ASSERT_EQ(0u, new_files[2].second.fd.GetPathId());
ASSERT_EQ(0u, new_files[3].second.fd.GetPathId());
ASSERT_EQ(kInvalidBlobFileNumber,
new_files[0].second.oldest_blob_file_number);
ASSERT_EQ(kInvalidBlobFileNumber,
new_files[1].second.oldest_blob_file_number);
ASSERT_EQ(kInvalidBlobFileNumber,
new_files[2].second.oldest_blob_file_number);
ASSERT_EQ(1001, new_files[3].second.oldest_blob_file_number);
}
TEST_F(VersionEditTest, ForwardCompatibleNewFile4) {
static const uint64_t kBig = 1ull << 50;
VersionEdit edit;
edit.AddFile(3, 300, 3, 100, InternalKey("foo", kBig + 500, kTypeValue),
InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500,
kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
300 /* epoch_number */, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue),
InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501,
kBig + 601, false, Temperature::kUnknown, kInvalidBlobFileNumber,
686, 868, 301 /* epoch_number */, "234", "crc32c",
kNullUniqueId64x2, 0);
edit.DeleteFile(4, 700);
edit.SetComparatorName("foo");
edit.SetLogNumber(kBig + 100);
edit.SetNextFile(kBig + 200);
edit.SetLastSequence(kBig + 1000);
std::string encoded;
// Call back function to add extra customized builds.
bool first = true;
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"VersionEdit::EncodeTo:NewFile4:CustomizeFields", [&](void* arg) {
std::string* str = reinterpret_cast<std::string*>(arg);
PutVarint32(str, 33);
const std::string str1 = "random_string";
PutLengthPrefixedSlice(str, str1);
if (first) {
first = false;
PutVarint32(str, 22);
const std::string str2 = "s";
PutLengthPrefixedSlice(str, str2);
}
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
edit.EncodeTo(&encoded);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
VersionEdit parsed;
Status s = parsed.DecodeFrom(encoded);
ASSERT_TRUE(s.ok()) << s.ToString();
ASSERT_TRUE(!first);
auto& new_files = parsed.GetNewFiles();
ASSERT_TRUE(new_files[0].second.marked_for_compaction);
ASSERT_TRUE(!new_files[1].second.marked_for_compaction);
ASSERT_EQ(3u, new_files[0].second.fd.GetPathId());
ASSERT_EQ(3u, new_files[1].second.fd.GetPathId());
ASSERT_EQ(1u, parsed.GetDeletedFiles().size());
}
TEST_F(VersionEditTest, NewFile4NotSupportedField) {
static const uint64_t kBig = 1ull << 50;
VersionEdit edit;
edit.AddFile(3, 300, 3, 100, InternalKey("foo", kBig + 500, kTypeValue),
InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500,
kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
300 /* epoch_number */, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
edit.SetComparatorName("foo");
edit.SetLogNumber(kBig + 100);
edit.SetNextFile(kBig + 200);
edit.SetLastSequence(kBig + 1000);
std::string encoded;
// Call back function to add extra customized builds.
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"VersionEdit::EncodeTo:NewFile4:CustomizeFields", [&](void* arg) {
std::string* str = reinterpret_cast<std::string*>(arg);
const std::string str1 = "s";
PutLengthPrefixedSlice(str, str1);
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
edit.EncodeTo(&encoded);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
VersionEdit parsed;
Status s = parsed.DecodeFrom(encoded);
ASSERT_NOK(s);
}
TEST_F(VersionEditTest, EncodeEmptyFile) {
VersionEdit edit;
edit.AddFile(0, 0, 0, 0, InternalKey(), InternalKey(), 0, 0, false,
Temperature::kUnknown, kInvalidBlobFileNumber,
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
1 /*epoch_number*/, kUnknownFileChecksum,
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
std::string buffer;
ASSERT_TRUE(!edit.EncodeTo(&buffer));
}
TEST_F(VersionEditTest, ColumnFamilyTest) {
VersionEdit edit;
edit.SetColumnFamily(2);
edit.AddColumnFamily("column_family");
edit.SetMaxColumnFamily(5);
TestEncodeDecode(edit);
edit.Clear();
edit.SetColumnFamily(3);
edit.DropColumnFamily();
TestEncodeDecode(edit);
}
TEST_F(VersionEditTest, MinLogNumberToKeep) {
VersionEdit edit;
edit.SetMinLogNumberToKeep(13);
TestEncodeDecode(edit);
edit.Clear();
edit.SetMinLogNumberToKeep(23);
TestEncodeDecode(edit);
}
TEST_F(VersionEditTest, AtomicGroupTest) {
VersionEdit edit;
edit.MarkAtomicGroup(1);
TestEncodeDecode(edit);
}
TEST_F(VersionEditTest, IgnorableField) {
VersionEdit ve;
std::string encoded;
// Size of ignorable field is too large
PutVarint32Varint64(&encoded, 2 /* kLogNumber */, 66);
// This is a customized ignorable tag
PutVarint32Varint64(&encoded,
0x2710 /* A field with kTagSafeIgnoreMask set */,
5 /* fieldlength 5 */);
encoded += "abc"; // Only fills 3 bytes,
ASSERT_NOK(ve.DecodeFrom(encoded));
encoded.clear();
// Error when seeing unidentified tag that is not ignorable
PutVarint32Varint64(&encoded, 2 /* kLogNumber */, 66);
// This is a customized ignorable tag
PutVarint32Varint64(&encoded, 666 /* A field with kTagSafeIgnoreMask unset */,
3 /* fieldlength 3 */);
encoded += "abc"; // Fill 3 bytes
PutVarint32Varint64(&encoded, 3 /* next file number */, 88);
ASSERT_NOK(ve.DecodeFrom(encoded));
// Safely ignore an identified but safely ignorable entry
encoded.clear();
PutVarint32Varint64(&encoded, 2 /* kLogNumber */, 66);
// This is a customized ignorable tag
PutVarint32Varint64(&encoded,
0x2710 /* A field with kTagSafeIgnoreMask set */,
3 /* fieldlength 3 */);
encoded += "abc"; // Fill 3 bytes
PutVarint32Varint64(&encoded, 3 /* kNextFileNumber */, 88);
ASSERT_OK(ve.DecodeFrom(encoded));
ASSERT_TRUE(ve.HasLogNumber());
ASSERT_TRUE(ve.HasNextFile());
ASSERT_EQ(66, ve.GetLogNumber());
ASSERT_EQ(88, ve.GetNextFile());
}
TEST_F(VersionEditTest, DbId) {
VersionEdit edit;
edit.SetDBId("ab34-cd12-435f-er00");
TestEncodeDecode(edit);
edit.Clear();
edit.SetDBId("34ba-cd12-435f-er01");
TestEncodeDecode(edit);
}
TEST_F(VersionEditTest, BlobFileAdditionAndGarbage) {
VersionEdit edit;
const std::string checksum_method_prefix = "Hash";
const std::string checksum_value_prefix = "Value";
for (uint64_t blob_file_number = 1; blob_file_number <= 10;
++blob_file_number) {
const uint64_t total_blob_count = blob_file_number << 10;
const uint64_t total_blob_bytes = blob_file_number << 20;
std::string checksum_method(checksum_method_prefix);
AppendNumberTo(&checksum_method, blob_file_number);
std::string checksum_value(checksum_value_prefix);
AppendNumberTo(&checksum_value, blob_file_number);
edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes,
checksum_method, checksum_value);
const uint64_t garbage_blob_count = total_blob_count >> 2;
const uint64_t garbage_blob_bytes = total_blob_bytes >> 1;
edit.AddBlobFileGarbage(blob_file_number, garbage_blob_count,
garbage_blob_bytes);
}
TestEncodeDecode(edit);
}
TEST_F(VersionEditTest, AddWalEncodeDecode) {
VersionEdit edit;
for (uint64_t log_number = 1; log_number <= 20; log_number++) {
WalMetadata meta;
bool has_size = rand() % 2 == 0;
if (has_size) {
meta.SetSyncedSizeInBytes(rand() % 1000);
}
edit.AddWal(log_number, meta);
}
TestEncodeDecode(edit);
}
static std::string PrefixEncodedWalAdditionWithLength(
const std::string& encoded) {
std::string ret;
PutVarint32(&ret, Tag::kWalAddition2);
PutLengthPrefixedSlice(&ret, encoded);
return ret;
}
TEST_F(VersionEditTest, AddWalDecodeBadLogNumber) {
std::string encoded;
{
// No log number.
std::string encoded_edit = PrefixEncodedWalAdditionWithLength(encoded);
VersionEdit edit;
Status s = edit.DecodeFrom(encoded_edit);
ASSERT_TRUE(s.IsCorruption());
ASSERT_TRUE(s.ToString().find("Error decoding WAL log number") !=
std::string::npos)
<< s.ToString();
}
{
// log number should be varint64,
// but we only encode 128 which is not a valid representation of varint64.
char c = 0;
unsigned char* ptr = reinterpret_cast<unsigned char*>(&c);
*ptr = 128;
encoded.append(1, c);
std::string encoded_edit = PrefixEncodedWalAdditionWithLength(encoded);
VersionEdit edit;
Status s = edit.DecodeFrom(encoded_edit);
ASSERT_TRUE(s.IsCorruption());
ASSERT_TRUE(s.ToString().find("Error decoding WAL log number") !=
std::string::npos)
<< s.ToString();
}
}
TEST_F(VersionEditTest, AddWalDecodeBadTag) {
constexpr WalNumber kLogNumber = 100;
constexpr uint64_t kSizeInBytes = 100;
std::string encoded;
PutVarint64(&encoded, kLogNumber);
{
// No tag.
std::string encoded_edit = PrefixEncodedWalAdditionWithLength(encoded);
VersionEdit edit;
Status s = edit.DecodeFrom(encoded_edit);
ASSERT_TRUE(s.IsCorruption());
ASSERT_TRUE(s.ToString().find("Error decoding tag") != std::string::npos)
<< s.ToString();
}
{
// Only has size tag, no terminate tag.
std::string encoded_with_size = encoded;
PutVarint32(&encoded_with_size,
static_cast<uint32_t>(WalAdditionTag::kSyncedSize));
PutVarint64(&encoded_with_size, kSizeInBytes);
std::string encoded_edit =
PrefixEncodedWalAdditionWithLength(encoded_with_size);
VersionEdit edit;
Status s = edit.DecodeFrom(encoded_edit);
ASSERT_TRUE(s.IsCorruption());
ASSERT_TRUE(s.ToString().find("Error decoding tag") != std::string::npos)
<< s.ToString();
}
{
// Only has terminate tag.
std::string encoded_with_terminate = encoded;
PutVarint32(&encoded_with_terminate,
static_cast<uint32_t>(WalAdditionTag::kTerminate));
std::string encoded_edit =
PrefixEncodedWalAdditionWithLength(encoded_with_terminate);
VersionEdit edit;
ASSERT_OK(edit.DecodeFrom(encoded_edit));
auto& wal_addition = edit.GetWalAdditions()[0];
ASSERT_EQ(wal_addition.GetLogNumber(), kLogNumber);
ASSERT_FALSE(wal_addition.GetMetadata().HasSyncedSize());
}
}
TEST_F(VersionEditTest, AddWalDecodeNoSize) {
constexpr WalNumber kLogNumber = 100;
std::string encoded;
PutVarint64(&encoded, kLogNumber);
PutVarint32(&encoded, static_cast<uint32_t>(WalAdditionTag::kSyncedSize));
// No real size after the size tag.
{
// Without terminate tag.
std::string encoded_edit = PrefixEncodedWalAdditionWithLength(encoded);
VersionEdit edit;
Status s = edit.DecodeFrom(encoded_edit);
ASSERT_TRUE(s.IsCorruption());
ASSERT_TRUE(s.ToString().find("Error decoding WAL file size") !=
std::string::npos)
<< s.ToString();
}
{
// With terminate tag.
PutVarint32(&encoded, static_cast<uint32_t>(WalAdditionTag::kTerminate));
std::string encoded_edit = PrefixEncodedWalAdditionWithLength(encoded);
VersionEdit edit;
Status s = edit.DecodeFrom(encoded_edit);
ASSERT_TRUE(s.IsCorruption());
// The terminate tag is misunderstood as the size.
ASSERT_TRUE(s.ToString().find("Error decoding tag") != std::string::npos)
<< s.ToString();
}
}
TEST_F(VersionEditTest, AddWalDebug) {
constexpr int n = 2;
constexpr std::array<uint64_t, n> kLogNumbers{{10, 20}};
constexpr std::array<uint64_t, n> kSizeInBytes{{100, 200}};
VersionEdit edit;
for (int i = 0; i < n; i++) {
edit.AddWal(kLogNumbers[i], WalMetadata(kSizeInBytes[i]));
}
const WalAdditions& wals = edit.GetWalAdditions();
ASSERT_TRUE(edit.IsWalAddition());
ASSERT_EQ(wals.size(), n);
for (int i = 0; i < n; i++) {
const WalAddition& wal = wals[i];
ASSERT_EQ(wal.GetLogNumber(), kLogNumbers[i]);
ASSERT_EQ(wal.GetMetadata().GetSyncedSizeInBytes(), kSizeInBytes[i]);
}
std::string expected_str = "VersionEdit {\n";
for (int i = 0; i < n; i++) {
std::stringstream ss;
ss << " WalAddition: log_number: " << kLogNumbers[i]
<< " synced_size_in_bytes: " << kSizeInBytes[i] << "\n";
expected_str += ss.str();
}
expected_str += " ColumnFamily: 0\n}\n";
ASSERT_EQ(edit.DebugString(true), expected_str);
std::string expected_json = "{\"EditNumber\": 4, \"WalAdditions\": [";
for (int i = 0; i < n; i++) {
std::stringstream ss;
ss << "{\"LogNumber\": " << kLogNumbers[i] << ", "
<< "\"SyncedSizeInBytes\": " << kSizeInBytes[i] << "}";
if (i < n - 1) ss << ", ";
expected_json += ss.str();
}
expected_json += "], \"ColumnFamily\": 0}";
ASSERT_EQ(edit.DebugJSON(4, true), expected_json);
}
TEST_F(VersionEditTest, DeleteWalEncodeDecode) {
VersionEdit edit;
edit.DeleteWalsBefore(rand() % 100);
TestEncodeDecode(edit);
}
TEST_F(VersionEditTest, DeleteWalDebug) {
constexpr int n = 2;
constexpr std::array<uint64_t, n> kLogNumbers{{10, 20}};
VersionEdit edit;
edit.DeleteWalsBefore(kLogNumbers[n - 1]);
const WalDeletion& wal = edit.GetWalDeletion();
ASSERT_TRUE(edit.IsWalDeletion());
ASSERT_EQ(wal.GetLogNumber(), kLogNumbers[n - 1]);
std::string expected_str = "VersionEdit {\n";
{
std::stringstream ss;
ss << " WalDeletion: log_number: " << kLogNumbers[n - 1] << "\n";
expected_str += ss.str();
}
expected_str += " ColumnFamily: 0\n}\n";
ASSERT_EQ(edit.DebugString(true), expected_str);
std::string expected_json = "{\"EditNumber\": 4, \"WalDeletion\": ";
{
std::stringstream ss;
ss << "{\"LogNumber\": " << kLogNumbers[n - 1] << "}";
expected_json += ss.str();
}
expected_json += ", \"ColumnFamily\": 0}";
ASSERT_EQ(edit.DebugJSON(4, true), expected_json);
}
TEST_F(VersionEditTest, FullHistoryTsLow) {
VersionEdit edit;
ASSERT_FALSE(edit.HasFullHistoryTsLow());
std::string ts = test::EncodeInt(0);
edit.SetFullHistoryTsLow(ts);
TestEncodeDecode(edit);
}
// Tests that if RocksDB is downgraded, the new types of VersionEdits
// that have a tag larger than kTagSafeIgnoreMask can be safely ignored.
TEST_F(VersionEditTest, IgnorableTags) {
SyncPoint::GetInstance()->SetCallBack(
"VersionEdit::EncodeTo:IgnoreIgnorableTags", [&](void* arg) {
bool* ignore = static_cast<bool*>(arg);
*ignore = true;
});
SyncPoint::GetInstance()->EnableProcessing();
constexpr uint64_t kPrevLogNumber = 100;
constexpr uint64_t kLogNumber = 200;
constexpr uint64_t kNextFileNumber = 300;
constexpr uint64_t kColumnFamilyId = 400;
VersionEdit edit;
// Add some ignorable entries.
for (int i = 0; i < 2; i++) {
edit.AddWal(i + 1, WalMetadata(i + 2));
}
edit.SetDBId("db_id");
// Add unignorable entries.
edit.SetPrevLogNumber(kPrevLogNumber);
edit.SetLogNumber(kLogNumber);
// Add more ignorable entries.
edit.DeleteWalsBefore(100);
// Add unignorable entry.
edit.SetNextFile(kNextFileNumber);
// Add more ignorable entries.
edit.SetFullHistoryTsLow("ts");
// Add unignorable entry.
edit.SetColumnFamily(kColumnFamilyId);
std::string encoded;
ASSERT_TRUE(edit.EncodeTo(&encoded));
VersionEdit decoded;
ASSERT_OK(decoded.DecodeFrom(encoded));
// Check that all ignorable entries are ignored.
ASSERT_FALSE(decoded.HasDbId());
ASSERT_FALSE(decoded.HasFullHistoryTsLow());
ASSERT_FALSE(decoded.IsWalAddition());
ASSERT_FALSE(decoded.IsWalDeletion());
ASSERT_TRUE(decoded.GetWalAdditions().empty());
ASSERT_TRUE(decoded.GetWalDeletion().IsEmpty());
// Check that unignorable entries are still present.
ASSERT_EQ(edit.GetPrevLogNumber(), kPrevLogNumber);
ASSERT_EQ(edit.GetLogNumber(), kLogNumber);
ASSERT_EQ(edit.GetNextFile(), kNextFileNumber);
ASSERT_EQ(edit.GetColumnFamily(), kColumnFamilyId);
SyncPoint::GetInstance()->DisableProcessing();
}
TEST(FileMetaDataTest, UpdateBoundariesBlobIndex) {
FileMetaData meta;
{
constexpr uint64_t file_number = 10;
constexpr uint32_t path_id = 0;
constexpr uint64_t file_size = 0;
meta.fd = FileDescriptor(file_number, path_id, file_size);
}
constexpr char key[] = "foo";
constexpr uint64_t expected_oldest_blob_file_number = 20;
// Plain old value (does not affect oldest_blob_file_number)
{
constexpr char value[] = "value";
constexpr SequenceNumber seq = 200;
ASSERT_OK(meta.UpdateBoundaries(key, value, seq, kTypeValue));
ASSERT_EQ(meta.oldest_blob_file_number, kInvalidBlobFileNumber);
}
// Non-inlined, non-TTL blob index (sets oldest_blob_file_number)
{
constexpr uint64_t blob_file_number = 25;
static_assert(blob_file_number > expected_oldest_blob_file_number,
"unexpected");
constexpr uint64_t offset = 1000;
constexpr uint64_t size = 100;
std::string blob_index;
BlobIndex::EncodeBlob(&blob_index, blob_file_number, offset, size,
kNoCompression);
constexpr SequenceNumber seq = 201;
ASSERT_OK(meta.UpdateBoundaries(key, blob_index, seq, kTypeBlobIndex));
ASSERT_EQ(meta.oldest_blob_file_number, blob_file_number);
}
// Another one, with the oldest blob file number (updates
// oldest_blob_file_number)
{
constexpr uint64_t offset = 2000;
constexpr uint64_t size = 300;
std::string blob_index;
BlobIndex::EncodeBlob(&blob_index, expected_oldest_blob_file_number, offset,
size, kNoCompression);
constexpr SequenceNumber seq = 202;
ASSERT_OK(meta.UpdateBoundaries(key, blob_index, seq, kTypeBlobIndex));
ASSERT_EQ(meta.oldest_blob_file_number, expected_oldest_blob_file_number);
}
// Inlined TTL blob index (does not affect oldest_blob_file_number)
{
constexpr uint64_t expiration = 9876543210;
constexpr char value[] = "value";
std::string blob_index;
BlobIndex::EncodeInlinedTTL(&blob_index, expiration, value);
constexpr SequenceNumber seq = 203;
ASSERT_OK(meta.UpdateBoundaries(key, blob_index, seq, kTypeBlobIndex));
ASSERT_EQ(meta.oldest_blob_file_number, expected_oldest_blob_file_number);
}
// Non-inlined TTL blob index (does not affect oldest_blob_file_number, even
// though file number is smaller)
{
constexpr uint64_t expiration = 9876543210;
constexpr uint64_t blob_file_number = 15;
static_assert(blob_file_number < expected_oldest_blob_file_number,
"unexpected");
constexpr uint64_t offset = 2000;
constexpr uint64_t size = 500;
std::string blob_index;
BlobIndex::EncodeBlobTTL(&blob_index, expiration, blob_file_number, offset,
size, kNoCompression);
constexpr SequenceNumber seq = 204;
ASSERT_OK(meta.UpdateBoundaries(key, blob_index, seq, kTypeBlobIndex));
ASSERT_EQ(meta.oldest_blob_file_number, expected_oldest_blob_file_number);
}
// Corrupt blob index
{
constexpr char corrupt_blob_index[] = "!corrupt!";
constexpr SequenceNumber seq = 205;
ASSERT_TRUE(
meta.UpdateBoundaries(key, corrupt_blob_index, seq, kTypeBlobIndex)
.IsCorruption());
ASSERT_EQ(meta.oldest_blob_file_number, expected_oldest_blob_file_number);
}
// Invalid blob file number
{
constexpr uint64_t offset = 10000;
constexpr uint64_t size = 1000;
std::string blob_index;
BlobIndex::EncodeBlob(&blob_index, kInvalidBlobFileNumber, offset, size,
kNoCompression);
constexpr SequenceNumber seq = 206;
ASSERT_TRUE(meta.UpdateBoundaries(key, blob_index, seq, kTypeBlobIndex)
.IsCorruption());
ASSERT_EQ(meta.oldest_blob_file_number, expected_oldest_blob_file_number);
}
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save